{ "best_metric": 0.12187993537495234, "best_model_checkpoint": "./checkpoints/w2v-pa-v2/checkpoint-133200", "epoch": 4.905879787775274, "eval_steps": 600, "global_step": 135000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00036339850279816846, "grad_norm": 12.712770462036133, "learning_rate": 3.633985027981685e-08, "loss": 8.1015, "step": 10 }, { "epoch": 0.0007267970055963369, "grad_norm": 14.929631233215332, "learning_rate": 7.26797005596337e-08, "loss": 8.5111, "step": 20 }, { "epoch": 0.0010901955083945055, "grad_norm": 12.774781227111816, "learning_rate": 1.0901955083945056e-07, "loss": 8.71, "step": 30 }, { "epoch": 0.0014535940111926739, "grad_norm": 15.27083969116211, "learning_rate": 1.417254160912857e-07, "loss": 9.2894, "step": 40 }, { "epoch": 0.0018169925139908424, "grad_norm": 31.209775924682617, "learning_rate": 1.7806526637110256e-07, "loss": 8.6779, "step": 50 }, { "epoch": 0.002180391016789011, "grad_norm": 12.960335731506348, "learning_rate": 2.1440511665091943e-07, "loss": 8.0399, "step": 60 }, { "epoch": 0.002543789519587179, "grad_norm": 14.452157020568848, "learning_rate": 2.5074496693073626e-07, "loss": 7.9357, "step": 70 }, { "epoch": 0.0029071880223853477, "grad_norm": 12.74867057800293, "learning_rate": 2.8708481721055307e-07, "loss": 8.0764, "step": 80 }, { "epoch": 0.0032705865251835163, "grad_norm": 19.905397415161133, "learning_rate": 3.2342466749036993e-07, "loss": 8.2522, "step": 90 }, { "epoch": 0.003633985027981685, "grad_norm": Infinity, "learning_rate": 3.5613053274220513e-07, "loss": 7.8161, "step": 100 }, { "epoch": 0.003997383530779853, "grad_norm": 15.877684593200684, "learning_rate": 3.92470383022022e-07, "loss": 7.4866, "step": 110 }, { "epoch": 0.004360782033578022, "grad_norm": 19.216800689697266, "learning_rate": 4.2881023330183885e-07, "loss": 7.0324, "step": 120 }, { "epoch": 0.0047241805363761906, "grad_norm": 16.937118530273438, "learning_rate": 4.651500835816557e-07, "loss": 6.7873, "step": 130 }, { "epoch": 0.005087579039174358, "grad_norm": 27.858692169189453, "learning_rate": 5.014899338614725e-07, "loss": 7.2063, "step": 140 }, { "epoch": 0.005450977541972527, "grad_norm": Infinity, "learning_rate": 5.341957991133076e-07, "loss": 6.5827, "step": 150 }, { "epoch": 0.005814376044770695, "grad_norm": 21.252164840698242, "learning_rate": 5.669016643651428e-07, "loss": 7.819, "step": 160 }, { "epoch": 0.006177774547568864, "grad_norm": 20.977886199951172, "learning_rate": 6.032415146449597e-07, "loss": 5.4741, "step": 170 }, { "epoch": 0.0065411730503670325, "grad_norm": 35.25390625, "learning_rate": 6.395813649247765e-07, "loss": 5.7225, "step": 180 }, { "epoch": 0.006904571553165201, "grad_norm": 7.165033340454102, "learning_rate": 6.759212152045934e-07, "loss": 4.7475, "step": 190 }, { "epoch": 0.00726797005596337, "grad_norm": 14.877301216125488, "learning_rate": 7.122610654844103e-07, "loss": 4.6375, "step": 200 }, { "epoch": 0.007631368558761538, "grad_norm": 5.826667785644531, "learning_rate": 7.486009157642272e-07, "loss": 4.356, "step": 210 }, { "epoch": 0.007994767061559707, "grad_norm": 6.022212982177734, "learning_rate": 7.84940766044044e-07, "loss": 4.2138, "step": 220 }, { "epoch": 0.008358165564357875, "grad_norm": 4.790489196777344, "learning_rate": 8.212806163238608e-07, "loss": 4.0662, "step": 230 }, { "epoch": 0.008721564067156044, "grad_norm": 4.448057174682617, "learning_rate": 8.576204666036777e-07, "loss": 3.9507, "step": 240 }, { "epoch": 0.009084962569954213, "grad_norm": 47.487003326416016, "learning_rate": 8.939603168834945e-07, "loss": 3.9576, "step": 250 }, { "epoch": 0.009448361072752381, "grad_norm": 8.64856243133545, "learning_rate": 9.303001671633114e-07, "loss": 3.7102, "step": 260 }, { "epoch": 0.009811759575550548, "grad_norm": 8.821709632873535, "learning_rate": 9.66640017443128e-07, "loss": 3.6644, "step": 270 }, { "epoch": 0.010175158078348717, "grad_norm": 14.071539878845215, "learning_rate": 1.002979867722945e-06, "loss": 3.6909, "step": 280 }, { "epoch": 0.010538556581146885, "grad_norm": 6.68039083480835, "learning_rate": 1.0393197180027619e-06, "loss": 3.6458, "step": 290 }, { "epoch": 0.010901955083945054, "grad_norm": 20.664649963378906, "learning_rate": 1.0756595682825787e-06, "loss": 3.6332, "step": 300 }, { "epoch": 0.011265353586743222, "grad_norm": 2.5272624492645264, "learning_rate": 1.1119994185623955e-06, "loss": 3.4835, "step": 310 }, { "epoch": 0.01162875208954139, "grad_norm": 8.353235244750977, "learning_rate": 1.1483392688422123e-06, "loss": 3.4892, "step": 320 }, { "epoch": 0.01199215059233956, "grad_norm": 7.0964531898498535, "learning_rate": 1.1846791191220293e-06, "loss": 3.5586, "step": 330 }, { "epoch": 0.012355549095137728, "grad_norm": 4.734161376953125, "learning_rate": 1.2210189694018461e-06, "loss": 3.4996, "step": 340 }, { "epoch": 0.012718947597935897, "grad_norm": 47.409996032714844, "learning_rate": 1.257358819681663e-06, "loss": 3.5565, "step": 350 }, { "epoch": 0.013082346100734065, "grad_norm": 2.880244016647339, "learning_rate": 1.2936986699614797e-06, "loss": 3.4154, "step": 360 }, { "epoch": 0.013445744603532234, "grad_norm": 6.637233734130859, "learning_rate": 1.3300385202412968e-06, "loss": 3.4119, "step": 370 }, { "epoch": 0.013809143106330402, "grad_norm": 11.791736602783203, "learning_rate": 1.3663783705211136e-06, "loss": 3.4347, "step": 380 }, { "epoch": 0.01417254160912857, "grad_norm": 8.274836540222168, "learning_rate": 1.4027182208009304e-06, "loss": 3.4253, "step": 390 }, { "epoch": 0.01453594011192674, "grad_norm": 10.09929084777832, "learning_rate": 1.4390580710807472e-06, "loss": 3.4702, "step": 400 }, { "epoch": 0.014899338614724908, "grad_norm": 6.32951545715332, "learning_rate": 1.4753979213605642e-06, "loss": 3.3513, "step": 410 }, { "epoch": 0.015262737117523077, "grad_norm": 2.4888486862182617, "learning_rate": 1.511737771640381e-06, "loss": 3.3421, "step": 420 }, { "epoch": 0.015626135620321245, "grad_norm": 3.02103328704834, "learning_rate": 1.5480776219201978e-06, "loss": 3.397, "step": 430 }, { "epoch": 0.015989534123119414, "grad_norm": 7.464268207550049, "learning_rate": 1.5844174722000146e-06, "loss": 3.3582, "step": 440 }, { "epoch": 0.016352932625917582, "grad_norm": 18.908123016357422, "learning_rate": 1.6207573224798317e-06, "loss": 3.4034, "step": 450 }, { "epoch": 0.01671633112871575, "grad_norm": 2.487326145172119, "learning_rate": 1.6570971727596485e-06, "loss": 3.2229, "step": 460 }, { "epoch": 0.01707972963151392, "grad_norm": 2.3999946117401123, "learning_rate": 1.6934370230394653e-06, "loss": 3.2185, "step": 470 }, { "epoch": 0.017443128134312088, "grad_norm": 5.007234573364258, "learning_rate": 1.729776873319282e-06, "loss": 3.2069, "step": 480 }, { "epoch": 0.017806526637110257, "grad_norm": 6.393301963806152, "learning_rate": 1.766116723599099e-06, "loss": 3.0687, "step": 490 }, { "epoch": 0.018169925139908425, "grad_norm": 45.44938278198242, "learning_rate": 1.802456573878916e-06, "loss": 3.112, "step": 500 }, { "epoch": 0.018533323642706594, "grad_norm": 7.32182502746582, "learning_rate": 1.8387964241587327e-06, "loss": 2.951, "step": 510 }, { "epoch": 0.018896722145504762, "grad_norm": 3.3864173889160156, "learning_rate": 1.8751362744385495e-06, "loss": 2.8879, "step": 520 }, { "epoch": 0.019260120648302927, "grad_norm": 5.429958343505859, "learning_rate": 1.911476124718366e-06, "loss": 2.7393, "step": 530 }, { "epoch": 0.019623519151101096, "grad_norm": 5.3577985763549805, "learning_rate": 1.947815974998183e-06, "loss": 2.4813, "step": 540 }, { "epoch": 0.019986917653899264, "grad_norm": 13.970659255981445, "learning_rate": 1.9841558252779998e-06, "loss": 2.3787, "step": 550 }, { "epoch": 0.020350316156697433, "grad_norm": 5.2666754722595215, "learning_rate": 2.0204956755578166e-06, "loss": 2.207, "step": 560 }, { "epoch": 0.0207137146594956, "grad_norm": 4.184991359710693, "learning_rate": 2.0568355258376334e-06, "loss": 2.0383, "step": 570 }, { "epoch": 0.02107711316229377, "grad_norm": 6.312343597412109, "learning_rate": 2.09317537611745e-06, "loss": 1.8416, "step": 580 }, { "epoch": 0.02144051166509194, "grad_norm": 4.754147529602051, "learning_rate": 2.1295152263972674e-06, "loss": 1.6002, "step": 590 }, { "epoch": 0.021803910167890107, "grad_norm": 21.47913360595703, "learning_rate": 2.1658550766770842e-06, "loss": 1.6015, "step": 600 }, { "epoch": 0.021803910167890107, "eval_loss": 1.5154471397399902, "eval_runtime": 180.9184, "eval_samples_per_second": 40.98, "eval_steps_per_second": 5.124, "eval_wer": 0.7997531177954872, "step": 600 }, { "epoch": 0.022167308670688276, "grad_norm": 4.2374348640441895, "learning_rate": 2.202194926956901e-06, "loss": 1.4842, "step": 610 }, { "epoch": 0.022530707173486444, "grad_norm": 4.392132759094238, "learning_rate": 2.238534777236718e-06, "loss": 1.3776, "step": 620 }, { "epoch": 0.022894105676284613, "grad_norm": 4.682064533233643, "learning_rate": 2.2748746275165347e-06, "loss": 1.3177, "step": 630 }, { "epoch": 0.02325750417908278, "grad_norm": 4.8396077156066895, "learning_rate": 2.3112144777963515e-06, "loss": 1.0737, "step": 640 }, { "epoch": 0.02362090268188095, "grad_norm": 33.27382278442383, "learning_rate": 2.3475543280761683e-06, "loss": 1.3046, "step": 650 }, { "epoch": 0.02398430118467912, "grad_norm": 5.410325050354004, "learning_rate": 2.383894178355985e-06, "loss": 1.1021, "step": 660 }, { "epoch": 0.024347699687477287, "grad_norm": 3.9523680210113525, "learning_rate": 2.420234028635802e-06, "loss": 1.0602, "step": 670 }, { "epoch": 0.024711098190275456, "grad_norm": 9.141073226928711, "learning_rate": 2.456573878915619e-06, "loss": 1.0631, "step": 680 }, { "epoch": 0.025074496693073624, "grad_norm": 5.3534626960754395, "learning_rate": 2.492913729195436e-06, "loss": 0.8968, "step": 690 }, { "epoch": 0.025437895195871793, "grad_norm": 32.30677795410156, "learning_rate": 2.5292535794752527e-06, "loss": 1.0439, "step": 700 }, { "epoch": 0.02580129369866996, "grad_norm": 4.310474872589111, "learning_rate": 2.5655934297550696e-06, "loss": 0.954, "step": 710 }, { "epoch": 0.02616469220146813, "grad_norm": 5.586440563201904, "learning_rate": 2.6019332800348864e-06, "loss": 1.0031, "step": 720 }, { "epoch": 0.0265280907042663, "grad_norm": 3.6927313804626465, "learning_rate": 2.638273130314703e-06, "loss": 0.7956, "step": 730 }, { "epoch": 0.026891489207064467, "grad_norm": 4.270529747009277, "learning_rate": 2.67461298059452e-06, "loss": 0.8874, "step": 740 }, { "epoch": 0.027254887709862636, "grad_norm": 23.553489685058594, "learning_rate": 2.710952830874337e-06, "loss": 0.8523, "step": 750 }, { "epoch": 0.027618286212660804, "grad_norm": 5.342041492462158, "learning_rate": 2.747292681154154e-06, "loss": 0.9029, "step": 760 }, { "epoch": 0.027981684715458973, "grad_norm": 3.3802621364593506, "learning_rate": 2.783632531433971e-06, "loss": 0.8378, "step": 770 }, { "epoch": 0.02834508321825714, "grad_norm": 6.378807067871094, "learning_rate": 2.8199723817137876e-06, "loss": 0.8085, "step": 780 }, { "epoch": 0.02870848172105531, "grad_norm": 4.007000923156738, "learning_rate": 2.8563122319936045e-06, "loss": 0.8218, "step": 790 }, { "epoch": 0.02907188022385348, "grad_norm": 68.16226196289062, "learning_rate": 2.8926520822734213e-06, "loss": 1.2055, "step": 800 }, { "epoch": 0.029435278726651647, "grad_norm": 6.70043420791626, "learning_rate": 2.928991932553238e-06, "loss": 0.7641, "step": 810 }, { "epoch": 0.029798677229449816, "grad_norm": 5.498161315917969, "learning_rate": 2.965331782833055e-06, "loss": 0.7739, "step": 820 }, { "epoch": 0.030162075732247984, "grad_norm": 9.515852928161621, "learning_rate": 3.0016716331128717e-06, "loss": 0.8293, "step": 830 }, { "epoch": 0.030525474235046153, "grad_norm": 13.3881196975708, "learning_rate": 3.0380114833926885e-06, "loss": 0.5597, "step": 840 }, { "epoch": 0.03088887273784432, "grad_norm": 13.670549392700195, "learning_rate": 3.0743513336725057e-06, "loss": 0.7658, "step": 850 }, { "epoch": 0.03125227124064249, "grad_norm": 3.58305287361145, "learning_rate": 3.1106911839523226e-06, "loss": 0.7036, "step": 860 }, { "epoch": 0.031615669743440655, "grad_norm": 4.119450569152832, "learning_rate": 3.147031034232139e-06, "loss": 0.6842, "step": 870 }, { "epoch": 0.03197906824623883, "grad_norm": 6.412299156188965, "learning_rate": 3.183370884511956e-06, "loss": 0.7148, "step": 880 }, { "epoch": 0.03234246674903699, "grad_norm": 8.700023651123047, "learning_rate": 3.2197107347917726e-06, "loss": 1.4861, "step": 890 }, { "epoch": 0.032705865251835164, "grad_norm": 18.78075408935547, "learning_rate": 3.25605058507159e-06, "loss": 0.7162, "step": 900 }, { "epoch": 0.03306926375463333, "grad_norm": 4.078335762023926, "learning_rate": 3.292390435351406e-06, "loss": 0.643, "step": 910 }, { "epoch": 0.0334326622574315, "grad_norm": 6.603452682495117, "learning_rate": 3.3287302856312234e-06, "loss": 0.6623, "step": 920 }, { "epoch": 0.03379606076022967, "grad_norm": 5.817732334136963, "learning_rate": 3.3650701359110402e-06, "loss": 0.6265, "step": 930 }, { "epoch": 0.03415945926302784, "grad_norm": 8.310086250305176, "learning_rate": 3.4014099861908575e-06, "loss": 0.5343, "step": 940 }, { "epoch": 0.034522857765826004, "grad_norm": NaN, "learning_rate": 3.4341158514426923e-06, "loss": 0.6999, "step": 950 }, { "epoch": 0.034886256268624176, "grad_norm": 4.416926860809326, "learning_rate": 3.4704557017225087e-06, "loss": 0.6688, "step": 960 }, { "epoch": 0.03524965477142234, "grad_norm": 3.2407495975494385, "learning_rate": 3.506795552002326e-06, "loss": 0.5808, "step": 970 }, { "epoch": 0.03561305327422051, "grad_norm": NaN, "learning_rate": 3.539501417254161e-06, "loss": 3.0266, "step": 980 }, { "epoch": 0.03597645177701868, "grad_norm": 8.086112022399902, "learning_rate": 3.575841267533978e-06, "loss": 0.5829, "step": 990 }, { "epoch": 0.03633985027981685, "grad_norm": 252.45077514648438, "learning_rate": 3.612181117813795e-06, "loss": 0.7371, "step": 1000 }, { "epoch": 0.036703248782615015, "grad_norm": 3.5969936847686768, "learning_rate": 3.648520968093612e-06, "loss": 0.6632, "step": 1010 }, { "epoch": 0.03706664728541319, "grad_norm": 3.0116841793060303, "learning_rate": 3.6848608183734285e-06, "loss": 0.537, "step": 1020 }, { "epoch": 0.03743004578821135, "grad_norm": 5.494657039642334, "learning_rate": 3.7212006686532457e-06, "loss": 0.5422, "step": 1030 }, { "epoch": 0.037793444291009524, "grad_norm": 21.526798248291016, "learning_rate": 3.757540518933062e-06, "loss": 0.5003, "step": 1040 }, { "epoch": 0.03815684279380769, "grad_norm": 80.90055084228516, "learning_rate": 3.7938803692128793e-06, "loss": 0.6566, "step": 1050 }, { "epoch": 0.038520241296605855, "grad_norm": 3.7678096294403076, "learning_rate": 3.830220219492696e-06, "loss": 0.5758, "step": 1060 }, { "epoch": 0.03888363979940403, "grad_norm": 4.526616096496582, "learning_rate": 3.866560069772512e-06, "loss": 0.5648, "step": 1070 }, { "epoch": 0.03924703830220219, "grad_norm": 4.571674346923828, "learning_rate": 3.90289992005233e-06, "loss": 0.5864, "step": 1080 }, { "epoch": 0.039610436805000364, "grad_norm": 5.295219421386719, "learning_rate": 3.939239770332146e-06, "loss": 0.4476, "step": 1090 }, { "epoch": 0.03997383530779853, "grad_norm": 16.631162643432617, "learning_rate": 3.975579620611963e-06, "loss": 0.6198, "step": 1100 }, { "epoch": 0.0403372338105967, "grad_norm": 4.685397624969482, "learning_rate": 4.01191947089178e-06, "loss": 0.7512, "step": 1110 }, { "epoch": 0.040700632313394866, "grad_norm": 3.333232879638672, "learning_rate": 4.048259321171597e-06, "loss": 0.5087, "step": 1120 }, { "epoch": 0.04106403081619304, "grad_norm": 5.501911640167236, "learning_rate": 4.084599171451414e-06, "loss": 0.5772, "step": 1130 }, { "epoch": 0.0414274293189912, "grad_norm": 8.066693305969238, "learning_rate": 4.120939021731231e-06, "loss": 0.4641, "step": 1140 }, { "epoch": 0.041790827821789375, "grad_norm": 13.463829040527344, "learning_rate": 4.1572788720110474e-06, "loss": 0.5192, "step": 1150 }, { "epoch": 0.04215422632458754, "grad_norm": 4.132773399353027, "learning_rate": 4.193618722290864e-06, "loss": 0.4696, "step": 1160 }, { "epoch": 0.04251762482738571, "grad_norm": 6.176777362823486, "learning_rate": 4.229958572570681e-06, "loss": 0.4851, "step": 1170 }, { "epoch": 0.04288102333018388, "grad_norm": 8.26610279083252, "learning_rate": 4.266298422850498e-06, "loss": 0.4967, "step": 1180 }, { "epoch": 0.04324442183298205, "grad_norm": 3.9725544452667236, "learning_rate": 4.302638273130315e-06, "loss": 0.431, "step": 1190 }, { "epoch": 0.043607820335780215, "grad_norm": 22.353294372558594, "learning_rate": 4.338978123410132e-06, "loss": 0.6523, "step": 1200 }, { "epoch": 0.043607820335780215, "eval_loss": 0.5945897102355957, "eval_runtime": 180.5168, "eval_samples_per_second": 41.071, "eval_steps_per_second": 5.135, "eval_wer": 0.3718390908925881, "step": 1200 }, { "epoch": 0.04397121883857839, "grad_norm": 3.7954189777374268, "learning_rate": 4.375317973689948e-06, "loss": 0.4511, "step": 1210 }, { "epoch": 0.04433461734137655, "grad_norm": 5.583435535430908, "learning_rate": 4.411657823969766e-06, "loss": 1.4019, "step": 1220 }, { "epoch": 0.044698015844174724, "grad_norm": 8.544243812561035, "learning_rate": 4.447997674249582e-06, "loss": 0.4467, "step": 1230 }, { "epoch": 0.04506141434697289, "grad_norm": 3.8716418743133545, "learning_rate": 4.4843375245293996e-06, "loss": 0.4347, "step": 1240 }, { "epoch": 0.04542481284977106, "grad_norm": 19.459606170654297, "learning_rate": 4.5206773748092155e-06, "loss": 0.616, "step": 1250 }, { "epoch": 0.045788211352569226, "grad_norm": 5.474793434143066, "learning_rate": 4.557017225089033e-06, "loss": 0.4689, "step": 1260 }, { "epoch": 0.0461516098553674, "grad_norm": 4.705495834350586, "learning_rate": 4.593357075368849e-06, "loss": 0.4623, "step": 1270 }, { "epoch": 0.04651500835816556, "grad_norm": 6.779942035675049, "learning_rate": 4.629696925648667e-06, "loss": 0.4418, "step": 1280 }, { "epoch": 0.046878406860963735, "grad_norm": 6.802936553955078, "learning_rate": 4.666036775928484e-06, "loss": 0.4429, "step": 1290 }, { "epoch": 0.0472418053637619, "grad_norm": 17.47754669189453, "learning_rate": 4.7023766262083004e-06, "loss": 0.596, "step": 1300 }, { "epoch": 0.04760520386656007, "grad_norm": 4.036036968231201, "learning_rate": 4.738716476488117e-06, "loss": 0.4362, "step": 1310 }, { "epoch": 0.04796860236935824, "grad_norm": 6.022701740264893, "learning_rate": 4.775056326767934e-06, "loss": 0.5092, "step": 1320 }, { "epoch": 0.04833200087215641, "grad_norm": 5.533923625946045, "learning_rate": 4.811396177047751e-06, "loss": 0.4358, "step": 1330 }, { "epoch": 0.048695399374954575, "grad_norm": 3.4037017822265625, "learning_rate": 4.847736027327568e-06, "loss": 0.3684, "step": 1340 }, { "epoch": 0.04905879787775275, "grad_norm": 13.625974655151367, "learning_rate": 4.8840758776073845e-06, "loss": 0.583, "step": 1350 }, { "epoch": 0.04942219638055091, "grad_norm": 3.597294330596924, "learning_rate": 4.920415727887201e-06, "loss": 0.4561, "step": 1360 }, { "epoch": 0.049785594883349084, "grad_norm": 2.8846936225891113, "learning_rate": 4.956755578167018e-06, "loss": 0.409, "step": 1370 }, { "epoch": 0.05014899338614725, "grad_norm": 5.500187397003174, "learning_rate": 4.993095428446836e-06, "loss": 0.4531, "step": 1380 }, { "epoch": 0.05051239188894542, "grad_norm": 3.1203413009643555, "learning_rate": 5.029435278726652e-06, "loss": 0.4004, "step": 1390 }, { "epoch": 0.050875790391743586, "grad_norm": 98.18115234375, "learning_rate": 5.0657751290064685e-06, "loss": 0.522, "step": 1400 }, { "epoch": 0.05123918889454175, "grad_norm": 3.901418924331665, "learning_rate": 5.102114979286285e-06, "loss": 0.4041, "step": 1410 }, { "epoch": 0.05160258739733992, "grad_norm": 4.045637130737305, "learning_rate": 5.138454829566102e-06, "loss": 0.4051, "step": 1420 }, { "epoch": 0.05196598590013809, "grad_norm": 6.835183143615723, "learning_rate": 5.174794679845919e-06, "loss": 0.4937, "step": 1430 }, { "epoch": 0.05232938440293626, "grad_norm": 7.708272457122803, "learning_rate": 5.211134530125736e-06, "loss": 0.3818, "step": 1440 }, { "epoch": 0.052692782905734425, "grad_norm": 24.2607364654541, "learning_rate": 5.247474380405553e-06, "loss": 0.5445, "step": 1450 }, { "epoch": 0.0530561814085326, "grad_norm": 3.3517005443573, "learning_rate": 5.283814230685369e-06, "loss": 0.4079, "step": 1460 }, { "epoch": 0.05341957991133076, "grad_norm": 12.727778434753418, "learning_rate": 5.320154080965187e-06, "loss": 0.4285, "step": 1470 }, { "epoch": 0.053782978414128935, "grad_norm": 4.984294891357422, "learning_rate": 5.356493931245003e-06, "loss": 0.5006, "step": 1480 }, { "epoch": 0.0541463769169271, "grad_norm": 3.3041558265686035, "learning_rate": 5.392833781524821e-06, "loss": 0.3729, "step": 1490 }, { "epoch": 0.05450977541972527, "grad_norm": 38.074546813964844, "learning_rate": 5.429173631804637e-06, "loss": 0.5401, "step": 1500 }, { "epoch": 0.05487317392252344, "grad_norm": 5.649720668792725, "learning_rate": 5.465513482084454e-06, "loss": 0.3879, "step": 1510 }, { "epoch": 0.05523657242532161, "grad_norm": 3.107583522796631, "learning_rate": 5.501853332364271e-06, "loss": 0.4144, "step": 1520 }, { "epoch": 0.055599970928119774, "grad_norm": 19.246564865112305, "learning_rate": 5.538193182644088e-06, "loss": 0.4314, "step": 1530 }, { "epoch": 0.055963369430917946, "grad_norm": 4.72367525100708, "learning_rate": 5.574533032923905e-06, "loss": 0.3576, "step": 1540 }, { "epoch": 0.05632676793371611, "grad_norm": 25.88886260986328, "learning_rate": 5.6108728832037215e-06, "loss": 0.5385, "step": 1550 }, { "epoch": 0.05669016643651428, "grad_norm": 3.1524956226348877, "learning_rate": 5.647212733483538e-06, "loss": 0.4075, "step": 1560 }, { "epoch": 0.05705356493931245, "grad_norm": 3.883281707763672, "learning_rate": 5.683552583763355e-06, "loss": 0.4242, "step": 1570 }, { "epoch": 0.05741696344211062, "grad_norm": 16.935935974121094, "learning_rate": 5.719892434043172e-06, "loss": 0.6194, "step": 1580 }, { "epoch": 0.057780361944908785, "grad_norm": 4.23909330368042, "learning_rate": 5.756232284322989e-06, "loss": 0.4206, "step": 1590 }, { "epoch": 0.05814376044770696, "grad_norm": 16.6039981842041, "learning_rate": 5.7925721346028056e-06, "loss": 0.4854, "step": 1600 }, { "epoch": 0.05850715895050512, "grad_norm": 2.5220890045166016, "learning_rate": 5.828911984882622e-06, "loss": 0.4186, "step": 1610 }, { "epoch": 0.058870557453303295, "grad_norm": 3.075101613998413, "learning_rate": 5.865251835162439e-06, "loss": 0.8877, "step": 1620 }, { "epoch": 0.05923395595610146, "grad_norm": 5.511383056640625, "learning_rate": 5.901591685442257e-06, "loss": 0.4219, "step": 1630 }, { "epoch": 0.05959735445889963, "grad_norm": 2.9449989795684814, "learning_rate": 5.937931535722073e-06, "loss": 0.2992, "step": 1640 }, { "epoch": 0.0599607529616978, "grad_norm": 31.823612213134766, "learning_rate": 5.9742713860018905e-06, "loss": 0.5939, "step": 1650 }, { "epoch": 0.06032415146449597, "grad_norm": 4.240995407104492, "learning_rate": 6.010611236281706e-06, "loss": 0.4176, "step": 1660 }, { "epoch": 0.060687549967294134, "grad_norm": 2.6084980964660645, "learning_rate": 6.046951086561524e-06, "loss": 0.3542, "step": 1670 }, { "epoch": 0.061050948470092306, "grad_norm": 8.318774223327637, "learning_rate": 6.08329093684134e-06, "loss": 0.3968, "step": 1680 }, { "epoch": 0.06141434697289047, "grad_norm": 5.18604850769043, "learning_rate": 6.119630787121158e-06, "loss": 0.3879, "step": 1690 }, { "epoch": 0.06177774547568864, "grad_norm": 51.732086181640625, "learning_rate": 6.1559706374009745e-06, "loss": 0.5025, "step": 1700 }, { "epoch": 0.06214114397848681, "grad_norm": 2.5876500606536865, "learning_rate": 6.192310487680791e-06, "loss": 0.3558, "step": 1710 }, { "epoch": 0.06250454248128498, "grad_norm": 5.071794033050537, "learning_rate": 6.228650337960608e-06, "loss": 0.3534, "step": 1720 }, { "epoch": 0.06286794098408315, "grad_norm": 11.539891242980957, "learning_rate": 6.264990188240424e-06, "loss": 0.4628, "step": 1730 }, { "epoch": 0.06323133948688131, "grad_norm": 3.275383710861206, "learning_rate": 6.301330038520243e-06, "loss": 0.3368, "step": 1740 }, { "epoch": 0.06359473798967948, "grad_norm": 41.4942741394043, "learning_rate": 6.3376698888000586e-06, "loss": 0.53, "step": 1750 }, { "epoch": 0.06395813649247765, "grad_norm": 3.0071399211883545, "learning_rate": 6.374009739079875e-06, "loss": 0.3623, "step": 1760 }, { "epoch": 0.06432153499527582, "grad_norm": 3.385955333709717, "learning_rate": 6.410349589359692e-06, "loss": 0.3476, "step": 1770 }, { "epoch": 0.06468493349807398, "grad_norm": 3.872527599334717, "learning_rate": 6.446689439639508e-06, "loss": 0.3168, "step": 1780 }, { "epoch": 0.06504833200087215, "grad_norm": 4.668768882751465, "learning_rate": 6.483029289919327e-06, "loss": 0.3813, "step": 1790 }, { "epoch": 0.06541173050367033, "grad_norm": 69.33656311035156, "learning_rate": 6.519369140199143e-06, "loss": 0.4557, "step": 1800 }, { "epoch": 0.06541173050367033, "eval_loss": 0.5579342246055603, "eval_runtime": 180.31, "eval_samples_per_second": 41.118, "eval_steps_per_second": 5.141, "eval_wer": 0.34055221740156477, "step": 1800 }, { "epoch": 0.0657751290064685, "grad_norm": 4.2848381996154785, "learning_rate": 6.555708990478959e-06, "loss": 0.5481, "step": 1810 }, { "epoch": 0.06613852750926666, "grad_norm": 11.31700325012207, "learning_rate": 6.592048840758775e-06, "loss": 0.3479, "step": 1820 }, { "epoch": 0.06650192601206482, "grad_norm": 6.088991165161133, "learning_rate": 6.628388691038594e-06, "loss": 0.3994, "step": 1830 }, { "epoch": 0.066865324514863, "grad_norm": 4.342681407928467, "learning_rate": 6.66472854131841e-06, "loss": 0.2953, "step": 1840 }, { "epoch": 0.06722872301766117, "grad_norm": 75.97467041015625, "learning_rate": 6.701068391598227e-06, "loss": 0.4807, "step": 1850 }, { "epoch": 0.06759212152045933, "grad_norm": 3.8739049434661865, "learning_rate": 6.7374082418780435e-06, "loss": 0.3963, "step": 1860 }, { "epoch": 0.0679555200232575, "grad_norm": 2.6209168434143066, "learning_rate": 6.773748092157861e-06, "loss": 0.422, "step": 1870 }, { "epoch": 0.06831891852605568, "grad_norm": 17.530773162841797, "learning_rate": 6.810087942437678e-06, "loss": 0.3939, "step": 1880 }, { "epoch": 0.06868231702885384, "grad_norm": 3.475748300552368, "learning_rate": 6.846427792717494e-06, "loss": 0.2996, "step": 1890 }, { "epoch": 0.06904571553165201, "grad_norm": 20.979995727539062, "learning_rate": 6.882767642997311e-06, "loss": 0.4528, "step": 1900 }, { "epoch": 0.06940911403445017, "grad_norm": 3.8432774543762207, "learning_rate": 6.919107493277128e-06, "loss": 0.2959, "step": 1910 }, { "epoch": 0.06977251253724835, "grad_norm": 7.830467700958252, "learning_rate": 6.955447343556945e-06, "loss": 0.3378, "step": 1920 }, { "epoch": 0.07013591104004652, "grad_norm": 15.633039474487305, "learning_rate": 6.991787193836762e-06, "loss": 0.4, "step": 1930 }, { "epoch": 0.07049930954284468, "grad_norm": 13.628314971923828, "learning_rate": 7.028127044116578e-06, "loss": 0.3255, "step": 1940 }, { "epoch": 0.07086270804564285, "grad_norm": 33.001773834228516, "learning_rate": 7.064466894396396e-06, "loss": 0.4367, "step": 1950 }, { "epoch": 0.07122610654844103, "grad_norm": 3.5115041732788086, "learning_rate": 7.100806744676212e-06, "loss": 0.3279, "step": 1960 }, { "epoch": 0.07158950505123919, "grad_norm": 3.0497541427612305, "learning_rate": 7.137146594956029e-06, "loss": 2.8797, "step": 1970 }, { "epoch": 0.07195290355403736, "grad_norm": 6.17769718170166, "learning_rate": 7.173486445235845e-06, "loss": 0.3534, "step": 1980 }, { "epoch": 0.07231630205683552, "grad_norm": 5.4114789962768555, "learning_rate": 7.209826295515664e-06, "loss": 0.3309, "step": 1990 }, { "epoch": 0.0726797005596337, "grad_norm": 11.600439071655273, "learning_rate": 7.24616614579548e-06, "loss": 0.4382, "step": 2000 }, { "epoch": 0.07304309906243187, "grad_norm": 3.4476027488708496, "learning_rate": 7.2825059960752965e-06, "loss": 0.3487, "step": 2010 }, { "epoch": 0.07340649756523003, "grad_norm": 5.642564296722412, "learning_rate": 7.318845846355113e-06, "loss": 0.3513, "step": 2020 }, { "epoch": 0.0737698960680282, "grad_norm": 7.132052898406982, "learning_rate": 7.355185696634931e-06, "loss": 0.4564, "step": 2030 }, { "epoch": 0.07413329457082637, "grad_norm": 6.583246231079102, "learning_rate": 7.391525546914748e-06, "loss": 0.3376, "step": 2040 }, { "epoch": 0.07449669307362454, "grad_norm": 23.98805809020996, "learning_rate": 7.427865397194564e-06, "loss": 0.4217, "step": 2050 }, { "epoch": 0.0748600915764227, "grad_norm": 3.9135584831237793, "learning_rate": 7.4642052474743805e-06, "loss": 0.3324, "step": 2060 }, { "epoch": 0.07522349007922087, "grad_norm": 3.4022698402404785, "learning_rate": 7.500545097754198e-06, "loss": 0.3391, "step": 2070 }, { "epoch": 0.07558688858201905, "grad_norm": 8.37547779083252, "learning_rate": 7.536884948034015e-06, "loss": 0.3119, "step": 2080 }, { "epoch": 0.07595028708481721, "grad_norm": 6.2167558670043945, "learning_rate": 7.573224798313831e-06, "loss": 0.3247, "step": 2090 }, { "epoch": 0.07631368558761538, "grad_norm": 81.76036834716797, "learning_rate": 7.609564648593648e-06, "loss": 0.4281, "step": 2100 }, { "epoch": 0.07667708409041354, "grad_norm": 2.8961973190307617, "learning_rate": 7.645904498873465e-06, "loss": 0.3368, "step": 2110 }, { "epoch": 0.07704048259321171, "grad_norm": 4.699477195739746, "learning_rate": 7.682244349153282e-06, "loss": 0.3403, "step": 2120 }, { "epoch": 0.07740388109600989, "grad_norm": 4.429138660430908, "learning_rate": 7.718584199433098e-06, "loss": 0.3182, "step": 2130 }, { "epoch": 0.07776727959880805, "grad_norm": 2.7269580364227295, "learning_rate": 7.754924049712916e-06, "loss": 0.2828, "step": 2140 }, { "epoch": 0.07813067810160622, "grad_norm": 15.126232147216797, "learning_rate": 7.791263899992732e-06, "loss": 0.4606, "step": 2150 }, { "epoch": 0.07849407660440438, "grad_norm": 10.14072322845459, "learning_rate": 7.82760375027255e-06, "loss": 0.3451, "step": 2160 }, { "epoch": 0.07885747510720256, "grad_norm": 4.95914363861084, "learning_rate": 7.863943600552365e-06, "loss": 0.3612, "step": 2170 }, { "epoch": 0.07922087361000073, "grad_norm": 4.115192413330078, "learning_rate": 7.900283450832183e-06, "loss": 0.3222, "step": 2180 }, { "epoch": 0.07958427211279889, "grad_norm": 5.405594825744629, "learning_rate": 7.936623301111999e-06, "loss": 0.3474, "step": 2190 }, { "epoch": 0.07994767061559706, "grad_norm": 23.328718185424805, "learning_rate": 7.972963151391817e-06, "loss": 0.4797, "step": 2200 }, { "epoch": 0.08031106911839524, "grad_norm": 3.5595099925994873, "learning_rate": 8.009303001671634e-06, "loss": 0.3305, "step": 2210 }, { "epoch": 0.0806744676211934, "grad_norm": 3.048445463180542, "learning_rate": 8.04564285195145e-06, "loss": 0.318, "step": 2220 }, { "epoch": 0.08103786612399157, "grad_norm": 5.857702732086182, "learning_rate": 8.081982702231266e-06, "loss": 0.3497, "step": 2230 }, { "epoch": 0.08140126462678973, "grad_norm": 3.0092968940734863, "learning_rate": 8.118322552511084e-06, "loss": 0.2995, "step": 2240 }, { "epoch": 0.08176466312958791, "grad_norm": 9.337843894958496, "learning_rate": 8.154662402790902e-06, "loss": 0.4517, "step": 2250 }, { "epoch": 0.08212806163238608, "grad_norm": 3.136950969696045, "learning_rate": 8.191002253070718e-06, "loss": 0.2927, "step": 2260 }, { "epoch": 0.08249146013518424, "grad_norm": 4.228198051452637, "learning_rate": 8.227342103350534e-06, "loss": 1.2185, "step": 2270 }, { "epoch": 0.0828548586379824, "grad_norm": 7.404679298400879, "learning_rate": 8.263681953630351e-06, "loss": 0.3448, "step": 2280 }, { "epoch": 0.08321825714078059, "grad_norm": 7.873497009277344, "learning_rate": 8.300021803910169e-06, "loss": 0.2965, "step": 2290 }, { "epoch": 0.08358165564357875, "grad_norm": 12.266081809997559, "learning_rate": 8.336361654189985e-06, "loss": 0.4631, "step": 2300 }, { "epoch": 0.08394505414637692, "grad_norm": 3.3576557636260986, "learning_rate": 8.3727015044698e-06, "loss": 0.3339, "step": 2310 }, { "epoch": 0.08430845264917508, "grad_norm": 3.0854902267456055, "learning_rate": 8.40904135474962e-06, "loss": 0.3448, "step": 2320 }, { "epoch": 0.08467185115197326, "grad_norm": 6.1308746337890625, "learning_rate": 8.445381205029436e-06, "loss": 0.386, "step": 2330 }, { "epoch": 0.08503524965477142, "grad_norm": 4.458275318145752, "learning_rate": 8.481721055309252e-06, "loss": 0.2916, "step": 2340 }, { "epoch": 0.08539864815756959, "grad_norm": 25.443647384643555, "learning_rate": 8.51806090558907e-06, "loss": 0.4232, "step": 2350 }, { "epoch": 0.08576204666036776, "grad_norm": 324.4353332519531, "learning_rate": 8.554400755868887e-06, "loss": 2.4995, "step": 2360 }, { "epoch": 0.08612544516316593, "grad_norm": 17.593692779541016, "learning_rate": 8.590740606148703e-06, "loss": 0.2952, "step": 2370 }, { "epoch": 0.0864888436659641, "grad_norm": 3.4646732807159424, "learning_rate": 8.62708045642852e-06, "loss": 0.2961, "step": 2380 }, { "epoch": 0.08685224216876226, "grad_norm": 2.9895999431610107, "learning_rate": 8.663420306708337e-06, "loss": 0.2852, "step": 2390 }, { "epoch": 0.08721564067156043, "grad_norm": 24.221176147460938, "learning_rate": 8.699760156988155e-06, "loss": 0.4343, "step": 2400 }, { "epoch": 0.08721564067156043, "eval_loss": 0.47036415338516235, "eval_runtime": 180.1154, "eval_samples_per_second": 41.162, "eval_steps_per_second": 5.147, "eval_wer": 0.28054713453264835, "step": 2400 }, { "epoch": 0.08757903917435861, "grad_norm": 161.69967651367188, "learning_rate": 8.73610000726797e-06, "loss": 1.4598, "step": 2410 }, { "epoch": 0.08794243767715677, "grad_norm": 10.37559700012207, "learning_rate": 8.772439857547786e-06, "loss": 0.3042, "step": 2420 }, { "epoch": 0.08830583617995494, "grad_norm": 5.90106725692749, "learning_rate": 8.808779707827604e-06, "loss": 0.3385, "step": 2430 }, { "epoch": 0.0886692346827531, "grad_norm": 9.207955360412598, "learning_rate": 8.845119558107422e-06, "loss": 0.2963, "step": 2440 }, { "epoch": 0.08903263318555127, "grad_norm": 22.280956268310547, "learning_rate": 8.881459408387238e-06, "loss": 0.4505, "step": 2450 }, { "epoch": 0.08939603168834945, "grad_norm": 3.090710401535034, "learning_rate": 8.917799258667055e-06, "loss": 0.3114, "step": 2460 }, { "epoch": 0.08975943019114761, "grad_norm": 4.144134044647217, "learning_rate": 8.954139108946871e-06, "loss": 0.2855, "step": 2470 }, { "epoch": 0.09012282869394578, "grad_norm": 4.343112468719482, "learning_rate": 8.990478959226687e-06, "loss": 0.2906, "step": 2480 }, { "epoch": 0.09048622719674394, "grad_norm": 2.6925292015075684, "learning_rate": 9.026818809506505e-06, "loss": 0.284, "step": 2490 }, { "epoch": 0.09084962569954212, "grad_norm": 29.639341354370117, "learning_rate": 9.063158659786323e-06, "loss": 0.3411, "step": 2500 }, { "epoch": 0.09121302420234029, "grad_norm": 4.425374984741211, "learning_rate": 9.099498510066139e-06, "loss": 0.3041, "step": 2510 }, { "epoch": 0.09157642270513845, "grad_norm": 5.6643195152282715, "learning_rate": 9.135838360345955e-06, "loss": 0.3123, "step": 2520 }, { "epoch": 0.09193982120793662, "grad_norm": 3.9098479747772217, "learning_rate": 9.172178210625772e-06, "loss": 0.3664, "step": 2530 }, { "epoch": 0.0923032197107348, "grad_norm": 3.133389949798584, "learning_rate": 9.20851806090559e-06, "loss": 0.2708, "step": 2540 }, { "epoch": 0.09266661821353296, "grad_norm": 43.00468063354492, "learning_rate": 9.244857911185406e-06, "loss": 0.4215, "step": 2550 }, { "epoch": 0.09303001671633113, "grad_norm": 3.1411876678466797, "learning_rate": 9.281197761465222e-06, "loss": 0.2983, "step": 2560 }, { "epoch": 0.09339341521912929, "grad_norm": 3.263828754425049, "learning_rate": 9.317537611745041e-06, "loss": 0.3137, "step": 2570 }, { "epoch": 0.09375681372192747, "grad_norm": 3.618751049041748, "learning_rate": 9.353877462024857e-06, "loss": 0.3279, "step": 2580 }, { "epoch": 0.09412021222472564, "grad_norm": 3.6551568508148193, "learning_rate": 9.390217312304673e-06, "loss": 0.2409, "step": 2590 }, { "epoch": 0.0944836107275238, "grad_norm": 8.680901527404785, "learning_rate": 9.42655716258449e-06, "loss": 0.373, "step": 2600 }, { "epoch": 0.09484700923032197, "grad_norm": 4.761026382446289, "learning_rate": 9.462897012864308e-06, "loss": 0.2777, "step": 2610 }, { "epoch": 0.09521040773312014, "grad_norm": 3.142723321914673, "learning_rate": 9.499236863144124e-06, "loss": 0.2882, "step": 2620 }, { "epoch": 0.09557380623591831, "grad_norm": 2.969968795776367, "learning_rate": 9.53557671342394e-06, "loss": 0.3086, "step": 2630 }, { "epoch": 0.09593720473871648, "grad_norm": 3.754549264907837, "learning_rate": 9.571916563703758e-06, "loss": 0.259, "step": 2640 }, { "epoch": 0.09630060324151464, "grad_norm": 23.7288761138916, "learning_rate": 9.608256413983576e-06, "loss": 0.4284, "step": 2650 }, { "epoch": 0.09666400174431282, "grad_norm": 2.7727372646331787, "learning_rate": 9.644596264263392e-06, "loss": 0.2602, "step": 2660 }, { "epoch": 0.09702740024711098, "grad_norm": 14.707064628601074, "learning_rate": 9.680936114543208e-06, "loss": 0.3059, "step": 2670 }, { "epoch": 0.09739079874990915, "grad_norm": 3.8396642208099365, "learning_rate": 9.717275964823025e-06, "loss": 0.2811, "step": 2680 }, { "epoch": 0.09775419725270731, "grad_norm": 2.9460713863372803, "learning_rate": 9.753615815102843e-06, "loss": 0.2686, "step": 2690 }, { "epoch": 0.0981175957555055, "grad_norm": 20.107336044311523, "learning_rate": 9.789955665382659e-06, "loss": 0.4306, "step": 2700 }, { "epoch": 0.09848099425830366, "grad_norm": 3.1286280155181885, "learning_rate": 9.826295515662477e-06, "loss": 0.3059, "step": 2710 }, { "epoch": 0.09884439276110182, "grad_norm": 6.160215854644775, "learning_rate": 9.862635365942292e-06, "loss": 0.3046, "step": 2720 }, { "epoch": 0.09920779126389999, "grad_norm": 6.1921186447143555, "learning_rate": 9.89897521622211e-06, "loss": 0.285, "step": 2730 }, { "epoch": 0.09957118976669817, "grad_norm": 13.759759902954102, "learning_rate": 9.935315066501926e-06, "loss": 0.2888, "step": 2740 }, { "epoch": 0.09993458826949633, "grad_norm": 13.92764949798584, "learning_rate": 9.971654916781744e-06, "loss": 0.4266, "step": 2750 }, { "epoch": 0.1002979867722945, "grad_norm": 3.3999857902526855, "learning_rate": 1.000799476706156e-05, "loss": 0.2858, "step": 2760 }, { "epoch": 0.10066138527509266, "grad_norm": 4.103928089141846, "learning_rate": 1.0044334617341377e-05, "loss": 0.262, "step": 2770 }, { "epoch": 0.10102478377789084, "grad_norm": 6.15985107421875, "learning_rate": 1.0080674467621195e-05, "loss": 0.2866, "step": 2780 }, { "epoch": 0.10138818228068901, "grad_norm": 4.904097557067871, "learning_rate": 1.0117014317901011e-05, "loss": 0.5057, "step": 2790 }, { "epoch": 0.10175158078348717, "grad_norm": 15.2875337600708, "learning_rate": 1.0153354168180827e-05, "loss": 0.4345, "step": 2800 }, { "epoch": 0.10211497928628534, "grad_norm": 2.4697763919830322, "learning_rate": 1.0189694018460643e-05, "loss": 0.2693, "step": 2810 }, { "epoch": 0.1024783777890835, "grad_norm": 5.04618501663208, "learning_rate": 1.0226033868740462e-05, "loss": 0.2868, "step": 2820 }, { "epoch": 0.10284177629188168, "grad_norm": 5.851120948791504, "learning_rate": 1.0262373719020278e-05, "loss": 0.3425, "step": 2830 }, { "epoch": 0.10320517479467985, "grad_norm": 2.1007258892059326, "learning_rate": 1.0298713569300094e-05, "loss": 0.2394, "step": 2840 }, { "epoch": 0.10356857329747801, "grad_norm": 23.411701202392578, "learning_rate": 1.0335053419579912e-05, "loss": 0.4125, "step": 2850 }, { "epoch": 0.10393197180027618, "grad_norm": 4.178852558135986, "learning_rate": 1.037139326985973e-05, "loss": 0.2951, "step": 2860 }, { "epoch": 0.10429537030307436, "grad_norm": 1.7873708009719849, "learning_rate": 1.0407733120139545e-05, "loss": 0.3272, "step": 2870 }, { "epoch": 0.10465876880587252, "grad_norm": 7.603367328643799, "learning_rate": 1.0444072970419361e-05, "loss": 0.2779, "step": 2880 }, { "epoch": 0.10502216730867069, "grad_norm": 3.468761444091797, "learning_rate": 1.0480412820699179e-05, "loss": 0.3007, "step": 2890 }, { "epoch": 0.10538556581146885, "grad_norm": 16.35407829284668, "learning_rate": 1.0516752670978997e-05, "loss": 0.3918, "step": 2900 }, { "epoch": 0.10574896431426703, "grad_norm": 3.4226725101470947, "learning_rate": 1.0553092521258813e-05, "loss": 3.7156, "step": 2910 }, { "epoch": 0.1061123628170652, "grad_norm": 9.006295204162598, "learning_rate": 1.058943237153863e-05, "loss": 0.4075, "step": 2920 }, { "epoch": 0.10647576131986336, "grad_norm": 4.993385314941406, "learning_rate": 1.0625772221818446e-05, "loss": 0.3588, "step": 2930 }, { "epoch": 0.10683915982266153, "grad_norm": 3.7684736251831055, "learning_rate": 1.0662112072098264e-05, "loss": 0.2429, "step": 2940 }, { "epoch": 0.1072025583254597, "grad_norm": 40.301170349121094, "learning_rate": 1.069845192237808e-05, "loss": 0.4739, "step": 2950 }, { "epoch": 0.10756595682825787, "grad_norm": 3.772693157196045, "learning_rate": 1.0734791772657898e-05, "loss": 0.3284, "step": 2960 }, { "epoch": 0.10792935533105603, "grad_norm": 3.0183212757110596, "learning_rate": 1.0771131622937714e-05, "loss": 0.38, "step": 2970 }, { "epoch": 0.1082927538338542, "grad_norm": 6.61776876449585, "learning_rate": 1.0807471473217531e-05, "loss": 0.2793, "step": 2980 }, { "epoch": 0.10865615233665238, "grad_norm": 6.112472057342529, "learning_rate": 1.0843811323497347e-05, "loss": 0.2447, "step": 2990 }, { "epoch": 0.10901955083945054, "grad_norm": 10.800559997558594, "learning_rate": 1.0880151173777165e-05, "loss": 0.373, "step": 3000 }, { "epoch": 0.10901955083945054, "eval_loss": 0.4652940630912781, "eval_runtime": 180.0765, "eval_samples_per_second": 41.171, "eval_steps_per_second": 5.148, "eval_wer": 0.27681667181004593, "step": 3000 }, { "epoch": 0.10938294934224871, "grad_norm": 7.778831958770752, "learning_rate": 1.091649102405698e-05, "loss": 0.29, "step": 3010 }, { "epoch": 0.10974634784504687, "grad_norm": 2.855592966079712, "learning_rate": 1.0952830874336798e-05, "loss": 0.2411, "step": 3020 }, { "epoch": 0.11010974634784505, "grad_norm": 4.229335784912109, "learning_rate": 1.0989170724616616e-05, "loss": 0.3247, "step": 3030 }, { "epoch": 0.11047314485064322, "grad_norm": 3.8145949840545654, "learning_rate": 1.1025510574896432e-05, "loss": 0.2242, "step": 3040 }, { "epoch": 0.11083654335344138, "grad_norm": 22.571304321289062, "learning_rate": 1.1061850425176248e-05, "loss": 0.3959, "step": 3050 }, { "epoch": 0.11119994185623955, "grad_norm": 2.4706461429595947, "learning_rate": 1.1098190275456066e-05, "loss": 0.2466, "step": 3060 }, { "epoch": 0.11156334035903773, "grad_norm": 4.497069358825684, "learning_rate": 1.1134530125735883e-05, "loss": 2.1968, "step": 3070 }, { "epoch": 0.11192673886183589, "grad_norm": 5.060062885284424, "learning_rate": 1.11708699760157e-05, "loss": 0.2921, "step": 3080 }, { "epoch": 0.11229013736463406, "grad_norm": 2.7882325649261475, "learning_rate": 1.1207209826295515e-05, "loss": 0.2534, "step": 3090 }, { "epoch": 0.11265353586743222, "grad_norm": 9.96241569519043, "learning_rate": 1.1243549676575333e-05, "loss": 0.421, "step": 3100 }, { "epoch": 0.1130169343702304, "grad_norm": 31.262916564941406, "learning_rate": 1.127988952685515e-05, "loss": 0.4048, "step": 3110 }, { "epoch": 0.11338033287302857, "grad_norm": 3.472343921661377, "learning_rate": 1.1316229377134967e-05, "loss": 0.2798, "step": 3120 }, { "epoch": 0.11374373137582673, "grad_norm": 4.074085235595703, "learning_rate": 1.1352569227414783e-05, "loss": 0.299, "step": 3130 }, { "epoch": 0.1141071298786249, "grad_norm": 2.879512310028076, "learning_rate": 1.1388909077694602e-05, "loss": 0.2137, "step": 3140 }, { "epoch": 0.11447052838142306, "grad_norm": 125.17889404296875, "learning_rate": 1.1425248927974418e-05, "loss": 0.5418, "step": 3150 }, { "epoch": 0.11483392688422124, "grad_norm": 4.171487808227539, "learning_rate": 1.1461588778254234e-05, "loss": 0.2685, "step": 3160 }, { "epoch": 0.1151973253870194, "grad_norm": 2.1496529579162598, "learning_rate": 1.1497928628534051e-05, "loss": 0.2421, "step": 3170 }, { "epoch": 0.11556072388981757, "grad_norm": 2.6266047954559326, "learning_rate": 1.1534268478813867e-05, "loss": 0.3288, "step": 3180 }, { "epoch": 0.11592412239261574, "grad_norm": 3.7677230834960938, "learning_rate": 1.1570608329093685e-05, "loss": 0.3093, "step": 3190 }, { "epoch": 0.11628752089541392, "grad_norm": 9.4945707321167, "learning_rate": 1.1606948179373501e-05, "loss": 0.3066, "step": 3200 }, { "epoch": 0.11665091939821208, "grad_norm": 2.5509915351867676, "learning_rate": 1.1643288029653319e-05, "loss": 0.2615, "step": 3210 }, { "epoch": 0.11701431790101025, "grad_norm": 3.066624641418457, "learning_rate": 1.1679627879933135e-05, "loss": 0.3224, "step": 3220 }, { "epoch": 0.11737771640380841, "grad_norm": 6.494440078735352, "learning_rate": 1.1715967730212952e-05, "loss": 0.3017, "step": 3230 }, { "epoch": 0.11774111490660659, "grad_norm": 3.4675605297088623, "learning_rate": 1.1752307580492768e-05, "loss": 0.2152, "step": 3240 }, { "epoch": 0.11810451340940475, "grad_norm": 15.5110445022583, "learning_rate": 1.1788647430772586e-05, "loss": 0.349, "step": 3250 }, { "epoch": 0.11846791191220292, "grad_norm": 1.972530484199524, "learning_rate": 1.1824987281052402e-05, "loss": 0.2728, "step": 3260 }, { "epoch": 0.11883131041500108, "grad_norm": 4.018677711486816, "learning_rate": 1.186132713133222e-05, "loss": 0.254, "step": 3270 }, { "epoch": 0.11919470891779926, "grad_norm": 4.95416784286499, "learning_rate": 1.1897666981612037e-05, "loss": 0.2465, "step": 3280 }, { "epoch": 0.11955810742059743, "grad_norm": 3.165599822998047, "learning_rate": 1.1934006831891853e-05, "loss": 0.2537, "step": 3290 }, { "epoch": 0.1199215059233956, "grad_norm": 8.508636474609375, "learning_rate": 1.1970346682171669e-05, "loss": 0.3655, "step": 3300 }, { "epoch": 0.12028490442619376, "grad_norm": 2.3892879486083984, "learning_rate": 1.2006686532451487e-05, "loss": 0.252, "step": 3310 }, { "epoch": 0.12064830292899194, "grad_norm": 3.591564178466797, "learning_rate": 1.2043026382731304e-05, "loss": 0.2401, "step": 3320 }, { "epoch": 0.1210117014317901, "grad_norm": 3.891261577606201, "learning_rate": 1.207936623301112e-05, "loss": 0.2909, "step": 3330 }, { "epoch": 0.12137509993458827, "grad_norm": 4.691511154174805, "learning_rate": 1.2115706083290936e-05, "loss": 0.2304, "step": 3340 }, { "epoch": 0.12173849843738643, "grad_norm": 18.415170669555664, "learning_rate": 1.2152045933570754e-05, "loss": 0.35, "step": 3350 }, { "epoch": 0.12210189694018461, "grad_norm": 3.9105615615844727, "learning_rate": 1.2188385783850572e-05, "loss": 0.3112, "step": 3360 }, { "epoch": 0.12246529544298278, "grad_norm": 3.215313196182251, "learning_rate": 1.2224725634130388e-05, "loss": 0.2492, "step": 3370 }, { "epoch": 0.12282869394578094, "grad_norm": 9.30749225616455, "learning_rate": 1.2261065484410204e-05, "loss": 0.2696, "step": 3380 }, { "epoch": 0.12319209244857911, "grad_norm": 4.9797682762146, "learning_rate": 1.2297405334690023e-05, "loss": 0.2197, "step": 3390 }, { "epoch": 0.12355549095137729, "grad_norm": 19.632797241210938, "learning_rate": 1.2333745184969839e-05, "loss": 0.3411, "step": 3400 }, { "epoch": 0.12391888945417545, "grad_norm": 4.509830474853516, "learning_rate": 1.2370085035249655e-05, "loss": 0.2394, "step": 3410 }, { "epoch": 0.12428228795697362, "grad_norm": 2.253514051437378, "learning_rate": 1.2406424885529473e-05, "loss": 0.266, "step": 3420 }, { "epoch": 0.12464568645977178, "grad_norm": 3.123828172683716, "learning_rate": 1.244276473580929e-05, "loss": 0.2675, "step": 3430 }, { "epoch": 0.12500908496256996, "grad_norm": 34.37680435180664, "learning_rate": 1.2479104586089106e-05, "loss": 0.279, "step": 3440 }, { "epoch": 0.1253724834653681, "grad_norm": 10.051690101623535, "learning_rate": 1.2515444436368922e-05, "loss": 0.3364, "step": 3450 }, { "epoch": 0.1257358819681663, "grad_norm": 2.1765711307525635, "learning_rate": 1.255178428664874e-05, "loss": 0.2288, "step": 3460 }, { "epoch": 0.12609928047096447, "grad_norm": 2.4910778999328613, "learning_rate": 1.2588124136928556e-05, "loss": 0.2866, "step": 3470 }, { "epoch": 0.12646267897376262, "grad_norm": 7.379613876342773, "learning_rate": 1.2624463987208373e-05, "loss": 0.2618, "step": 3480 }, { "epoch": 0.1268260774765608, "grad_norm": 2.681814432144165, "learning_rate": 1.266080383748819e-05, "loss": 0.2405, "step": 3490 }, { "epoch": 0.12718947597935895, "grad_norm": 83.93474578857422, "learning_rate": 1.2697143687768007e-05, "loss": 0.338, "step": 3500 }, { "epoch": 0.12755287448215713, "grad_norm": 1.5564826726913452, "learning_rate": 1.2733483538047825e-05, "loss": 0.2305, "step": 3510 }, { "epoch": 0.1279162729849553, "grad_norm": 2.6026437282562256, "learning_rate": 1.2769823388327639e-05, "loss": 0.2618, "step": 3520 }, { "epoch": 0.12827967148775346, "grad_norm": 8.228372573852539, "learning_rate": 1.2806163238607458e-05, "loss": 0.2586, "step": 3530 }, { "epoch": 0.12864306999055164, "grad_norm": 2.643139362335205, "learning_rate": 1.2842503088887276e-05, "loss": 0.2197, "step": 3540 }, { "epoch": 0.12900646849334982, "grad_norm": 400.0296325683594, "learning_rate": 1.287884293916709e-05, "loss": 0.3586, "step": 3550 }, { "epoch": 0.12936986699614797, "grad_norm": 1.6349281072616577, "learning_rate": 1.2915182789446908e-05, "loss": 0.2364, "step": 3560 }, { "epoch": 0.12973326549894615, "grad_norm": 2.6573753356933594, "learning_rate": 1.2951522639726724e-05, "loss": 0.2195, "step": 3570 }, { "epoch": 0.1300966640017443, "grad_norm": 4.2721686363220215, "learning_rate": 1.2987862490006542e-05, "loss": 0.3092, "step": 3580 }, { "epoch": 0.13046006250454248, "grad_norm": 2.9982502460479736, "learning_rate": 1.302420234028636e-05, "loss": 0.2826, "step": 3590 }, { "epoch": 0.13082346100734066, "grad_norm": 8.903009414672852, "learning_rate": 1.3060542190566175e-05, "loss": 0.3367, "step": 3600 }, { "epoch": 0.13082346100734066, "eval_loss": 0.4490436017513275, "eval_runtime": 179.8743, "eval_samples_per_second": 41.218, "eval_steps_per_second": 5.154, "eval_wer": 0.2664058670829778, "step": 3600 }, { "epoch": 0.1311868595101388, "grad_norm": 2.9746363162994385, "learning_rate": 1.3096882040845993e-05, "loss": 0.2418, "step": 3610 }, { "epoch": 0.131550258012937, "grad_norm": 2.274872303009033, "learning_rate": 1.313322189112581e-05, "loss": 0.3052, "step": 3620 }, { "epoch": 0.13191365651573517, "grad_norm": 7.114847660064697, "learning_rate": 1.3169561741405625e-05, "loss": 0.2821, "step": 3630 }, { "epoch": 0.13227705501853332, "grad_norm": 3.2101128101348877, "learning_rate": 1.3205901591685444e-05, "loss": 0.2223, "step": 3640 }, { "epoch": 0.1326404535213315, "grad_norm": 18.914968490600586, "learning_rate": 1.3242241441965258e-05, "loss": 0.3809, "step": 3650 }, { "epoch": 0.13300385202412965, "grad_norm": 2.399569272994995, "learning_rate": 1.3278581292245076e-05, "loss": 0.2221, "step": 3660 }, { "epoch": 0.13336725052692783, "grad_norm": 5.76792573928833, "learning_rate": 1.3314921142524894e-05, "loss": 0.2487, "step": 3670 }, { "epoch": 0.133730649029726, "grad_norm": 3.6859967708587646, "learning_rate": 1.335126099280471e-05, "loss": 0.2781, "step": 3680 }, { "epoch": 0.13409404753252416, "grad_norm": 2.9653141498565674, "learning_rate": 1.3387600843084527e-05, "loss": 0.2258, "step": 3690 }, { "epoch": 0.13445744603532234, "grad_norm": 19.170753479003906, "learning_rate": 1.3423940693364345e-05, "loss": 0.3902, "step": 3700 }, { "epoch": 0.13482084453812052, "grad_norm": 2.2880115509033203, "learning_rate": 1.3460280543644161e-05, "loss": 0.2745, "step": 3710 }, { "epoch": 0.13518424304091867, "grad_norm": 2.5196125507354736, "learning_rate": 1.3496620393923979e-05, "loss": 0.2293, "step": 3720 }, { "epoch": 0.13554764154371685, "grad_norm": 3.827986001968384, "learning_rate": 1.3532960244203793e-05, "loss": 0.259, "step": 3730 }, { "epoch": 0.135911040046515, "grad_norm": 3.4211530685424805, "learning_rate": 1.356930009448361e-05, "loss": 0.3256, "step": 3740 }, { "epoch": 0.13627443854931318, "grad_norm": 26.879398345947266, "learning_rate": 1.360563994476343e-05, "loss": 0.3208, "step": 3750 }, { "epoch": 0.13663783705211135, "grad_norm": 2.316091775894165, "learning_rate": 1.3641979795043244e-05, "loss": 0.2316, "step": 3760 }, { "epoch": 0.1370012355549095, "grad_norm": 4.098924160003662, "learning_rate": 1.3678319645323062e-05, "loss": 0.2399, "step": 3770 }, { "epoch": 0.13736463405770769, "grad_norm": 6.9372687339782715, "learning_rate": 1.371465949560288e-05, "loss": 0.2858, "step": 3780 }, { "epoch": 0.13772803256050586, "grad_norm": 2.509535789489746, "learning_rate": 1.3750999345882695e-05, "loss": 0.2113, "step": 3790 }, { "epoch": 0.13809143106330402, "grad_norm": 7.7181077003479, "learning_rate": 1.3787339196162513e-05, "loss": 0.3279, "step": 3800 }, { "epoch": 0.1384548295661022, "grad_norm": 2.6843245029449463, "learning_rate": 1.3823679046442329e-05, "loss": 0.2267, "step": 3810 }, { "epoch": 0.13881822806890035, "grad_norm": 3.05159068107605, "learning_rate": 1.3860018896722147e-05, "loss": 0.229, "step": 3820 }, { "epoch": 0.13918162657169852, "grad_norm": 5.029635429382324, "learning_rate": 1.3896358747001964e-05, "loss": 0.2627, "step": 3830 }, { "epoch": 0.1395450250744967, "grad_norm": 2.8287103176116943, "learning_rate": 1.3932698597281779e-05, "loss": 0.2294, "step": 3840 }, { "epoch": 0.13990842357729485, "grad_norm": 24.862224578857422, "learning_rate": 1.3969038447561598e-05, "loss": 0.3198, "step": 3850 }, { "epoch": 0.14027182208009303, "grad_norm": 5.624647617340088, "learning_rate": 1.4005378297841412e-05, "loss": 0.2641, "step": 3860 }, { "epoch": 0.14063522058289118, "grad_norm": 1.6199389696121216, "learning_rate": 1.404171814812123e-05, "loss": 0.2279, "step": 3870 }, { "epoch": 0.14099861908568936, "grad_norm": 2.864058017730713, "learning_rate": 1.4078057998401047e-05, "loss": 0.2448, "step": 3880 }, { "epoch": 0.14136201758848754, "grad_norm": 3.897899627685547, "learning_rate": 1.4114397848680863e-05, "loss": 0.2438, "step": 3890 }, { "epoch": 0.1417254160912857, "grad_norm": 42.4840087890625, "learning_rate": 1.4150737698960681e-05, "loss": 0.3604, "step": 3900 }, { "epoch": 0.14208881459408387, "grad_norm": 1.6532913446426392, "learning_rate": 1.4187077549240499e-05, "loss": 0.2469, "step": 3910 }, { "epoch": 0.14245221309688205, "grad_norm": 2.3755931854248047, "learning_rate": 1.4223417399520315e-05, "loss": 0.2233, "step": 3920 }, { "epoch": 0.1428156115996802, "grad_norm": 5.866461277008057, "learning_rate": 1.4259757249800132e-05, "loss": 0.2952, "step": 3930 }, { "epoch": 0.14317901010247838, "grad_norm": 3.171570301055908, "learning_rate": 1.4296097100079947e-05, "loss": 0.2329, "step": 3940 }, { "epoch": 0.14354240860527653, "grad_norm": 23.302635192871094, "learning_rate": 1.4332436950359764e-05, "loss": 0.3592, "step": 3950 }, { "epoch": 0.1439058071080747, "grad_norm": 2.3609213829040527, "learning_rate": 1.4368776800639584e-05, "loss": 1.1975, "step": 3960 }, { "epoch": 0.1442692056108729, "grad_norm": 2.857872486114502, "learning_rate": 1.4405116650919398e-05, "loss": 0.265, "step": 3970 }, { "epoch": 0.14463260411367104, "grad_norm": 6.918335914611816, "learning_rate": 1.4441456501199216e-05, "loss": 0.4057, "step": 3980 }, { "epoch": 0.14499600261646922, "grad_norm": 3.8019461631774902, "learning_rate": 1.4477796351479033e-05, "loss": 0.299, "step": 3990 }, { "epoch": 0.1453594011192674, "grad_norm": 30.963428497314453, "learning_rate": 1.451413620175885e-05, "loss": 0.335, "step": 4000 }, { "epoch": 0.14572279962206555, "grad_norm": 2.3968963623046875, "learning_rate": 1.4550476052038667e-05, "loss": 0.224, "step": 4010 }, { "epoch": 0.14608619812486373, "grad_norm": 6.7229485511779785, "learning_rate": 1.4586815902318481e-05, "loss": 0.2657, "step": 4020 }, { "epoch": 0.14644959662766188, "grad_norm": 17.447879791259766, "learning_rate": 1.46231557525983e-05, "loss": 0.2199, "step": 4030 }, { "epoch": 0.14681299513046006, "grad_norm": 2.020756721496582, "learning_rate": 1.4659495602878118e-05, "loss": 0.31, "step": 4040 }, { "epoch": 0.14717639363325824, "grad_norm": 38.28268814086914, "learning_rate": 1.4695835453157932e-05, "loss": 0.3861, "step": 4050 }, { "epoch": 0.1475397921360564, "grad_norm": 2.085073232650757, "learning_rate": 1.473217530343775e-05, "loss": 0.2078, "step": 4060 }, { "epoch": 0.14790319063885457, "grad_norm": 3.453597068786621, "learning_rate": 1.476851515371757e-05, "loss": 0.296, "step": 4070 }, { "epoch": 0.14826658914165275, "grad_norm": 2.3039424419403076, "learning_rate": 1.4804855003997384e-05, "loss": 0.2346, "step": 4080 }, { "epoch": 0.1486299876444509, "grad_norm": 3.217890977859497, "learning_rate": 1.4841194854277201e-05, "loss": 0.2243, "step": 4090 }, { "epoch": 0.14899338614724908, "grad_norm": 12.48748779296875, "learning_rate": 1.4877534704557017e-05, "loss": 0.3378, "step": 4100 }, { "epoch": 0.14935678465004723, "grad_norm": 2.781388282775879, "learning_rate": 1.4913874554836835e-05, "loss": 0.2167, "step": 4110 }, { "epoch": 0.1497201831528454, "grad_norm": 2.564457893371582, "learning_rate": 1.4950214405116653e-05, "loss": 0.2187, "step": 4120 }, { "epoch": 0.1500835816556436, "grad_norm": 9.590895652770996, "learning_rate": 1.4986554255396469e-05, "loss": 0.2444, "step": 4130 }, { "epoch": 0.15044698015844174, "grad_norm": 2.8055028915405273, "learning_rate": 1.5022894105676286e-05, "loss": 0.2499, "step": 4140 }, { "epoch": 0.15081037866123992, "grad_norm": 7.157045364379883, "learning_rate": 1.5059233955956104e-05, "loss": 0.361, "step": 4150 }, { "epoch": 0.1511737771640381, "grad_norm": 3.369006633758545, "learning_rate": 1.5095573806235918e-05, "loss": 0.259, "step": 4160 }, { "epoch": 0.15153717566683625, "grad_norm": 5.334355354309082, "learning_rate": 1.5131913656515736e-05, "loss": 0.2797, "step": 4170 }, { "epoch": 0.15190057416963443, "grad_norm": 6.667120456695557, "learning_rate": 1.5168253506795552e-05, "loss": 0.2678, "step": 4180 }, { "epoch": 0.15226397267243258, "grad_norm": 1.7419887781143188, "learning_rate": 1.520459335707537e-05, "loss": 0.2432, "step": 4190 }, { "epoch": 0.15262737117523076, "grad_norm": 7.022573947906494, "learning_rate": 1.5240933207355187e-05, "loss": 0.2955, "step": 4200 }, { "epoch": 0.15262737117523076, "eval_loss": 0.4361402690410614, "eval_runtime": 180.5933, "eval_samples_per_second": 41.054, "eval_steps_per_second": 5.133, "eval_wer": 0.2589540181894095, "step": 4200 }, { "epoch": 0.15299076967802894, "grad_norm": 3.108078718185425, "learning_rate": 1.5277273057635e-05, "loss": 3.6147, "step": 4210 }, { "epoch": 0.1533541681808271, "grad_norm": 2.6063787937164307, "learning_rate": 1.531361290791482e-05, "loss": 0.232, "step": 4220 }, { "epoch": 0.15371756668362527, "grad_norm": 3.581697463989258, "learning_rate": 1.5349952758194637e-05, "loss": 0.2451, "step": 4230 }, { "epoch": 0.15408096518642342, "grad_norm": 2.5910837650299072, "learning_rate": 1.5386292608474453e-05, "loss": 0.2283, "step": 4240 }, { "epoch": 0.1544443636892216, "grad_norm": 70.38739013671875, "learning_rate": 1.5422632458754272e-05, "loss": 0.396, "step": 4250 }, { "epoch": 0.15480776219201978, "grad_norm": 3.5658187866210938, "learning_rate": 1.5458972309034088e-05, "loss": 0.2116, "step": 4260 }, { "epoch": 0.15517116069481793, "grad_norm": 5.393126487731934, "learning_rate": 1.5495312159313904e-05, "loss": 0.2382, "step": 4270 }, { "epoch": 0.1555345591976161, "grad_norm": 10.135586738586426, "learning_rate": 1.5531652009593723e-05, "loss": 0.2485, "step": 4280 }, { "epoch": 0.15589795770041429, "grad_norm": 2.1143031120300293, "learning_rate": 1.5567991859873536e-05, "loss": 0.1936, "step": 4290 }, { "epoch": 0.15626135620321244, "grad_norm": 20.077383041381836, "learning_rate": 1.5604331710153355e-05, "loss": 0.3818, "step": 4300 }, { "epoch": 0.15662475470601062, "grad_norm": 3.793126344680786, "learning_rate": 1.564067156043317e-05, "loss": 0.2245, "step": 4310 }, { "epoch": 0.15698815320880877, "grad_norm": 3.2057955265045166, "learning_rate": 1.5677011410712987e-05, "loss": 0.2551, "step": 4320 }, { "epoch": 0.15735155171160695, "grad_norm": 5.002716064453125, "learning_rate": 1.5713351260992806e-05, "loss": 0.2951, "step": 4330 }, { "epoch": 0.15771495021440513, "grad_norm": 2.2240726947784424, "learning_rate": 1.5749691111272622e-05, "loss": 0.1993, "step": 4340 }, { "epoch": 0.15807834871720328, "grad_norm": 55.30891036987305, "learning_rate": 1.578603096155244e-05, "loss": 0.2803, "step": 4350 }, { "epoch": 0.15844174722000146, "grad_norm": 1.9186596870422363, "learning_rate": 1.5822370811832258e-05, "loss": 0.2234, "step": 4360 }, { "epoch": 0.15880514572279963, "grad_norm": 1.7817661762237549, "learning_rate": 1.5858710662112074e-05, "loss": 0.2038, "step": 4370 }, { "epoch": 0.15916854422559779, "grad_norm": 3.046330690383911, "learning_rate": 1.589505051239189e-05, "loss": 0.2809, "step": 4380 }, { "epoch": 0.15953194272839596, "grad_norm": 5.43302583694458, "learning_rate": 1.5931390362671706e-05, "loss": 0.1896, "step": 4390 }, { "epoch": 0.15989534123119412, "grad_norm": 12.185855865478516, "learning_rate": 1.596773021295152e-05, "loss": 0.2984, "step": 4400 }, { "epoch": 0.1602587397339923, "grad_norm": 1.9507842063903809, "learning_rate": 1.600407006323134e-05, "loss": 0.2064, "step": 4410 }, { "epoch": 0.16062213823679047, "grad_norm": 4.536543846130371, "learning_rate": 1.6040409913511157e-05, "loss": 0.2433, "step": 4420 }, { "epoch": 0.16098553673958862, "grad_norm": 3.101174831390381, "learning_rate": 1.6076749763790973e-05, "loss": 0.2746, "step": 4430 }, { "epoch": 0.1613489352423868, "grad_norm": 2.2098021507263184, "learning_rate": 1.6113089614070792e-05, "loss": 0.17, "step": 4440 }, { "epoch": 0.16171233374518498, "grad_norm": 59.360809326171875, "learning_rate": 1.6149429464350608e-05, "loss": 0.379, "step": 4450 }, { "epoch": 0.16207573224798313, "grad_norm": 6.364736557006836, "learning_rate": 1.6185769314630424e-05, "loss": 0.2224, "step": 4460 }, { "epoch": 0.1624391307507813, "grad_norm": 3.2455356121063232, "learning_rate": 1.622210916491024e-05, "loss": 0.2195, "step": 4470 }, { "epoch": 0.16280252925357946, "grad_norm": 6.399629592895508, "learning_rate": 1.625844901519006e-05, "loss": 0.266, "step": 4480 }, { "epoch": 0.16316592775637764, "grad_norm": 16.19785499572754, "learning_rate": 1.6294788865469875e-05, "loss": 0.1836, "step": 4490 }, { "epoch": 0.16352932625917582, "grad_norm": 7.909778594970703, "learning_rate": 1.633112871574969e-05, "loss": 0.6016, "step": 4500 }, { "epoch": 0.16389272476197397, "grad_norm": 2.8134663105010986, "learning_rate": 1.636746856602951e-05, "loss": 0.2148, "step": 4510 }, { "epoch": 0.16425612326477215, "grad_norm": 2.667999505996704, "learning_rate": 1.6403808416309327e-05, "loss": 0.2294, "step": 4520 }, { "epoch": 0.1646195217675703, "grad_norm": 3.355242967605591, "learning_rate": 1.6440148266589143e-05, "loss": 0.2097, "step": 4530 }, { "epoch": 0.16498292027036848, "grad_norm": 2.6241908073425293, "learning_rate": 1.647648811686896e-05, "loss": 0.2337, "step": 4540 }, { "epoch": 0.16534631877316666, "grad_norm": 16.759428024291992, "learning_rate": 1.6512827967148775e-05, "loss": 0.2944, "step": 4550 }, { "epoch": 0.1657097172759648, "grad_norm": 3.098898410797119, "learning_rate": 1.6549167817428594e-05, "loss": 0.1895, "step": 4560 }, { "epoch": 0.166073115778763, "grad_norm": 4.042644023895264, "learning_rate": 1.658550766770841e-05, "loss": 0.2369, "step": 4570 }, { "epoch": 0.16643651428156117, "grad_norm": 7.174807548522949, "learning_rate": 1.6621847517988226e-05, "loss": 0.2331, "step": 4580 }, { "epoch": 0.16679991278435932, "grad_norm": 2.1805012226104736, "learning_rate": 1.6658187368268045e-05, "loss": 0.2422, "step": 4590 }, { "epoch": 0.1671633112871575, "grad_norm": 18.097871780395508, "learning_rate": 1.6694527218547858e-05, "loss": 0.347, "step": 4600 }, { "epoch": 0.16752670978995565, "grad_norm": 3.48561429977417, "learning_rate": 1.6730867068827677e-05, "loss": 0.2985, "step": 4610 }, { "epoch": 0.16789010829275383, "grad_norm": 1.7519229650497437, "learning_rate": 1.6767206919107496e-05, "loss": 0.2204, "step": 4620 }, { "epoch": 0.168253506795552, "grad_norm": 3.7641661167144775, "learning_rate": 1.680354676938731e-05, "loss": 0.2348, "step": 4630 }, { "epoch": 0.16861690529835016, "grad_norm": 3.0688085556030273, "learning_rate": 1.683988661966713e-05, "loss": 0.2147, "step": 4640 }, { "epoch": 0.16898030380114834, "grad_norm": 25.845094680786133, "learning_rate": 1.6876226469946944e-05, "loss": 0.3671, "step": 4650 }, { "epoch": 0.16934370230394652, "grad_norm": 2.841994524002075, "learning_rate": 1.691256632022676e-05, "loss": 0.2182, "step": 4660 }, { "epoch": 0.16970710080674467, "grad_norm": 1.0501997470855713, "learning_rate": 1.694890617050658e-05, "loss": 0.1791, "step": 4670 }, { "epoch": 0.17007049930954285, "grad_norm": 3.3973441123962402, "learning_rate": 1.6985246020786392e-05, "loss": 0.3338, "step": 4680 }, { "epoch": 0.170433897812341, "grad_norm": 1.8442267179489136, "learning_rate": 1.702158587106621e-05, "loss": 0.2528, "step": 4690 }, { "epoch": 0.17079729631513918, "grad_norm": 42.373409271240234, "learning_rate": 1.705792572134603e-05, "loss": 0.2892, "step": 4700 }, { "epoch": 0.17116069481793736, "grad_norm": 6.344671726226807, "learning_rate": 1.7094265571625844e-05, "loss": 0.2474, "step": 4710 }, { "epoch": 0.1715240933207355, "grad_norm": 1.6177664995193481, "learning_rate": 1.7130605421905663e-05, "loss": 0.2364, "step": 4720 }, { "epoch": 0.1718874918235337, "grad_norm": 4.98591423034668, "learning_rate": 1.7166945272185482e-05, "loss": 0.2046, "step": 4730 }, { "epoch": 0.17225089032633187, "grad_norm": 7.943169116973877, "learning_rate": 1.7203285122465295e-05, "loss": 0.293, "step": 4740 }, { "epoch": 0.17261428882913002, "grad_norm": 7.402034759521484, "learning_rate": 1.7239624972745114e-05, "loss": 0.2722, "step": 4750 }, { "epoch": 0.1729776873319282, "grad_norm": 13.290019035339355, "learning_rate": 1.727596482302493e-05, "loss": 0.347, "step": 4760 }, { "epoch": 0.17334108583472635, "grad_norm": 1.8591586351394653, "learning_rate": 1.7312304673304746e-05, "loss": 0.2291, "step": 4770 }, { "epoch": 0.17370448433752453, "grad_norm": 2.5220861434936523, "learning_rate": 1.7348644523584565e-05, "loss": 0.2436, "step": 4780 }, { "epoch": 0.1740678828403227, "grad_norm": 1.8692690134048462, "learning_rate": 1.738498437386438e-05, "loss": 0.1782, "step": 4790 }, { "epoch": 0.17443128134312086, "grad_norm": 12.558557510375977, "learning_rate": 1.7421324224144197e-05, "loss": 0.3347, "step": 4800 }, { "epoch": 0.17443128134312086, "eval_loss": 0.4148472547531128, "eval_runtime": 180.0999, "eval_samples_per_second": 41.166, "eval_steps_per_second": 5.147, "eval_wer": 0.23564543358687168, "step": 4800 }, { "epoch": 0.17479467984591904, "grad_norm": 6.168694972991943, "learning_rate": 1.7457664074424017e-05, "loss": 0.2183, "step": 4810 }, { "epoch": 0.17515807834871722, "grad_norm": 5.153416633605957, "learning_rate": 1.749400392470383e-05, "loss": 0.2689, "step": 4820 }, { "epoch": 0.17552147685151537, "grad_norm": 2.8500893115997314, "learning_rate": 1.753034377498365e-05, "loss": 0.2848, "step": 4830 }, { "epoch": 0.17588487535431355, "grad_norm": 17.89117431640625, "learning_rate": 1.7566683625263465e-05, "loss": 0.2539, "step": 4840 }, { "epoch": 0.1762482738571117, "grad_norm": 19.455005645751953, "learning_rate": 1.760302347554328e-05, "loss": 0.3166, "step": 4850 }, { "epoch": 0.17661167235990988, "grad_norm": 1.7975777387619019, "learning_rate": 1.76393633258231e-05, "loss": 0.1927, "step": 4860 }, { "epoch": 0.17697507086270806, "grad_norm": 4.6790690422058105, "learning_rate": 1.7675703176102916e-05, "loss": 0.2248, "step": 4870 }, { "epoch": 0.1773384693655062, "grad_norm": 3.2644243240356445, "learning_rate": 1.7712043026382732e-05, "loss": 0.2239, "step": 4880 }, { "epoch": 0.1777018678683044, "grad_norm": 1.9375410079956055, "learning_rate": 1.7748382876662548e-05, "loss": 0.2053, "step": 4890 }, { "epoch": 0.17806526637110254, "grad_norm": 15.435178756713867, "learning_rate": 1.7784722726942367e-05, "loss": 0.2903, "step": 4900 }, { "epoch": 0.17842866487390072, "grad_norm": 2.486330270767212, "learning_rate": 1.7821062577222183e-05, "loss": 0.2598, "step": 4910 }, { "epoch": 0.1787920633766989, "grad_norm": 2.5542314052581787, "learning_rate": 1.7857402427502e-05, "loss": 0.2305, "step": 4920 }, { "epoch": 0.17915546187949705, "grad_norm": 3.6416103839874268, "learning_rate": 1.7893742277781815e-05, "loss": 1.046, "step": 4930 }, { "epoch": 0.17951886038229523, "grad_norm": 1.9395058155059814, "learning_rate": 1.7930082128061634e-05, "loss": 0.2466, "step": 4940 }, { "epoch": 0.1798822588850934, "grad_norm": 7.664824962615967, "learning_rate": 1.796642197834145e-05, "loss": 0.2871, "step": 4950 }, { "epoch": 0.18024565738789156, "grad_norm": 2.0301320552825928, "learning_rate": 1.8002761828621266e-05, "loss": 0.1996, "step": 4960 }, { "epoch": 0.18060905589068973, "grad_norm": 8.371182441711426, "learning_rate": 1.8039101678901082e-05, "loss": 0.1947, "step": 4970 }, { "epoch": 0.18097245439348789, "grad_norm": 2.6746129989624023, "learning_rate": 1.80754415291809e-05, "loss": 0.2679, "step": 4980 }, { "epoch": 0.18133585289628606, "grad_norm": 3.448202133178711, "learning_rate": 1.8111781379460718e-05, "loss": 0.1859, "step": 4990 }, { "epoch": 0.18169925139908424, "grad_norm": 28.57021141052246, "learning_rate": 1.8148121229740534e-05, "loss": 0.3318, "step": 5000 }, { "epoch": 0.1820626499018824, "grad_norm": 4.731750965118408, "learning_rate": 1.8184461080020353e-05, "loss": 0.2354, "step": 5010 }, { "epoch": 0.18242604840468057, "grad_norm": 1.6815394163131714, "learning_rate": 1.822080093030017e-05, "loss": 0.2075, "step": 5020 }, { "epoch": 0.18278944690747875, "grad_norm": 3.868263006210327, "learning_rate": 1.8257140780579985e-05, "loss": 0.2345, "step": 5030 }, { "epoch": 0.1831528454102769, "grad_norm": 1.964240550994873, "learning_rate": 1.82934806308598e-05, "loss": 0.222, "step": 5040 }, { "epoch": 0.18351624391307508, "grad_norm": 11.881858825683594, "learning_rate": 1.8329820481139617e-05, "loss": 0.3251, "step": 5050 }, { "epoch": 0.18387964241587323, "grad_norm": 1.8463056087493896, "learning_rate": 1.8366160331419436e-05, "loss": 0.2255, "step": 5060 }, { "epoch": 0.1842430409186714, "grad_norm": 2.592672348022461, "learning_rate": 1.8402500181699252e-05, "loss": 0.1904, "step": 5070 }, { "epoch": 0.1846064394214696, "grad_norm": 4.0694074630737305, "learning_rate": 1.8438840031979068e-05, "loss": 0.2, "step": 5080 }, { "epoch": 0.18496983792426774, "grad_norm": 2.101837396621704, "learning_rate": 1.8475179882258887e-05, "loss": 0.1927, "step": 5090 }, { "epoch": 0.18533323642706592, "grad_norm": 22.162702560424805, "learning_rate": 1.8511519732538703e-05, "loss": 0.3481, "step": 5100 }, { "epoch": 0.1856966349298641, "grad_norm": 2.7928340435028076, "learning_rate": 1.854785958281852e-05, "loss": 0.2344, "step": 5110 }, { "epoch": 0.18606003343266225, "grad_norm": 1.8618485927581787, "learning_rate": 1.858419943309834e-05, "loss": 0.2139, "step": 5120 }, { "epoch": 0.18642343193546043, "grad_norm": 2.9611120223999023, "learning_rate": 1.862053928337815e-05, "loss": 0.2194, "step": 5130 }, { "epoch": 0.18678683043825858, "grad_norm": 5.181276321411133, "learning_rate": 1.865687913365797e-05, "loss": 0.2596, "step": 5140 }, { "epoch": 0.18715022894105676, "grad_norm": 10.01041030883789, "learning_rate": 1.8693218983937787e-05, "loss": 0.3122, "step": 5150 }, { "epoch": 0.18751362744385494, "grad_norm": 4.952126979827881, "learning_rate": 1.8729558834217603e-05, "loss": 0.2183, "step": 5160 }, { "epoch": 0.1878770259466531, "grad_norm": 2.19279146194458, "learning_rate": 1.8765898684497422e-05, "loss": 0.2439, "step": 5170 }, { "epoch": 0.18824042444945127, "grad_norm": 3.5189321041107178, "learning_rate": 1.8802238534777238e-05, "loss": 0.2343, "step": 5180 }, { "epoch": 0.18860382295224945, "grad_norm": 2.0936787128448486, "learning_rate": 1.8838578385057054e-05, "loss": 0.1831, "step": 5190 }, { "epoch": 0.1889672214550476, "grad_norm": 12.835061073303223, "learning_rate": 1.8874918235336873e-05, "loss": 0.2561, "step": 5200 }, { "epoch": 0.18933061995784578, "grad_norm": 1.6738308668136597, "learning_rate": 1.8911258085616686e-05, "loss": 1.0257, "step": 5210 }, { "epoch": 0.18969401846064393, "grad_norm": 2.7661142349243164, "learning_rate": 1.8947597935896505e-05, "loss": 0.2398, "step": 5220 }, { "epoch": 0.1900574169634421, "grad_norm": 4.173921585083008, "learning_rate": 1.8983937786176324e-05, "loss": 0.2157, "step": 5230 }, { "epoch": 0.1904208154662403, "grad_norm": 3.7037158012390137, "learning_rate": 1.9020277636456137e-05, "loss": 0.2182, "step": 5240 }, { "epoch": 0.19078421396903844, "grad_norm": 16.288227081298828, "learning_rate": 1.9056617486735956e-05, "loss": 0.2829, "step": 5250 }, { "epoch": 0.19114761247183662, "grad_norm": 2.0504090785980225, "learning_rate": 1.9092957337015772e-05, "loss": 0.201, "step": 5260 }, { "epoch": 0.19151101097463477, "grad_norm": 1.2266415357589722, "learning_rate": 1.9129297187295588e-05, "loss": 0.2072, "step": 5270 }, { "epoch": 0.19187440947743295, "grad_norm": 4.910546779632568, "learning_rate": 1.9165637037575408e-05, "loss": 0.1824, "step": 5280 }, { "epoch": 0.19223780798023113, "grad_norm": 3.093318223953247, "learning_rate": 1.9201976887855224e-05, "loss": 0.2471, "step": 5290 }, { "epoch": 0.19260120648302928, "grad_norm": 6.74167013168335, "learning_rate": 1.923831673813504e-05, "loss": 0.2912, "step": 5300 }, { "epoch": 0.19296460498582746, "grad_norm": 2.0540058612823486, "learning_rate": 1.927465658841486e-05, "loss": 0.2599, "step": 5310 }, { "epoch": 0.19332800348862564, "grad_norm": 2.407750129699707, "learning_rate": 1.931099643869467e-05, "loss": 0.2478, "step": 5320 }, { "epoch": 0.1936914019914238, "grad_norm": 5.479567527770996, "learning_rate": 1.934733628897449e-05, "loss": 0.5936, "step": 5330 }, { "epoch": 0.19405480049422197, "grad_norm": 1.912705659866333, "learning_rate": 1.9383676139254307e-05, "loss": 0.215, "step": 5340 }, { "epoch": 0.19441819899702012, "grad_norm": 38.24689865112305, "learning_rate": 1.9420015989534123e-05, "loss": 0.286, "step": 5350 }, { "epoch": 0.1947815974998183, "grad_norm": 3.4196550846099854, "learning_rate": 1.9456355839813942e-05, "loss": 0.4764, "step": 5360 }, { "epoch": 0.19514499600261648, "grad_norm": 1.705702781677246, "learning_rate": 1.9492695690093758e-05, "loss": 0.195, "step": 5370 }, { "epoch": 0.19550839450541463, "grad_norm": 2.7188572883605957, "learning_rate": 1.9529035540373574e-05, "loss": 0.2318, "step": 5380 }, { "epoch": 0.1958717930082128, "grad_norm": 5.217918872833252, "learning_rate": 1.9565375390653393e-05, "loss": 0.2288, "step": 5390 }, { "epoch": 0.196235191511011, "grad_norm": 7.094780921936035, "learning_rate": 1.960171524093321e-05, "loss": 0.3607, "step": 5400 }, { "epoch": 0.196235191511011, "eval_loss": 0.3953820765018463, "eval_runtime": 180.5214, "eval_samples_per_second": 41.07, "eval_steps_per_second": 5.135, "eval_wer": 0.23525514186650207, "step": 5400 }, { "epoch": 0.19659859001380914, "grad_norm": 2.379298448562622, "learning_rate": 1.9638055091213025e-05, "loss": 0.1981, "step": 5410 }, { "epoch": 0.19696198851660732, "grad_norm": 1.2755372524261475, "learning_rate": 1.967439494149284e-05, "loss": 0.3185, "step": 5420 }, { "epoch": 0.19732538701940547, "grad_norm": 2.6385338306427, "learning_rate": 1.9710734791772657e-05, "loss": 0.2231, "step": 5430 }, { "epoch": 0.19768878552220365, "grad_norm": 4.030337810516357, "learning_rate": 1.9747074642052477e-05, "loss": 0.2417, "step": 5440 }, { "epoch": 0.19805218402500183, "grad_norm": 10.988908767700195, "learning_rate": 1.9783414492332293e-05, "loss": 0.3163, "step": 5450 }, { "epoch": 0.19841558252779998, "grad_norm": 2.8273231983184814, "learning_rate": 1.981975434261211e-05, "loss": 0.2062, "step": 5460 }, { "epoch": 0.19877898103059816, "grad_norm": 1.880952000617981, "learning_rate": 1.9856094192891928e-05, "loss": 0.2103, "step": 5470 }, { "epoch": 0.19914237953339634, "grad_norm": 12.882647514343262, "learning_rate": 1.9892434043171744e-05, "loss": 0.2513, "step": 5480 }, { "epoch": 0.1995057780361945, "grad_norm": 2.8202428817749023, "learning_rate": 1.992877389345156e-05, "loss": 0.2002, "step": 5490 }, { "epoch": 0.19986917653899267, "grad_norm": 11.30123519897461, "learning_rate": 1.9965113743731376e-05, "loss": 0.3399, "step": 5500 }, { "epoch": 0.20023257504179082, "grad_norm": 3.016954183578491, "learning_rate": 2.0001453594011195e-05, "loss": 0.2016, "step": 5510 }, { "epoch": 0.200595973544589, "grad_norm": 1.3506131172180176, "learning_rate": 2.003779344429101e-05, "loss": 0.6008, "step": 5520 }, { "epoch": 0.20095937204738717, "grad_norm": 3.711284637451172, "learning_rate": 2.0074133294570827e-05, "loss": 0.2297, "step": 5530 }, { "epoch": 0.20132277055018533, "grad_norm": 2.8310322761535645, "learning_rate": 2.0110473144850643e-05, "loss": 0.19, "step": 5540 }, { "epoch": 0.2016861690529835, "grad_norm": 14.37038516998291, "learning_rate": 2.0146812995130462e-05, "loss": 0.3418, "step": 5550 }, { "epoch": 0.20204956755578168, "grad_norm": 2.037245988845825, "learning_rate": 2.0183152845410278e-05, "loss": 0.2054, "step": 5560 }, { "epoch": 0.20241296605857984, "grad_norm": 2.47495698928833, "learning_rate": 2.0219492695690094e-05, "loss": 0.2102, "step": 5570 }, { "epoch": 0.20277636456137801, "grad_norm": 5.948564529418945, "learning_rate": 2.025583254596991e-05, "loss": 0.2299, "step": 5580 }, { "epoch": 0.20313976306417617, "grad_norm": 2.010765552520752, "learning_rate": 2.029217239624973e-05, "loss": 0.2214, "step": 5590 }, { "epoch": 0.20350316156697434, "grad_norm": 109.07927703857422, "learning_rate": 2.0328512246529546e-05, "loss": 0.327, "step": 5600 }, { "epoch": 0.20386656006977252, "grad_norm": 2.708141565322876, "learning_rate": 2.036485209680936e-05, "loss": 0.2128, "step": 5610 }, { "epoch": 0.20422995857257067, "grad_norm": 4.145051002502441, "learning_rate": 2.040119194708918e-05, "loss": 1.5499, "step": 5620 }, { "epoch": 0.20459335707536885, "grad_norm": 5.204433917999268, "learning_rate": 2.0437531797368993e-05, "loss": 0.2238, "step": 5630 }, { "epoch": 0.204956755578167, "grad_norm": 3.625671625137329, "learning_rate": 2.0473871647648813e-05, "loss": 0.2009, "step": 5640 }, { "epoch": 0.20532015408096518, "grad_norm": 7.134413719177246, "learning_rate": 2.051021149792863e-05, "loss": 0.3236, "step": 5650 }, { "epoch": 0.20568355258376336, "grad_norm": 3.090585708618164, "learning_rate": 2.0546551348208445e-05, "loss": 0.2245, "step": 5660 }, { "epoch": 0.20604695108656151, "grad_norm": 1.5290725231170654, "learning_rate": 2.0582891198488264e-05, "loss": 0.9725, "step": 5670 }, { "epoch": 0.2064103495893597, "grad_norm": 12.433088302612305, "learning_rate": 2.061923104876808e-05, "loss": 0.2755, "step": 5680 }, { "epoch": 0.20677374809215787, "grad_norm": 4.399518013000488, "learning_rate": 2.0655570899047896e-05, "loss": 0.2136, "step": 5690 }, { "epoch": 0.20713714659495602, "grad_norm": 12.662751197814941, "learning_rate": 2.0691910749327715e-05, "loss": 0.3022, "step": 5700 }, { "epoch": 0.2075005450977542, "grad_norm": 1.8056265115737915, "learning_rate": 2.0728250599607528e-05, "loss": 0.3538, "step": 5710 }, { "epoch": 0.20786394360055235, "grad_norm": 1.3133045434951782, "learning_rate": 2.0764590449887347e-05, "loss": 0.1829, "step": 5720 }, { "epoch": 0.20822734210335053, "grad_norm": 6.10534143447876, "learning_rate": 2.0800930300167167e-05, "loss": 0.2819, "step": 5730 }, { "epoch": 0.2085907406061487, "grad_norm": 4.327618598937988, "learning_rate": 2.083727015044698e-05, "loss": 0.2029, "step": 5740 }, { "epoch": 0.20895413910894686, "grad_norm": 6.878536224365234, "learning_rate": 2.08736100007268e-05, "loss": 0.3301, "step": 5750 }, { "epoch": 0.20931753761174504, "grad_norm": 2.8301913738250732, "learning_rate": 2.0909949851006614e-05, "loss": 0.2144, "step": 5760 }, { "epoch": 0.20968093611454322, "grad_norm": 2.248054265975952, "learning_rate": 2.094628970128643e-05, "loss": 0.2046, "step": 5770 }, { "epoch": 0.21004433461734137, "grad_norm": 4.619300842285156, "learning_rate": 2.098262955156625e-05, "loss": 0.2487, "step": 5780 }, { "epoch": 0.21040773312013955, "grad_norm": 2.6446404457092285, "learning_rate": 2.1018969401846066e-05, "loss": 0.2222, "step": 5790 }, { "epoch": 0.2107711316229377, "grad_norm": 7.827177047729492, "learning_rate": 2.1055309252125882e-05, "loss": 0.2684, "step": 5800 }, { "epoch": 0.21113453012573588, "grad_norm": 5.37054967880249, "learning_rate": 2.10916491024057e-05, "loss": 0.216, "step": 5810 }, { "epoch": 0.21149792862853406, "grad_norm": 1.5430680513381958, "learning_rate": 2.1127988952685514e-05, "loss": 0.1723, "step": 5820 }, { "epoch": 0.2118613271313322, "grad_norm": 4.355040550231934, "learning_rate": 2.1164328802965333e-05, "loss": 0.3078, "step": 5830 }, { "epoch": 0.2122247256341304, "grad_norm": 2.70613169670105, "learning_rate": 2.1200668653245152e-05, "loss": 0.1857, "step": 5840 }, { "epoch": 0.21258812413692857, "grad_norm": 17.876861572265625, "learning_rate": 2.1237008503524965e-05, "loss": 0.335, "step": 5850 }, { "epoch": 0.21295152263972672, "grad_norm": 2.048499822616577, "learning_rate": 2.1273348353804784e-05, "loss": 0.2588, "step": 5860 }, { "epoch": 0.2133149211425249, "grad_norm": 2.2033607959747314, "learning_rate": 2.13096882040846e-05, "loss": 0.1973, "step": 5870 }, { "epoch": 0.21367831964532305, "grad_norm": 5.563814640045166, "learning_rate": 2.1346028054364416e-05, "loss": 0.2632, "step": 5880 }, { "epoch": 0.21404171814812123, "grad_norm": 1.4629203081130981, "learning_rate": 2.1382367904644236e-05, "loss": 0.1714, "step": 5890 }, { "epoch": 0.2144051166509194, "grad_norm": 9.641836166381836, "learning_rate": 2.141870775492405e-05, "loss": 0.3329, "step": 5900 }, { "epoch": 0.21476851515371756, "grad_norm": 3.0128610134124756, "learning_rate": 2.1455047605203867e-05, "loss": 0.205, "step": 5910 }, { "epoch": 0.21513191365651574, "grad_norm": 6.38659143447876, "learning_rate": 2.1491387455483687e-05, "loss": 0.2066, "step": 5920 }, { "epoch": 0.2154953121593139, "grad_norm": 3.397566080093384, "learning_rate": 2.15277273057635e-05, "loss": 0.2682, "step": 5930 }, { "epoch": 0.21585871066211207, "grad_norm": 1.8110759258270264, "learning_rate": 2.156406715604332e-05, "loss": 0.2357, "step": 5940 }, { "epoch": 0.21622210916491025, "grad_norm": 12.391556739807129, "learning_rate": 2.1600407006323135e-05, "loss": 0.3043, "step": 5950 }, { "epoch": 0.2165855076677084, "grad_norm": 1.8203914165496826, "learning_rate": 2.163674685660295e-05, "loss": 0.2979, "step": 5960 }, { "epoch": 0.21694890617050658, "grad_norm": 3.362252950668335, "learning_rate": 2.167308670688277e-05, "loss": 0.1667, "step": 5970 }, { "epoch": 0.21731230467330476, "grad_norm": 4.1468000411987305, "learning_rate": 2.1709426557162586e-05, "loss": 0.3419, "step": 5980 }, { "epoch": 0.2176757031761029, "grad_norm": 2.479288339614868, "learning_rate": 2.1745766407442402e-05, "loss": 0.1938, "step": 5990 }, { "epoch": 0.2180391016789011, "grad_norm": 26.185468673706055, "learning_rate": 2.1782106257722218e-05, "loss": 0.2818, "step": 6000 }, { "epoch": 0.2180391016789011, "eval_loss": 0.4106527864933014, "eval_runtime": 179.9044, "eval_samples_per_second": 41.211, "eval_steps_per_second": 5.153, "eval_wer": 0.2305625646704304, "step": 6000 }, { "epoch": 0.21840250018169924, "grad_norm": 2.2452592849731445, "learning_rate": 2.1818446108002037e-05, "loss": 0.2208, "step": 6010 }, { "epoch": 0.21876589868449742, "grad_norm": 2.273920774459839, "learning_rate": 2.1854785958281853e-05, "loss": 0.2268, "step": 6020 }, { "epoch": 0.2191292971872956, "grad_norm": 1.9621226787567139, "learning_rate": 2.189112580856167e-05, "loss": 0.1965, "step": 6030 }, { "epoch": 0.21949269569009375, "grad_norm": 2.866110324859619, "learning_rate": 2.1927465658841485e-05, "loss": 0.223, "step": 6040 }, { "epoch": 0.21985609419289193, "grad_norm": 15.169930458068848, "learning_rate": 2.1963805509121305e-05, "loss": 0.254, "step": 6050 }, { "epoch": 0.2202194926956901, "grad_norm": 2.174626350402832, "learning_rate": 2.200014535940112e-05, "loss": 0.2056, "step": 6060 }, { "epoch": 0.22058289119848826, "grad_norm": 1.9627354145050049, "learning_rate": 2.2036485209680936e-05, "loss": 0.2211, "step": 6070 }, { "epoch": 0.22094628970128644, "grad_norm": 5.444493770599365, "learning_rate": 2.2072825059960752e-05, "loss": 0.2819, "step": 6080 }, { "epoch": 0.2213096882040846, "grad_norm": 2.5131990909576416, "learning_rate": 2.2109164910240572e-05, "loss": 0.2262, "step": 6090 }, { "epoch": 0.22167308670688277, "grad_norm": 15.716779708862305, "learning_rate": 2.2145504760520388e-05, "loss": 0.2833, "step": 6100 }, { "epoch": 0.22203648520968094, "grad_norm": 1.7514111995697021, "learning_rate": 2.2181844610800204e-05, "loss": 0.2238, "step": 6110 }, { "epoch": 0.2223998837124791, "grad_norm": 1.8236886262893677, "learning_rate": 2.2218184461080023e-05, "loss": 0.1872, "step": 6120 }, { "epoch": 0.22276328221527728, "grad_norm": 4.081092834472656, "learning_rate": 2.225452431135984e-05, "loss": 0.4926, "step": 6130 }, { "epoch": 0.22312668071807545, "grad_norm": 3.3254685401916504, "learning_rate": 2.2290864161639655e-05, "loss": 0.2179, "step": 6140 }, { "epoch": 0.2234900792208736, "grad_norm": 9.953665733337402, "learning_rate": 2.232720401191947e-05, "loss": 0.3221, "step": 6150 }, { "epoch": 0.22385347772367178, "grad_norm": 3.531538724899292, "learning_rate": 2.2363543862199287e-05, "loss": 0.3639, "step": 6160 }, { "epoch": 0.22421687622646994, "grad_norm": 1.6166915893554688, "learning_rate": 2.2399883712479106e-05, "loss": 0.1906, "step": 6170 }, { "epoch": 0.22458027472926811, "grad_norm": 3.0561792850494385, "learning_rate": 2.2436223562758922e-05, "loss": 0.222, "step": 6180 }, { "epoch": 0.2249436732320663, "grad_norm": 7.607283115386963, "learning_rate": 2.2472563413038738e-05, "loss": 0.1769, "step": 6190 }, { "epoch": 0.22530707173486444, "grad_norm": 38.86745834350586, "learning_rate": 2.2508903263318557e-05, "loss": 0.3523, "step": 6200 }, { "epoch": 0.22567047023766262, "grad_norm": 1.2490432262420654, "learning_rate": 2.2545243113598373e-05, "loss": 0.2241, "step": 6210 }, { "epoch": 0.2260338687404608, "grad_norm": 3.8632936477661133, "learning_rate": 2.258158296387819e-05, "loss": 0.2761, "step": 6220 }, { "epoch": 0.22639726724325895, "grad_norm": 6.057976722717285, "learning_rate": 2.261792281415801e-05, "loss": 0.2534, "step": 6230 }, { "epoch": 0.22676066574605713, "grad_norm": 5.2983551025390625, "learning_rate": 2.265426266443782e-05, "loss": 0.1972, "step": 6240 }, { "epoch": 0.22712406424885528, "grad_norm": 7.395950794219971, "learning_rate": 2.269060251471764e-05, "loss": 0.3446, "step": 6250 }, { "epoch": 0.22748746275165346, "grad_norm": 2.7409260272979736, "learning_rate": 2.2726942364997457e-05, "loss": 0.1894, "step": 6260 }, { "epoch": 0.22785086125445164, "grad_norm": 1.7545270919799805, "learning_rate": 2.2763282215277273e-05, "loss": 0.2376, "step": 6270 }, { "epoch": 0.2282142597572498, "grad_norm": 112.10614013671875, "learning_rate": 2.2799622065557092e-05, "loss": 2.0322, "step": 6280 }, { "epoch": 0.22857765826004797, "grad_norm": 3.6547396183013916, "learning_rate": 2.2835961915836908e-05, "loss": 0.2942, "step": 6290 }, { "epoch": 0.22894105676284612, "grad_norm": 50.726261138916016, "learning_rate": 2.2872301766116724e-05, "loss": 0.3279, "step": 6300 }, { "epoch": 0.2293044552656443, "grad_norm": 1.2374241352081299, "learning_rate": 2.2908641616396543e-05, "loss": 0.1912, "step": 6310 }, { "epoch": 0.22966785376844248, "grad_norm": 1.6278152465820312, "learning_rate": 2.2944981466676356e-05, "loss": 0.1913, "step": 6320 }, { "epoch": 0.23003125227124063, "grad_norm": 7.58544397354126, "learning_rate": 2.2981321316956175e-05, "loss": 0.2393, "step": 6330 }, { "epoch": 0.2303946507740388, "grad_norm": 1.7094483375549316, "learning_rate": 2.3017661167235995e-05, "loss": 0.2333, "step": 6340 }, { "epoch": 0.230758049276837, "grad_norm": 24.214885711669922, "learning_rate": 2.3054001017515807e-05, "loss": 0.3019, "step": 6350 }, { "epoch": 0.23112144777963514, "grad_norm": 1.962106704711914, "learning_rate": 2.3090340867795626e-05, "loss": 0.8948, "step": 6360 }, { "epoch": 0.23148484628243332, "grad_norm": 1.3703123331069946, "learning_rate": 2.3126680718075442e-05, "loss": 0.1936, "step": 6370 }, { "epoch": 0.23184824478523147, "grad_norm": 7.507201194763184, "learning_rate": 2.316302056835526e-05, "loss": 0.2185, "step": 6380 }, { "epoch": 0.23221164328802965, "grad_norm": 2.6310977935791016, "learning_rate": 2.3199360418635078e-05, "loss": 0.1961, "step": 6390 }, { "epoch": 0.23257504179082783, "grad_norm": 4.186092376708984, "learning_rate": 2.3235700268914894e-05, "loss": 0.2734, "step": 6400 }, { "epoch": 0.23293844029362598, "grad_norm": 1.817269206047058, "learning_rate": 2.327204011919471e-05, "loss": 0.1966, "step": 6410 }, { "epoch": 0.23330183879642416, "grad_norm": 1.9503989219665527, "learning_rate": 2.330837996947453e-05, "loss": 2.7438, "step": 6420 }, { "epoch": 0.23366523729922234, "grad_norm": 3.1107656955718994, "learning_rate": 2.334471981975434e-05, "loss": 0.2534, "step": 6430 }, { "epoch": 0.2340286358020205, "grad_norm": 5.268273830413818, "learning_rate": 2.338105967003416e-05, "loss": 0.1963, "step": 6440 }, { "epoch": 0.23439203430481867, "grad_norm": 9.586852073669434, "learning_rate": 2.3417399520313977e-05, "loss": 0.2342, "step": 6450 }, { "epoch": 0.23475543280761682, "grad_norm": 3.0218632221221924, "learning_rate": 2.3453739370593793e-05, "loss": 0.231, "step": 6460 }, { "epoch": 0.235118831310415, "grad_norm": 1.9708057641983032, "learning_rate": 2.3490079220873612e-05, "loss": 0.2156, "step": 6470 }, { "epoch": 0.23548222981321318, "grad_norm": 3.6212944984436035, "learning_rate": 2.3526419071153428e-05, "loss": 0.2172, "step": 6480 }, { "epoch": 0.23584562831601133, "grad_norm": 2.5205702781677246, "learning_rate": 2.3562758921433244e-05, "loss": 0.4643, "step": 6490 }, { "epoch": 0.2362090268188095, "grad_norm": 4.1570305824279785, "learning_rate": 2.3599098771713063e-05, "loss": 0.2722, "step": 6500 }, { "epoch": 0.2365724253216077, "grad_norm": 1.8376798629760742, "learning_rate": 2.363543862199288e-05, "loss": 0.2027, "step": 6510 }, { "epoch": 0.23693582382440584, "grad_norm": 2.0464930534362793, "learning_rate": 2.3671778472272695e-05, "loss": 0.183, "step": 6520 }, { "epoch": 0.23729922232720402, "grad_norm": 4.8776469230651855, "learning_rate": 2.370811832255251e-05, "loss": 0.2169, "step": 6530 }, { "epoch": 0.23766262083000217, "grad_norm": 1.5764952898025513, "learning_rate": 2.3744458172832327e-05, "loss": 0.1917, "step": 6540 }, { "epoch": 0.23802601933280035, "grad_norm": 16.132232666015625, "learning_rate": 2.3780798023112147e-05, "loss": 0.2732, "step": 6550 }, { "epoch": 0.23838941783559853, "grad_norm": 8.105748176574707, "learning_rate": 2.3817137873391963e-05, "loss": 0.2055, "step": 6560 }, { "epoch": 0.23875281633839668, "grad_norm": 2.087362051010132, "learning_rate": 2.385347772367178e-05, "loss": 0.186, "step": 6570 }, { "epoch": 0.23911621484119486, "grad_norm": 2.8280205726623535, "learning_rate": 2.3889817573951598e-05, "loss": 0.201, "step": 6580 }, { "epoch": 0.23947961334399304, "grad_norm": 1.2525794506072998, "learning_rate": 2.3926157424231414e-05, "loss": 0.1893, "step": 6590 }, { "epoch": 0.2398430118467912, "grad_norm": 23.419832229614258, "learning_rate": 2.396249727451123e-05, "loss": 0.2554, "step": 6600 }, { "epoch": 0.2398430118467912, "eval_loss": 0.4065987765789032, "eval_runtime": 179.638, "eval_samples_per_second": 41.272, "eval_steps_per_second": 5.16, "eval_wer": 0.24529380797647357, "step": 6600 }, { "epoch": 0.24020641034958937, "grad_norm": 1.3757339715957642, "learning_rate": 2.3998837124791046e-05, "loss": 0.1962, "step": 6610 }, { "epoch": 0.24056980885238752, "grad_norm": 4.00860071182251, "learning_rate": 2.4035176975070865e-05, "loss": 0.1848, "step": 6620 }, { "epoch": 0.2409332073551857, "grad_norm": 5.544015407562256, "learning_rate": 2.407151682535068e-05, "loss": 0.245, "step": 6630 }, { "epoch": 0.24129660585798388, "grad_norm": 1.0618844032287598, "learning_rate": 2.4107856675630497e-05, "loss": 0.191, "step": 6640 }, { "epoch": 0.24166000436078203, "grad_norm": 125.15505981445312, "learning_rate": 2.4144196525910313e-05, "loss": 0.3055, "step": 6650 }, { "epoch": 0.2420234028635802, "grad_norm": 5.015167713165283, "learning_rate": 2.418053637619013e-05, "loss": 0.2701, "step": 6660 }, { "epoch": 0.24238680136637836, "grad_norm": 3.944514274597168, "learning_rate": 2.421687622646995e-05, "loss": 0.2107, "step": 6670 }, { "epoch": 0.24275019986917654, "grad_norm": 3.1539418697357178, "learning_rate": 2.4253216076749764e-05, "loss": 0.232, "step": 6680 }, { "epoch": 0.24311359837197472, "grad_norm": 2.980459213256836, "learning_rate": 2.428955592702958e-05, "loss": 0.2391, "step": 6690 }, { "epoch": 0.24347699687477287, "grad_norm": 35.02157211303711, "learning_rate": 2.43258957773094e-05, "loss": 0.3172, "step": 6700 }, { "epoch": 0.24384039537757105, "grad_norm": 1.606570839881897, "learning_rate": 2.4362235627589216e-05, "loss": 1.5707, "step": 6710 }, { "epoch": 0.24420379388036922, "grad_norm": 3.940394401550293, "learning_rate": 2.439857547786903e-05, "loss": 0.1969, "step": 6720 }, { "epoch": 0.24456719238316738, "grad_norm": 3.8990156650543213, "learning_rate": 2.443491532814885e-05, "loss": 0.2475, "step": 6730 }, { "epoch": 0.24493059088596555, "grad_norm": 2.523500442504883, "learning_rate": 2.4471255178428664e-05, "loss": 0.194, "step": 6740 }, { "epoch": 0.2452939893887637, "grad_norm": 4.920846939086914, "learning_rate": 2.4507595028708483e-05, "loss": 0.2417, "step": 6750 }, { "epoch": 0.24565738789156188, "grad_norm": 2.2269723415374756, "learning_rate": 2.4543934878988302e-05, "loss": 0.2148, "step": 6760 }, { "epoch": 0.24602078639436006, "grad_norm": 1.669722557067871, "learning_rate": 2.4580274729268115e-05, "loss": 0.1979, "step": 6770 }, { "epoch": 0.24638418489715821, "grad_norm": 4.581501007080078, "learning_rate": 2.4616614579547934e-05, "loss": 0.2412, "step": 6780 }, { "epoch": 0.2467475833999564, "grad_norm": 2.6605944633483887, "learning_rate": 2.465295442982775e-05, "loss": 0.1992, "step": 6790 }, { "epoch": 0.24711098190275457, "grad_norm": 7.089646816253662, "learning_rate": 2.4689294280107566e-05, "loss": 0.2789, "step": 6800 }, { "epoch": 0.24747438040555272, "grad_norm": 1.9901385307312012, "learning_rate": 2.4725634130387385e-05, "loss": 1.907, "step": 6810 }, { "epoch": 0.2478377789083509, "grad_norm": 2.5120224952697754, "learning_rate": 2.4761973980667198e-05, "loss": 0.1908, "step": 6820 }, { "epoch": 0.24820117741114905, "grad_norm": 1.553806185722351, "learning_rate": 2.4794679845919035e-05, "loss": 1.6707, "step": 6830 }, { "epoch": 0.24856457591394723, "grad_norm": 2.130095958709717, "learning_rate": 2.4831019696198855e-05, "loss": 0.222, "step": 6840 }, { "epoch": 0.2489279744167454, "grad_norm": 15.832701683044434, "learning_rate": 2.486735954647867e-05, "loss": 0.4634, "step": 6850 }, { "epoch": 0.24929137291954356, "grad_norm": 1.87086820602417, "learning_rate": 2.4903699396758487e-05, "loss": 0.1887, "step": 6860 }, { "epoch": 0.24965477142234174, "grad_norm": 2.32084584236145, "learning_rate": 2.4940039247038303e-05, "loss": 0.1881, "step": 6870 }, { "epoch": 0.2500181699251399, "grad_norm": 3.3228461742401123, "learning_rate": 2.497637909731812e-05, "loss": 0.264, "step": 6880 }, { "epoch": 0.2503815684279381, "grad_norm": 1.8676607608795166, "learning_rate": 2.5012718947597935e-05, "loss": 0.2102, "step": 6890 }, { "epoch": 0.2507449669307362, "grad_norm": 17.540319442749023, "learning_rate": 2.5049058797877757e-05, "loss": 0.2567, "step": 6900 }, { "epoch": 0.25110836543353443, "grad_norm": 1.6276856660842896, "learning_rate": 2.508539864815757e-05, "loss": 0.1917, "step": 6910 }, { "epoch": 0.2514717639363326, "grad_norm": 2.347691059112549, "learning_rate": 2.5121738498437386e-05, "loss": 0.1998, "step": 6920 }, { "epoch": 0.25183516243913073, "grad_norm": 3.5337650775909424, "learning_rate": 2.5158078348717205e-05, "loss": 0.2418, "step": 6930 }, { "epoch": 0.25219856094192894, "grad_norm": 3.7415404319763184, "learning_rate": 2.519441819899702e-05, "loss": 0.2074, "step": 6940 }, { "epoch": 0.2525619594447271, "grad_norm": 16.603042602539062, "learning_rate": 2.5230758049276837e-05, "loss": 0.3104, "step": 6950 }, { "epoch": 0.25292535794752524, "grad_norm": 1.4864579439163208, "learning_rate": 2.5267097899556656e-05, "loss": 0.1771, "step": 6960 }, { "epoch": 0.25328875645032345, "grad_norm": 1.7935876846313477, "learning_rate": 2.5303437749836472e-05, "loss": 0.1984, "step": 6970 }, { "epoch": 0.2536521549531216, "grad_norm": 3.187351942062378, "learning_rate": 2.533977760011629e-05, "loss": 0.1828, "step": 6980 }, { "epoch": 0.25401555345591975, "grad_norm": 1.7930549383163452, "learning_rate": 2.5376117450396104e-05, "loss": 0.2132, "step": 6990 }, { "epoch": 0.2543789519587179, "grad_norm": 4.86196231842041, "learning_rate": 2.5412457300675924e-05, "loss": 0.2426, "step": 7000 }, { "epoch": 0.2547423504615161, "grad_norm": 2.784335136413574, "learning_rate": 2.544879715095574e-05, "loss": 1.6557, "step": 7010 }, { "epoch": 0.25510574896431426, "grad_norm": 1.460509181022644, "learning_rate": 2.5485137001235552e-05, "loss": 0.1812, "step": 7020 }, { "epoch": 0.2554691474671124, "grad_norm": 2.5204946994781494, "learning_rate": 2.5521476851515375e-05, "loss": 0.3731, "step": 7030 }, { "epoch": 0.2558325459699106, "grad_norm": 1.6122281551361084, "learning_rate": 2.555781670179519e-05, "loss": 0.2256, "step": 7040 }, { "epoch": 0.25619594447270877, "grad_norm": 8.13974666595459, "learning_rate": 2.5594156552075004e-05, "loss": 0.2756, "step": 7050 }, { "epoch": 0.2565593429755069, "grad_norm": 2.1560494899749756, "learning_rate": 2.5630496402354826e-05, "loss": 0.1869, "step": 7060 }, { "epoch": 0.25692274147830513, "grad_norm": 2.938570737838745, "learning_rate": 2.5666836252634642e-05, "loss": 0.187, "step": 7070 }, { "epoch": 0.2572861399811033, "grad_norm": 1.6697754859924316, "learning_rate": 2.5703176102914455e-05, "loss": 0.1841, "step": 7080 }, { "epoch": 0.25764953848390143, "grad_norm": 2.500377655029297, "learning_rate": 2.5739515953194278e-05, "loss": 0.4097, "step": 7090 }, { "epoch": 0.25801293698669964, "grad_norm": 6.614553928375244, "learning_rate": 2.577585580347409e-05, "loss": 0.2779, "step": 7100 }, { "epoch": 0.2583763354894978, "grad_norm": 2.1538803577423096, "learning_rate": 2.5812195653753906e-05, "loss": 0.2035, "step": 7110 }, { "epoch": 0.25873973399229594, "grad_norm": 2.64719820022583, "learning_rate": 2.584853550403373e-05, "loss": 0.1815, "step": 7120 }, { "epoch": 0.25910313249509415, "grad_norm": 4.064308166503906, "learning_rate": 2.588487535431354e-05, "loss": 0.2115, "step": 7130 }, { "epoch": 0.2594665309978923, "grad_norm": 4.535513877868652, "learning_rate": 2.5921215204593357e-05, "loss": 0.1733, "step": 7140 }, { "epoch": 0.25982992950069045, "grad_norm": 14.761083602905273, "learning_rate": 2.5957555054873173e-05, "loss": 0.3061, "step": 7150 }, { "epoch": 0.2601933280034886, "grad_norm": 2.902010202407837, "learning_rate": 2.5993894905152993e-05, "loss": 0.2539, "step": 7160 }, { "epoch": 0.2605567265062868, "grad_norm": 2.6499462127685547, "learning_rate": 2.603023475543281e-05, "loss": 0.209, "step": 7170 }, { "epoch": 0.26092012500908496, "grad_norm": 2.0298879146575928, "learning_rate": 2.6066574605712625e-05, "loss": 0.1966, "step": 7180 }, { "epoch": 0.2612835235118831, "grad_norm": 5.285839080810547, "learning_rate": 2.6102914455992444e-05, "loss": 0.2416, "step": 7190 }, { "epoch": 0.2616469220146813, "grad_norm": 14.89932918548584, "learning_rate": 2.613925430627226e-05, "loss": 0.2649, "step": 7200 }, { "epoch": 0.2616469220146813, "eval_loss": 0.43822312355041504, "eval_runtime": 180.398, "eval_samples_per_second": 41.098, "eval_steps_per_second": 5.139, "eval_wer": 0.23023580881151634, "step": 7200 }, { "epoch": 0.26201032051747947, "grad_norm": 2.9772818088531494, "learning_rate": 2.6175594156552076e-05, "loss": 0.2158, "step": 7210 }, { "epoch": 0.2623737190202776, "grad_norm": 1.4703949689865112, "learning_rate": 2.6211934006831895e-05, "loss": 0.1925, "step": 7220 }, { "epoch": 0.2627371175230758, "grad_norm": 2.6034176349639893, "learning_rate": 2.624827385711171e-05, "loss": 0.2065, "step": 7230 }, { "epoch": 0.263100516025874, "grad_norm": 2.8392562866210938, "learning_rate": 2.6284613707391527e-05, "loss": 0.2097, "step": 7240 }, { "epoch": 0.2634639145286721, "grad_norm": 8.892645835876465, "learning_rate": 2.6320953557671347e-05, "loss": 0.2835, "step": 7250 }, { "epoch": 0.26382731303147033, "grad_norm": 1.616268277168274, "learning_rate": 2.6357293407951162e-05, "loss": 0.1875, "step": 7260 }, { "epoch": 0.2641907115342685, "grad_norm": 2.1791138648986816, "learning_rate": 2.6393633258230975e-05, "loss": 0.1722, "step": 7270 }, { "epoch": 0.26455411003706664, "grad_norm": 2.8691608905792236, "learning_rate": 2.642997310851079e-05, "loss": 0.2377, "step": 7280 }, { "epoch": 0.26491750853986484, "grad_norm": 1.5673551559448242, "learning_rate": 2.6466312958790614e-05, "loss": 0.4404, "step": 7290 }, { "epoch": 0.265280907042663, "grad_norm": 7.296738147735596, "learning_rate": 2.6502652809070426e-05, "loss": 0.3198, "step": 7300 }, { "epoch": 0.26564430554546115, "grad_norm": 6.389322757720947, "learning_rate": 2.6538992659350242e-05, "loss": 0.2041, "step": 7310 }, { "epoch": 0.2660077040482593, "grad_norm": 11.64201831817627, "learning_rate": 2.657533250963006e-05, "loss": 0.2014, "step": 7320 }, { "epoch": 0.2663711025510575, "grad_norm": 4.454049587249756, "learning_rate": 2.6611672359909878e-05, "loss": 0.2295, "step": 7330 }, { "epoch": 0.26673450105385565, "grad_norm": 2.091968297958374, "learning_rate": 2.6648012210189694e-05, "loss": 0.1784, "step": 7340 }, { "epoch": 0.2670978995566538, "grad_norm": 6.904966354370117, "learning_rate": 2.6684352060469513e-05, "loss": 0.3303, "step": 7350 }, { "epoch": 0.267461298059452, "grad_norm": 1.6893994808197021, "learning_rate": 2.672069191074933e-05, "loss": 0.2534, "step": 7360 }, { "epoch": 0.26782469656225016, "grad_norm": 1.3456122875213623, "learning_rate": 2.6757031761029145e-05, "loss": 0.1829, "step": 7370 }, { "epoch": 0.2681880950650483, "grad_norm": 7.959611892700195, "learning_rate": 2.6793371611308964e-05, "loss": 0.2425, "step": 7380 }, { "epoch": 0.2685514935678465, "grad_norm": 1.5833840370178223, "learning_rate": 2.682971146158878e-05, "loss": 0.1988, "step": 7390 }, { "epoch": 0.2689148920706447, "grad_norm": 19.886600494384766, "learning_rate": 2.6866051311868596e-05, "loss": 0.3563, "step": 7400 }, { "epoch": 0.2692782905734428, "grad_norm": 2.55553936958313, "learning_rate": 2.6902391162148415e-05, "loss": 0.1857, "step": 7410 }, { "epoch": 0.26964168907624103, "grad_norm": 2.125661849975586, "learning_rate": 2.693873101242823e-05, "loss": 0.7398, "step": 7420 }, { "epoch": 0.2700050875790392, "grad_norm": 2.577770233154297, "learning_rate": 2.6975070862708047e-05, "loss": 0.5703, "step": 7430 }, { "epoch": 0.27036848608183733, "grad_norm": 2.3848683834075928, "learning_rate": 2.701141071298786e-05, "loss": 0.173, "step": 7440 }, { "epoch": 0.2707318845846355, "grad_norm": 22.96078109741211, "learning_rate": 2.7047750563267683e-05, "loss": 0.293, "step": 7450 }, { "epoch": 0.2710952830874337, "grad_norm": 3.206329822540283, "learning_rate": 2.70840904135475e-05, "loss": 0.4585, "step": 7460 }, { "epoch": 0.27145868159023184, "grad_norm": 2.251904010772705, "learning_rate": 2.712043026382731e-05, "loss": 0.2196, "step": 7470 }, { "epoch": 0.27182208009303, "grad_norm": 3.7445387840270996, "learning_rate": 2.7156770114107134e-05, "loss": 0.2195, "step": 7480 }, { "epoch": 0.2721854785958282, "grad_norm": 1.5370314121246338, "learning_rate": 2.7193109964386947e-05, "loss": 0.2007, "step": 7490 }, { "epoch": 0.27254887709862635, "grad_norm": 18.44324493408203, "learning_rate": 2.7229449814666763e-05, "loss": 0.3091, "step": 7500 }, { "epoch": 0.2729122756014245, "grad_norm": 1.5792795419692993, "learning_rate": 2.7265789664946585e-05, "loss": 0.1601, "step": 7510 }, { "epoch": 0.2732756741042227, "grad_norm": 9.128384590148926, "learning_rate": 2.7302129515226398e-05, "loss": 0.178, "step": 7520 }, { "epoch": 0.27363907260702086, "grad_norm": 2.2285592555999756, "learning_rate": 2.7338469365506214e-05, "loss": 2.4074, "step": 7530 }, { "epoch": 0.274002471109819, "grad_norm": 2.2741541862487793, "learning_rate": 2.7374809215786033e-05, "loss": 0.246, "step": 7540 }, { "epoch": 0.2743658696126172, "grad_norm": 17.185470581054688, "learning_rate": 2.741114906606585e-05, "loss": 0.2577, "step": 7550 }, { "epoch": 0.27472926811541537, "grad_norm": 1.1907752752304077, "learning_rate": 2.7447488916345665e-05, "loss": 0.2073, "step": 7560 }, { "epoch": 0.2750926666182135, "grad_norm": 3.535682201385498, "learning_rate": 2.748382876662548e-05, "loss": 0.2012, "step": 7570 }, { "epoch": 0.27545606512101173, "grad_norm": 3.585460662841797, "learning_rate": 2.75201686169053e-05, "loss": 0.2147, "step": 7580 }, { "epoch": 0.2758194636238099, "grad_norm": 1.9034504890441895, "learning_rate": 2.7556508467185116e-05, "loss": 0.1626, "step": 7590 }, { "epoch": 0.27618286212660803, "grad_norm": 39.66155242919922, "learning_rate": 2.7592848317464932e-05, "loss": 0.2617, "step": 7600 }, { "epoch": 0.2765462606294062, "grad_norm": 1.5698285102844238, "learning_rate": 2.762918816774475e-05, "loss": 0.3136, "step": 7610 }, { "epoch": 0.2769096591322044, "grad_norm": 2.4866106510162354, "learning_rate": 2.7665528018024568e-05, "loss": 0.1971, "step": 7620 }, { "epoch": 0.27727305763500254, "grad_norm": 9.244050025939941, "learning_rate": 2.7701867868304384e-05, "loss": 0.2025, "step": 7630 }, { "epoch": 0.2776364561378007, "grad_norm": 2.1344380378723145, "learning_rate": 2.7738207718584203e-05, "loss": 0.2055, "step": 7640 }, { "epoch": 0.2779998546405989, "grad_norm": 13.503227233886719, "learning_rate": 2.777454756886402e-05, "loss": 0.2671, "step": 7650 }, { "epoch": 0.27836325314339705, "grad_norm": 2.238834857940674, "learning_rate": 2.781088741914383e-05, "loss": 0.1714, "step": 7660 }, { "epoch": 0.2787266516461952, "grad_norm": 0.897280216217041, "learning_rate": 2.7847227269423654e-05, "loss": 0.1615, "step": 7670 }, { "epoch": 0.2790900501489934, "grad_norm": 5.808285713195801, "learning_rate": 2.788356711970347e-05, "loss": 0.2052, "step": 7680 }, { "epoch": 0.27945344865179156, "grad_norm": 1.8924663066864014, "learning_rate": 2.7919906969983283e-05, "loss": 0.1769, "step": 7690 }, { "epoch": 0.2798168471545897, "grad_norm": 11.939653396606445, "learning_rate": 2.7956246820263105e-05, "loss": 0.2859, "step": 7700 }, { "epoch": 0.2801802456573879, "grad_norm": 2.5077621936798096, "learning_rate": 2.7992586670542918e-05, "loss": 0.1767, "step": 7710 }, { "epoch": 0.28054364416018607, "grad_norm": 2.0336718559265137, "learning_rate": 2.8028926520822734e-05, "loss": 0.6757, "step": 7720 }, { "epoch": 0.2809070426629842, "grad_norm": 3.9547739028930664, "learning_rate": 2.806526637110255e-05, "loss": 0.2322, "step": 7730 }, { "epoch": 0.28127044116578237, "grad_norm": 1.8082466125488281, "learning_rate": 2.810160622138237e-05, "loss": 0.1758, "step": 7740 }, { "epoch": 0.2816338396685806, "grad_norm": 16.173986434936523, "learning_rate": 2.8137946071662185e-05, "loss": 0.2642, "step": 7750 }, { "epoch": 0.28199723817137873, "grad_norm": 3.341475486755371, "learning_rate": 2.8174285921942e-05, "loss": 3.4407, "step": 7760 }, { "epoch": 0.2823606366741769, "grad_norm": 1.7220288515090942, "learning_rate": 2.821062577222182e-05, "loss": 0.1965, "step": 7770 }, { "epoch": 0.2827240351769751, "grad_norm": 3.8534610271453857, "learning_rate": 2.8246965622501637e-05, "loss": 0.1966, "step": 7780 }, { "epoch": 0.28308743367977324, "grad_norm": 1.962780475616455, "learning_rate": 2.8283305472781453e-05, "loss": 0.1859, "step": 7790 }, { "epoch": 0.2834508321825714, "grad_norm": 40.28166961669922, "learning_rate": 2.8319645323061272e-05, "loss": 0.6588, "step": 7800 }, { "epoch": 0.2834508321825714, "eval_loss": 0.42970865964889526, "eval_runtime": 180.6321, "eval_samples_per_second": 41.045, "eval_steps_per_second": 5.132, "eval_wer": 0.2413455080145951, "step": 7800 }, { "epoch": 0.2838142306853696, "grad_norm": 1.748349666595459, "learning_rate": 2.8355985173341088e-05, "loss": 0.1786, "step": 7810 }, { "epoch": 0.28417762918816775, "grad_norm": 2.1137237548828125, "learning_rate": 2.8392325023620904e-05, "loss": 0.1803, "step": 7820 }, { "epoch": 0.2845410276909659, "grad_norm": 1.59931218624115, "learning_rate": 2.8428664873900723e-05, "loss": 0.2107, "step": 7830 }, { "epoch": 0.2849044261937641, "grad_norm": 2.263493061065674, "learning_rate": 2.846500472418054e-05, "loss": 0.1967, "step": 7840 }, { "epoch": 0.28526782469656226, "grad_norm": 20.798656463623047, "learning_rate": 2.8501344574460355e-05, "loss": 0.268, "step": 7850 }, { "epoch": 0.2856312231993604, "grad_norm": 3.0182480812072754, "learning_rate": 2.8537684424740168e-05, "loss": 0.1901, "step": 7860 }, { "epoch": 0.2859946217021586, "grad_norm": 6.6378493309021, "learning_rate": 2.857402427501999e-05, "loss": 0.1804, "step": 7870 }, { "epoch": 0.28635802020495676, "grad_norm": 2.5524067878723145, "learning_rate": 2.8610364125299803e-05, "loss": 0.233, "step": 7880 }, { "epoch": 0.2867214187077549, "grad_norm": 2.6409335136413574, "learning_rate": 2.864670397557962e-05, "loss": 0.1717, "step": 7890 }, { "epoch": 0.28708481721055307, "grad_norm": 6.834221363067627, "learning_rate": 2.868304382585944e-05, "loss": 0.2956, "step": 7900 }, { "epoch": 0.2874482157133513, "grad_norm": 2.760669708251953, "learning_rate": 2.8719383676139254e-05, "loss": 0.1789, "step": 7910 }, { "epoch": 0.2878116142161494, "grad_norm": 1.7543925046920776, "learning_rate": 2.875572352641907e-05, "loss": 0.2041, "step": 7920 }, { "epoch": 0.2881750127189476, "grad_norm": 4.784151077270508, "learning_rate": 2.879206337669889e-05, "loss": 0.2259, "step": 7930 }, { "epoch": 0.2885384112217458, "grad_norm": 2.1769356727600098, "learning_rate": 2.8828403226978706e-05, "loss": 0.2023, "step": 7940 }, { "epoch": 0.28890180972454393, "grad_norm": 9.373051643371582, "learning_rate": 2.886474307725852e-05, "loss": 0.3511, "step": 7950 }, { "epoch": 0.2892652082273421, "grad_norm": 1.895190715789795, "learning_rate": 2.890108292753834e-05, "loss": 0.1976, "step": 7960 }, { "epoch": 0.2896286067301403, "grad_norm": 3.4400076866149902, "learning_rate": 2.8937422777818157e-05, "loss": 0.1902, "step": 7970 }, { "epoch": 0.28999200523293844, "grad_norm": 9.663911819458008, "learning_rate": 2.8973762628097973e-05, "loss": 0.2551, "step": 7980 }, { "epoch": 0.2903554037357366, "grad_norm": 5.1054463386535645, "learning_rate": 2.9010102478377792e-05, "loss": 0.2001, "step": 7990 }, { "epoch": 0.2907188022385348, "grad_norm": 9.06143569946289, "learning_rate": 2.9046442328657608e-05, "loss": 0.2266, "step": 8000 }, { "epoch": 0.29108220074133295, "grad_norm": 1.604077696800232, "learning_rate": 2.9082782178937424e-05, "loss": 0.1883, "step": 8010 }, { "epoch": 0.2914455992441311, "grad_norm": 2.245687246322632, "learning_rate": 2.911912202921724e-05, "loss": 0.2093, "step": 8020 }, { "epoch": 0.29180899774692926, "grad_norm": 3.8099372386932373, "learning_rate": 2.915546187949706e-05, "loss": 0.2283, "step": 8030 }, { "epoch": 0.29217239624972746, "grad_norm": 2.135115623474121, "learning_rate": 2.9191801729776875e-05, "loss": 0.2369, "step": 8040 }, { "epoch": 0.2925357947525256, "grad_norm": 5.596993446350098, "learning_rate": 2.9228141580056688e-05, "loss": 0.2709, "step": 8050 }, { "epoch": 0.29289919325532376, "grad_norm": 1.3212496042251587, "learning_rate": 2.926448143033651e-05, "loss": 0.1968, "step": 8060 }, { "epoch": 0.29326259175812197, "grad_norm": 1.9241231679916382, "learning_rate": 2.9300821280616327e-05, "loss": 0.3883, "step": 8070 }, { "epoch": 0.2936259902609201, "grad_norm": 4.008016109466553, "learning_rate": 2.933716113089614e-05, "loss": 0.2074, "step": 8080 }, { "epoch": 0.2939893887637183, "grad_norm": 1.5871399641036987, "learning_rate": 2.9373500981175962e-05, "loss": 0.1698, "step": 8090 }, { "epoch": 0.2943527872665165, "grad_norm": 19.480670928955078, "learning_rate": 2.9409840831455774e-05, "loss": 0.4023, "step": 8100 }, { "epoch": 0.29471618576931463, "grad_norm": 3.8420443534851074, "learning_rate": 2.944618068173559e-05, "loss": 0.181, "step": 8110 }, { "epoch": 0.2950795842721128, "grad_norm": 1.9951499700546265, "learning_rate": 2.9482520532015413e-05, "loss": 0.2872, "step": 8120 }, { "epoch": 0.295442982774911, "grad_norm": 4.958978176116943, "learning_rate": 2.9518860382295226e-05, "loss": 0.2359, "step": 8130 }, { "epoch": 0.29580638127770914, "grad_norm": 1.5531708002090454, "learning_rate": 2.9555200232575042e-05, "loss": 0.2138, "step": 8140 }, { "epoch": 0.2961697797805073, "grad_norm": 5.297884941101074, "learning_rate": 2.9591540082854864e-05, "loss": 0.2694, "step": 8150 }, { "epoch": 0.2965331782833055, "grad_norm": 1.5989892482757568, "learning_rate": 2.9627879933134677e-05, "loss": 0.1686, "step": 8160 }, { "epoch": 0.29689657678610365, "grad_norm": 3.347722291946411, "learning_rate": 2.9664219783414493e-05, "loss": 0.2206, "step": 8170 }, { "epoch": 0.2972599752889018, "grad_norm": 2.9551491737365723, "learning_rate": 2.970055963369431e-05, "loss": 0.2274, "step": 8180 }, { "epoch": 0.29762337379169995, "grad_norm": 2.527963638305664, "learning_rate": 2.973689948397413e-05, "loss": 0.1731, "step": 8190 }, { "epoch": 0.29798677229449816, "grad_norm": 5.818012714385986, "learning_rate": 2.9773239334253944e-05, "loss": 0.265, "step": 8200 }, { "epoch": 0.2983501707972963, "grad_norm": 1.5580624341964722, "learning_rate": 2.980594519950578e-05, "loss": 2.627, "step": 8210 }, { "epoch": 0.29871356930009446, "grad_norm": 1.6011282205581665, "learning_rate": 2.9842285049785594e-05, "loss": 0.1811, "step": 8220 }, { "epoch": 0.29907696780289267, "grad_norm": 44.825157165527344, "learning_rate": 2.987862490006541e-05, "loss": 0.4799, "step": 8230 }, { "epoch": 0.2994403663056908, "grad_norm": 1.520982027053833, "learning_rate": 2.991496475034523e-05, "loss": 0.1935, "step": 8240 }, { "epoch": 0.29980376480848897, "grad_norm": 6.3379058837890625, "learning_rate": 2.9951304600625046e-05, "loss": 0.2435, "step": 8250 }, { "epoch": 0.3001671633112872, "grad_norm": 2.2493958473205566, "learning_rate": 2.998764445090486e-05, "loss": 0.1984, "step": 8260 }, { "epoch": 0.30053056181408533, "grad_norm": 3.234196186065674, "learning_rate": 3.002398430118468e-05, "loss": 0.1785, "step": 8270 }, { "epoch": 0.3008939603168835, "grad_norm": 4.99449348449707, "learning_rate": 3.0060324151464497e-05, "loss": 0.1888, "step": 8280 }, { "epoch": 0.3012573588196817, "grad_norm": 1.8624048233032227, "learning_rate": 3.0096664001744313e-05, "loss": 1.6561, "step": 8290 }, { "epoch": 0.30162075732247984, "grad_norm": 7.615640640258789, "learning_rate": 3.0133003852024132e-05, "loss": 0.2918, "step": 8300 }, { "epoch": 0.301984155825278, "grad_norm": 1.6900697946548462, "learning_rate": 3.0169343702303948e-05, "loss": 0.2255, "step": 8310 }, { "epoch": 0.3023475543280762, "grad_norm": 2.2034566402435303, "learning_rate": 3.0205683552583764e-05, "loss": 0.198, "step": 8320 }, { "epoch": 0.30271095283087435, "grad_norm": 2.044597625732422, "learning_rate": 3.0242023402863583e-05, "loss": 0.1946, "step": 8330 }, { "epoch": 0.3030743513336725, "grad_norm": 1.6171079874038696, "learning_rate": 3.02783632531434e-05, "loss": 0.1935, "step": 8340 }, { "epoch": 0.30343774983647065, "grad_norm": 2.8435897827148438, "learning_rate": 3.0314703103423215e-05, "loss": 0.3876, "step": 8350 }, { "epoch": 0.30380114833926886, "grad_norm": 2.023019552230835, "learning_rate": 3.0351042953703035e-05, "loss": 0.1879, "step": 8360 }, { "epoch": 0.304164546842067, "grad_norm": 1.7610963582992554, "learning_rate": 3.038738280398285e-05, "loss": 0.1901, "step": 8370 }, { "epoch": 0.30452794534486516, "grad_norm": 1.9482131004333496, "learning_rate": 3.0423722654262667e-05, "loss": 0.2119, "step": 8380 }, { "epoch": 0.30489134384766337, "grad_norm": 1.6463958024978638, "learning_rate": 3.046006250454248e-05, "loss": 0.2067, "step": 8390 }, { "epoch": 0.3052547423504615, "grad_norm": 10.607688903808594, "learning_rate": 3.0496402354822302e-05, "loss": 0.2709, "step": 8400 }, { "epoch": 0.3052547423504615, "eval_loss": 0.3912598192691803, "eval_runtime": 179.9461, "eval_samples_per_second": 41.201, "eval_steps_per_second": 5.152, "eval_wer": 0.22865648882676493, "step": 8400 }, { "epoch": 0.30561814085325967, "grad_norm": 5.675121307373047, "learning_rate": 3.053274220510212e-05, "loss": 0.1937, "step": 8410 }, { "epoch": 0.3059815393560579, "grad_norm": 1.9001195430755615, "learning_rate": 3.056908205538193e-05, "loss": 0.1668, "step": 8420 }, { "epoch": 0.306344937858856, "grad_norm": 6.807525157928467, "learning_rate": 3.060542190566175e-05, "loss": 0.2077, "step": 8430 }, { "epoch": 0.3067083363616542, "grad_norm": 2.067265272140503, "learning_rate": 3.064176175594157e-05, "loss": 0.1596, "step": 8440 }, { "epoch": 0.3070717348644524, "grad_norm": 15.267791748046875, "learning_rate": 3.067810160622138e-05, "loss": 0.2667, "step": 8450 }, { "epoch": 0.30743513336725053, "grad_norm": 1.367903709411621, "learning_rate": 3.07144414565012e-05, "loss": 0.1819, "step": 8460 }, { "epoch": 0.3077985318700487, "grad_norm": 1.531816840171814, "learning_rate": 3.075078130678102e-05, "loss": 0.1681, "step": 8470 }, { "epoch": 0.30816193037284684, "grad_norm": 3.668304204940796, "learning_rate": 3.078712115706083e-05, "loss": 0.2488, "step": 8480 }, { "epoch": 0.30852532887564504, "grad_norm": 2.2622220516204834, "learning_rate": 3.082346100734065e-05, "loss": 0.1866, "step": 8490 }, { "epoch": 0.3088887273784432, "grad_norm": 6.450117111206055, "learning_rate": 3.085980085762047e-05, "loss": 0.2676, "step": 8500 }, { "epoch": 0.30925212588124135, "grad_norm": 2.096731424331665, "learning_rate": 3.0896140707900284e-05, "loss": 0.1952, "step": 8510 }, { "epoch": 0.30961552438403955, "grad_norm": 1.3809120655059814, "learning_rate": 3.09324805581801e-05, "loss": 0.3478, "step": 8520 }, { "epoch": 0.3099789228868377, "grad_norm": 4.2257585525512695, "learning_rate": 3.096882040845992e-05, "loss": 0.2126, "step": 8530 }, { "epoch": 0.31034232138963586, "grad_norm": 2.8543758392333984, "learning_rate": 3.1005160258739736e-05, "loss": 0.8169, "step": 8540 }, { "epoch": 0.31070571989243406, "grad_norm": 5.897162437438965, "learning_rate": 3.104150010901955e-05, "loss": 0.2421, "step": 8550 }, { "epoch": 0.3110691183952322, "grad_norm": 1.8980865478515625, "learning_rate": 3.107783995929937e-05, "loss": 0.193, "step": 8560 }, { "epoch": 0.31143251689803036, "grad_norm": 2.113833427429199, "learning_rate": 3.111417980957919e-05, "loss": 0.1553, "step": 8570 }, { "epoch": 0.31179591540082857, "grad_norm": 2.7569572925567627, "learning_rate": 3.1150519659859e-05, "loss": 0.2003, "step": 8580 }, { "epoch": 0.3121593139036267, "grad_norm": 2.480473756790161, "learning_rate": 3.118685951013882e-05, "loss": 0.2173, "step": 8590 }, { "epoch": 0.3125227124064249, "grad_norm": 12.174234390258789, "learning_rate": 3.122319936041864e-05, "loss": 0.3081, "step": 8600 }, { "epoch": 0.3128861109092231, "grad_norm": 2.8075544834136963, "learning_rate": 3.125953921069845e-05, "loss": 0.263, "step": 8610 }, { "epoch": 0.31324950941202123, "grad_norm": 16.535009384155273, "learning_rate": 3.129587906097827e-05, "loss": 0.1968, "step": 8620 }, { "epoch": 0.3136129079148194, "grad_norm": 6.4783711433410645, "learning_rate": 3.133221891125809e-05, "loss": 0.2396, "step": 8630 }, { "epoch": 0.31397630641761753, "grad_norm": 0.945353090763092, "learning_rate": 3.13685587615379e-05, "loss": 0.1623, "step": 8640 }, { "epoch": 0.31433970492041574, "grad_norm": 7.135663032531738, "learning_rate": 3.140489861181772e-05, "loss": 0.3006, "step": 8650 }, { "epoch": 0.3147031034232139, "grad_norm": 1.275896430015564, "learning_rate": 3.144123846209754e-05, "loss": 0.1845, "step": 8660 }, { "epoch": 0.31506650192601204, "grad_norm": 2.1660525798797607, "learning_rate": 3.147757831237735e-05, "loss": 0.1614, "step": 8670 }, { "epoch": 0.31542990042881025, "grad_norm": 3.878882646560669, "learning_rate": 3.1513918162657166e-05, "loss": 0.2124, "step": 8680 }, { "epoch": 0.3157932989316084, "grad_norm": 3.452864170074463, "learning_rate": 3.155025801293699e-05, "loss": 0.1659, "step": 8690 }, { "epoch": 0.31615669743440655, "grad_norm": 4.0493292808532715, "learning_rate": 3.1586597863216805e-05, "loss": 0.2653, "step": 8700 }, { "epoch": 0.31652009593720476, "grad_norm": 1.9184757471084595, "learning_rate": 3.162293771349662e-05, "loss": 0.2043, "step": 8710 }, { "epoch": 0.3168834944400029, "grad_norm": 4.22302770614624, "learning_rate": 3.165927756377644e-05, "loss": 0.2005, "step": 8720 }, { "epoch": 0.31724689294280106, "grad_norm": 8.557464599609375, "learning_rate": 3.1695617414056256e-05, "loss": 0.2135, "step": 8730 }, { "epoch": 0.31761029144559927, "grad_norm": 1.6090949773788452, "learning_rate": 3.173195726433607e-05, "loss": 0.1565, "step": 8740 }, { "epoch": 0.3179736899483974, "grad_norm": 35.859737396240234, "learning_rate": 3.1768297114615894e-05, "loss": 0.3239, "step": 8750 }, { "epoch": 0.31833708845119557, "grad_norm": 2.837944507598877, "learning_rate": 3.180463696489571e-05, "loss": 0.1902, "step": 8760 }, { "epoch": 0.3187004869539937, "grad_norm": 1.6548888683319092, "learning_rate": 3.184097681517552e-05, "loss": 0.1732, "step": 8770 }, { "epoch": 0.31906388545679193, "grad_norm": 3.840034246444702, "learning_rate": 3.187731666545534e-05, "loss": 0.2318, "step": 8780 }, { "epoch": 0.3194272839595901, "grad_norm": 3.3684277534484863, "learning_rate": 3.191365651573516e-05, "loss": 0.1794, "step": 8790 }, { "epoch": 0.31979068246238823, "grad_norm": 8.668655395507812, "learning_rate": 3.194999636601497e-05, "loss": 0.2745, "step": 8800 }, { "epoch": 0.32015408096518644, "grad_norm": 1.412441611289978, "learning_rate": 3.198633621629479e-05, "loss": 0.1913, "step": 8810 }, { "epoch": 0.3205174794679846, "grad_norm": 1.6273925304412842, "learning_rate": 3.202267606657461e-05, "loss": 0.1905, "step": 8820 }, { "epoch": 0.32088087797078274, "grad_norm": 5.704558372497559, "learning_rate": 3.205901591685442e-05, "loss": 0.2217, "step": 8830 }, { "epoch": 0.32124427647358095, "grad_norm": 2.248072385787964, "learning_rate": 3.209535576713424e-05, "loss": 0.1752, "step": 8840 }, { "epoch": 0.3216076749763791, "grad_norm": 8.330979347229004, "learning_rate": 3.213169561741406e-05, "loss": 0.2693, "step": 8850 }, { "epoch": 0.32197107347917725, "grad_norm": 6.713444709777832, "learning_rate": 3.2168035467693873e-05, "loss": 0.1821, "step": 8860 }, { "epoch": 0.32233447198197546, "grad_norm": 1.7717983722686768, "learning_rate": 3.220437531797369e-05, "loss": 0.1572, "step": 8870 }, { "epoch": 0.3226978704847736, "grad_norm": 3.8419570922851562, "learning_rate": 3.224071516825351e-05, "loss": 0.2168, "step": 8880 }, { "epoch": 0.32306126898757176, "grad_norm": 1.8515948057174683, "learning_rate": 3.2277055018533325e-05, "loss": 0.1474, "step": 8890 }, { "epoch": 0.32342466749036997, "grad_norm": 12.963587760925293, "learning_rate": 3.231339486881314e-05, "loss": 0.2349, "step": 8900 }, { "epoch": 0.3237880659931681, "grad_norm": 1.078845500946045, "learning_rate": 3.2349734719092963e-05, "loss": 0.1968, "step": 8910 }, { "epoch": 0.32415146449596627, "grad_norm": 1.5369044542312622, "learning_rate": 3.2386074569372776e-05, "loss": 0.1681, "step": 8920 }, { "epoch": 0.3245148629987644, "grad_norm": 3.8013484477996826, "learning_rate": 3.242241441965259e-05, "loss": 0.2214, "step": 8930 }, { "epoch": 0.3248782615015626, "grad_norm": 2.0259406566619873, "learning_rate": 3.2458754269932415e-05, "loss": 0.4227, "step": 8940 }, { "epoch": 0.3252416600043608, "grad_norm": 6.423609256744385, "learning_rate": 3.249509412021223e-05, "loss": 0.2835, "step": 8950 }, { "epoch": 0.32560505850715893, "grad_norm": 2.363159656524658, "learning_rate": 3.253143397049204e-05, "loss": 0.2038, "step": 8960 }, { "epoch": 0.32596845700995714, "grad_norm": 2.4034435749053955, "learning_rate": 3.256777382077186e-05, "loss": 0.1907, "step": 8970 }, { "epoch": 0.3263318555127553, "grad_norm": 4.032980442047119, "learning_rate": 3.260411367105168e-05, "loss": 0.1973, "step": 8980 }, { "epoch": 0.32669525401555344, "grad_norm": 6.102022647857666, "learning_rate": 3.264045352133149e-05, "loss": 0.197, "step": 8990 }, { "epoch": 0.32705865251835164, "grad_norm": 35.67893981933594, "learning_rate": 3.267679337161131e-05, "loss": 0.2682, "step": 9000 }, { "epoch": 0.32705865251835164, "eval_loss": 0.40712428092956543, "eval_runtime": 179.2194, "eval_samples_per_second": 41.368, "eval_steps_per_second": 5.172, "eval_wer": 0.226941020567466, "step": 9000 }, { "epoch": 0.3274220510211498, "grad_norm": 1.8014717102050781, "learning_rate": 3.271313322189113e-05, "loss": 0.1591, "step": 9010 }, { "epoch": 0.32778544952394795, "grad_norm": 1.7404965162277222, "learning_rate": 3.274947307217094e-05, "loss": 0.17, "step": 9020 }, { "epoch": 0.32814884802674615, "grad_norm": 3.7020771503448486, "learning_rate": 3.278581292245076e-05, "loss": 0.2225, "step": 9030 }, { "epoch": 0.3285122465295443, "grad_norm": 1.045998454093933, "learning_rate": 3.282215277273058e-05, "loss": 0.1681, "step": 9040 }, { "epoch": 0.32887564503234246, "grad_norm": 5.282716751098633, "learning_rate": 3.2858492623010394e-05, "loss": 0.2856, "step": 9050 }, { "epoch": 0.3292390435351406, "grad_norm": 3.3956387042999268, "learning_rate": 3.289483247329021e-05, "loss": 0.1782, "step": 9060 }, { "epoch": 0.3296024420379388, "grad_norm": 1.855603575706482, "learning_rate": 3.293117232357003e-05, "loss": 0.1582, "step": 9070 }, { "epoch": 0.32996584054073697, "grad_norm": 7.214013576507568, "learning_rate": 3.2967512173849845e-05, "loss": 0.1691, "step": 9080 }, { "epoch": 0.3303292390435351, "grad_norm": 3.140125036239624, "learning_rate": 3.3003852024129664e-05, "loss": 0.1872, "step": 9090 }, { "epoch": 0.3306926375463333, "grad_norm": 17.094255447387695, "learning_rate": 3.304019187440948e-05, "loss": 0.2848, "step": 9100 }, { "epoch": 0.3310560360491315, "grad_norm": 1.9439010620117188, "learning_rate": 3.3076531724689296e-05, "loss": 0.1625, "step": 9110 }, { "epoch": 0.3314194345519296, "grad_norm": 1.609747290611267, "learning_rate": 3.311287157496911e-05, "loss": 0.1915, "step": 9120 }, { "epoch": 0.33178283305472783, "grad_norm": 4.03629207611084, "learning_rate": 3.314921142524893e-05, "loss": 0.2291, "step": 9130 }, { "epoch": 0.332146231557526, "grad_norm": 1.9643129110336304, "learning_rate": 3.318555127552875e-05, "loss": 0.1747, "step": 9140 }, { "epoch": 0.33250963006032414, "grad_norm": 9.304847717285156, "learning_rate": 3.322189112580856e-05, "loss": 0.2539, "step": 9150 }, { "epoch": 0.33287302856312234, "grad_norm": 1.991467833518982, "learning_rate": 3.325823097608838e-05, "loss": 3.61, "step": 9160 }, { "epoch": 0.3332364270659205, "grad_norm": 2.7127187252044678, "learning_rate": 3.32945708263682e-05, "loss": 0.1985, "step": 9170 }, { "epoch": 0.33359982556871864, "grad_norm": 2.831299304962158, "learning_rate": 3.333091067664801e-05, "loss": 1.7334, "step": 9180 }, { "epoch": 0.33396322407151685, "grad_norm": 1.5434614419937134, "learning_rate": 3.336725052692783e-05, "loss": 0.1718, "step": 9190 }, { "epoch": 0.334326622574315, "grad_norm": 10.254124641418457, "learning_rate": 3.340359037720765e-05, "loss": 0.3246, "step": 9200 }, { "epoch": 0.33469002107711315, "grad_norm": 1.169886589050293, "learning_rate": 3.343993022748746e-05, "loss": 0.1936, "step": 9210 }, { "epoch": 0.3350534195799113, "grad_norm": 3.697627544403076, "learning_rate": 3.347627007776728e-05, "loss": 0.205, "step": 9220 }, { "epoch": 0.3354168180827095, "grad_norm": 3.15781307220459, "learning_rate": 3.35126099280471e-05, "loss": 0.2222, "step": 9230 }, { "epoch": 0.33578021658550766, "grad_norm": 1.903701663017273, "learning_rate": 3.3548949778326914e-05, "loss": 0.1611, "step": 9240 }, { "epoch": 0.3361436150883058, "grad_norm": 26.77275848388672, "learning_rate": 3.358528962860673e-05, "loss": 0.2872, "step": 9250 }, { "epoch": 0.336507013591104, "grad_norm": 1.588224172592163, "learning_rate": 3.3621629478886546e-05, "loss": 3.404, "step": 9260 }, { "epoch": 0.33687041209390217, "grad_norm": 1.8802090883255005, "learning_rate": 3.3657969329166365e-05, "loss": 0.1715, "step": 9270 }, { "epoch": 0.3372338105967003, "grad_norm": 5.38352632522583, "learning_rate": 3.3694309179446185e-05, "loss": 0.1906, "step": 9280 }, { "epoch": 0.33759720909949853, "grad_norm": 1.736177921295166, "learning_rate": 3.3730649029726e-05, "loss": 0.1881, "step": 9290 }, { "epoch": 0.3379606076022967, "grad_norm": 17.865558624267578, "learning_rate": 3.3766988880005816e-05, "loss": 0.3003, "step": 9300 }, { "epoch": 0.33832400610509483, "grad_norm": 1.532173991203308, "learning_rate": 3.3803328730285636e-05, "loss": 0.188, "step": 9310 }, { "epoch": 0.33868740460789304, "grad_norm": 3.8595352172851562, "learning_rate": 3.383966858056545e-05, "loss": 0.1869, "step": 9320 }, { "epoch": 0.3390508031106912, "grad_norm": 2.5906641483306885, "learning_rate": 3.387600843084527e-05, "loss": 0.1993, "step": 9330 }, { "epoch": 0.33941420161348934, "grad_norm": 2.5224273204803467, "learning_rate": 3.391234828112508e-05, "loss": 0.1935, "step": 9340 }, { "epoch": 0.33977760011628755, "grad_norm": 11.555095672607422, "learning_rate": 3.39486881314049e-05, "loss": 0.2891, "step": 9350 }, { "epoch": 0.3401409986190857, "grad_norm": 1.3724703788757324, "learning_rate": 3.398502798168472e-05, "loss": 0.1656, "step": 9360 }, { "epoch": 0.34050439712188385, "grad_norm": 2.1549072265625, "learning_rate": 3.402136783196453e-05, "loss": 0.1769, "step": 9370 }, { "epoch": 0.340867795624682, "grad_norm": 1.793492317199707, "learning_rate": 3.405770768224435e-05, "loss": 0.2661, "step": 9380 }, { "epoch": 0.3412311941274802, "grad_norm": 4.038620948791504, "learning_rate": 3.409404753252417e-05, "loss": 0.1871, "step": 9390 }, { "epoch": 0.34159459263027836, "grad_norm": 31.7847900390625, "learning_rate": 3.413038738280398e-05, "loss": 0.2967, "step": 9400 }, { "epoch": 0.3419579911330765, "grad_norm": 2.398646354675293, "learning_rate": 3.41667272330838e-05, "loss": 0.2086, "step": 9410 }, { "epoch": 0.3423213896358747, "grad_norm": 2.2226221561431885, "learning_rate": 3.4203067083363615e-05, "loss": 0.1665, "step": 9420 }, { "epoch": 0.34268478813867287, "grad_norm": 39.96380615234375, "learning_rate": 3.4239406933643434e-05, "loss": 0.9468, "step": 9430 }, { "epoch": 0.343048186641471, "grad_norm": 1.5465339422225952, "learning_rate": 3.4275746783923254e-05, "loss": 0.1827, "step": 9440 }, { "epoch": 0.3434115851442692, "grad_norm": 7.941345691680908, "learning_rate": 3.4312086634203066e-05, "loss": 0.2786, "step": 9450 }, { "epoch": 0.3437749836470674, "grad_norm": 1.2575476169586182, "learning_rate": 3.4348426484482885e-05, "loss": 0.1764, "step": 9460 }, { "epoch": 0.34413838214986553, "grad_norm": 1.3529596328735352, "learning_rate": 3.4384766334762705e-05, "loss": 0.207, "step": 9470 }, { "epoch": 0.34450178065266374, "grad_norm": 3.2839174270629883, "learning_rate": 3.442110618504252e-05, "loss": 0.2672, "step": 9480 }, { "epoch": 0.3448651791554619, "grad_norm": 3.246384859085083, "learning_rate": 3.445744603532234e-05, "loss": 0.1906, "step": 9490 }, { "epoch": 0.34522857765826004, "grad_norm": 2.595038652420044, "learning_rate": 3.4493785885602156e-05, "loss": 0.2441, "step": 9500 }, { "epoch": 0.3455919761610582, "grad_norm": 1.3803220987319946, "learning_rate": 3.453012573588197e-05, "loss": 0.1745, "step": 9510 }, { "epoch": 0.3459553746638564, "grad_norm": 1.2091724872589111, "learning_rate": 3.456646558616179e-05, "loss": 0.1441, "step": 9520 }, { "epoch": 0.34631877316665455, "grad_norm": 6.582603931427002, "learning_rate": 3.460280543644161e-05, "loss": 0.1835, "step": 9530 }, { "epoch": 0.3466821716694527, "grad_norm": 2.6845383644104004, "learning_rate": 3.463914528672142e-05, "loss": 0.2048, "step": 9540 }, { "epoch": 0.3470455701722509, "grad_norm": 11.775678634643555, "learning_rate": 3.467548513700123e-05, "loss": 0.2841, "step": 9550 }, { "epoch": 0.34740896867504906, "grad_norm": 2.256279706954956, "learning_rate": 3.471182498728106e-05, "loss": 0.6472, "step": 9560 }, { "epoch": 0.3477723671778472, "grad_norm": 1.4487576484680176, "learning_rate": 3.474816483756087e-05, "loss": 0.2722, "step": 9570 }, { "epoch": 0.3481357656806454, "grad_norm": 3.843964099884033, "learning_rate": 3.4784504687840684e-05, "loss": 0.1855, "step": 9580 }, { "epoch": 0.34849916418344357, "grad_norm": 1.5561772584915161, "learning_rate": 3.48208445381205e-05, "loss": 0.1908, "step": 9590 }, { "epoch": 0.3488625626862417, "grad_norm": 3.757232666015625, "learning_rate": 3.485718438840032e-05, "loss": 0.2198, "step": 9600 }, { "epoch": 0.3488625626862417, "eval_loss": 0.3895765244960785, "eval_runtime": 179.7435, "eval_samples_per_second": 41.248, "eval_steps_per_second": 5.157, "eval_wer": 0.21512335033674007, "step": 9600 }, { "epoch": 0.3492259611890399, "grad_norm": 1.3912307024002075, "learning_rate": 3.4893524238680135e-05, "loss": 0.1616, "step": 9610 }, { "epoch": 0.3495893596918381, "grad_norm": 2.4036080837249756, "learning_rate": 3.4929864088959954e-05, "loss": 0.1579, "step": 9620 }, { "epoch": 0.3499527581946362, "grad_norm": 2.611175537109375, "learning_rate": 3.4966203939239774e-05, "loss": 0.1746, "step": 9630 }, { "epoch": 0.35031615669743443, "grad_norm": 1.4045140743255615, "learning_rate": 3.5002543789519586e-05, "loss": 0.1594, "step": 9640 }, { "epoch": 0.3506795552002326, "grad_norm": 12.708057403564453, "learning_rate": 3.5038883639799406e-05, "loss": 0.3118, "step": 9650 }, { "epoch": 0.35104295370303074, "grad_norm": 3.0364696979522705, "learning_rate": 3.5075223490079225e-05, "loss": 0.3062, "step": 9660 }, { "epoch": 0.3514063522058289, "grad_norm": 1.4527848958969116, "learning_rate": 3.511156334035904e-05, "loss": 0.1603, "step": 9670 }, { "epoch": 0.3517697507086271, "grad_norm": 5.697939395904541, "learning_rate": 3.514790319063886e-05, "loss": 0.2069, "step": 9680 }, { "epoch": 0.35213314921142524, "grad_norm": 2.1645712852478027, "learning_rate": 3.5184243040918676e-05, "loss": 0.162, "step": 9690 }, { "epoch": 0.3524965477142234, "grad_norm": 8.024601936340332, "learning_rate": 3.522058289119849e-05, "loss": 0.898, "step": 9700 }, { "epoch": 0.3528599462170216, "grad_norm": 1.4516103267669678, "learning_rate": 3.52569227414783e-05, "loss": 0.189, "step": 9710 }, { "epoch": 0.35322334471981975, "grad_norm": 1.0467925071716309, "learning_rate": 3.529326259175813e-05, "loss": 0.1547, "step": 9720 }, { "epoch": 0.3535867432226179, "grad_norm": 3.9237303733825684, "learning_rate": 3.532960244203794e-05, "loss": 0.1968, "step": 9730 }, { "epoch": 0.3539501417254161, "grad_norm": 2.502257823944092, "learning_rate": 3.536594229231775e-05, "loss": 0.1645, "step": 9740 }, { "epoch": 0.35431354022821426, "grad_norm": 30.662227630615234, "learning_rate": 3.540228214259758e-05, "loss": 0.2847, "step": 9750 }, { "epoch": 0.3546769387310124, "grad_norm": 1.7106624841690063, "learning_rate": 3.543862199287739e-05, "loss": 0.1951, "step": 9760 }, { "epoch": 0.3550403372338106, "grad_norm": 2.169036865234375, "learning_rate": 3.5474961843157204e-05, "loss": 0.172, "step": 9770 }, { "epoch": 0.3554037357366088, "grad_norm": 6.116454124450684, "learning_rate": 3.551130169343703e-05, "loss": 0.1934, "step": 9780 }, { "epoch": 0.3557671342394069, "grad_norm": 1.8530545234680176, "learning_rate": 3.554764154371684e-05, "loss": 0.217, "step": 9790 }, { "epoch": 0.3561305327422051, "grad_norm": 11.060449600219727, "learning_rate": 3.5583981393996655e-05, "loss": 0.2145, "step": 9800 }, { "epoch": 0.3564939312450033, "grad_norm": 7.748067378997803, "learning_rate": 3.5620321244276475e-05, "loss": 0.2114, "step": 9810 }, { "epoch": 0.35685732974780143, "grad_norm": 3.562528610229492, "learning_rate": 3.5656661094556294e-05, "loss": 0.221, "step": 9820 }, { "epoch": 0.3572207282505996, "grad_norm": 2.798417091369629, "learning_rate": 3.5693000944836107e-05, "loss": 0.2071, "step": 9830 }, { "epoch": 0.3575841267533978, "grad_norm": 2.3908724784851074, "learning_rate": 3.5729340795115926e-05, "loss": 0.1678, "step": 9840 }, { "epoch": 0.35794752525619594, "grad_norm": 7.205004692077637, "learning_rate": 3.5765680645395745e-05, "loss": 0.2953, "step": 9850 }, { "epoch": 0.3583109237589941, "grad_norm": 2.5064749717712402, "learning_rate": 3.580202049567556e-05, "loss": 0.197, "step": 9860 }, { "epoch": 0.3586743222617923, "grad_norm": 2.0985934734344482, "learning_rate": 3.583836034595538e-05, "loss": 0.1441, "step": 9870 }, { "epoch": 0.35903772076459045, "grad_norm": 5.256442070007324, "learning_rate": 3.5874700196235197e-05, "loss": 0.203, "step": 9880 }, { "epoch": 0.3594011192673886, "grad_norm": 2.3590219020843506, "learning_rate": 3.591104004651501e-05, "loss": 0.1811, "step": 9890 }, { "epoch": 0.3597645177701868, "grad_norm": 24.96747398376465, "learning_rate": 3.594737989679482e-05, "loss": 0.293, "step": 9900 }, { "epoch": 0.36012791627298496, "grad_norm": 1.727751612663269, "learning_rate": 3.598371974707465e-05, "loss": 0.1896, "step": 9910 }, { "epoch": 0.3604913147757831, "grad_norm": 2.349269151687622, "learning_rate": 3.602005959735446e-05, "loss": 0.1649, "step": 9920 }, { "epoch": 0.3608547132785813, "grad_norm": 3.139385223388672, "learning_rate": 3.605639944763427e-05, "loss": 0.2181, "step": 9930 }, { "epoch": 0.36121811178137947, "grad_norm": 2.1249756813049316, "learning_rate": 3.60927392979141e-05, "loss": 0.1751, "step": 9940 }, { "epoch": 0.3615815102841776, "grad_norm": 3.6616756916046143, "learning_rate": 3.612907914819391e-05, "loss": 0.2729, "step": 9950 }, { "epoch": 0.36194490878697577, "grad_norm": 1.367600440979004, "learning_rate": 3.6165418998473724e-05, "loss": 0.1592, "step": 9960 }, { "epoch": 0.362308307289774, "grad_norm": 1.8141239881515503, "learning_rate": 3.620175884875355e-05, "loss": 0.2867, "step": 9970 }, { "epoch": 0.36267170579257213, "grad_norm": 7.0058794021606445, "learning_rate": 3.623809869903336e-05, "loss": 0.207, "step": 9980 }, { "epoch": 0.3630351042953703, "grad_norm": 1.923048734664917, "learning_rate": 3.6274438549313176e-05, "loss": 0.346, "step": 9990 }, { "epoch": 0.3633985027981685, "grad_norm": 16.30779457092285, "learning_rate": 3.6310778399592995e-05, "loss": 0.3107, "step": 10000 }, { "epoch": 0.36376190130096664, "grad_norm": 1.979866862297058, "learning_rate": 3.6347118249872814e-05, "loss": 0.7999, "step": 10010 }, { "epoch": 0.3641252998037648, "grad_norm": 2.7377023696899414, "learning_rate": 3.638345810015263e-05, "loss": 0.2005, "step": 10020 }, { "epoch": 0.364488698306563, "grad_norm": 5.546159744262695, "learning_rate": 3.6419797950432446e-05, "loss": 0.1964, "step": 10030 }, { "epoch": 0.36485209680936115, "grad_norm": 2.2417142391204834, "learning_rate": 3.6456137800712265e-05, "loss": 0.2078, "step": 10040 }, { "epoch": 0.3652154953121593, "grad_norm": 7.2175092697143555, "learning_rate": 3.649247765099208e-05, "loss": 0.291, "step": 10050 }, { "epoch": 0.3655788938149575, "grad_norm": 2.6172754764556885, "learning_rate": 3.65288175012719e-05, "loss": 0.2037, "step": 10060 }, { "epoch": 0.36594229231775566, "grad_norm": 2.0634214878082275, "learning_rate": 3.656515735155172e-05, "loss": 0.1668, "step": 10070 }, { "epoch": 0.3663056908205538, "grad_norm": 3.5431976318359375, "learning_rate": 3.660149720183153e-05, "loss": 0.475, "step": 10080 }, { "epoch": 0.366669089323352, "grad_norm": 2.147472381591797, "learning_rate": 3.663783705211135e-05, "loss": 0.1869, "step": 10090 }, { "epoch": 0.36703248782615017, "grad_norm": 18.726482391357422, "learning_rate": 3.667417690239117e-05, "loss": 0.2773, "step": 10100 }, { "epoch": 0.3673958863289483, "grad_norm": 1.6554090976715088, "learning_rate": 3.671051675267098e-05, "loss": 0.1707, "step": 10110 }, { "epoch": 0.36775928483174647, "grad_norm": 1.8967760801315308, "learning_rate": 3.674685660295079e-05, "loss": 0.2159, "step": 10120 }, { "epoch": 0.3681226833345447, "grad_norm": 2.3765788078308105, "learning_rate": 3.678319645323061e-05, "loss": 0.2229, "step": 10130 }, { "epoch": 0.3684860818373428, "grad_norm": 5.890452861785889, "learning_rate": 3.681953630351043e-05, "loss": 0.195, "step": 10140 }, { "epoch": 0.368849480340141, "grad_norm": 5.045167446136475, "learning_rate": 3.6855876153790244e-05, "loss": 0.3111, "step": 10150 }, { "epoch": 0.3692128788429392, "grad_norm": 2.37107253074646, "learning_rate": 3.6892216004070064e-05, "loss": 0.1942, "step": 10160 }, { "epoch": 0.36957627734573734, "grad_norm": 1.9943170547485352, "learning_rate": 3.692855585434988e-05, "loss": 0.1906, "step": 10170 }, { "epoch": 0.3699396758485355, "grad_norm": 3.16873836517334, "learning_rate": 3.6964895704629696e-05, "loss": 0.1791, "step": 10180 }, { "epoch": 0.3703030743513337, "grad_norm": 15.252134323120117, "learning_rate": 3.7001235554909515e-05, "loss": 0.3702, "step": 10190 }, { "epoch": 0.37066647285413185, "grad_norm": 8.845834732055664, "learning_rate": 3.7037575405189334e-05, "loss": 0.2765, "step": 10200 }, { "epoch": 0.37066647285413185, "eval_loss": 0.4178149104118347, "eval_runtime": 179.6523, "eval_samples_per_second": 41.269, "eval_steps_per_second": 5.16, "eval_wer": 0.2237551509430537, "step": 10200 }, { "epoch": 0.37102987135693, "grad_norm": 6.2689313888549805, "learning_rate": 3.707391525546915e-05, "loss": 0.1922, "step": 10210 }, { "epoch": 0.3713932698597282, "grad_norm": 1.00067138671875, "learning_rate": 3.7110255105748966e-05, "loss": 0.1535, "step": 10220 }, { "epoch": 0.37175666836252635, "grad_norm": 2.6602060794830322, "learning_rate": 3.7146594956028786e-05, "loss": 0.1959, "step": 10230 }, { "epoch": 0.3721200668653245, "grad_norm": 4.743015766143799, "learning_rate": 3.71829348063086e-05, "loss": 0.2058, "step": 10240 }, { "epoch": 0.37248346536812266, "grad_norm": 8.304347038269043, "learning_rate": 3.721927465658842e-05, "loss": 0.3027, "step": 10250 }, { "epoch": 0.37284686387092086, "grad_norm": 1.8180521726608276, "learning_rate": 3.725561450686824e-05, "loss": 0.1708, "step": 10260 }, { "epoch": 0.373210262373719, "grad_norm": 2.05625057220459, "learning_rate": 3.729195435714805e-05, "loss": 0.1824, "step": 10270 }, { "epoch": 0.37357366087651717, "grad_norm": 2.426814317703247, "learning_rate": 3.732829420742787e-05, "loss": 0.197, "step": 10280 }, { "epoch": 0.3739370593793154, "grad_norm": 1.658158540725708, "learning_rate": 3.736463405770768e-05, "loss": 0.1578, "step": 10290 }, { "epoch": 0.3743004578821135, "grad_norm": 10.913407325744629, "learning_rate": 3.74009739079875e-05, "loss": 0.2728, "step": 10300 }, { "epoch": 0.3746638563849117, "grad_norm": 1.6443781852722168, "learning_rate": 3.743731375826732e-05, "loss": 0.1656, "step": 10310 }, { "epoch": 0.3750272548877099, "grad_norm": 1.0702744722366333, "learning_rate": 3.747365360854713e-05, "loss": 0.7132, "step": 10320 }, { "epoch": 0.37539065339050803, "grad_norm": 5.8824052810668945, "learning_rate": 3.750999345882695e-05, "loss": 0.2701, "step": 10330 }, { "epoch": 0.3757540518933062, "grad_norm": 4.373916149139404, "learning_rate": 3.754633330910677e-05, "loss": 0.2053, "step": 10340 }, { "epoch": 0.3761174503961044, "grad_norm": 22.25397300720215, "learning_rate": 3.7582673159386584e-05, "loss": 0.2781, "step": 10350 }, { "epoch": 0.37648084889890254, "grad_norm": 1.8272254467010498, "learning_rate": 3.7619013009666403e-05, "loss": 0.1833, "step": 10360 }, { "epoch": 0.3768442474017007, "grad_norm": 3.286931037902832, "learning_rate": 3.7655352859946216e-05, "loss": 0.1576, "step": 10370 }, { "epoch": 0.3772076459044989, "grad_norm": 5.283690929412842, "learning_rate": 3.7691692710226035e-05, "loss": 0.21, "step": 10380 }, { "epoch": 0.37757104440729705, "grad_norm": 1.184476375579834, "learning_rate": 3.7728032560505855e-05, "loss": 0.2597, "step": 10390 }, { "epoch": 0.3779344429100952, "grad_norm": 5.685116767883301, "learning_rate": 3.776437241078567e-05, "loss": 0.2476, "step": 10400 }, { "epoch": 0.37829784141289335, "grad_norm": 1.1873399019241333, "learning_rate": 3.7800712261065487e-05, "loss": 0.1597, "step": 10410 }, { "epoch": 0.37866123991569156, "grad_norm": 1.6136255264282227, "learning_rate": 3.7837052111345306e-05, "loss": 0.188, "step": 10420 }, { "epoch": 0.3790246384184897, "grad_norm": 4.743179798126221, "learning_rate": 3.787339196162512e-05, "loss": 0.1962, "step": 10430 }, { "epoch": 0.37938803692128786, "grad_norm": 2.603379011154175, "learning_rate": 3.790973181190494e-05, "loss": 0.1854, "step": 10440 }, { "epoch": 0.37975143542408607, "grad_norm": 6.267378807067871, "learning_rate": 3.794607166218475e-05, "loss": 0.2569, "step": 10450 }, { "epoch": 0.3801148339268842, "grad_norm": 5.370235919952393, "learning_rate": 3.798241151246457e-05, "loss": 0.1796, "step": 10460 }, { "epoch": 0.3804782324296824, "grad_norm": 2.170964002609253, "learning_rate": 3.801875136274439e-05, "loss": 0.1713, "step": 10470 }, { "epoch": 0.3808416309324806, "grad_norm": 4.134753704071045, "learning_rate": 3.80550912130242e-05, "loss": 0.2269, "step": 10480 }, { "epoch": 0.38120502943527873, "grad_norm": 2.7026259899139404, "learning_rate": 3.809143106330402e-05, "loss": 0.1938, "step": 10490 }, { "epoch": 0.3815684279380769, "grad_norm": 7.368224143981934, "learning_rate": 3.812777091358384e-05, "loss": 0.2617, "step": 10500 }, { "epoch": 0.3819318264408751, "grad_norm": 1.3194938898086548, "learning_rate": 3.816411076386365e-05, "loss": 0.2066, "step": 10510 }, { "epoch": 0.38229522494367324, "grad_norm": 1.901505470275879, "learning_rate": 3.820045061414347e-05, "loss": 0.1716, "step": 10520 }, { "epoch": 0.3826586234464714, "grad_norm": 3.4045536518096924, "learning_rate": 3.823679046442329e-05, "loss": 0.1625, "step": 10530 }, { "epoch": 0.38302202194926954, "grad_norm": 2.1540184020996094, "learning_rate": 3.8273130314703104e-05, "loss": 0.1829, "step": 10540 }, { "epoch": 0.38338542045206775, "grad_norm": 14.377511024475098, "learning_rate": 3.8309470164982924e-05, "loss": 0.2747, "step": 10550 }, { "epoch": 0.3837488189548659, "grad_norm": 1.9092762470245361, "learning_rate": 3.834581001526274e-05, "loss": 0.1728, "step": 10560 }, { "epoch": 0.38411221745766405, "grad_norm": 1.867458462715149, "learning_rate": 3.8382149865542556e-05, "loss": 0.1752, "step": 10570 }, { "epoch": 0.38447561596046226, "grad_norm": 5.246692657470703, "learning_rate": 3.841848971582237e-05, "loss": 0.1823, "step": 10580 }, { "epoch": 0.3848390144632604, "grad_norm": 2.9294533729553223, "learning_rate": 3.845482956610219e-05, "loss": 0.2052, "step": 10590 }, { "epoch": 0.38520241296605856, "grad_norm": 11.946113586425781, "learning_rate": 3.849116941638201e-05, "loss": 0.309, "step": 10600 }, { "epoch": 0.38556581146885677, "grad_norm": 1.7155182361602783, "learning_rate": 3.852750926666182e-05, "loss": 0.175, "step": 10610 }, { "epoch": 0.3859292099716549, "grad_norm": 1.1520076990127563, "learning_rate": 3.856384911694164e-05, "loss": 0.2129, "step": 10620 }, { "epoch": 0.38629260847445307, "grad_norm": 1.9750351905822754, "learning_rate": 3.860018896722146e-05, "loss": 0.1725, "step": 10630 }, { "epoch": 0.3866560069772513, "grad_norm": 4.309560298919678, "learning_rate": 3.863652881750127e-05, "loss": 0.1516, "step": 10640 }, { "epoch": 0.3870194054800494, "grad_norm": 7.554156303405762, "learning_rate": 3.867286866778109e-05, "loss": 0.3069, "step": 10650 }, { "epoch": 0.3873828039828476, "grad_norm": 3.7965683937072754, "learning_rate": 3.870920851806091e-05, "loss": 0.2014, "step": 10660 }, { "epoch": 0.3877462024856458, "grad_norm": 3.8691935539245605, "learning_rate": 3.874554836834072e-05, "loss": 0.1678, "step": 10670 }, { "epoch": 0.38810960098844394, "grad_norm": 4.144315719604492, "learning_rate": 3.878188821862054e-05, "loss": 0.2936, "step": 10680 }, { "epoch": 0.3884729994912421, "grad_norm": 1.5667825937271118, "learning_rate": 3.881822806890036e-05, "loss": 0.1871, "step": 10690 }, { "epoch": 0.38883639799404024, "grad_norm": 7.6076788902282715, "learning_rate": 3.885456791918017e-05, "loss": 0.2661, "step": 10700 }, { "epoch": 0.38919979649683845, "grad_norm": 1.7828059196472168, "learning_rate": 3.889090776945999e-05, "loss": 0.1808, "step": 10710 }, { "epoch": 0.3895631949996366, "grad_norm": 7.039370059967041, "learning_rate": 3.892724761973981e-05, "loss": 0.2484, "step": 10720 }, { "epoch": 0.38992659350243475, "grad_norm": 2.1001148223876953, "learning_rate": 3.8963587470019625e-05, "loss": 0.1644, "step": 10730 }, { "epoch": 0.39028999200523296, "grad_norm": 0.9235002398490906, "learning_rate": 3.899992732029944e-05, "loss": 0.172, "step": 10740 }, { "epoch": 0.3906533905080311, "grad_norm": 10.066643714904785, "learning_rate": 3.903626717057926e-05, "loss": 0.2999, "step": 10750 }, { "epoch": 0.39101678901082926, "grad_norm": 2.256965160369873, "learning_rate": 3.9072607020859076e-05, "loss": 0.2116, "step": 10760 }, { "epoch": 0.39138018751362746, "grad_norm": 1.742125153541565, "learning_rate": 3.910894687113889e-05, "loss": 0.1838, "step": 10770 }, { "epoch": 0.3917435860164256, "grad_norm": 5.397392749786377, "learning_rate": 3.9145286721418714e-05, "loss": 0.2213, "step": 10780 }, { "epoch": 0.39210698451922377, "grad_norm": 2.439197540283203, "learning_rate": 3.918162657169853e-05, "loss": 0.1984, "step": 10790 }, { "epoch": 0.392470383022022, "grad_norm": 6.7387895584106445, "learning_rate": 3.921796642197834e-05, "loss": 0.2842, "step": 10800 }, { "epoch": 0.392470383022022, "eval_loss": 0.39516785740852356, "eval_runtime": 180.1522, "eval_samples_per_second": 41.154, "eval_steps_per_second": 5.146, "eval_wer": 0.21758309583023216, "step": 10800 }, { "epoch": 0.3928337815248201, "grad_norm": 1.5229130983352661, "learning_rate": 3.925430627225816e-05, "loss": 0.1809, "step": 10810 }, { "epoch": 0.3931971800276183, "grad_norm": 1.6385318040847778, "learning_rate": 3.929064612253798e-05, "loss": 0.155, "step": 10820 }, { "epoch": 0.3935605785304164, "grad_norm": 2.403878927230835, "learning_rate": 3.932698597281779e-05, "loss": 0.2837, "step": 10830 }, { "epoch": 0.39392397703321463, "grad_norm": 2.818368434906006, "learning_rate": 3.936332582309761e-05, "loss": 0.2298, "step": 10840 }, { "epoch": 0.3942873755360128, "grad_norm": 6.08942174911499, "learning_rate": 3.939966567337743e-05, "loss": 0.2262, "step": 10850 }, { "epoch": 0.39465077403881094, "grad_norm": 1.2632570266723633, "learning_rate": 3.943600552365724e-05, "loss": 0.2087, "step": 10860 }, { "epoch": 0.39501417254160914, "grad_norm": 2.2119662761688232, "learning_rate": 3.947234537393706e-05, "loss": 0.1974, "step": 10870 }, { "epoch": 0.3953775710444073, "grad_norm": 2.936021089553833, "learning_rate": 3.950868522421688e-05, "loss": 0.1909, "step": 10880 }, { "epoch": 0.39574096954720545, "grad_norm": 1.3898749351501465, "learning_rate": 3.9545025074496693e-05, "loss": 0.184, "step": 10890 }, { "epoch": 0.39610436805000365, "grad_norm": 9.063791275024414, "learning_rate": 3.958136492477651e-05, "loss": 0.338, "step": 10900 }, { "epoch": 0.3964677665528018, "grad_norm": 1.3791584968566895, "learning_rate": 3.961770477505633e-05, "loss": 0.2256, "step": 10910 }, { "epoch": 0.39683116505559995, "grad_norm": 0.9377845525741577, "learning_rate": 3.9654044625336145e-05, "loss": 0.9822, "step": 10920 }, { "epoch": 0.39719456355839816, "grad_norm": 3.9755465984344482, "learning_rate": 3.969038447561596e-05, "loss": 0.2257, "step": 10930 }, { "epoch": 0.3975579620611963, "grad_norm": 1.559699535369873, "learning_rate": 3.9726724325895783e-05, "loss": 0.2116, "step": 10940 }, { "epoch": 0.39792136056399446, "grad_norm": 7.545668601989746, "learning_rate": 3.9763064176175596e-05, "loss": 0.2515, "step": 10950 }, { "epoch": 0.39828475906679267, "grad_norm": 1.980197548866272, "learning_rate": 3.979940402645541e-05, "loss": 0.1721, "step": 10960 }, { "epoch": 0.3986481575695908, "grad_norm": 2.5450973510742188, "learning_rate": 3.9835743876735235e-05, "loss": 1.7152, "step": 10970 }, { "epoch": 0.399011556072389, "grad_norm": 3.518233060836792, "learning_rate": 3.987208372701505e-05, "loss": 0.2521, "step": 10980 }, { "epoch": 0.3993749545751871, "grad_norm": 2.678774356842041, "learning_rate": 3.990842357729486e-05, "loss": 0.2025, "step": 10990 }, { "epoch": 0.39973835307798533, "grad_norm": 11.46552848815918, "learning_rate": 3.9944763427574686e-05, "loss": 0.2683, "step": 11000 }, { "epoch": 0.4001017515807835, "grad_norm": 2.3148844242095947, "learning_rate": 3.99811032778545e-05, "loss": 1.5331, "step": 11010 }, { "epoch": 0.40046515008358163, "grad_norm": 1.2145686149597168, "learning_rate": 4.001744312813431e-05, "loss": 0.1931, "step": 11020 }, { "epoch": 0.40082854858637984, "grad_norm": 3.581883192062378, "learning_rate": 4.005378297841413e-05, "loss": 0.18, "step": 11030 }, { "epoch": 0.401191947089178, "grad_norm": 2.4645683765411377, "learning_rate": 4.009012282869395e-05, "loss": 0.2303, "step": 11040 }, { "epoch": 0.40155534559197614, "grad_norm": 13.845566749572754, "learning_rate": 4.012646267897376e-05, "loss": 0.2515, "step": 11050 }, { "epoch": 0.40191874409477435, "grad_norm": 1.6929864883422852, "learning_rate": 4.016280252925358e-05, "loss": 2.9232, "step": 11060 }, { "epoch": 0.4022821425975725, "grad_norm": 1.5453213453292847, "learning_rate": 4.01991423795334e-05, "loss": 0.1703, "step": 11070 }, { "epoch": 0.40264554110037065, "grad_norm": 1.5723987817764282, "learning_rate": 4.0235482229813214e-05, "loss": 0.1694, "step": 11080 }, { "epoch": 0.40300893960316886, "grad_norm": 1.4501444101333618, "learning_rate": 4.027182208009303e-05, "loss": 0.2477, "step": 11090 }, { "epoch": 0.403372338105967, "grad_norm": 20.50950813293457, "learning_rate": 4.030816193037285e-05, "loss": 0.2641, "step": 11100 }, { "epoch": 0.40373573660876516, "grad_norm": 1.9846757650375366, "learning_rate": 4.0344501780652665e-05, "loss": 0.1807, "step": 11110 }, { "epoch": 0.40409913511156337, "grad_norm": 1.3933240175247192, "learning_rate": 4.0380841630932484e-05, "loss": 0.1683, "step": 11120 }, { "epoch": 0.4044625336143615, "grad_norm": 2.370534658432007, "learning_rate": 4.0417181481212304e-05, "loss": 0.2476, "step": 11130 }, { "epoch": 0.40482593211715967, "grad_norm": 2.6382100582122803, "learning_rate": 4.0453521331492116e-05, "loss": 0.1723, "step": 11140 }, { "epoch": 0.4051893306199578, "grad_norm": 27.381826400756836, "learning_rate": 4.048986118177193e-05, "loss": 0.2058, "step": 11150 }, { "epoch": 0.40555272912275603, "grad_norm": 1.3622616529464722, "learning_rate": 4.052620103205175e-05, "loss": 0.1744, "step": 11160 }, { "epoch": 0.4059161276255542, "grad_norm": 1.4734828472137451, "learning_rate": 4.056254088233157e-05, "loss": 0.1685, "step": 11170 }, { "epoch": 0.40627952612835233, "grad_norm": 5.694312572479248, "learning_rate": 4.059888073261138e-05, "loss": 0.3549, "step": 11180 }, { "epoch": 0.40664292463115054, "grad_norm": 1.9976438283920288, "learning_rate": 4.06352205828912e-05, "loss": 0.1525, "step": 11190 }, { "epoch": 0.4070063231339487, "grad_norm": 5.735686779022217, "learning_rate": 4.067156043317102e-05, "loss": 0.2642, "step": 11200 }, { "epoch": 0.40736972163674684, "grad_norm": 5.192315101623535, "learning_rate": 4.070790028345083e-05, "loss": 0.1636, "step": 11210 }, { "epoch": 0.40773312013954505, "grad_norm": 2.6324477195739746, "learning_rate": 4.074424013373065e-05, "loss": 0.3451, "step": 11220 }, { "epoch": 0.4080965186423432, "grad_norm": 2.496997356414795, "learning_rate": 4.078057998401047e-05, "loss": 0.1792, "step": 11230 }, { "epoch": 0.40845991714514135, "grad_norm": 3.928255558013916, "learning_rate": 4.081691983429028e-05, "loss": 0.2203, "step": 11240 }, { "epoch": 0.40882331564793956, "grad_norm": 14.433273315429688, "learning_rate": 4.08532596845701e-05, "loss": 0.3283, "step": 11250 }, { "epoch": 0.4091867141507377, "grad_norm": 1.9282217025756836, "learning_rate": 4.088959953484992e-05, "loss": 0.2191, "step": 11260 }, { "epoch": 0.40955011265353586, "grad_norm": 1.8360569477081299, "learning_rate": 4.0925939385129734e-05, "loss": 0.1623, "step": 11270 }, { "epoch": 0.409913511156334, "grad_norm": 4.518060207366943, "learning_rate": 4.096227923540955e-05, "loss": 0.2036, "step": 11280 }, { "epoch": 0.4102769096591322, "grad_norm": 1.4292632341384888, "learning_rate": 4.099861908568937e-05, "loss": 0.1515, "step": 11290 }, { "epoch": 0.41064030816193037, "grad_norm": 23.795089721679688, "learning_rate": 4.1034958935969185e-05, "loss": 0.3228, "step": 11300 }, { "epoch": 0.4110037066647285, "grad_norm": 1.7721456289291382, "learning_rate": 4.1071298786249005e-05, "loss": 0.15, "step": 11310 }, { "epoch": 0.4113671051675267, "grad_norm": 3.544579029083252, "learning_rate": 4.110763863652882e-05, "loss": 0.2349, "step": 11320 }, { "epoch": 0.4117305036703249, "grad_norm": 4.25554895401001, "learning_rate": 4.1143978486808636e-05, "loss": 0.5458, "step": 11330 }, { "epoch": 0.41209390217312303, "grad_norm": 3.069894313812256, "learning_rate": 4.1180318337088456e-05, "loss": 0.2131, "step": 11340 }, { "epoch": 0.41245730067592123, "grad_norm": 5.389547348022461, "learning_rate": 4.121665818736827e-05, "loss": 0.2895, "step": 11350 }, { "epoch": 0.4128206991787194, "grad_norm": 2.308717727661133, "learning_rate": 4.125299803764809e-05, "loss": 0.209, "step": 11360 }, { "epoch": 0.41318409768151754, "grad_norm": 2.071504831314087, "learning_rate": 4.12893378879279e-05, "loss": 0.225, "step": 11370 }, { "epoch": 0.41354749618431574, "grad_norm": 10.397724151611328, "learning_rate": 4.132567773820772e-05, "loss": 0.5041, "step": 11380 }, { "epoch": 0.4139108946871139, "grad_norm": 3.3916842937469482, "learning_rate": 4.136201758848754e-05, "loss": 0.2055, "step": 11390 }, { "epoch": 0.41427429318991205, "grad_norm": 27.703519821166992, "learning_rate": 4.139835743876735e-05, "loss": 0.3002, "step": 11400 }, { "epoch": 0.41427429318991205, "eval_loss": 0.40216270089149475, "eval_runtime": 180.385, "eval_samples_per_second": 41.101, "eval_steps_per_second": 5.139, "eval_wer": 0.22132263510447112, "step": 11400 }, { "epoch": 0.41463769169271025, "grad_norm": 3.024658203125, "learning_rate": 4.143469728904717e-05, "loss": 0.168, "step": 11410 }, { "epoch": 0.4150010901955084, "grad_norm": 2.899369478225708, "learning_rate": 4.147103713932699e-05, "loss": 3.0252, "step": 11420 }, { "epoch": 0.41536448869830656, "grad_norm": 3.960700511932373, "learning_rate": 4.15073769896068e-05, "loss": 0.1972, "step": 11430 }, { "epoch": 0.4157278872011047, "grad_norm": 1.542468786239624, "learning_rate": 4.154371683988662e-05, "loss": 0.1971, "step": 11440 }, { "epoch": 0.4160912857039029, "grad_norm": 16.871423721313477, "learning_rate": 4.158005669016644e-05, "loss": 0.2768, "step": 11450 }, { "epoch": 0.41645468420670106, "grad_norm": 3.142385721206665, "learning_rate": 4.1616396540446254e-05, "loss": 0.2173, "step": 11460 }, { "epoch": 0.4168180827094992, "grad_norm": 0.9852932095527649, "learning_rate": 4.1652736390726074e-05, "loss": 0.2529, "step": 11470 }, { "epoch": 0.4171814812122974, "grad_norm": 2.4834413528442383, "learning_rate": 4.1689076241005886e-05, "loss": 0.9175, "step": 11480 }, { "epoch": 0.4175448797150956, "grad_norm": 2.7286272048950195, "learning_rate": 4.1725416091285705e-05, "loss": 0.1864, "step": 11490 }, { "epoch": 0.4179082782178937, "grad_norm": 5.711360454559326, "learning_rate": 4.1761755941565525e-05, "loss": 0.2285, "step": 11500 }, { "epoch": 0.41827167672069193, "grad_norm": 1.160866379737854, "learning_rate": 4.179809579184534e-05, "loss": 0.1959, "step": 11510 }, { "epoch": 0.4186350752234901, "grad_norm": 2.5051305294036865, "learning_rate": 4.183443564212516e-05, "loss": 0.185, "step": 11520 }, { "epoch": 0.41899847372628823, "grad_norm": 3.641874313354492, "learning_rate": 4.1870775492404976e-05, "loss": 0.199, "step": 11530 }, { "epoch": 0.41936187222908644, "grad_norm": 1.676038146018982, "learning_rate": 4.190711534268479e-05, "loss": 0.1895, "step": 11540 }, { "epoch": 0.4197252707318846, "grad_norm": 11.47658634185791, "learning_rate": 4.194345519296461e-05, "loss": 0.246, "step": 11550 }, { "epoch": 0.42008866923468274, "grad_norm": 1.7632570266723633, "learning_rate": 4.197979504324443e-05, "loss": 0.1761, "step": 11560 }, { "epoch": 0.4204520677374809, "grad_norm": 2.2994728088378906, "learning_rate": 4.201613489352424e-05, "loss": 0.1799, "step": 11570 }, { "epoch": 0.4208154662402791, "grad_norm": 3.964228391647339, "learning_rate": 4.205247474380406e-05, "loss": 0.7376, "step": 11580 }, { "epoch": 0.42117886474307725, "grad_norm": 1.866466760635376, "learning_rate": 4.208881459408387e-05, "loss": 0.2293, "step": 11590 }, { "epoch": 0.4215422632458754, "grad_norm": 4.722428798675537, "learning_rate": 4.212515444436369e-05, "loss": 0.2303, "step": 11600 }, { "epoch": 0.4219056617486736, "grad_norm": 2.8812968730926514, "learning_rate": 4.2161494294643504e-05, "loss": 0.1628, "step": 11610 }, { "epoch": 0.42226906025147176, "grad_norm": 8.05451488494873, "learning_rate": 4.219783414492332e-05, "loss": 0.1978, "step": 11620 }, { "epoch": 0.4226324587542699, "grad_norm": 3.4176700115203857, "learning_rate": 4.223417399520314e-05, "loss": 0.1986, "step": 11630 }, { "epoch": 0.4229958572570681, "grad_norm": 5.204764366149902, "learning_rate": 4.2270513845482955e-05, "loss": 0.1959, "step": 11640 }, { "epoch": 0.42335925575986627, "grad_norm": 6.184700965881348, "learning_rate": 4.2306853695762774e-05, "loss": 0.2822, "step": 11650 }, { "epoch": 0.4237226542626644, "grad_norm": 2.288935422897339, "learning_rate": 4.2343193546042594e-05, "loss": 0.2073, "step": 11660 }, { "epoch": 0.42408605276546263, "grad_norm": 3.8856844902038574, "learning_rate": 4.2379533396322406e-05, "loss": 0.2134, "step": 11670 }, { "epoch": 0.4244494512682608, "grad_norm": 4.048069953918457, "learning_rate": 4.2415873246602226e-05, "loss": 0.1922, "step": 11680 }, { "epoch": 0.42481284977105893, "grad_norm": 1.466927409172058, "learning_rate": 4.2452213096882045e-05, "loss": 0.1653, "step": 11690 }, { "epoch": 0.42517624827385714, "grad_norm": 35.94015121459961, "learning_rate": 4.248855294716186e-05, "loss": 0.2398, "step": 11700 }, { "epoch": 0.4255396467766553, "grad_norm": 2.575195789337158, "learning_rate": 4.252489279744168e-05, "loss": 0.2241, "step": 11710 }, { "epoch": 0.42590304527945344, "grad_norm": 1.4232568740844727, "learning_rate": 4.2561232647721496e-05, "loss": 0.1817, "step": 11720 }, { "epoch": 0.4262664437822516, "grad_norm": 2.8543412685394287, "learning_rate": 4.259757249800131e-05, "loss": 0.2094, "step": 11730 }, { "epoch": 0.4266298422850498, "grad_norm": 0.85033118724823, "learning_rate": 4.263391234828113e-05, "loss": 0.1578, "step": 11740 }, { "epoch": 0.42699324078784795, "grad_norm": 7.382369041442871, "learning_rate": 4.267025219856095e-05, "loss": 0.2763, "step": 11750 }, { "epoch": 0.4273566392906461, "grad_norm": 1.3994635343551636, "learning_rate": 4.270659204884076e-05, "loss": 0.199, "step": 11760 }, { "epoch": 0.4277200377934443, "grad_norm": 1.4978888034820557, "learning_rate": 4.274293189912057e-05, "loss": 0.1804, "step": 11770 }, { "epoch": 0.42808343629624246, "grad_norm": 5.206210136413574, "learning_rate": 4.27792717494004e-05, "loss": 0.2483, "step": 11780 }, { "epoch": 0.4284468347990406, "grad_norm": 1.4130820035934448, "learning_rate": 4.281561159968021e-05, "loss": 0.1792, "step": 11790 }, { "epoch": 0.4288102333018388, "grad_norm": 2.60227370262146, "learning_rate": 4.2851951449960024e-05, "loss": 0.214, "step": 11800 }, { "epoch": 0.42917363180463697, "grad_norm": 1.8874465227127075, "learning_rate": 4.288829130023985e-05, "loss": 0.1894, "step": 11810 }, { "epoch": 0.4295370303074351, "grad_norm": 2.921766519546509, "learning_rate": 4.292463115051966e-05, "loss": 0.1608, "step": 11820 }, { "epoch": 0.4299004288102333, "grad_norm": 2.812821626663208, "learning_rate": 4.2960971000799475e-05, "loss": 0.2381, "step": 11830 }, { "epoch": 0.4302638273130315, "grad_norm": 1.8063637018203735, "learning_rate": 4.2997310851079295e-05, "loss": 0.198, "step": 11840 }, { "epoch": 0.43062722581582963, "grad_norm": 16.433927536010742, "learning_rate": 4.3033650701359114e-05, "loss": 0.3015, "step": 11850 }, { "epoch": 0.4309906243186278, "grad_norm": 1.295142650604248, "learning_rate": 4.3069990551638927e-05, "loss": 0.1678, "step": 11860 }, { "epoch": 0.431354022821426, "grad_norm": 135.4871063232422, "learning_rate": 4.3106330401918746e-05, "loss": 1.8542, "step": 11870 }, { "epoch": 0.43171742132422414, "grad_norm": 2.3314764499664307, "learning_rate": 4.3142670252198565e-05, "loss": 0.1983, "step": 11880 }, { "epoch": 0.4320808198270223, "grad_norm": 1.6635117530822754, "learning_rate": 4.317901010247838e-05, "loss": 0.1737, "step": 11890 }, { "epoch": 0.4324442183298205, "grad_norm": 32.102664947509766, "learning_rate": 4.32153499527582e-05, "loss": 0.3092, "step": 11900 }, { "epoch": 0.43280761683261865, "grad_norm": 2.3491451740264893, "learning_rate": 4.3251689803038017e-05, "loss": 0.1849, "step": 11910 }, { "epoch": 0.4331710153354168, "grad_norm": 3.8088629245758057, "learning_rate": 4.328802965331783e-05, "loss": 0.2023, "step": 11920 }, { "epoch": 0.433534413838215, "grad_norm": 2.7132246494293213, "learning_rate": 4.332436950359764e-05, "loss": 0.1935, "step": 11930 }, { "epoch": 0.43389781234101316, "grad_norm": 1.2917368412017822, "learning_rate": 4.336070935387747e-05, "loss": 0.1918, "step": 11940 }, { "epoch": 0.4342612108438113, "grad_norm": 9.690601348876953, "learning_rate": 4.339704920415728e-05, "loss": 0.3059, "step": 11950 }, { "epoch": 0.4346246093466095, "grad_norm": 1.2652380466461182, "learning_rate": 4.343338905443709e-05, "loss": 0.1587, "step": 11960 }, { "epoch": 0.43498800784940767, "grad_norm": 0.9622058272361755, "learning_rate": 4.346972890471692e-05, "loss": 0.1755, "step": 11970 }, { "epoch": 0.4353514063522058, "grad_norm": 5.316989898681641, "learning_rate": 4.350606875499673e-05, "loss": 0.1794, "step": 11980 }, { "epoch": 0.435714804855004, "grad_norm": 3.428891181945801, "learning_rate": 4.3542408605276544e-05, "loss": 0.2105, "step": 11990 }, { "epoch": 0.4360782033578022, "grad_norm": 12.879768371582031, "learning_rate": 4.357874845555637e-05, "loss": 0.2904, "step": 12000 }, { "epoch": 0.4360782033578022, "eval_loss": 0.3918191194534302, "eval_runtime": 180.0676, "eval_samples_per_second": 41.173, "eval_steps_per_second": 5.148, "eval_wer": 0.22659611160527893, "step": 12000 }, { "epoch": 0.4364416018606003, "grad_norm": 2.0471973419189453, "learning_rate": 4.361508830583618e-05, "loss": 0.2544, "step": 12010 }, { "epoch": 0.4368050003633985, "grad_norm": 1.3883107900619507, "learning_rate": 4.3651428156115995e-05, "loss": 0.1957, "step": 12020 }, { "epoch": 0.4371683988661967, "grad_norm": 1.786475419998169, "learning_rate": 4.368776800639582e-05, "loss": 0.1732, "step": 12030 }, { "epoch": 0.43753179736899483, "grad_norm": 3.3099594116210938, "learning_rate": 4.3724107856675634e-05, "loss": 0.1871, "step": 12040 }, { "epoch": 0.437895195871793, "grad_norm": 9.09699535369873, "learning_rate": 4.376044770695545e-05, "loss": 0.2745, "step": 12050 }, { "epoch": 0.4382585943745912, "grad_norm": 2.0993807315826416, "learning_rate": 4.3796787557235266e-05, "loss": 0.2076, "step": 12060 }, { "epoch": 0.43862199287738934, "grad_norm": 27.799428939819336, "learning_rate": 4.3833127407515085e-05, "loss": 0.548, "step": 12070 }, { "epoch": 0.4389853913801875, "grad_norm": 3.8897557258605957, "learning_rate": 4.38694672577949e-05, "loss": 0.1799, "step": 12080 }, { "epoch": 0.4393487898829857, "grad_norm": 3.4620189666748047, "learning_rate": 4.390580710807472e-05, "loss": 0.1735, "step": 12090 }, { "epoch": 0.43971218838578385, "grad_norm": 9.587783813476562, "learning_rate": 4.394214695835454e-05, "loss": 0.3344, "step": 12100 }, { "epoch": 0.440075586888582, "grad_norm": 1.2581641674041748, "learning_rate": 4.397848680863435e-05, "loss": 0.1863, "step": 12110 }, { "epoch": 0.4404389853913802, "grad_norm": 1.3624401092529297, "learning_rate": 4.401482665891417e-05, "loss": 3.7692, "step": 12120 }, { "epoch": 0.44080238389417836, "grad_norm": 2.0099213123321533, "learning_rate": 4.405116650919399e-05, "loss": 0.1999, "step": 12130 }, { "epoch": 0.4411657823969765, "grad_norm": 2.7499871253967285, "learning_rate": 4.40875063594738e-05, "loss": 0.1854, "step": 12140 }, { "epoch": 0.4415291808997747, "grad_norm": 6.473042964935303, "learning_rate": 4.412384620975361e-05, "loss": 0.2843, "step": 12150 }, { "epoch": 0.44189257940257287, "grad_norm": 3.845900535583496, "learning_rate": 4.416018606003344e-05, "loss": 0.1747, "step": 12160 }, { "epoch": 0.442255977905371, "grad_norm": 1.4052759408950806, "learning_rate": 4.419652591031325e-05, "loss": 0.16, "step": 12170 }, { "epoch": 0.4426193764081692, "grad_norm": 3.5824673175811768, "learning_rate": 4.4232865760593064e-05, "loss": 0.7205, "step": 12180 }, { "epoch": 0.4429827749109674, "grad_norm": 1.237358570098877, "learning_rate": 4.426920561087289e-05, "loss": 0.2043, "step": 12190 }, { "epoch": 0.44334617341376553, "grad_norm": 11.106649398803711, "learning_rate": 4.43055454611527e-05, "loss": 0.2537, "step": 12200 }, { "epoch": 0.4437095719165637, "grad_norm": 1.4566165208816528, "learning_rate": 4.4341885311432516e-05, "loss": 0.174, "step": 12210 }, { "epoch": 0.4440729704193619, "grad_norm": 1.4067914485931396, "learning_rate": 4.4378225161712335e-05, "loss": 0.1672, "step": 12220 }, { "epoch": 0.44443636892216004, "grad_norm": 3.1289005279541016, "learning_rate": 4.4414565011992154e-05, "loss": 0.2459, "step": 12230 }, { "epoch": 0.4447997674249582, "grad_norm": 1.2487775087356567, "learning_rate": 4.445090486227197e-05, "loss": 0.1911, "step": 12240 }, { "epoch": 0.4451631659277564, "grad_norm": 4.373108863830566, "learning_rate": 4.4487244712551786e-05, "loss": 0.265, "step": 12250 }, { "epoch": 0.44552656443055455, "grad_norm": 3.0927655696868896, "learning_rate": 4.4523584562831606e-05, "loss": 0.166, "step": 12260 }, { "epoch": 0.4458899629333527, "grad_norm": 1.4012075662612915, "learning_rate": 4.455992441311142e-05, "loss": 0.1631, "step": 12270 }, { "epoch": 0.4462533614361509, "grad_norm": 3.9944920539855957, "learning_rate": 4.459626426339124e-05, "loss": 0.2616, "step": 12280 }, { "epoch": 0.44661675993894906, "grad_norm": 2.412261962890625, "learning_rate": 4.463260411367106e-05, "loss": 0.1963, "step": 12290 }, { "epoch": 0.4469801584417472, "grad_norm": 8.601739883422852, "learning_rate": 4.466894396395087e-05, "loss": 0.3057, "step": 12300 }, { "epoch": 0.44734355694454536, "grad_norm": 2.1279587745666504, "learning_rate": 4.470528381423069e-05, "loss": 0.1931, "step": 12310 }, { "epoch": 0.44770695544734357, "grad_norm": 2.465534210205078, "learning_rate": 4.474162366451051e-05, "loss": 0.1701, "step": 12320 }, { "epoch": 0.4480703539501417, "grad_norm": 6.147269248962402, "learning_rate": 4.477796351479032e-05, "loss": 0.7176, "step": 12330 }, { "epoch": 0.44843375245293987, "grad_norm": 1.6242046356201172, "learning_rate": 4.481430336507014e-05, "loss": 0.1769, "step": 12340 }, { "epoch": 0.4487971509557381, "grad_norm": 7.065566539764404, "learning_rate": 4.485064321534995e-05, "loss": 0.2967, "step": 12350 }, { "epoch": 0.44916054945853623, "grad_norm": 1.9389359951019287, "learning_rate": 4.488698306562977e-05, "loss": 0.1853, "step": 12360 }, { "epoch": 0.4495239479613344, "grad_norm": 1.011250376701355, "learning_rate": 4.492332291590959e-05, "loss": 0.2036, "step": 12370 }, { "epoch": 0.4498873464641326, "grad_norm": 2.459062099456787, "learning_rate": 4.4959662766189404e-05, "loss": 0.1865, "step": 12380 }, { "epoch": 0.45025074496693074, "grad_norm": 1.8472875356674194, "learning_rate": 4.499600261646922e-05, "loss": 0.2178, "step": 12390 }, { "epoch": 0.4506141434697289, "grad_norm": 40.6389045715332, "learning_rate": 4.5032342466749036e-05, "loss": 0.2506, "step": 12400 }, { "epoch": 0.4509775419725271, "grad_norm": 3.9729344844818115, "learning_rate": 4.5068682317028855e-05, "loss": 0.1917, "step": 12410 }, { "epoch": 0.45134094047532525, "grad_norm": 1.0262936353683472, "learning_rate": 4.5105022167308675e-05, "loss": 0.2115, "step": 12420 }, { "epoch": 0.4517043389781234, "grad_norm": 1.5356003046035767, "learning_rate": 4.514136201758849e-05, "loss": 0.1907, "step": 12430 }, { "epoch": 0.4520677374809216, "grad_norm": 1.3107296228408813, "learning_rate": 4.5177701867868307e-05, "loss": 0.195, "step": 12440 }, { "epoch": 0.45243113598371976, "grad_norm": 11.025674819946289, "learning_rate": 4.5214041718148126e-05, "loss": 0.2794, "step": 12450 }, { "epoch": 0.4527945344865179, "grad_norm": 1.8793771266937256, "learning_rate": 4.525038156842794e-05, "loss": 0.2143, "step": 12460 }, { "epoch": 0.45315793298931606, "grad_norm": 1.6508142948150635, "learning_rate": 4.528672141870776e-05, "loss": 0.1863, "step": 12470 }, { "epoch": 0.45352133149211427, "grad_norm": 4.942420959472656, "learning_rate": 4.532306126898758e-05, "loss": 0.1997, "step": 12480 }, { "epoch": 0.4538847299949124, "grad_norm": 3.1977925300598145, "learning_rate": 4.535940111926739e-05, "loss": 1.9163, "step": 12490 }, { "epoch": 0.45424812849771057, "grad_norm": 8.74572467803955, "learning_rate": 4.539574096954721e-05, "loss": 0.3186, "step": 12500 }, { "epoch": 0.4546115270005088, "grad_norm": 1.5346311330795288, "learning_rate": 4.543208081982702e-05, "loss": 0.1958, "step": 12510 }, { "epoch": 0.4549749255033069, "grad_norm": 1.622859239578247, "learning_rate": 4.546842067010684e-05, "loss": 0.1828, "step": 12520 }, { "epoch": 0.4553383240061051, "grad_norm": 1.9394720792770386, "learning_rate": 4.550476052038666e-05, "loss": 0.2198, "step": 12530 }, { "epoch": 0.4557017225089033, "grad_norm": 1.8405578136444092, "learning_rate": 4.554110037066647e-05, "loss": 0.1789, "step": 12540 }, { "epoch": 0.45606512101170144, "grad_norm": 6.24867582321167, "learning_rate": 4.557744022094629e-05, "loss": 0.2593, "step": 12550 }, { "epoch": 0.4564285195144996, "grad_norm": 1.6062959432601929, "learning_rate": 4.561378007122611e-05, "loss": 0.1665, "step": 12560 }, { "epoch": 0.4567919180172978, "grad_norm": 1.1478540897369385, "learning_rate": 4.5650119921505924e-05, "loss": 0.1942, "step": 12570 }, { "epoch": 0.45715531652009594, "grad_norm": 2.0299808979034424, "learning_rate": 4.5686459771785744e-05, "loss": 0.2092, "step": 12580 }, { "epoch": 0.4575187150228941, "grad_norm": 1.6643180847167969, "learning_rate": 4.572279962206556e-05, "loss": 0.1714, "step": 12590 }, { "epoch": 0.45788211352569225, "grad_norm": 10.169012069702148, "learning_rate": 4.5759139472345376e-05, "loss": 0.3101, "step": 12600 }, { "epoch": 0.45788211352569225, "eval_loss": 0.408176064491272, "eval_runtime": 179.7843, "eval_samples_per_second": 41.238, "eval_steps_per_second": 5.156, "eval_wer": 0.24004756113057527, "step": 12600 }, { "epoch": 0.45824551202849045, "grad_norm": 1.8151092529296875, "learning_rate": 4.5795479322625195e-05, "loss": 0.1739, "step": 12610 }, { "epoch": 0.4586089105312886, "grad_norm": 1.1606543064117432, "learning_rate": 4.583181917290501e-05, "loss": 0.1781, "step": 12620 }, { "epoch": 0.45897230903408676, "grad_norm": 2.5139431953430176, "learning_rate": 4.586815902318483e-05, "loss": 0.2101, "step": 12630 }, { "epoch": 0.45933570753688496, "grad_norm": 3.1557183265686035, "learning_rate": 4.590449887346464e-05, "loss": 0.1925, "step": 12640 }, { "epoch": 0.4596991060396831, "grad_norm": 13.978137016296387, "learning_rate": 4.594083872374446e-05, "loss": 0.3085, "step": 12650 }, { "epoch": 0.46006250454248127, "grad_norm": 1.5187938213348389, "learning_rate": 4.597717857402428e-05, "loss": 0.1909, "step": 12660 }, { "epoch": 0.46042590304527947, "grad_norm": 1.661890983581543, "learning_rate": 4.601351842430409e-05, "loss": 0.1729, "step": 12670 }, { "epoch": 0.4607893015480776, "grad_norm": 5.693175792694092, "learning_rate": 4.604985827458391e-05, "loss": 0.2069, "step": 12680 }, { "epoch": 0.4611527000508758, "grad_norm": 2.5228755474090576, "learning_rate": 4.608619812486373e-05, "loss": 0.1899, "step": 12690 }, { "epoch": 0.461516098553674, "grad_norm": 12.629317283630371, "learning_rate": 4.612253797514354e-05, "loss": 0.2441, "step": 12700 }, { "epoch": 0.46187949705647213, "grad_norm": 1.5003726482391357, "learning_rate": 4.615887782542336e-05, "loss": 0.1845, "step": 12710 }, { "epoch": 0.4622428955592703, "grad_norm": 1.596705675125122, "learning_rate": 4.619521767570318e-05, "loss": 0.1942, "step": 12720 }, { "epoch": 0.4626062940620685, "grad_norm": 4.299325466156006, "learning_rate": 4.623155752598299e-05, "loss": 0.1881, "step": 12730 }, { "epoch": 0.46296969256486664, "grad_norm": 2.242932081222534, "learning_rate": 4.626789737626281e-05, "loss": 0.1655, "step": 12740 }, { "epoch": 0.4633330910676648, "grad_norm": 17.353313446044922, "learning_rate": 4.630423722654263e-05, "loss": 0.3002, "step": 12750 }, { "epoch": 0.46369648957046294, "grad_norm": 1.8967528343200684, "learning_rate": 4.6340577076822444e-05, "loss": 0.1967, "step": 12760 }, { "epoch": 0.46405988807326115, "grad_norm": 1.9839125871658325, "learning_rate": 4.6376916927102264e-05, "loss": 0.1582, "step": 12770 }, { "epoch": 0.4644232865760593, "grad_norm": 1.8139293193817139, "learning_rate": 4.641325677738208e-05, "loss": 0.2527, "step": 12780 }, { "epoch": 0.46478668507885745, "grad_norm": 1.6944659948349, "learning_rate": 4.6449596627661896e-05, "loss": 0.1656, "step": 12790 }, { "epoch": 0.46515008358165566, "grad_norm": 3.7842020988464355, "learning_rate": 4.648593647794171e-05, "loss": 0.2375, "step": 12800 }, { "epoch": 0.4655134820844538, "grad_norm": 1.8103773593902588, "learning_rate": 4.6522276328221534e-05, "loss": 0.1932, "step": 12810 }, { "epoch": 0.46587688058725196, "grad_norm": 1.4419440031051636, "learning_rate": 4.655861617850135e-05, "loss": 0.1808, "step": 12820 }, { "epoch": 0.46624027909005017, "grad_norm": 6.361825466156006, "learning_rate": 4.659495602878116e-05, "loss": 0.2105, "step": 12830 }, { "epoch": 0.4666036775928483, "grad_norm": 1.4687098264694214, "learning_rate": 4.663129587906098e-05, "loss": 0.18, "step": 12840 }, { "epoch": 0.46696707609564647, "grad_norm": 14.758776664733887, "learning_rate": 4.66676357293408e-05, "loss": 0.3001, "step": 12850 }, { "epoch": 0.4673304745984447, "grad_norm": 1.4836699962615967, "learning_rate": 4.670397557962061e-05, "loss": 0.1713, "step": 12860 }, { "epoch": 0.46769387310124283, "grad_norm": 4.860133171081543, "learning_rate": 4.674031542990043e-05, "loss": 0.1791, "step": 12870 }, { "epoch": 0.468057271604041, "grad_norm": 1.9861228466033936, "learning_rate": 4.677665528018025e-05, "loss": 0.2029, "step": 12880 }, { "epoch": 0.46842067010683913, "grad_norm": 1.9190025329589844, "learning_rate": 4.681299513046006e-05, "loss": 0.1611, "step": 12890 }, { "epoch": 0.46878406860963734, "grad_norm": 4.6381516456604, "learning_rate": 4.684933498073988e-05, "loss": 0.2646, "step": 12900 }, { "epoch": 0.4691474671124355, "grad_norm": 1.2092620134353638, "learning_rate": 4.68856748310197e-05, "loss": 0.1865, "step": 12910 }, { "epoch": 0.46951086561523364, "grad_norm": 2.7816121578216553, "learning_rate": 4.6922014681299513e-05, "loss": 0.2047, "step": 12920 }, { "epoch": 0.46987426411803185, "grad_norm": 0.629324734210968, "learning_rate": 4.6958354531579326e-05, "loss": 0.2404, "step": 12930 }, { "epoch": 0.47023766262083, "grad_norm": 4.156667232513428, "learning_rate": 4.699469438185915e-05, "loss": 0.1604, "step": 12940 }, { "epoch": 0.47060106112362815, "grad_norm": 1.8534492254257202, "learning_rate": 4.7031034232138965e-05, "loss": 0.2364, "step": 12950 }, { "epoch": 0.47096445962642636, "grad_norm": 1.382408857345581, "learning_rate": 4.706737408241878e-05, "loss": 0.2078, "step": 12960 }, { "epoch": 0.4713278581292245, "grad_norm": 2.499023914337158, "learning_rate": 4.7103713932698603e-05, "loss": 0.1935, "step": 12970 }, { "epoch": 0.47169125663202266, "grad_norm": 2.726032257080078, "learning_rate": 4.7140053782978416e-05, "loss": 0.2143, "step": 12980 }, { "epoch": 0.47205465513482087, "grad_norm": 2.1388118267059326, "learning_rate": 4.717639363325823e-05, "loss": 0.1704, "step": 12990 }, { "epoch": 0.472418053637619, "grad_norm": 5.408501148223877, "learning_rate": 4.7212733483538055e-05, "loss": 0.2492, "step": 13000 }, { "epoch": 0.47278145214041717, "grad_norm": 1.8640841245651245, "learning_rate": 4.724907333381787e-05, "loss": 0.1958, "step": 13010 }, { "epoch": 0.4731448506432154, "grad_norm": 1.4251651763916016, "learning_rate": 4.728541318409768e-05, "loss": 0.1969, "step": 13020 }, { "epoch": 0.4735082491460135, "grad_norm": 2.2603137493133545, "learning_rate": 4.7321753034377506e-05, "loss": 0.1879, "step": 13030 }, { "epoch": 0.4738716476488117, "grad_norm": 1.7813081741333008, "learning_rate": 4.735809288465732e-05, "loss": 0.1627, "step": 13040 }, { "epoch": 0.47423504615160983, "grad_norm": 16.746126174926758, "learning_rate": 4.739443273493713e-05, "loss": 0.3058, "step": 13050 }, { "epoch": 0.47459844465440804, "grad_norm": 2.56193470954895, "learning_rate": 4.743077258521695e-05, "loss": 0.1729, "step": 13060 }, { "epoch": 0.4749618431572062, "grad_norm": 2.1787185668945312, "learning_rate": 4.746711243549677e-05, "loss": 0.1804, "step": 13070 }, { "epoch": 0.47532524166000434, "grad_norm": 3.385338544845581, "learning_rate": 4.750345228577658e-05, "loss": 0.1884, "step": 13080 }, { "epoch": 0.47568864016280255, "grad_norm": 2.48083233833313, "learning_rate": 4.75397921360564e-05, "loss": 0.1728, "step": 13090 }, { "epoch": 0.4760520386656007, "grad_norm": 47.18072509765625, "learning_rate": 4.757613198633622e-05, "loss": 0.2427, "step": 13100 }, { "epoch": 0.47641543716839885, "grad_norm": 1.3267533779144287, "learning_rate": 4.7612471836616034e-05, "loss": 0.1847, "step": 13110 }, { "epoch": 0.47677883567119705, "grad_norm": 2.098389148712158, "learning_rate": 4.764881168689585e-05, "loss": 0.1682, "step": 13120 }, { "epoch": 0.4771422341739952, "grad_norm": 1.1197071075439453, "learning_rate": 4.768515153717567e-05, "loss": 0.166, "step": 13130 }, { "epoch": 0.47750563267679336, "grad_norm": 1.431281328201294, "learning_rate": 4.7721491387455485e-05, "loss": 0.3262, "step": 13140 }, { "epoch": 0.47786903117959156, "grad_norm": 15.357772827148438, "learning_rate": 4.7757831237735304e-05, "loss": 0.2906, "step": 13150 }, { "epoch": 0.4782324296823897, "grad_norm": 3.03275465965271, "learning_rate": 4.7794171088015124e-05, "loss": 0.207, "step": 13160 }, { "epoch": 0.47859582818518787, "grad_norm": 1.0988962650299072, "learning_rate": 4.7830510938294936e-05, "loss": 0.1788, "step": 13170 }, { "epoch": 0.4789592266879861, "grad_norm": 1.9456548690795898, "learning_rate": 4.786685078857475e-05, "loss": 0.2397, "step": 13180 }, { "epoch": 0.4793226251907842, "grad_norm": 1.7383311986923218, "learning_rate": 4.7903190638854575e-05, "loss": 0.1841, "step": 13190 }, { "epoch": 0.4796860236935824, "grad_norm": 5.512730121612549, "learning_rate": 4.793953048913439e-05, "loss": 0.2708, "step": 13200 }, { "epoch": 0.4796860236935824, "eval_loss": 0.3998795747756958, "eval_runtime": 180.9114, "eval_samples_per_second": 40.981, "eval_steps_per_second": 5.124, "eval_wer": 0.2369433804708915, "step": 13200 }, { "epoch": 0.4800494221963805, "grad_norm": 1.5843122005462646, "learning_rate": 4.79758703394142e-05, "loss": 1.0933, "step": 13210 }, { "epoch": 0.48041282069917873, "grad_norm": 1.4696934223175049, "learning_rate": 4.8012210189694026e-05, "loss": 0.1771, "step": 13220 }, { "epoch": 0.4807762192019769, "grad_norm": 2.5620357990264893, "learning_rate": 4.804855003997384e-05, "loss": 0.2202, "step": 13230 }, { "epoch": 0.48113961770477504, "grad_norm": 73.08427429199219, "learning_rate": 4.808488989025365e-05, "loss": 0.2471, "step": 13240 }, { "epoch": 0.48150301620757324, "grad_norm": 7.291989803314209, "learning_rate": 4.812122974053347e-05, "loss": 0.2542, "step": 13250 }, { "epoch": 0.4818664147103714, "grad_norm": 1.7582112550735474, "learning_rate": 4.815756959081329e-05, "loss": 0.1884, "step": 13260 }, { "epoch": 0.48222981321316954, "grad_norm": 0.9253680109977722, "learning_rate": 4.81939094410931e-05, "loss": 0.1797, "step": 13270 }, { "epoch": 0.48259321171596775, "grad_norm": 8.042390823364258, "learning_rate": 4.823024929137292e-05, "loss": 0.192, "step": 13280 }, { "epoch": 0.4829566102187659, "grad_norm": 3.2288219928741455, "learning_rate": 4.826658914165274e-05, "loss": 0.2041, "step": 13290 }, { "epoch": 0.48332000872156405, "grad_norm": 7.657989978790283, "learning_rate": 4.8302928991932554e-05, "loss": 0.3034, "step": 13300 }, { "epoch": 0.48368340722436226, "grad_norm": 2.9273271560668945, "learning_rate": 4.833926884221237e-05, "loss": 0.2028, "step": 13310 }, { "epoch": 0.4840468057271604, "grad_norm": 4.2344865798950195, "learning_rate": 4.837560869249219e-05, "loss": 0.1817, "step": 13320 }, { "epoch": 0.48441020422995856, "grad_norm": 4.074464797973633, "learning_rate": 4.8411948542772005e-05, "loss": 0.2197, "step": 13330 }, { "epoch": 0.4847736027327567, "grad_norm": 1.7070029973983765, "learning_rate": 4.8448288393051825e-05, "loss": 0.2374, "step": 13340 }, { "epoch": 0.4851370012355549, "grad_norm": 2.5278494358062744, "learning_rate": 4.8484628243331644e-05, "loss": 0.265, "step": 13350 }, { "epoch": 0.4855003997383531, "grad_norm": 1.4800697565078735, "learning_rate": 4.8520968093611456e-05, "loss": 0.1597, "step": 13360 }, { "epoch": 0.4858637982411512, "grad_norm": 1.238171935081482, "learning_rate": 4.8557307943891276e-05, "loss": 0.1862, "step": 13370 }, { "epoch": 0.48622719674394943, "grad_norm": 2.7711944580078125, "learning_rate": 4.859364779417109e-05, "loss": 0.1572, "step": 13380 }, { "epoch": 0.4865905952467476, "grad_norm": 2.386011838912964, "learning_rate": 4.862998764445091e-05, "loss": 0.1624, "step": 13390 }, { "epoch": 0.48695399374954573, "grad_norm": 10.38249397277832, "learning_rate": 4.866632749473072e-05, "loss": 0.2182, "step": 13400 }, { "epoch": 0.48731739225234394, "grad_norm": 1.1541043519973755, "learning_rate": 4.870266734501054e-05, "loss": 0.1867, "step": 13410 }, { "epoch": 0.4876807907551421, "grad_norm": 0.7680534720420837, "learning_rate": 4.873900719529036e-05, "loss": 0.1619, "step": 13420 }, { "epoch": 0.48804418925794024, "grad_norm": 2.6120142936706543, "learning_rate": 4.877534704557017e-05, "loss": 1.0657, "step": 13430 }, { "epoch": 0.48840758776073845, "grad_norm": 2.1559348106384277, "learning_rate": 4.881168689584999e-05, "loss": 0.1576, "step": 13440 }, { "epoch": 0.4887709862635366, "grad_norm": 8.222488403320312, "learning_rate": 4.884802674612981e-05, "loss": 0.2596, "step": 13450 }, { "epoch": 0.48913438476633475, "grad_norm": 1.7630010843276978, "learning_rate": 4.888436659640962e-05, "loss": 0.1755, "step": 13460 }, { "epoch": 0.48949778326913296, "grad_norm": 1.489050269126892, "learning_rate": 4.892070644668944e-05, "loss": 0.1844, "step": 13470 }, { "epoch": 0.4898611817719311, "grad_norm": 4.412111759185791, "learning_rate": 4.895704629696926e-05, "loss": 0.2114, "step": 13480 }, { "epoch": 0.49022458027472926, "grad_norm": 2.060366630554199, "learning_rate": 4.8993386147249074e-05, "loss": 0.1932, "step": 13490 }, { "epoch": 0.4905879787775274, "grad_norm": 9.488603591918945, "learning_rate": 4.9029725997528893e-05, "loss": 0.303, "step": 13500 }, { "epoch": 0.4909513772803256, "grad_norm": 2.295671224594116, "learning_rate": 4.906606584780871e-05, "loss": 0.1583, "step": 13510 }, { "epoch": 0.49131477578312377, "grad_norm": 4.13812255859375, "learning_rate": 4.9102405698088525e-05, "loss": 1.9041, "step": 13520 }, { "epoch": 0.4916781742859219, "grad_norm": 3.7411348819732666, "learning_rate": 4.9138745548368345e-05, "loss": 0.1927, "step": 13530 }, { "epoch": 0.4920415727887201, "grad_norm": 1.523505449295044, "learning_rate": 4.917508539864816e-05, "loss": 0.1721, "step": 13540 }, { "epoch": 0.4924049712915183, "grad_norm": 8.239662170410156, "learning_rate": 4.921142524892798e-05, "loss": 0.3205, "step": 13550 }, { "epoch": 0.49276836979431643, "grad_norm": 1.8316904306411743, "learning_rate": 4.9247765099207796e-05, "loss": 0.172, "step": 13560 }, { "epoch": 0.49313176829711464, "grad_norm": 4.627805233001709, "learning_rate": 4.928410494948761e-05, "loss": 0.1731, "step": 13570 }, { "epoch": 0.4934951667999128, "grad_norm": 4.277485370635986, "learning_rate": 4.932044479976743e-05, "loss": 0.2522, "step": 13580 }, { "epoch": 0.49385856530271094, "grad_norm": 2.131641149520874, "learning_rate": 4.935678465004725e-05, "loss": 0.1766, "step": 13590 }, { "epoch": 0.49422196380550915, "grad_norm": 2.9195988178253174, "learning_rate": 4.939312450032706e-05, "loss": 0.3745, "step": 13600 }, { "epoch": 0.4945853623083073, "grad_norm": 1.5876374244689941, "learning_rate": 4.942946435060688e-05, "loss": 0.1716, "step": 13610 }, { "epoch": 0.49494876081110545, "grad_norm": 4.506389617919922, "learning_rate": 4.946580420088669e-05, "loss": 0.5847, "step": 13620 }, { "epoch": 0.4953121593139036, "grad_norm": 3.497152090072632, "learning_rate": 4.950214405116651e-05, "loss": 0.2179, "step": 13630 }, { "epoch": 0.4956755578167018, "grad_norm": 1.7728289365768433, "learning_rate": 4.953848390144633e-05, "loss": 0.165, "step": 13640 }, { "epoch": 0.49603895631949996, "grad_norm": 12.01921558380127, "learning_rate": 4.957482375172614e-05, "loss": 0.2447, "step": 13650 }, { "epoch": 0.4964023548222981, "grad_norm": 2.5448553562164307, "learning_rate": 4.961116360200596e-05, "loss": 0.2089, "step": 13660 }, { "epoch": 0.4967657533250963, "grad_norm": 2.3643887042999268, "learning_rate": 4.9647503452285775e-05, "loss": 0.1724, "step": 13670 }, { "epoch": 0.49712915182789447, "grad_norm": 2.096191644668579, "learning_rate": 4.9683843302565594e-05, "loss": 0.1759, "step": 13680 }, { "epoch": 0.4974925503306926, "grad_norm": 0.9760168790817261, "learning_rate": 4.9720183152845414e-05, "loss": 0.1817, "step": 13690 }, { "epoch": 0.4978559488334908, "grad_norm": 3.019702434539795, "learning_rate": 4.9756523003125226e-05, "loss": 0.2275, "step": 13700 }, { "epoch": 0.498219347336289, "grad_norm": 1.0820231437683105, "learning_rate": 4.9789228868377064e-05, "loss": 2.5822, "step": 13710 }, { "epoch": 0.4985827458390871, "grad_norm": 3.2908883094787598, "learning_rate": 4.982556871865688e-05, "loss": 0.1898, "step": 13720 }, { "epoch": 0.49894614434188533, "grad_norm": 3.4303886890411377, "learning_rate": 4.98619085689367e-05, "loss": 0.2295, "step": 13730 }, { "epoch": 0.4993095428446835, "grad_norm": 1.8785525560379028, "learning_rate": 4.9898248419216515e-05, "loss": 0.1699, "step": 13740 }, { "epoch": 0.49967294134748164, "grad_norm": 7.539544105529785, "learning_rate": 4.993458826949633e-05, "loss": 0.2955, "step": 13750 }, { "epoch": 0.5000363398502798, "grad_norm": 1.6091630458831787, "learning_rate": 4.997092811977615e-05, "loss": 0.1696, "step": 13760 }, { "epoch": 0.5003997383530779, "grad_norm": 1.023695945739746, "learning_rate": 4.9999999978456776e-05, "loss": 0.1872, "step": 13770 }, { "epoch": 0.5007631368558761, "grad_norm": 8.364274978637695, "learning_rate": 4.999999922444405e-05, "loss": 0.1844, "step": 13780 }, { "epoch": 0.5011265353586744, "grad_norm": 1.7257829904556274, "learning_rate": 4.99999973932703e-05, "loss": 0.241, "step": 13790 }, { "epoch": 0.5014899338614724, "grad_norm": 7.256163597106934, "learning_rate": 4.999999448493561e-05, "loss": 0.2714, "step": 13800 }, { "epoch": 0.5014899338614724, "eval_loss": 0.4298999607563019, "eval_runtime": 179.7223, "eval_samples_per_second": 41.253, "eval_steps_per_second": 5.158, "eval_wer": 0.22517109299834806, "step": 13800 }, { "epoch": 0.5018533323642707, "grad_norm": 3.087979316711426, "learning_rate": 4.999999049944011e-05, "loss": 0.3094, "step": 13810 }, { "epoch": 0.5022167308670689, "grad_norm": 1.7626384496688843, "learning_rate": 4.999998543678397e-05, "loss": 0.2521, "step": 13820 }, { "epoch": 0.502580129369867, "grad_norm": 2.257432699203491, "learning_rate": 4.999997929696741e-05, "loss": 0.1913, "step": 13830 }, { "epoch": 0.5029435278726652, "grad_norm": 1.7763293981552124, "learning_rate": 4.999997207999069e-05, "loss": 0.1812, "step": 13840 }, { "epoch": 0.5033069263754634, "grad_norm": 8.228759765625, "learning_rate": 4.9999963785854124e-05, "loss": 0.2953, "step": 13850 }, { "epoch": 0.5036703248782615, "grad_norm": 1.200305461883545, "learning_rate": 4.999995441455807e-05, "loss": 0.3246, "step": 13860 }, { "epoch": 0.5040337233810597, "grad_norm": 1.9264732599258423, "learning_rate": 4.999994396610292e-05, "loss": 0.1749, "step": 13870 }, { "epoch": 0.5043971218838579, "grad_norm": 2.547212839126587, "learning_rate": 4.999993244048915e-05, "loss": 0.2714, "step": 13880 }, { "epoch": 0.504760520386656, "grad_norm": 2.7918379306793213, "learning_rate": 4.999991983771723e-05, "loss": 0.1984, "step": 13890 }, { "epoch": 0.5051239188894542, "grad_norm": 16.789764404296875, "learning_rate": 4.999990615778772e-05, "loss": 0.239, "step": 13900 }, { "epoch": 0.5054873173922524, "grad_norm": 1.1825790405273438, "learning_rate": 4.9999891400701205e-05, "loss": 0.1774, "step": 13910 }, { "epoch": 0.5058507158950505, "grad_norm": 2.1524746417999268, "learning_rate": 4.999987556645832e-05, "loss": 0.1956, "step": 13920 }, { "epoch": 0.5062141143978487, "grad_norm": 2.8159048557281494, "learning_rate": 4.999985865505974e-05, "loss": 0.2315, "step": 13930 }, { "epoch": 0.5065775129006469, "grad_norm": 1.7412035465240479, "learning_rate": 4.99998406665062e-05, "loss": 0.2106, "step": 13940 }, { "epoch": 0.506940911403445, "grad_norm": 61.967708587646484, "learning_rate": 4.999982160079848e-05, "loss": 0.3067, "step": 13950 }, { "epoch": 0.5073043099062432, "grad_norm": 2.378682851791382, "learning_rate": 4.9999801457937404e-05, "loss": 0.21, "step": 13960 }, { "epoch": 0.5076677084090414, "grad_norm": 1.3668854236602783, "learning_rate": 4.9999780237923824e-05, "loss": 0.1529, "step": 13970 }, { "epoch": 0.5080311069118395, "grad_norm": 2.2655959129333496, "learning_rate": 4.9999757940758665e-05, "loss": 0.1747, "step": 13980 }, { "epoch": 0.5083945054146377, "grad_norm": 1.5975615978240967, "learning_rate": 4.9999734566442877e-05, "loss": 0.1728, "step": 13990 }, { "epoch": 0.5087579039174358, "grad_norm": 9.869553565979004, "learning_rate": 4.999971011497748e-05, "loss": 0.3207, "step": 14000 }, { "epoch": 0.509121302420234, "grad_norm": 2.3095829486846924, "learning_rate": 4.999968458636353e-05, "loss": 0.1872, "step": 14010 }, { "epoch": 0.5094847009230322, "grad_norm": 2.059575080871582, "learning_rate": 4.999965798060212e-05, "loss": 0.3059, "step": 14020 }, { "epoch": 0.5098480994258303, "grad_norm": 1.7138803005218506, "learning_rate": 4.9999630297694395e-05, "loss": 0.2025, "step": 14030 }, { "epoch": 0.5102114979286285, "grad_norm": 2.831191062927246, "learning_rate": 4.999960153764155e-05, "loss": 0.1685, "step": 14040 }, { "epoch": 0.5105748964314267, "grad_norm": 15.457362174987793, "learning_rate": 4.999957170044482e-05, "loss": 0.2165, "step": 14050 }, { "epoch": 0.5109382949342248, "grad_norm": 3.923633337020874, "learning_rate": 4.999954078610549e-05, "loss": 0.1888, "step": 14060 }, { "epoch": 0.511301693437023, "grad_norm": 0.8243936896324158, "learning_rate": 4.999950879462491e-05, "loss": 0.295, "step": 14070 }, { "epoch": 0.5116650919398212, "grad_norm": 2.921447277069092, "learning_rate": 4.9999475726004434e-05, "loss": 0.3208, "step": 14080 }, { "epoch": 0.5120284904426193, "grad_norm": 0.9395463466644287, "learning_rate": 4.99994415802455e-05, "loss": 0.1936, "step": 14090 }, { "epoch": 0.5123918889454175, "grad_norm": 11.025691986083984, "learning_rate": 4.999940635734958e-05, "loss": 0.2581, "step": 14100 }, { "epoch": 0.5127552874482157, "grad_norm": 2.2102460861206055, "learning_rate": 4.999937005731818e-05, "loss": 0.1888, "step": 14110 }, { "epoch": 0.5131186859510138, "grad_norm": 1.6075447797775269, "learning_rate": 4.9999332680152876e-05, "loss": 0.1557, "step": 14120 }, { "epoch": 0.513482084453812, "grad_norm": 3.0174403190612793, "learning_rate": 4.999929422585528e-05, "loss": 0.2137, "step": 14130 }, { "epoch": 0.5138454829566103, "grad_norm": 3.2911272048950195, "learning_rate": 4.999925469442705e-05, "loss": 0.2249, "step": 14140 }, { "epoch": 0.5142088814594084, "grad_norm": 4.0001444816589355, "learning_rate": 4.999921408586986e-05, "loss": 0.2548, "step": 14150 }, { "epoch": 0.5145722799622066, "grad_norm": 2.7695538997650146, "learning_rate": 4.9999172400185504e-05, "loss": 0.2107, "step": 14160 }, { "epoch": 0.5149356784650048, "grad_norm": 1.420189380645752, "learning_rate": 4.999912963737574e-05, "loss": 0.1887, "step": 14170 }, { "epoch": 0.5152990769678029, "grad_norm": 1.4330711364746094, "learning_rate": 4.9999085797442434e-05, "loss": 0.2295, "step": 14180 }, { "epoch": 0.5156624754706011, "grad_norm": 1.9518648386001587, "learning_rate": 4.999904088038747e-05, "loss": 0.181, "step": 14190 }, { "epoch": 0.5160258739733993, "grad_norm": 9.763446807861328, "learning_rate": 4.999899488621278e-05, "loss": 0.2163, "step": 14200 }, { "epoch": 0.5163892724761974, "grad_norm": 1.63487708568573, "learning_rate": 4.999894781492035e-05, "loss": 0.1675, "step": 14210 }, { "epoch": 0.5167526709789956, "grad_norm": 1.3337619304656982, "learning_rate": 4.99988996665122e-05, "loss": 1.8258, "step": 14220 }, { "epoch": 0.5171160694817938, "grad_norm": 4.741299152374268, "learning_rate": 4.9998850440990414e-05, "loss": 0.199, "step": 14230 }, { "epoch": 0.5174794679845919, "grad_norm": 2.203994035720825, "learning_rate": 4.9998800138357106e-05, "loss": 0.1666, "step": 14240 }, { "epoch": 0.5178428664873901, "grad_norm": 9.144301414489746, "learning_rate": 4.999874875861444e-05, "loss": 0.2567, "step": 14250 }, { "epoch": 0.5182062649901883, "grad_norm": 1.432627558708191, "learning_rate": 4.9998696301764644e-05, "loss": 0.1842, "step": 14260 }, { "epoch": 0.5185696634929864, "grad_norm": 1.5303106307983398, "learning_rate": 4.999864276780998e-05, "loss": 0.1726, "step": 14270 }, { "epoch": 0.5189330619957846, "grad_norm": 13.468036651611328, "learning_rate": 4.999858815675273e-05, "loss": 0.1927, "step": 14280 }, { "epoch": 0.5192964604985827, "grad_norm": 3.7133965492248535, "learning_rate": 4.999853246859526e-05, "loss": 0.1822, "step": 14290 }, { "epoch": 0.5196598590013809, "grad_norm": 10.077652931213379, "learning_rate": 4.999847570333998e-05, "loss": 0.2847, "step": 14300 }, { "epoch": 0.5200232575041791, "grad_norm": 2.3906922340393066, "learning_rate": 4.9998417860989325e-05, "loss": 0.1962, "step": 14310 }, { "epoch": 0.5203866560069772, "grad_norm": 0.8041434288024902, "learning_rate": 4.999835894154579e-05, "loss": 0.1661, "step": 14320 }, { "epoch": 0.5207500545097754, "grad_norm": 4.1071953773498535, "learning_rate": 4.99982989450119e-05, "loss": 0.2012, "step": 14330 }, { "epoch": 0.5211134530125736, "grad_norm": 0.9645094871520996, "learning_rate": 4.999823787139026e-05, "loss": 0.209, "step": 14340 }, { "epoch": 0.5214768515153717, "grad_norm": 19.18789291381836, "learning_rate": 4.9998175720683506e-05, "loss": 0.3019, "step": 14350 }, { "epoch": 0.5218402500181699, "grad_norm": 1.6560392379760742, "learning_rate": 4.999811249289429e-05, "loss": 0.1696, "step": 14360 }, { "epoch": 0.5222036485209681, "grad_norm": 1.993741512298584, "learning_rate": 4.999804818802535e-05, "loss": 0.1895, "step": 14370 }, { "epoch": 0.5225670470237662, "grad_norm": 3.4508492946624756, "learning_rate": 4.999798280607947e-05, "loss": 0.2111, "step": 14380 }, { "epoch": 0.5229304455265644, "grad_norm": 8.431037902832031, "learning_rate": 4.999791634705944e-05, "loss": 0.1898, "step": 14390 }, { "epoch": 0.5232938440293626, "grad_norm": 10.659805297851562, "learning_rate": 4.9997848810968137e-05, "loss": 0.4744, "step": 14400 }, { "epoch": 0.5232938440293626, "eval_loss": 0.40915772318840027, "eval_runtime": 179.6286, "eval_samples_per_second": 41.274, "eval_steps_per_second": 5.161, "eval_wer": 0.2273222357361991, "step": 14400 }, { "epoch": 0.5236572425321607, "grad_norm": 1.676483392715454, "learning_rate": 4.999778019780849e-05, "loss": 0.1856, "step": 14410 }, { "epoch": 0.5240206410349589, "grad_norm": 3.4859771728515625, "learning_rate": 4.9997710507583414e-05, "loss": 0.1641, "step": 14420 }, { "epoch": 0.5243840395377571, "grad_norm": 2.583261251449585, "learning_rate": 4.999763974029595e-05, "loss": 0.2545, "step": 14430 }, { "epoch": 0.5247474380405552, "grad_norm": 2.0467324256896973, "learning_rate": 4.999756789594913e-05, "loss": 0.1974, "step": 14440 }, { "epoch": 0.5251108365433534, "grad_norm": 4.777310848236084, "learning_rate": 4.999749497454605e-05, "loss": 0.2653, "step": 14450 }, { "epoch": 0.5254742350461516, "grad_norm": 1.6312458515167236, "learning_rate": 4.999742097608984e-05, "loss": 0.1503, "step": 14460 }, { "epoch": 0.5258376335489497, "grad_norm": 1.1725629568099976, "learning_rate": 4.999734590058371e-05, "loss": 0.1636, "step": 14470 }, { "epoch": 0.526201032051748, "grad_norm": 3.2061386108398438, "learning_rate": 4.999726974803089e-05, "loss": 0.1988, "step": 14480 }, { "epoch": 0.5265644305545462, "grad_norm": 1.7078185081481934, "learning_rate": 4.9997192518434655e-05, "loss": 0.1763, "step": 14490 }, { "epoch": 0.5269278290573443, "grad_norm": 3.5756313800811768, "learning_rate": 4.999711421179833e-05, "loss": 0.2651, "step": 14500 }, { "epoch": 0.5272912275601425, "grad_norm": 1.8054040670394897, "learning_rate": 4.99970348281253e-05, "loss": 0.1923, "step": 14510 }, { "epoch": 0.5276546260629407, "grad_norm": 2.8949921131134033, "learning_rate": 4.9996954367418976e-05, "loss": 0.1897, "step": 14520 }, { "epoch": 0.5280180245657388, "grad_norm": 2.0020744800567627, "learning_rate": 4.9996872829682825e-05, "loss": 0.2469, "step": 14530 }, { "epoch": 0.528381423068537, "grad_norm": 1.1650570631027222, "learning_rate": 4.999679021492037e-05, "loss": 0.2088, "step": 14540 }, { "epoch": 0.5287448215713352, "grad_norm": 14.624237060546875, "learning_rate": 4.999670652313516e-05, "loss": 0.2918, "step": 14550 }, { "epoch": 0.5291082200741333, "grad_norm": 1.6658445596694946, "learning_rate": 4.99966217543308e-05, "loss": 0.1936, "step": 14560 }, { "epoch": 0.5294716185769315, "grad_norm": 2.0761842727661133, "learning_rate": 4.9996535908510955e-05, "loss": 0.2318, "step": 14570 }, { "epoch": 0.5298350170797297, "grad_norm": 2.475193977355957, "learning_rate": 4.999644898567931e-05, "loss": 0.1682, "step": 14580 }, { "epoch": 0.5301984155825278, "grad_norm": 1.7537975311279297, "learning_rate": 4.9996360985839616e-05, "loss": 0.1528, "step": 14590 }, { "epoch": 0.530561814085326, "grad_norm": 6.486474990844727, "learning_rate": 4.9996271908995666e-05, "loss": 0.2571, "step": 14600 }, { "epoch": 0.5309252125881241, "grad_norm": 2.308250665664673, "learning_rate": 4.9996181755151294e-05, "loss": 0.1764, "step": 14610 }, { "epoch": 0.5312886110909223, "grad_norm": 4.871829032897949, "learning_rate": 4.999609052431039e-05, "loss": 0.3045, "step": 14620 }, { "epoch": 0.5316520095937205, "grad_norm": 2.796844959259033, "learning_rate": 4.999599821647688e-05, "loss": 0.2102, "step": 14630 }, { "epoch": 0.5320154080965186, "grad_norm": 1.9681658744812012, "learning_rate": 4.999590483165475e-05, "loss": 0.1882, "step": 14640 }, { "epoch": 0.5323788065993168, "grad_norm": 5.858233451843262, "learning_rate": 4.9995810369848006e-05, "loss": 0.314, "step": 14650 }, { "epoch": 0.532742205102115, "grad_norm": 6.469663143157959, "learning_rate": 4.9995714831060736e-05, "loss": 0.2103, "step": 14660 }, { "epoch": 0.5331056036049131, "grad_norm": 1.8543453216552734, "learning_rate": 4.999561821529705e-05, "loss": 0.219, "step": 14670 }, { "epoch": 0.5334690021077113, "grad_norm": 2.222320318222046, "learning_rate": 4.99955205225611e-05, "loss": 0.1879, "step": 14680 }, { "epoch": 0.5338324006105095, "grad_norm": 5.018227577209473, "learning_rate": 4.999542175285711e-05, "loss": 0.1437, "step": 14690 }, { "epoch": 0.5341957991133076, "grad_norm": 6.225541114807129, "learning_rate": 4.999532190618933e-05, "loss": 0.268, "step": 14700 }, { "epoch": 0.5345591976161058, "grad_norm": 1.8122676610946655, "learning_rate": 4.999522098256206e-05, "loss": 0.1644, "step": 14710 }, { "epoch": 0.534922596118904, "grad_norm": 2.4057557582855225, "learning_rate": 4.999511898197966e-05, "loss": 0.1663, "step": 14720 }, { "epoch": 0.5352859946217021, "grad_norm": 1.756697416305542, "learning_rate": 4.9995015904446513e-05, "loss": 0.1771, "step": 14730 }, { "epoch": 0.5356493931245003, "grad_norm": 1.5457457304000854, "learning_rate": 4.999491174996706e-05, "loss": 0.1889, "step": 14740 }, { "epoch": 0.5360127916272985, "grad_norm": 3.108682155609131, "learning_rate": 4.999480651854579e-05, "loss": 0.2063, "step": 14750 }, { "epoch": 0.5363761901300966, "grad_norm": 2.2037875652313232, "learning_rate": 4.9994700210187246e-05, "loss": 0.1579, "step": 14760 }, { "epoch": 0.5367395886328948, "grad_norm": 1.2102454900741577, "learning_rate": 4.9994592824895994e-05, "loss": 0.2361, "step": 14770 }, { "epoch": 0.537102987135693, "grad_norm": 6.5722455978393555, "learning_rate": 4.999448436267667e-05, "loss": 0.2165, "step": 14780 }, { "epoch": 0.5374663856384911, "grad_norm": 1.606378197669983, "learning_rate": 4.999437482353395e-05, "loss": 0.1642, "step": 14790 }, { "epoch": 0.5378297841412893, "grad_norm": 24.709177017211914, "learning_rate": 4.999426420747255e-05, "loss": 0.2628, "step": 14800 }, { "epoch": 0.5381931826440876, "grad_norm": 2.543760299682617, "learning_rate": 4.999415251449723e-05, "loss": 0.1883, "step": 14810 }, { "epoch": 0.5385565811468856, "grad_norm": 2.0813279151916504, "learning_rate": 4.999403974461281e-05, "loss": 0.1842, "step": 14820 }, { "epoch": 0.5389199796496839, "grad_norm": 4.744104385375977, "learning_rate": 4.9993925897824144e-05, "loss": 0.1981, "step": 14830 }, { "epoch": 0.5392833781524821, "grad_norm": 3.2407493591308594, "learning_rate": 4.9993810974136146e-05, "loss": 0.2169, "step": 14840 }, { "epoch": 0.5396467766552802, "grad_norm": 13.33681869506836, "learning_rate": 4.999369497355375e-05, "loss": 0.2775, "step": 14850 }, { "epoch": 0.5400101751580784, "grad_norm": 2.3192784786224365, "learning_rate": 4.9993577896081975e-05, "loss": 0.1987, "step": 14860 }, { "epoch": 0.5403735736608766, "grad_norm": 1.6611911058425903, "learning_rate": 4.999345974172586e-05, "loss": 0.188, "step": 14870 }, { "epoch": 0.5407369721636747, "grad_norm": 4.368532180786133, "learning_rate": 4.9993340510490485e-05, "loss": 0.2201, "step": 14880 }, { "epoch": 0.5411003706664729, "grad_norm": 1.4825586080551147, "learning_rate": 4.999322020238099e-05, "loss": 0.185, "step": 14890 }, { "epoch": 0.541463769169271, "grad_norm": 4.346343994140625, "learning_rate": 4.9993098817402564e-05, "loss": 0.2415, "step": 14900 }, { "epoch": 0.5418271676720692, "grad_norm": 1.0175251960754395, "learning_rate": 4.999297635556044e-05, "loss": 0.1991, "step": 14910 }, { "epoch": 0.5421905661748674, "grad_norm": 1.600205421447754, "learning_rate": 4.999285281685989e-05, "loss": 0.1706, "step": 14920 }, { "epoch": 0.5425539646776655, "grad_norm": 4.332497596740723, "learning_rate": 4.999272820130623e-05, "loss": 0.1964, "step": 14930 }, { "epoch": 0.5429173631804637, "grad_norm": 2.0384531021118164, "learning_rate": 4.999260250890484e-05, "loss": 0.1571, "step": 14940 }, { "epoch": 0.5432807616832619, "grad_norm": 11.780756950378418, "learning_rate": 4.999247573966114e-05, "loss": 0.319, "step": 14950 }, { "epoch": 0.54364416018606, "grad_norm": 2.7058663368225098, "learning_rate": 4.999234789358057e-05, "loss": 0.2009, "step": 14960 }, { "epoch": 0.5440075586888582, "grad_norm": 1.966780662536621, "learning_rate": 4.999221897066866e-05, "loss": 0.177, "step": 14970 }, { "epoch": 0.5443709571916564, "grad_norm": 2.2129642963409424, "learning_rate": 4.999208897093096e-05, "loss": 0.2472, "step": 14980 }, { "epoch": 0.5447343556944545, "grad_norm": 2.726358652114868, "learning_rate": 4.9991957894373064e-05, "loss": 0.2239, "step": 14990 }, { "epoch": 0.5450977541972527, "grad_norm": 28.577600479125977, "learning_rate": 4.999182574100063e-05, "loss": 0.2524, "step": 15000 }, { "epoch": 0.5450977541972527, "eval_loss": 0.3972287178039551, "eval_runtime": 180.8086, "eval_samples_per_second": 41.005, "eval_steps_per_second": 5.127, "eval_wer": 0.2289560150307695, "step": 15000 }, { "epoch": 0.5454611527000509, "grad_norm": 1.9243866205215454, "learning_rate": 4.9991692510819335e-05, "loss": 0.1679, "step": 15010 }, { "epoch": 0.545824551202849, "grad_norm": 1.3926585912704468, "learning_rate": 4.9991558203834944e-05, "loss": 0.1933, "step": 15020 }, { "epoch": 0.5461879497056472, "grad_norm": 5.275027751922607, "learning_rate": 4.999142282005322e-05, "loss": 0.2838, "step": 15030 }, { "epoch": 0.5465513482084454, "grad_norm": 2.142784357070923, "learning_rate": 4.999128635948e-05, "loss": 0.1754, "step": 15040 }, { "epoch": 0.5469147467112435, "grad_norm": 40.32966995239258, "learning_rate": 4.999114882212119e-05, "loss": 0.323, "step": 15050 }, { "epoch": 0.5472781452140417, "grad_norm": 1.056662678718567, "learning_rate": 4.999101020798268e-05, "loss": 0.1462, "step": 15060 }, { "epoch": 0.5476415437168399, "grad_norm": 3.7527568340301514, "learning_rate": 4.9990870517070464e-05, "loss": 0.2106, "step": 15070 }, { "epoch": 0.548004942219638, "grad_norm": 3.396487236022949, "learning_rate": 4.9990729749390555e-05, "loss": 0.1995, "step": 15080 }, { "epoch": 0.5483683407224362, "grad_norm": 1.650519609451294, "learning_rate": 4.999058790494902e-05, "loss": 0.195, "step": 15090 }, { "epoch": 0.5487317392252344, "grad_norm": 16.096418380737305, "learning_rate": 4.9990444983751975e-05, "loss": 0.2705, "step": 15100 }, { "epoch": 0.5490951377280325, "grad_norm": 1.273149847984314, "learning_rate": 4.999030098580556e-05, "loss": 0.2216, "step": 15110 }, { "epoch": 0.5494585362308307, "grad_norm": 1.5414496660232544, "learning_rate": 4.9990155911115995e-05, "loss": 0.1876, "step": 15120 }, { "epoch": 0.549821934733629, "grad_norm": 4.707805633544922, "learning_rate": 4.9990009759689524e-05, "loss": 0.1895, "step": 15130 }, { "epoch": 0.550185333236427, "grad_norm": 2.033162832260132, "learning_rate": 4.9989862531532456e-05, "loss": 0.1705, "step": 15140 }, { "epoch": 0.5505487317392252, "grad_norm": 7.349232196807861, "learning_rate": 4.998971422665112e-05, "loss": 0.2815, "step": 15150 }, { "epoch": 0.5509121302420235, "grad_norm": 1.293078064918518, "learning_rate": 4.9989564845051915e-05, "loss": 0.1789, "step": 15160 }, { "epoch": 0.5512755287448216, "grad_norm": 1.7343147993087769, "learning_rate": 4.998941438674127e-05, "loss": 0.1781, "step": 15170 }, { "epoch": 0.5516389272476198, "grad_norm": 2.440030574798584, "learning_rate": 4.9989262851725674e-05, "loss": 0.1927, "step": 15180 }, { "epoch": 0.5520023257504179, "grad_norm": 2.276111364364624, "learning_rate": 4.998911024001165e-05, "loss": 0.1774, "step": 15190 }, { "epoch": 0.5523657242532161, "grad_norm": 9.360533714294434, "learning_rate": 4.9988956551605783e-05, "loss": 0.2761, "step": 15200 }, { "epoch": 0.5527291227560143, "grad_norm": 3.8025522232055664, "learning_rate": 4.998880178651468e-05, "loss": 0.2855, "step": 15210 }, { "epoch": 0.5530925212588124, "grad_norm": 3.816631555557251, "learning_rate": 4.998864594474503e-05, "loss": 0.1559, "step": 15220 }, { "epoch": 0.5534559197616106, "grad_norm": 3.2255067825317383, "learning_rate": 4.998848902630353e-05, "loss": 0.1632, "step": 15230 }, { "epoch": 0.5538193182644088, "grad_norm": 1.077268123626709, "learning_rate": 4.9988331031196944e-05, "loss": 0.1969, "step": 15240 }, { "epoch": 0.5541827167672069, "grad_norm": 5.657801151275635, "learning_rate": 4.998817195943209e-05, "loss": 0.2361, "step": 15250 }, { "epoch": 0.5545461152700051, "grad_norm": 1.180039882659912, "learning_rate": 4.998801181101581e-05, "loss": 0.1779, "step": 15260 }, { "epoch": 0.5549095137728033, "grad_norm": 2.12725830078125, "learning_rate": 4.998785058595501e-05, "loss": 0.1505, "step": 15270 }, { "epoch": 0.5552729122756014, "grad_norm": 2.0784361362457275, "learning_rate": 4.998768828425664e-05, "loss": 0.2221, "step": 15280 }, { "epoch": 0.5556363107783996, "grad_norm": 2.0133538246154785, "learning_rate": 4.998752490592768e-05, "loss": 0.1759, "step": 15290 }, { "epoch": 0.5559997092811978, "grad_norm": 3.3181140422821045, "learning_rate": 4.998736045097518e-05, "loss": 0.229, "step": 15300 }, { "epoch": 0.5563631077839959, "grad_norm": 1.2881536483764648, "learning_rate": 4.998719491940622e-05, "loss": 0.1928, "step": 15310 }, { "epoch": 0.5567265062867941, "grad_norm": 1.0155376195907593, "learning_rate": 4.998702831122794e-05, "loss": 0.1986, "step": 15320 }, { "epoch": 0.5570899047895923, "grad_norm": 7.5557661056518555, "learning_rate": 4.998686062644752e-05, "loss": 0.2317, "step": 15330 }, { "epoch": 0.5574533032923904, "grad_norm": 2.3196377754211426, "learning_rate": 4.9986691865072176e-05, "loss": 0.1827, "step": 15340 }, { "epoch": 0.5578167017951886, "grad_norm": 25.910188674926758, "learning_rate": 4.998652202710918e-05, "loss": 0.2824, "step": 15350 }, { "epoch": 0.5581801002979868, "grad_norm": 1.0091907978057861, "learning_rate": 4.9986351112565846e-05, "loss": 0.1946, "step": 15360 }, { "epoch": 0.5585434988007849, "grad_norm": 3.0022408962249756, "learning_rate": 4.998617912144956e-05, "loss": 0.2028, "step": 15370 }, { "epoch": 0.5589068973035831, "grad_norm": 2.9837419986724854, "learning_rate": 4.99860060537677e-05, "loss": 0.203, "step": 15380 }, { "epoch": 0.5592702958063813, "grad_norm": 2.238867998123169, "learning_rate": 4.9985831909527746e-05, "loss": 0.1392, "step": 15390 }, { "epoch": 0.5596336943091794, "grad_norm": 3.8585119247436523, "learning_rate": 4.9985656688737205e-05, "loss": 0.2289, "step": 15400 }, { "epoch": 0.5599970928119776, "grad_norm": 2.4951331615448, "learning_rate": 4.998548039140361e-05, "loss": 0.1852, "step": 15410 }, { "epoch": 0.5603604913147758, "grad_norm": 1.8404667377471924, "learning_rate": 4.998530301753455e-05, "loss": 0.1813, "step": 15420 }, { "epoch": 0.5607238898175739, "grad_norm": 2.615247964859009, "learning_rate": 4.9985124567137695e-05, "loss": 0.228, "step": 15430 }, { "epoch": 0.5610872883203721, "grad_norm": 1.2074272632598877, "learning_rate": 4.9984945040220715e-05, "loss": 0.1879, "step": 15440 }, { "epoch": 0.5614506868231703, "grad_norm": 38.466712951660156, "learning_rate": 4.9984764436791355e-05, "loss": 0.3965, "step": 15450 }, { "epoch": 0.5618140853259684, "grad_norm": 1.4196547269821167, "learning_rate": 4.998458275685739e-05, "loss": 0.2061, "step": 15460 }, { "epoch": 0.5621774838287666, "grad_norm": 1.2451281547546387, "learning_rate": 4.998440000042664e-05, "loss": 0.2118, "step": 15470 }, { "epoch": 0.5625408823315647, "grad_norm": 3.7021896839141846, "learning_rate": 4.9984216167507005e-05, "loss": 0.2294, "step": 15480 }, { "epoch": 0.562904280834363, "grad_norm": 2.8826780319213867, "learning_rate": 4.998403125810638e-05, "loss": 0.1654, "step": 15490 }, { "epoch": 0.5632676793371612, "grad_norm": 8.366926193237305, "learning_rate": 4.998384527223274e-05, "loss": 0.2467, "step": 15500 }, { "epoch": 0.5636310778399592, "grad_norm": 2.2532148361206055, "learning_rate": 4.99836582098941e-05, "loss": 0.2569, "step": 15510 }, { "epoch": 0.5639944763427575, "grad_norm": 2.164987325668335, "learning_rate": 4.998347007109853e-05, "loss": 0.2167, "step": 15520 }, { "epoch": 0.5643578748455557, "grad_norm": 4.651108264923096, "learning_rate": 4.998328085585411e-05, "loss": 0.2138, "step": 15530 }, { "epoch": 0.5647212733483538, "grad_norm": 1.5128902196884155, "learning_rate": 4.9983090564169024e-05, "loss": 0.1821, "step": 15540 }, { "epoch": 0.565084671851152, "grad_norm": 8.516124725341797, "learning_rate": 4.998289919605145e-05, "loss": 0.2546, "step": 15550 }, { "epoch": 0.5654480703539502, "grad_norm": 1.6480666399002075, "learning_rate": 4.9982706751509635e-05, "loss": 0.2069, "step": 15560 }, { "epoch": 0.5658114688567483, "grad_norm": 1.3768938779830933, "learning_rate": 4.998251323055187e-05, "loss": 0.1775, "step": 15570 }, { "epoch": 0.5661748673595465, "grad_norm": 1.8793795108795166, "learning_rate": 4.998231863318651e-05, "loss": 0.14, "step": 15580 }, { "epoch": 0.5665382658623447, "grad_norm": 1.2361701726913452, "learning_rate": 4.9982122959421924e-05, "loss": 0.1797, "step": 15590 }, { "epoch": 0.5669016643651428, "grad_norm": 14.16727352142334, "learning_rate": 4.998192620926655e-05, "loss": 0.3523, "step": 15600 }, { "epoch": 0.5669016643651428, "eval_loss": 0.40661031007766724, "eval_runtime": 180.2598, "eval_samples_per_second": 41.13, "eval_steps_per_second": 5.143, "eval_wer": 0.21753771307204967, "step": 15600 }, { "epoch": 0.567265062867941, "grad_norm": 2.460245370864868, "learning_rate": 4.9981728382728855e-05, "loss": 0.1824, "step": 15610 }, { "epoch": 0.5676284613707392, "grad_norm": 1.603381633758545, "learning_rate": 4.9981529479817366e-05, "loss": 0.1506, "step": 15620 }, { "epoch": 0.5679918598735373, "grad_norm": 3.650087356567383, "learning_rate": 4.9981329500540664e-05, "loss": 0.2351, "step": 15630 }, { "epoch": 0.5683552583763355, "grad_norm": 2.2338075637817383, "learning_rate": 4.9981128444907354e-05, "loss": 0.1785, "step": 15640 }, { "epoch": 0.5687186568791337, "grad_norm": 7.641642093658447, "learning_rate": 4.998092631292611e-05, "loss": 0.2816, "step": 15650 }, { "epoch": 0.5690820553819318, "grad_norm": 1.5877048969268799, "learning_rate": 4.998072310460562e-05, "loss": 0.1784, "step": 15660 }, { "epoch": 0.56944545388473, "grad_norm": 3.5917787551879883, "learning_rate": 4.998051881995466e-05, "loss": 0.1685, "step": 15670 }, { "epoch": 0.5698088523875282, "grad_norm": 6.459184169769287, "learning_rate": 4.998031345898203e-05, "loss": 0.2031, "step": 15680 }, { "epoch": 0.5701722508903263, "grad_norm": 2.7518184185028076, "learning_rate": 4.9980107021696565e-05, "loss": 0.168, "step": 15690 }, { "epoch": 0.5705356493931245, "grad_norm": 9.814598083496094, "learning_rate": 4.997989950810718e-05, "loss": 0.2778, "step": 15700 }, { "epoch": 0.5708990478959227, "grad_norm": 2.0985398292541504, "learning_rate": 4.9979690918222785e-05, "loss": 0.1864, "step": 15710 }, { "epoch": 0.5712624463987208, "grad_norm": 1.9264591932296753, "learning_rate": 4.997948125205241e-05, "loss": 0.1682, "step": 15720 }, { "epoch": 0.571625844901519, "grad_norm": 4.2961955070495605, "learning_rate": 4.997927050960505e-05, "loss": 0.198, "step": 15730 }, { "epoch": 0.5719892434043172, "grad_norm": 4.524483680725098, "learning_rate": 4.99790586908898e-05, "loss": 0.3235, "step": 15740 }, { "epoch": 0.5723526419071153, "grad_norm": 5.259559154510498, "learning_rate": 4.997884579591578e-05, "loss": 0.335, "step": 15750 }, { "epoch": 0.5727160404099135, "grad_norm": 1.7875639200210571, "learning_rate": 4.997863182469219e-05, "loss": 0.1674, "step": 15760 }, { "epoch": 0.5730794389127116, "grad_norm": 1.1852960586547852, "learning_rate": 4.9978416777228216e-05, "loss": 0.1968, "step": 15770 }, { "epoch": 0.5734428374155098, "grad_norm": 1.253061294555664, "learning_rate": 4.997820065353314e-05, "loss": 0.2177, "step": 15780 }, { "epoch": 0.573806235918308, "grad_norm": 2.0577871799468994, "learning_rate": 4.9977983453616266e-05, "loss": 0.1498, "step": 15790 }, { "epoch": 0.5741696344211061, "grad_norm": 7.4168901443481445, "learning_rate": 4.997776517748696e-05, "loss": 0.3137, "step": 15800 }, { "epoch": 0.5745330329239043, "grad_norm": 2.9957845211029053, "learning_rate": 4.9977545825154625e-05, "loss": 0.1819, "step": 15810 }, { "epoch": 0.5748964314267025, "grad_norm": 1.251610517501831, "learning_rate": 4.997732539662871e-05, "loss": 0.1633, "step": 15820 }, { "epoch": 0.5752598299295006, "grad_norm": 3.229581594467163, "learning_rate": 4.997710389191871e-05, "loss": 0.1888, "step": 15830 }, { "epoch": 0.5756232284322989, "grad_norm": 1.2718089818954468, "learning_rate": 4.997688131103417e-05, "loss": 0.1938, "step": 15840 }, { "epoch": 0.5759866269350971, "grad_norm": 4.77078104019165, "learning_rate": 4.9976657653984694e-05, "loss": 0.2311, "step": 15850 }, { "epoch": 0.5763500254378952, "grad_norm": 1.9487907886505127, "learning_rate": 4.9976432920779904e-05, "loss": 0.7679, "step": 15860 }, { "epoch": 0.5767134239406934, "grad_norm": 2.1322100162506104, "learning_rate": 4.997620711142948e-05, "loss": 0.204, "step": 15870 }, { "epoch": 0.5770768224434916, "grad_norm": 3.0756008625030518, "learning_rate": 4.997598022594316e-05, "loss": 0.205, "step": 15880 }, { "epoch": 0.5774402209462897, "grad_norm": 2.2399511337280273, "learning_rate": 4.997575226433071e-05, "loss": 0.183, "step": 15890 }, { "epoch": 0.5778036194490879, "grad_norm": 4.17095947265625, "learning_rate": 4.997552322660197e-05, "loss": 0.2209, "step": 15900 }, { "epoch": 0.5781670179518861, "grad_norm": 1.7085528373718262, "learning_rate": 4.9975293112766794e-05, "loss": 0.4196, "step": 15910 }, { "epoch": 0.5785304164546842, "grad_norm": 1.6818984746932983, "learning_rate": 4.99750619228351e-05, "loss": 0.1592, "step": 15920 }, { "epoch": 0.5788938149574824, "grad_norm": 2.526503324508667, "learning_rate": 4.9974829656816846e-05, "loss": 0.7523, "step": 15930 }, { "epoch": 0.5792572134602806, "grad_norm": 4.456855297088623, "learning_rate": 4.997459631472205e-05, "loss": 0.1664, "step": 15940 }, { "epoch": 0.5796206119630787, "grad_norm": 28.427839279174805, "learning_rate": 4.9974361896560746e-05, "loss": 0.2891, "step": 15950 }, { "epoch": 0.5799840104658769, "grad_norm": 1.6720882654190063, "learning_rate": 4.997412640234306e-05, "loss": 0.1522, "step": 15960 }, { "epoch": 0.5803474089686751, "grad_norm": 1.6327390670776367, "learning_rate": 4.997388983207911e-05, "loss": 0.1957, "step": 15970 }, { "epoch": 0.5807108074714732, "grad_norm": 1.5792416334152222, "learning_rate": 4.997365218577912e-05, "loss": 0.2325, "step": 15980 }, { "epoch": 0.5810742059742714, "grad_norm": 1.7585738897323608, "learning_rate": 4.9973413463453305e-05, "loss": 0.2023, "step": 15990 }, { "epoch": 0.5814376044770696, "grad_norm": 8.14810562133789, "learning_rate": 4.997317366511196e-05, "loss": 0.2093, "step": 16000 }, { "epoch": 0.5818010029798677, "grad_norm": 3.1430416107177734, "learning_rate": 4.997293279076543e-05, "loss": 0.1742, "step": 16010 }, { "epoch": 0.5821644014826659, "grad_norm": 1.9447312355041504, "learning_rate": 4.997269084042406e-05, "loss": 0.1852, "step": 16020 }, { "epoch": 0.5825277999854641, "grad_norm": 2.1479732990264893, "learning_rate": 4.997244781409831e-05, "loss": 0.2197, "step": 16030 }, { "epoch": 0.5828911984882622, "grad_norm": 3.7066800594329834, "learning_rate": 4.9972203711798625e-05, "loss": 0.1899, "step": 16040 }, { "epoch": 0.5832545969910604, "grad_norm": 4.3598432540893555, "learning_rate": 4.9971958533535544e-05, "loss": 0.237, "step": 16050 }, { "epoch": 0.5836179954938585, "grad_norm": 3.410356283187866, "learning_rate": 4.997171227931962e-05, "loss": 0.1985, "step": 16060 }, { "epoch": 0.5839813939966567, "grad_norm": 1.6299129724502563, "learning_rate": 4.9971464949161454e-05, "loss": 0.1758, "step": 16070 }, { "epoch": 0.5843447924994549, "grad_norm": 1.993067979812622, "learning_rate": 4.9971216543071716e-05, "loss": 0.1822, "step": 16080 }, { "epoch": 0.584708191002253, "grad_norm": 1.2057979106903076, "learning_rate": 4.9970967061061104e-05, "loss": 0.1953, "step": 16090 }, { "epoch": 0.5850715895050512, "grad_norm": 34.54500961303711, "learning_rate": 4.997071650314037e-05, "loss": 0.277, "step": 16100 }, { "epoch": 0.5854349880078494, "grad_norm": 1.243656039237976, "learning_rate": 4.997046486932031e-05, "loss": 0.172, "step": 16110 }, { "epoch": 0.5857983865106475, "grad_norm": 0.6155187487602234, "learning_rate": 4.997021215961176e-05, "loss": 0.1523, "step": 16120 }, { "epoch": 0.5861617850134457, "grad_norm": 2.0203208923339844, "learning_rate": 4.9969958374025615e-05, "loss": 0.1857, "step": 16130 }, { "epoch": 0.5865251835162439, "grad_norm": 1.1912654638290405, "learning_rate": 4.9969703512572805e-05, "loss": 0.2169, "step": 16140 }, { "epoch": 0.586888582019042, "grad_norm": 3.528538227081299, "learning_rate": 4.9969447575264315e-05, "loss": 0.2428, "step": 16150 }, { "epoch": 0.5872519805218402, "grad_norm": 0.9166990518569946, "learning_rate": 4.996919056211117e-05, "loss": 0.2206, "step": 16160 }, { "epoch": 0.5876153790246385, "grad_norm": 1.4956426620483398, "learning_rate": 4.996893247312444e-05, "loss": 0.168, "step": 16170 }, { "epoch": 0.5879787775274365, "grad_norm": 1.4502993822097778, "learning_rate": 4.996867330831526e-05, "loss": 0.1767, "step": 16180 }, { "epoch": 0.5883421760302348, "grad_norm": 0.9337482452392578, "learning_rate": 4.9968413067694775e-05, "loss": 0.2035, "step": 16190 }, { "epoch": 0.588705574533033, "grad_norm": 13.500269889831543, "learning_rate": 4.996815175127422e-05, "loss": 0.2335, "step": 16200 }, { "epoch": 0.588705574533033, "eval_loss": 0.4428017735481262, "eval_runtime": 180.2159, "eval_samples_per_second": 41.14, "eval_steps_per_second": 5.144, "eval_wer": 0.22281118957285748, "step": 16200 }, { "epoch": 0.5890689730358311, "grad_norm": 5.091770648956299, "learning_rate": 4.996788935906483e-05, "loss": 0.1884, "step": 16210 }, { "epoch": 0.5894323715386293, "grad_norm": 2.379033327102661, "learning_rate": 4.996762589107793e-05, "loss": 0.196, "step": 16220 }, { "epoch": 0.5897957700414275, "grad_norm": 2.576484203338623, "learning_rate": 4.996736134732487e-05, "loss": 0.2069, "step": 16230 }, { "epoch": 0.5901591685442256, "grad_norm": 1.4235923290252686, "learning_rate": 4.9967095727817035e-05, "loss": 0.1905, "step": 16240 }, { "epoch": 0.5905225670470238, "grad_norm": 7.119918346405029, "learning_rate": 4.9966829032565886e-05, "loss": 0.2803, "step": 16250 }, { "epoch": 0.590885965549822, "grad_norm": 1.1050286293029785, "learning_rate": 4.99665612615829e-05, "loss": 0.1914, "step": 16260 }, { "epoch": 0.5912493640526201, "grad_norm": 1.403601884841919, "learning_rate": 4.9966292414879625e-05, "loss": 0.1649, "step": 16270 }, { "epoch": 0.5916127625554183, "grad_norm": 5.439052104949951, "learning_rate": 4.9966022492467635e-05, "loss": 0.1897, "step": 16280 }, { "epoch": 0.5919761610582165, "grad_norm": 1.0014379024505615, "learning_rate": 4.996575149435857e-05, "loss": 0.1472, "step": 16290 }, { "epoch": 0.5923395595610146, "grad_norm": 9.480517387390137, "learning_rate": 4.99654794205641e-05, "loss": 0.2351, "step": 16300 }, { "epoch": 0.5927029580638128, "grad_norm": 0.8406987190246582, "learning_rate": 4.9965206271095955e-05, "loss": 0.1795, "step": 16310 }, { "epoch": 0.593066356566611, "grad_norm": 1.378169298171997, "learning_rate": 4.996493204596589e-05, "loss": 0.1597, "step": 16320 }, { "epoch": 0.5934297550694091, "grad_norm": 3.9748549461364746, "learning_rate": 4.996465674518573e-05, "loss": 0.2264, "step": 16330 }, { "epoch": 0.5937931535722073, "grad_norm": 2.2626171112060547, "learning_rate": 4.996438036876734e-05, "loss": 0.1647, "step": 16340 }, { "epoch": 0.5941565520750055, "grad_norm": 3.8039205074310303, "learning_rate": 4.996410291672262e-05, "loss": 0.2204, "step": 16350 }, { "epoch": 0.5945199505778036, "grad_norm": 1.5219416618347168, "learning_rate": 4.996382438906353e-05, "loss": 0.1518, "step": 16360 }, { "epoch": 0.5948833490806018, "grad_norm": 1.4811570644378662, "learning_rate": 4.9963544785802064e-05, "loss": 0.2006, "step": 16370 }, { "epoch": 0.5952467475833999, "grad_norm": 4.7030558586120605, "learning_rate": 4.996326410695028e-05, "loss": 0.2524, "step": 16380 }, { "epoch": 0.5956101460861981, "grad_norm": 1.103624939918518, "learning_rate": 4.996298235252026e-05, "loss": 0.1558, "step": 16390 }, { "epoch": 0.5959735445889963, "grad_norm": 4.654818534851074, "learning_rate": 4.996269952252415e-05, "loss": 0.2746, "step": 16400 }, { "epoch": 0.5963369430917944, "grad_norm": 1.6746747493743896, "learning_rate": 4.996241561697413e-05, "loss": 0.1838, "step": 16410 }, { "epoch": 0.5967003415945926, "grad_norm": 3.1955924034118652, "learning_rate": 4.996213063588245e-05, "loss": 0.1773, "step": 16420 }, { "epoch": 0.5970637400973908, "grad_norm": 1.782669186592102, "learning_rate": 4.996184457926137e-05, "loss": 0.1939, "step": 16430 }, { "epoch": 0.5974271386001889, "grad_norm": 1.2277849912643433, "learning_rate": 4.996155744712322e-05, "loss": 0.1724, "step": 16440 }, { "epoch": 0.5977905371029871, "grad_norm": 25.578798294067383, "learning_rate": 4.996126923948038e-05, "loss": 0.2612, "step": 16450 }, { "epoch": 0.5981539356057853, "grad_norm": 0.984426736831665, "learning_rate": 4.9960979956345254e-05, "loss": 0.1621, "step": 16460 }, { "epoch": 0.5985173341085834, "grad_norm": 2.1299145221710205, "learning_rate": 4.9960689597730315e-05, "loss": 0.161, "step": 16470 }, { "epoch": 0.5988807326113816, "grad_norm": 2.6153085231781006, "learning_rate": 4.996039816364807e-05, "loss": 0.2122, "step": 16480 }, { "epoch": 0.5992441311141798, "grad_norm": 4.464552879333496, "learning_rate": 4.996010565411108e-05, "loss": 0.2417, "step": 16490 }, { "epoch": 0.5996075296169779, "grad_norm": 26.441349029541016, "learning_rate": 4.995981206913194e-05, "loss": 0.3103, "step": 16500 }, { "epoch": 0.5999709281197761, "grad_norm": 2.353302478790283, "learning_rate": 4.995951740872331e-05, "loss": 1.0256, "step": 16510 }, { "epoch": 0.6003343266225744, "grad_norm": 0.8436356782913208, "learning_rate": 4.995922167289788e-05, "loss": 0.1563, "step": 16520 }, { "epoch": 0.6006977251253725, "grad_norm": 3.3516342639923096, "learning_rate": 4.99589248616684e-05, "loss": 0.2441, "step": 16530 }, { "epoch": 0.6010611236281707, "grad_norm": 2.0286059379577637, "learning_rate": 4.995862697504764e-05, "loss": 0.1767, "step": 16540 }, { "epoch": 0.6014245221309689, "grad_norm": 18.248151779174805, "learning_rate": 4.9958328013048464e-05, "loss": 0.3522, "step": 16550 }, { "epoch": 0.601787920633767, "grad_norm": 2.1514463424682617, "learning_rate": 4.995802797568372e-05, "loss": 0.1771, "step": 16560 }, { "epoch": 0.6021513191365652, "grad_norm": 5.868020534515381, "learning_rate": 4.995772686296635e-05, "loss": 0.1776, "step": 16570 }, { "epoch": 0.6025147176393634, "grad_norm": 4.539637565612793, "learning_rate": 4.9957424674909336e-05, "loss": 0.2002, "step": 16580 }, { "epoch": 0.6028781161421615, "grad_norm": 1.7226190567016602, "learning_rate": 4.99571214115257e-05, "loss": 0.1927, "step": 16590 }, { "epoch": 0.6032415146449597, "grad_norm": 22.087247848510742, "learning_rate": 4.9956817072828485e-05, "loss": 0.249, "step": 16600 }, { "epoch": 0.6036049131477579, "grad_norm": 2.4267120361328125, "learning_rate": 4.995651165883083e-05, "loss": 0.1935, "step": 16610 }, { "epoch": 0.603968311650556, "grad_norm": 2.5284249782562256, "learning_rate": 4.995620516954588e-05, "loss": 0.1495, "step": 16620 }, { "epoch": 0.6043317101533542, "grad_norm": 1.5988596677780151, "learning_rate": 4.995589760498684e-05, "loss": 0.2329, "step": 16630 }, { "epoch": 0.6046951086561524, "grad_norm": 1.0771689414978027, "learning_rate": 4.9955588965166966e-05, "loss": 0.1634, "step": 16640 }, { "epoch": 0.6050585071589505, "grad_norm": 8.72423267364502, "learning_rate": 4.995527925009956e-05, "loss": 0.27, "step": 16650 }, { "epoch": 0.6054219056617487, "grad_norm": 1.3176789283752441, "learning_rate": 4.9954968459797955e-05, "loss": 1.1913, "step": 16660 }, { "epoch": 0.6057853041645468, "grad_norm": 1.8307547569274902, "learning_rate": 4.9954656594275555e-05, "loss": 0.188, "step": 16670 }, { "epoch": 0.606148702667345, "grad_norm": 2.783604621887207, "learning_rate": 4.9954343653545795e-05, "loss": 0.1791, "step": 16680 }, { "epoch": 0.6065121011701432, "grad_norm": 1.6639970541000366, "learning_rate": 4.9954029637622146e-05, "loss": 0.1829, "step": 16690 }, { "epoch": 0.6068754996729413, "grad_norm": 11.055110931396484, "learning_rate": 4.995371454651815e-05, "loss": 0.2229, "step": 16700 }, { "epoch": 0.6072388981757395, "grad_norm": 1.8166972398757935, "learning_rate": 4.9953398380247384e-05, "loss": 0.1734, "step": 16710 }, { "epoch": 0.6076022966785377, "grad_norm": 4.851889610290527, "learning_rate": 4.995308113882346e-05, "loss": 0.1716, "step": 16720 }, { "epoch": 0.6079656951813358, "grad_norm": 3.0047857761383057, "learning_rate": 4.9952762822260056e-05, "loss": 0.2125, "step": 16730 }, { "epoch": 0.608329093684134, "grad_norm": 1.1506407260894775, "learning_rate": 4.9952443430570887e-05, "loss": 0.171, "step": 16740 }, { "epoch": 0.6086924921869322, "grad_norm": 4.324979782104492, "learning_rate": 4.995212296376971e-05, "loss": 0.2365, "step": 16750 }, { "epoch": 0.6090558906897303, "grad_norm": 1.2295490503311157, "learning_rate": 4.995180142187033e-05, "loss": 0.2002, "step": 16760 }, { "epoch": 0.6094192891925285, "grad_norm": 1.454434871673584, "learning_rate": 4.995147880488661e-05, "loss": 0.1602, "step": 16770 }, { "epoch": 0.6097826876953267, "grad_norm": 2.6185641288757324, "learning_rate": 4.995115511283244e-05, "loss": 0.1904, "step": 16780 }, { "epoch": 0.6101460861981248, "grad_norm": 1.2603826522827148, "learning_rate": 4.9950830345721774e-05, "loss": 0.1892, "step": 16790 }, { "epoch": 0.610509484700923, "grad_norm": 14.189190864562988, "learning_rate": 4.9950504503568615e-05, "loss": 0.281, "step": 16800 }, { "epoch": 0.610509484700923, "eval_loss": 0.4123116433620453, "eval_runtime": 179.5081, "eval_samples_per_second": 41.302, "eval_steps_per_second": 5.164, "eval_wer": 0.22986367019441972, "step": 16800 }, { "epoch": 0.6108728832037212, "grad_norm": 1.7670204639434814, "learning_rate": 4.995017758638698e-05, "loss": 0.2581, "step": 16810 }, { "epoch": 0.6112362817065193, "grad_norm": 1.2099360227584839, "learning_rate": 4.9949849594190964e-05, "loss": 0.1762, "step": 16820 }, { "epoch": 0.6115996802093175, "grad_norm": 2.7719335556030273, "learning_rate": 4.9949520526994716e-05, "loss": 0.1867, "step": 16830 }, { "epoch": 0.6119630787121157, "grad_norm": 1.5935924053192139, "learning_rate": 4.9949190384812386e-05, "loss": 0.191, "step": 16840 }, { "epoch": 0.6123264772149138, "grad_norm": 3.511439085006714, "learning_rate": 4.994885916765821e-05, "loss": 0.2132, "step": 16850 }, { "epoch": 0.612689875717712, "grad_norm": 1.693789005279541, "learning_rate": 4.994852687554647e-05, "loss": 0.167, "step": 16860 }, { "epoch": 0.6130532742205103, "grad_norm": 2.1199066638946533, "learning_rate": 4.994819350849147e-05, "loss": 0.172, "step": 16870 }, { "epoch": 0.6134166727233084, "grad_norm": 2.724487543106079, "learning_rate": 4.9947859066507575e-05, "loss": 0.2083, "step": 16880 }, { "epoch": 0.6137800712261066, "grad_norm": 0.926547110080719, "learning_rate": 4.99475235496092e-05, "loss": 0.1517, "step": 16890 }, { "epoch": 0.6141434697289048, "grad_norm": 14.503059387207031, "learning_rate": 4.99471869578108e-05, "loss": 0.1945, "step": 16900 }, { "epoch": 0.6145068682317029, "grad_norm": 3.2206919193267822, "learning_rate": 4.994684929112687e-05, "loss": 0.1882, "step": 16910 }, { "epoch": 0.6148702667345011, "grad_norm": 2.004995107650757, "learning_rate": 4.994651054957198e-05, "loss": 0.1876, "step": 16920 }, { "epoch": 0.6152336652372993, "grad_norm": 2.0580127239227295, "learning_rate": 4.99461707331607e-05, "loss": 0.2104, "step": 16930 }, { "epoch": 0.6155970637400974, "grad_norm": 3.3028602600097656, "learning_rate": 4.9945829841907684e-05, "loss": 0.1494, "step": 16940 }, { "epoch": 0.6159604622428956, "grad_norm": 7.572249412536621, "learning_rate": 4.994548787582761e-05, "loss": 0.2381, "step": 16950 }, { "epoch": 0.6163238607456937, "grad_norm": 1.4220709800720215, "learning_rate": 4.9945144834935234e-05, "loss": 0.1916, "step": 16960 }, { "epoch": 0.6166872592484919, "grad_norm": 1.2397724390029907, "learning_rate": 4.994480071924531e-05, "loss": 0.1593, "step": 16970 }, { "epoch": 0.6170506577512901, "grad_norm": 2.2569403648376465, "learning_rate": 4.9944455528772684e-05, "loss": 0.1984, "step": 16980 }, { "epoch": 0.6174140562540882, "grad_norm": 1.811727523803711, "learning_rate": 4.994410926353221e-05, "loss": 0.1838, "step": 16990 }, { "epoch": 0.6177774547568864, "grad_norm": 2.783061981201172, "learning_rate": 4.9943761923538834e-05, "loss": 0.2217, "step": 17000 }, { "epoch": 0.6181408532596846, "grad_norm": 2.816331148147583, "learning_rate": 4.99434135088075e-05, "loss": 0.1911, "step": 17010 }, { "epoch": 0.6185042517624827, "grad_norm": 1.238916039466858, "learning_rate": 4.9943064019353234e-05, "loss": 0.1854, "step": 17020 }, { "epoch": 0.6188676502652809, "grad_norm": 5.16685152053833, "learning_rate": 4.9942713455191075e-05, "loss": 0.1797, "step": 17030 }, { "epoch": 0.6192310487680791, "grad_norm": 2.486461639404297, "learning_rate": 4.9942361816336146e-05, "loss": 0.1926, "step": 17040 }, { "epoch": 0.6195944472708772, "grad_norm": 9.018515586853027, "learning_rate": 4.994200910280359e-05, "loss": 0.2193, "step": 17050 }, { "epoch": 0.6199578457736754, "grad_norm": 1.804166555404663, "learning_rate": 4.994165531460861e-05, "loss": 0.1977, "step": 17060 }, { "epoch": 0.6203212442764736, "grad_norm": 1.2862845659255981, "learning_rate": 4.994130045176644e-05, "loss": 0.1493, "step": 17070 }, { "epoch": 0.6206846427792717, "grad_norm": 4.164750576019287, "learning_rate": 4.994094451429237e-05, "loss": 0.2548, "step": 17080 }, { "epoch": 0.6210480412820699, "grad_norm": 1.577255368232727, "learning_rate": 4.994058750220176e-05, "loss": 0.1703, "step": 17090 }, { "epoch": 0.6214114397848681, "grad_norm": 5.805021286010742, "learning_rate": 4.994022941550996e-05, "loss": 0.2976, "step": 17100 }, { "epoch": 0.6217748382876662, "grad_norm": 0.9706230163574219, "learning_rate": 4.993987025423241e-05, "loss": 0.1454, "step": 17110 }, { "epoch": 0.6221382367904644, "grad_norm": 1.4393014907836914, "learning_rate": 4.993951001838459e-05, "loss": 0.1496, "step": 17120 }, { "epoch": 0.6225016352932626, "grad_norm": 1.839086651802063, "learning_rate": 4.993914870798202e-05, "loss": 0.2256, "step": 17130 }, { "epoch": 0.6228650337960607, "grad_norm": 1.8924603462219238, "learning_rate": 4.993878632304027e-05, "loss": 0.1415, "step": 17140 }, { "epoch": 0.6232284322988589, "grad_norm": 12.03149700164795, "learning_rate": 4.993842286357494e-05, "loss": 0.7236, "step": 17150 }, { "epoch": 0.6235918308016571, "grad_norm": 2.0251877307891846, "learning_rate": 4.993805832960171e-05, "loss": 0.1913, "step": 17160 }, { "epoch": 0.6239552293044552, "grad_norm": 2.341251850128174, "learning_rate": 4.993769272113628e-05, "loss": 0.1734, "step": 17170 }, { "epoch": 0.6243186278072534, "grad_norm": 2.517820358276367, "learning_rate": 4.993732603819438e-05, "loss": 0.18, "step": 17180 }, { "epoch": 0.6246820263100517, "grad_norm": 1.6384356021881104, "learning_rate": 4.993695828079184e-05, "loss": 0.1513, "step": 17190 }, { "epoch": 0.6250454248128497, "grad_norm": 10.794693946838379, "learning_rate": 4.993658944894449e-05, "loss": 0.2282, "step": 17200 }, { "epoch": 0.625408823315648, "grad_norm": 1.2552087306976318, "learning_rate": 4.9936219542668236e-05, "loss": 0.1938, "step": 17210 }, { "epoch": 0.6257722218184462, "grad_norm": 2.423431634902954, "learning_rate": 4.993584856197899e-05, "loss": 0.1487, "step": 17220 }, { "epoch": 0.6261356203212443, "grad_norm": 1.7924834489822388, "learning_rate": 4.9935476506892763e-05, "loss": 0.195, "step": 17230 }, { "epoch": 0.6264990188240425, "grad_norm": 1.6521999835968018, "learning_rate": 4.9935103377425566e-05, "loss": 0.1652, "step": 17240 }, { "epoch": 0.6268624173268406, "grad_norm": 6.472127437591553, "learning_rate": 4.9934729173593494e-05, "loss": 0.2481, "step": 17250 }, { "epoch": 0.6272258158296388, "grad_norm": 1.8962410688400269, "learning_rate": 4.993435389541265e-05, "loss": 0.1487, "step": 17260 }, { "epoch": 0.627589214332437, "grad_norm": 1.2054486274719238, "learning_rate": 4.993397754289922e-05, "loss": 0.1496, "step": 17270 }, { "epoch": 0.6279526128352351, "grad_norm": 3.9840786457061768, "learning_rate": 4.993360011606941e-05, "loss": 0.1776, "step": 17280 }, { "epoch": 0.6283160113380333, "grad_norm": 0.9625970125198364, "learning_rate": 4.9933221614939485e-05, "loss": 0.1652, "step": 17290 }, { "epoch": 0.6286794098408315, "grad_norm": 11.166252136230469, "learning_rate": 4.993284203952575e-05, "loss": 0.233, "step": 17300 }, { "epoch": 0.6290428083436296, "grad_norm": 2.356268882751465, "learning_rate": 4.9932461389844566e-05, "loss": 0.1498, "step": 17310 }, { "epoch": 0.6294062068464278, "grad_norm": 0.9366337656974792, "learning_rate": 4.993207966591234e-05, "loss": 0.1483, "step": 17320 }, { "epoch": 0.629769605349226, "grad_norm": 5.854847431182861, "learning_rate": 4.9931696867745495e-05, "loss": 0.1603, "step": 17330 }, { "epoch": 0.6301330038520241, "grad_norm": 1.0090773105621338, "learning_rate": 4.9931312995360546e-05, "loss": 0.1475, "step": 17340 }, { "epoch": 0.6304964023548223, "grad_norm": 3.896676540374756, "learning_rate": 4.9930928048774024e-05, "loss": 0.244, "step": 17350 }, { "epoch": 0.6308598008576205, "grad_norm": 1.1872800588607788, "learning_rate": 4.993054202800252e-05, "loss": 0.1618, "step": 17360 }, { "epoch": 0.6312231993604186, "grad_norm": 1.8078994750976562, "learning_rate": 4.9930154933062654e-05, "loss": 0.1554, "step": 17370 }, { "epoch": 0.6315865978632168, "grad_norm": 1.8264563083648682, "learning_rate": 4.9929766763971126e-05, "loss": 0.162, "step": 17380 }, { "epoch": 0.631949996366015, "grad_norm": 0.6304519176483154, "learning_rate": 4.992937752074465e-05, "loss": 0.209, "step": 17390 }, { "epoch": 0.6323133948688131, "grad_norm": 4.7621917724609375, "learning_rate": 4.992898720339998e-05, "loss": 0.2393, "step": 17400 }, { "epoch": 0.6323133948688131, "eval_loss": 0.3943130671977997, "eval_runtime": 180.0553, "eval_samples_per_second": 41.176, "eval_steps_per_second": 5.148, "eval_wer": 0.21144734692395664, "step": 17400 }, { "epoch": 0.6326767933716113, "grad_norm": 1.1110138893127441, "learning_rate": 4.992859581195396e-05, "loss": 0.143, "step": 17410 }, { "epoch": 0.6330401918744095, "grad_norm": 1.2453794479370117, "learning_rate": 4.992820334642344e-05, "loss": 0.1454, "step": 17420 }, { "epoch": 0.6334035903772076, "grad_norm": 3.669144630432129, "learning_rate": 4.9927809806825335e-05, "loss": 0.2496, "step": 17430 }, { "epoch": 0.6337669888800058, "grad_norm": 2.7898483276367188, "learning_rate": 4.99274151931766e-05, "loss": 0.1614, "step": 17440 }, { "epoch": 0.634130387382804, "grad_norm": 6.725431442260742, "learning_rate": 4.992701950549423e-05, "loss": 0.2622, "step": 17450 }, { "epoch": 0.6344937858856021, "grad_norm": 1.6481575965881348, "learning_rate": 4.992662274379528e-05, "loss": 0.1713, "step": 17460 }, { "epoch": 0.6348571843884003, "grad_norm": 1.3567384481430054, "learning_rate": 4.9926224908096856e-05, "loss": 0.1725, "step": 17470 }, { "epoch": 0.6352205828911985, "grad_norm": 1.8207722902297974, "learning_rate": 4.9925825998416076e-05, "loss": 0.1973, "step": 17480 }, { "epoch": 0.6355839813939966, "grad_norm": 2.2345893383026123, "learning_rate": 4.9925426014770146e-05, "loss": 0.1847, "step": 17490 }, { "epoch": 0.6359473798967948, "grad_norm": 7.193591594696045, "learning_rate": 4.992502495717629e-05, "loss": 0.2605, "step": 17500 }, { "epoch": 0.636310778399593, "grad_norm": 1.346073865890503, "learning_rate": 4.99246228256518e-05, "loss": 0.1518, "step": 17510 }, { "epoch": 0.6366741769023911, "grad_norm": 1.5637879371643066, "learning_rate": 4.9924219620213995e-05, "loss": 0.1648, "step": 17520 }, { "epoch": 0.6370375754051893, "grad_norm": 3.2450170516967773, "learning_rate": 4.9923815340880236e-05, "loss": 0.1974, "step": 17530 }, { "epoch": 0.6374009739079874, "grad_norm": 0.9553948640823364, "learning_rate": 4.992340998766796e-05, "loss": 0.1694, "step": 17540 }, { "epoch": 0.6377643724107857, "grad_norm": 8.901055335998535, "learning_rate": 4.9923003560594625e-05, "loss": 0.2625, "step": 17550 }, { "epoch": 0.6381277709135839, "grad_norm": 1.7500522136688232, "learning_rate": 4.992259605967774e-05, "loss": 0.1799, "step": 17560 }, { "epoch": 0.638491169416382, "grad_norm": 1.4673160314559937, "learning_rate": 4.9922187484934865e-05, "loss": 0.1698, "step": 17570 }, { "epoch": 0.6388545679191802, "grad_norm": 2.5377135276794434, "learning_rate": 4.992177783638361e-05, "loss": 0.1822, "step": 17580 }, { "epoch": 0.6392179664219784, "grad_norm": 1.660311222076416, "learning_rate": 4.9921367114041625e-05, "loss": 0.1659, "step": 17590 }, { "epoch": 0.6395813649247765, "grad_norm": 8.248649597167969, "learning_rate": 4.9920955317926595e-05, "loss": 0.2384, "step": 17600 }, { "epoch": 0.6399447634275747, "grad_norm": 1.5581409931182861, "learning_rate": 4.992054244805627e-05, "loss": 0.1665, "step": 17610 }, { "epoch": 0.6403081619303729, "grad_norm": 0.9654737710952759, "learning_rate": 4.992012850444844e-05, "loss": 0.3493, "step": 17620 }, { "epoch": 0.640671560433171, "grad_norm": 3.4477317333221436, "learning_rate": 4.9919713487120935e-05, "loss": 0.2097, "step": 17630 }, { "epoch": 0.6410349589359692, "grad_norm": 1.3745356798171997, "learning_rate": 4.9919297396091634e-05, "loss": 0.1459, "step": 17640 }, { "epoch": 0.6413983574387674, "grad_norm": 4.813534259796143, "learning_rate": 4.991888023137849e-05, "loss": 0.1905, "step": 17650 }, { "epoch": 0.6417617559415655, "grad_norm": 3.118452310562134, "learning_rate": 4.9918461992999445e-05, "loss": 0.1527, "step": 17660 }, { "epoch": 0.6421251544443637, "grad_norm": 1.8424941301345825, "learning_rate": 4.991804268097253e-05, "loss": 0.1759, "step": 17670 }, { "epoch": 0.6424885529471619, "grad_norm": 7.301458835601807, "learning_rate": 4.9917622295315826e-05, "loss": 0.1662, "step": 17680 }, { "epoch": 0.64285195144996, "grad_norm": 3.133114814758301, "learning_rate": 4.991720083604743e-05, "loss": 0.1692, "step": 17690 }, { "epoch": 0.6432153499527582, "grad_norm": 11.538620948791504, "learning_rate": 4.99167783031855e-05, "loss": 0.2443, "step": 17700 }, { "epoch": 0.6435787484555564, "grad_norm": 1.3739595413208008, "learning_rate": 4.991635469674825e-05, "loss": 0.1465, "step": 17710 }, { "epoch": 0.6439421469583545, "grad_norm": 1.6855549812316895, "learning_rate": 4.991593001675393e-05, "loss": 0.1819, "step": 17720 }, { "epoch": 0.6443055454611527, "grad_norm": 1.692335844039917, "learning_rate": 4.991550426322083e-05, "loss": 0.1654, "step": 17730 }, { "epoch": 0.6446689439639509, "grad_norm": 1.1132971048355103, "learning_rate": 4.9915077436167313e-05, "loss": 0.1688, "step": 17740 }, { "epoch": 0.645032342466749, "grad_norm": 5.6813201904296875, "learning_rate": 4.9914649535611756e-05, "loss": 0.2235, "step": 17750 }, { "epoch": 0.6453957409695472, "grad_norm": 1.5107471942901611, "learning_rate": 4.99142205615726e-05, "loss": 0.1747, "step": 17760 }, { "epoch": 0.6457591394723454, "grad_norm": 2.4552764892578125, "learning_rate": 4.9913790514068316e-05, "loss": 0.1739, "step": 17770 }, { "epoch": 0.6461225379751435, "grad_norm": 1.5664808750152588, "learning_rate": 4.991335939311744e-05, "loss": 0.1766, "step": 17780 }, { "epoch": 0.6464859364779417, "grad_norm": 2.935850143432617, "learning_rate": 4.9912927198738556e-05, "loss": 0.2148, "step": 17790 }, { "epoch": 0.6468493349807399, "grad_norm": 10.267364501953125, "learning_rate": 4.991249393095028e-05, "loss": 0.2521, "step": 17800 }, { "epoch": 0.647212733483538, "grad_norm": 1.3392564058303833, "learning_rate": 4.9912059589771274e-05, "loss": 0.172, "step": 17810 }, { "epoch": 0.6475761319863362, "grad_norm": 0.895491361618042, "learning_rate": 4.991162417522026e-05, "loss": 0.1379, "step": 17820 }, { "epoch": 0.6479395304891343, "grad_norm": 2.536397695541382, "learning_rate": 4.9911187687315997e-05, "loss": 0.1477, "step": 17830 }, { "epoch": 0.6483029289919325, "grad_norm": 1.7795464992523193, "learning_rate": 4.9910750126077296e-05, "loss": 0.1786, "step": 17840 }, { "epoch": 0.6486663274947307, "grad_norm": 62.683929443359375, "learning_rate": 4.9910311491523e-05, "loss": 0.266, "step": 17850 }, { "epoch": 0.6490297259975288, "grad_norm": 2.0866358280181885, "learning_rate": 4.990987178367201e-05, "loss": 0.1428, "step": 17860 }, { "epoch": 0.649393124500327, "grad_norm": 1.5636661052703857, "learning_rate": 4.990943100254328e-05, "loss": 0.1845, "step": 17870 }, { "epoch": 0.6497565230031253, "grad_norm": 3.540689468383789, "learning_rate": 4.9908989148155796e-05, "loss": 0.2348, "step": 17880 }, { "epoch": 0.6501199215059233, "grad_norm": 1.720421314239502, "learning_rate": 4.990854622052859e-05, "loss": 0.1742, "step": 17890 }, { "epoch": 0.6504833200087216, "grad_norm": 7.7201056480407715, "learning_rate": 4.9908102219680756e-05, "loss": 0.2573, "step": 17900 }, { "epoch": 0.6508467185115198, "grad_norm": 3.826190948486328, "learning_rate": 4.9907701701329876e-05, "loss": 3.6024, "step": 17910 }, { "epoch": 0.6512101170143179, "grad_norm": 2.047307252883911, "learning_rate": 4.990725566141558e-05, "loss": 0.1551, "step": 17920 }, { "epoch": 0.6515735155171161, "grad_norm": 6.462743282318115, "learning_rate": 4.990680854833626e-05, "loss": 0.2109, "step": 17930 }, { "epoch": 0.6519369140199143, "grad_norm": 1.7611109018325806, "learning_rate": 4.9906360362111184e-05, "loss": 0.1959, "step": 17940 }, { "epoch": 0.6523003125227124, "grad_norm": 5.253514766693115, "learning_rate": 4.9905911102759655e-05, "loss": 0.2436, "step": 17950 }, { "epoch": 0.6526637110255106, "grad_norm": 0.9357771873474121, "learning_rate": 4.9905460770301035e-05, "loss": 0.1664, "step": 17960 }, { "epoch": 0.6530271095283088, "grad_norm": 1.219488263130188, "learning_rate": 4.990500936475472e-05, "loss": 0.2286, "step": 17970 }, { "epoch": 0.6533905080311069, "grad_norm": 2.8499608039855957, "learning_rate": 4.990455688614016e-05, "loss": 0.2664, "step": 17980 }, { "epoch": 0.6537539065339051, "grad_norm": 1.5652077198028564, "learning_rate": 4.990410333447686e-05, "loss": 0.1341, "step": 17990 }, { "epoch": 0.6541173050367033, "grad_norm": 5.98219633102417, "learning_rate": 4.9903648709784356e-05, "loss": 0.2338, "step": 18000 }, { "epoch": 0.6541173050367033, "eval_loss": 0.37892404198646545, "eval_runtime": 180.0524, "eval_samples_per_second": 41.177, "eval_steps_per_second": 5.149, "eval_wer": 0.200864087715795, "step": 18000 }, { "epoch": 0.6544807035395014, "grad_norm": 2.105100154876709, "learning_rate": 4.990319301208223e-05, "loss": 0.1764, "step": 18010 }, { "epoch": 0.6548441020422996, "grad_norm": 1.0867921113967896, "learning_rate": 4.990273624139013e-05, "loss": 0.1507, "step": 18020 }, { "epoch": 0.6552075005450978, "grad_norm": 2.9895503520965576, "learning_rate": 4.9902278397727734e-05, "loss": 0.1479, "step": 18030 }, { "epoch": 0.6555708990478959, "grad_norm": 0.9947407841682434, "learning_rate": 4.990181948111475e-05, "loss": 0.1558, "step": 18040 }, { "epoch": 0.6559342975506941, "grad_norm": 7.774895191192627, "learning_rate": 4.9901359491570974e-05, "loss": 0.2202, "step": 18050 }, { "epoch": 0.6562976960534923, "grad_norm": 1.8466017246246338, "learning_rate": 4.990089842911622e-05, "loss": 0.1929, "step": 18060 }, { "epoch": 0.6566610945562904, "grad_norm": 0.8435410261154175, "learning_rate": 4.9900436293770345e-05, "loss": 0.1377, "step": 18070 }, { "epoch": 0.6570244930590886, "grad_norm": 3.10648512840271, "learning_rate": 4.989997308555326e-05, "loss": 0.202, "step": 18080 }, { "epoch": 0.6573878915618868, "grad_norm": 1.112806797027588, "learning_rate": 4.989950880448494e-05, "loss": 0.1486, "step": 18090 }, { "epoch": 0.6577512900646849, "grad_norm": 18.821117401123047, "learning_rate": 4.989904345058538e-05, "loss": 0.2677, "step": 18100 }, { "epoch": 0.6581146885674831, "grad_norm": 1.254798412322998, "learning_rate": 4.989857702387463e-05, "loss": 3.5769, "step": 18110 }, { "epoch": 0.6584780870702812, "grad_norm": 0.9956761002540588, "learning_rate": 4.989810952437277e-05, "loss": 0.1958, "step": 18120 }, { "epoch": 0.6588414855730794, "grad_norm": 2.9471828937530518, "learning_rate": 4.9897640952099975e-05, "loss": 0.1988, "step": 18130 }, { "epoch": 0.6592048840758776, "grad_norm": 1.3806344270706177, "learning_rate": 4.989717130707641e-05, "loss": 0.1552, "step": 18140 }, { "epoch": 0.6595682825786757, "grad_norm": 3.0857722759246826, "learning_rate": 4.989670058932231e-05, "loss": 0.2168, "step": 18150 }, { "epoch": 0.6599316810814739, "grad_norm": 1.8781664371490479, "learning_rate": 4.989622879885798e-05, "loss": 0.1571, "step": 18160 }, { "epoch": 0.6602950795842721, "grad_norm": 1.1139156818389893, "learning_rate": 4.9895755935703725e-05, "loss": 0.1365, "step": 18170 }, { "epoch": 0.6606584780870702, "grad_norm": 2.3965742588043213, "learning_rate": 4.9895281999879925e-05, "loss": 0.1879, "step": 18180 }, { "epoch": 0.6610218765898684, "grad_norm": 1.2575726509094238, "learning_rate": 4.9894806991407e-05, "loss": 0.2197, "step": 18190 }, { "epoch": 0.6613852750926666, "grad_norm": 10.392169952392578, "learning_rate": 4.989433091030542e-05, "loss": 0.2318, "step": 18200 }, { "epoch": 0.6617486735954647, "grad_norm": 0.8268498182296753, "learning_rate": 4.98938537565957e-05, "loss": 0.1416, "step": 18210 }, { "epoch": 0.662112072098263, "grad_norm": 0.9257369637489319, "learning_rate": 4.9893375530298384e-05, "loss": 0.1855, "step": 18220 }, { "epoch": 0.6624754706010612, "grad_norm": 1.7720370292663574, "learning_rate": 4.9892896231434094e-05, "loss": 1.0276, "step": 18230 }, { "epoch": 0.6628388691038593, "grad_norm": 2.2012548446655273, "learning_rate": 4.9892415860023476e-05, "loss": 1.1909, "step": 18240 }, { "epoch": 0.6632022676066575, "grad_norm": 9.690247535705566, "learning_rate": 4.9891934416087224e-05, "loss": 0.2603, "step": 18250 }, { "epoch": 0.6635656661094557, "grad_norm": 2.528682231903076, "learning_rate": 4.989145189964608e-05, "loss": 0.1912, "step": 18260 }, { "epoch": 0.6639290646122538, "grad_norm": 1.4666227102279663, "learning_rate": 4.989096831072084e-05, "loss": 0.2316, "step": 18270 }, { "epoch": 0.664292463115052, "grad_norm": 1.463526725769043, "learning_rate": 4.989048364933234e-05, "loss": 0.1388, "step": 18280 }, { "epoch": 0.6646558616178502, "grad_norm": 1.2156569957733154, "learning_rate": 4.988999791550146e-05, "loss": 0.4086, "step": 18290 }, { "epoch": 0.6650192601206483, "grad_norm": 4.909139156341553, "learning_rate": 4.988951110924913e-05, "loss": 0.2631, "step": 18300 }, { "epoch": 0.6653826586234465, "grad_norm": 1.3692512512207031, "learning_rate": 4.988902323059632e-05, "loss": 0.1525, "step": 18310 }, { "epoch": 0.6657460571262447, "grad_norm": 1.153344988822937, "learning_rate": 4.988853427956406e-05, "loss": 0.1904, "step": 18320 }, { "epoch": 0.6661094556290428, "grad_norm": 2.052828073501587, "learning_rate": 4.988804425617341e-05, "loss": 0.1979, "step": 18330 }, { "epoch": 0.666472854131841, "grad_norm": 1.373213768005371, "learning_rate": 4.988755316044548e-05, "loss": 0.1836, "step": 18340 }, { "epoch": 0.6668362526346392, "grad_norm": 24.185970306396484, "learning_rate": 4.9887060992401436e-05, "loss": 0.2546, "step": 18350 }, { "epoch": 0.6671996511374373, "grad_norm": 1.702205777168274, "learning_rate": 4.988656775206248e-05, "loss": 0.1433, "step": 18360 }, { "epoch": 0.6675630496402355, "grad_norm": 2.279100179672241, "learning_rate": 4.9886073439449864e-05, "loss": 0.1671, "step": 18370 }, { "epoch": 0.6679264481430337, "grad_norm": 3.928740978240967, "learning_rate": 4.98855780545849e-05, "loss": 0.1506, "step": 18380 }, { "epoch": 0.6682898466458318, "grad_norm": 2.2895402908325195, "learning_rate": 4.988508159748891e-05, "loss": 0.1523, "step": 18390 }, { "epoch": 0.66865324514863, "grad_norm": 10.151689529418945, "learning_rate": 4.98845840681833e-05, "loss": 0.2284, "step": 18400 }, { "epoch": 0.6690166436514282, "grad_norm": 1.268561840057373, "learning_rate": 4.9884085466689504e-05, "loss": 0.171, "step": 18410 }, { "epoch": 0.6693800421542263, "grad_norm": 1.0731265544891357, "learning_rate": 4.9883585793029e-05, "loss": 0.7778, "step": 18420 }, { "epoch": 0.6697434406570245, "grad_norm": 1.0762509107589722, "learning_rate": 4.988308504722332e-05, "loss": 0.1966, "step": 18430 }, { "epoch": 0.6701068391598226, "grad_norm": 6.763409614562988, "learning_rate": 4.9882583229294044e-05, "loss": 0.156, "step": 18440 }, { "epoch": 0.6704702376626208, "grad_norm": 8.312501907348633, "learning_rate": 4.988208033926279e-05, "loss": 0.2573, "step": 18450 }, { "epoch": 0.670833636165419, "grad_norm": 1.7566003799438477, "learning_rate": 4.988157637715122e-05, "loss": 0.1639, "step": 18460 }, { "epoch": 0.6711970346682171, "grad_norm": 2.336911916732788, "learning_rate": 4.988107134298105e-05, "loss": 0.1536, "step": 18470 }, { "epoch": 0.6715604331710153, "grad_norm": 2.2477078437805176, "learning_rate": 4.988056523677405e-05, "loss": 0.2734, "step": 18480 }, { "epoch": 0.6719238316738135, "grad_norm": 1.62912917137146, "learning_rate": 4.9880058058552015e-05, "loss": 0.1501, "step": 18490 }, { "epoch": 0.6722872301766116, "grad_norm": 8.896906852722168, "learning_rate": 4.98795498083368e-05, "loss": 0.213, "step": 18500 }, { "epoch": 0.6726506286794098, "grad_norm": 1.804291009902954, "learning_rate": 4.987904048615031e-05, "loss": 0.2175, "step": 18510 }, { "epoch": 0.673014027182208, "grad_norm": 0.9261330366134644, "learning_rate": 4.9878530092014486e-05, "loss": 0.1553, "step": 18520 }, { "epoch": 0.6733774256850061, "grad_norm": 4.854642868041992, "learning_rate": 4.987801862595132e-05, "loss": 0.2065, "step": 18530 }, { "epoch": 0.6737408241878043, "grad_norm": 0.9362125992774963, "learning_rate": 4.987750608798284e-05, "loss": 0.1611, "step": 18540 }, { "epoch": 0.6741042226906026, "grad_norm": 13.348092079162598, "learning_rate": 4.987699247813114e-05, "loss": 0.2834, "step": 18550 }, { "epoch": 0.6744676211934006, "grad_norm": 1.3235937356948853, "learning_rate": 4.987647779641835e-05, "loss": 0.166, "step": 18560 }, { "epoch": 0.6748310196961989, "grad_norm": 1.7941697835922241, "learning_rate": 4.987596204286664e-05, "loss": 0.186, "step": 18570 }, { "epoch": 0.6751944181989971, "grad_norm": 6.945876121520996, "learning_rate": 4.987544521749824e-05, "loss": 0.1859, "step": 18580 }, { "epoch": 0.6755578167017952, "grad_norm": 1.1671024560928345, "learning_rate": 4.98749273203354e-05, "loss": 0.2007, "step": 18590 }, { "epoch": 0.6759212152045934, "grad_norm": 46.817718505859375, "learning_rate": 4.987440835140046e-05, "loss": 0.275, "step": 18600 }, { "epoch": 0.6759212152045934, "eval_loss": 0.4186328053474426, "eval_runtime": 180.6066, "eval_samples_per_second": 41.051, "eval_steps_per_second": 5.133, "eval_wer": 0.21444260896400238, "step": 18600 }, { "epoch": 0.6762846137073916, "grad_norm": 0.9619908928871155, "learning_rate": 4.987388831071575e-05, "loss": 0.2147, "step": 18610 }, { "epoch": 0.6766480122101897, "grad_norm": 1.139666199684143, "learning_rate": 4.9873367198303714e-05, "loss": 0.2591, "step": 18620 }, { "epoch": 0.6770114107129879, "grad_norm": 2.6673026084899902, "learning_rate": 4.9872845014186776e-05, "loss": 0.2013, "step": 18630 }, { "epoch": 0.6773748092157861, "grad_norm": 1.0486637353897095, "learning_rate": 4.987232175838745e-05, "loss": 0.2326, "step": 18640 }, { "epoch": 0.6777382077185842, "grad_norm": 6.457462787628174, "learning_rate": 4.987179743092827e-05, "loss": 0.2395, "step": 18650 }, { "epoch": 0.6781016062213824, "grad_norm": 3.296480178833008, "learning_rate": 4.987127203183183e-05, "loss": 0.1857, "step": 18660 }, { "epoch": 0.6784650047241806, "grad_norm": 2.828460454940796, "learning_rate": 4.987074556112078e-05, "loss": 0.1391, "step": 18670 }, { "epoch": 0.6788284032269787, "grad_norm": 10.424219131469727, "learning_rate": 4.987021801881779e-05, "loss": 0.1583, "step": 18680 }, { "epoch": 0.6791918017297769, "grad_norm": 5.248502254486084, "learning_rate": 4.986968940494559e-05, "loss": 0.1676, "step": 18690 }, { "epoch": 0.6795552002325751, "grad_norm": 8.20375919342041, "learning_rate": 4.986915971952696e-05, "loss": 0.2844, "step": 18700 }, { "epoch": 0.6799185987353732, "grad_norm": 2.415562152862549, "learning_rate": 4.986862896258473e-05, "loss": 0.1634, "step": 18710 }, { "epoch": 0.6802819972381714, "grad_norm": 1.635680079460144, "learning_rate": 4.986809713414176e-05, "loss": 0.509, "step": 18720 }, { "epoch": 0.6806453957409695, "grad_norm": 10.641048431396484, "learning_rate": 4.986756423422095e-05, "loss": 0.2015, "step": 18730 }, { "epoch": 0.6810087942437677, "grad_norm": 1.3304156064987183, "learning_rate": 4.986703026284529e-05, "loss": 0.1598, "step": 18740 }, { "epoch": 0.6813721927465659, "grad_norm": 4.707154750823975, "learning_rate": 4.986649522003778e-05, "loss": 0.2486, "step": 18750 }, { "epoch": 0.681735591249364, "grad_norm": 1.671863317489624, "learning_rate": 4.9865959105821454e-05, "loss": 0.1628, "step": 18760 }, { "epoch": 0.6820989897521622, "grad_norm": 2.4183709621429443, "learning_rate": 4.986542192021942e-05, "loss": 0.1636, "step": 18770 }, { "epoch": 0.6824623882549604, "grad_norm": 120.8931884765625, "learning_rate": 4.9864883663254836e-05, "loss": 2.0172, "step": 18780 }, { "epoch": 0.6828257867577585, "grad_norm": 2.785879135131836, "learning_rate": 4.986434433495089e-05, "loss": 0.1669, "step": 18790 }, { "epoch": 0.6831891852605567, "grad_norm": 3.662753105163574, "learning_rate": 4.98638039353308e-05, "loss": 0.3095, "step": 18800 }, { "epoch": 0.6835525837633549, "grad_norm": 1.1632777452468872, "learning_rate": 4.986326246441787e-05, "loss": 0.1632, "step": 18810 }, { "epoch": 0.683915982266153, "grad_norm": 0.9660913348197937, "learning_rate": 4.986271992223543e-05, "loss": 0.1509, "step": 18820 }, { "epoch": 0.6842793807689512, "grad_norm": 2.810391426086426, "learning_rate": 4.986217630880684e-05, "loss": 0.1507, "step": 18830 }, { "epoch": 0.6846427792717494, "grad_norm": 2.008641242980957, "learning_rate": 4.986163162415554e-05, "loss": 0.1858, "step": 18840 }, { "epoch": 0.6850061777745475, "grad_norm": 3.4007887840270996, "learning_rate": 4.986108586830499e-05, "loss": 0.2389, "step": 18850 }, { "epoch": 0.6853695762773457, "grad_norm": 0.8250002861022949, "learning_rate": 4.986053904127871e-05, "loss": 0.1618, "step": 18860 }, { "epoch": 0.685732974780144, "grad_norm": 0.792607307434082, "learning_rate": 4.986004598111927e-05, "loss": 1.5835, "step": 18870 }, { "epoch": 0.686096373282942, "grad_norm": 2.740478038787842, "learning_rate": 4.985949711892404e-05, "loss": 0.2021, "step": 18880 }, { "epoch": 0.6864597717857402, "grad_norm": 1.1361775398254395, "learning_rate": 4.985894718562153e-05, "loss": 0.2244, "step": 18890 }, { "epoch": 0.6868231702885385, "grad_norm": 2.692542314529419, "learning_rate": 4.985839618123543e-05, "loss": 0.2095, "step": 18900 }, { "epoch": 0.6871865687913365, "grad_norm": 1.2691428661346436, "learning_rate": 4.9857844105789485e-05, "loss": 0.1533, "step": 18910 }, { "epoch": 0.6875499672941348, "grad_norm": 2.087209939956665, "learning_rate": 4.9857290959307483e-05, "loss": 0.1469, "step": 18920 }, { "epoch": 0.687913365796933, "grad_norm": 1.5252209901809692, "learning_rate": 4.985673674181326e-05, "loss": 0.2099, "step": 18930 }, { "epoch": 0.6882767642997311, "grad_norm": 1.81588876247406, "learning_rate": 4.9856181453330685e-05, "loss": 0.174, "step": 18940 }, { "epoch": 0.6886401628025293, "grad_norm": 21.244775772094727, "learning_rate": 4.9855625093883695e-05, "loss": 0.2455, "step": 18950 }, { "epoch": 0.6890035613053275, "grad_norm": 1.53201425075531, "learning_rate": 4.9855067663496255e-05, "loss": 0.1731, "step": 18960 }, { "epoch": 0.6893669598081256, "grad_norm": 0.9922922849655151, "learning_rate": 4.985450916219239e-05, "loss": 0.1569, "step": 18970 }, { "epoch": 0.6897303583109238, "grad_norm": 1.6983296871185303, "learning_rate": 4.985394958999615e-05, "loss": 0.1784, "step": 18980 }, { "epoch": 0.690093756813722, "grad_norm": 2.5069353580474854, "learning_rate": 4.9853388946931654e-05, "loss": 0.1484, "step": 18990 }, { "epoch": 0.6904571553165201, "grad_norm": 52.345367431640625, "learning_rate": 4.985282723302306e-05, "loss": 0.2431, "step": 19000 }, { "epoch": 0.6908205538193183, "grad_norm": 1.5318138599395752, "learning_rate": 4.9852264448294564e-05, "loss": 0.1662, "step": 19010 }, { "epoch": 0.6911839523221164, "grad_norm": 1.5980876684188843, "learning_rate": 4.985170059277041e-05, "loss": 1.3532, "step": 19020 }, { "epoch": 0.6915473508249146, "grad_norm": 2.355023145675659, "learning_rate": 4.9851135666474915e-05, "loss": 0.1688, "step": 19030 }, { "epoch": 0.6919107493277128, "grad_norm": 3.2141480445861816, "learning_rate": 4.98505696694324e-05, "loss": 0.1303, "step": 19040 }, { "epoch": 0.6922741478305109, "grad_norm": 19.482290267944336, "learning_rate": 4.985000260166725e-05, "loss": 0.2337, "step": 19050 }, { "epoch": 0.6926375463333091, "grad_norm": 0.8456101417541504, "learning_rate": 4.9849434463203915e-05, "loss": 0.1732, "step": 19060 }, { "epoch": 0.6930009448361073, "grad_norm": 2.2158889770507812, "learning_rate": 4.9848865254066856e-05, "loss": 0.1524, "step": 19070 }, { "epoch": 0.6933643433389054, "grad_norm": 2.0843331813812256, "learning_rate": 4.9848294974280605e-05, "loss": 0.1943, "step": 19080 }, { "epoch": 0.6937277418417036, "grad_norm": 2.6970462799072266, "learning_rate": 4.9847723623869734e-05, "loss": 0.1697, "step": 19090 }, { "epoch": 0.6940911403445018, "grad_norm": 9.394730567932129, "learning_rate": 4.984715120285887e-05, "loss": 0.2151, "step": 19100 }, { "epoch": 0.6944545388472999, "grad_norm": 1.922090768814087, "learning_rate": 4.9846577711272656e-05, "loss": 0.1737, "step": 19110 }, { "epoch": 0.6948179373500981, "grad_norm": 1.3870245218276978, "learning_rate": 4.9846003149135815e-05, "loss": 0.1694, "step": 19120 }, { "epoch": 0.6951813358528963, "grad_norm": 1.6474970579147339, "learning_rate": 4.9845427516473104e-05, "loss": 0.219, "step": 19130 }, { "epoch": 0.6955447343556944, "grad_norm": 1.4302411079406738, "learning_rate": 4.984485081330932e-05, "loss": 0.1489, "step": 19140 }, { "epoch": 0.6959081328584926, "grad_norm": 3.888967990875244, "learning_rate": 4.984427303966932e-05, "loss": 0.2425, "step": 19150 }, { "epoch": 0.6962715313612908, "grad_norm": 1.2002874612808228, "learning_rate": 4.984369419557798e-05, "loss": 0.1575, "step": 19160 }, { "epoch": 0.6966349298640889, "grad_norm": 1.9064863920211792, "learning_rate": 4.984311428106025e-05, "loss": 0.1526, "step": 19170 }, { "epoch": 0.6969983283668871, "grad_norm": 1.3838772773742676, "learning_rate": 4.984253329614112e-05, "loss": 0.1601, "step": 19180 }, { "epoch": 0.6973617268696853, "grad_norm": 3.6261801719665527, "learning_rate": 4.984195124084563e-05, "loss": 0.1668, "step": 19190 }, { "epoch": 0.6977251253724834, "grad_norm": 7.647263526916504, "learning_rate": 4.984136811519884e-05, "loss": 0.1879, "step": 19200 }, { "epoch": 0.6977251253724834, "eval_loss": 0.3865276575088501, "eval_runtime": 179.6651, "eval_samples_per_second": 41.266, "eval_steps_per_second": 5.16, "eval_wer": 0.20815255867990634, "step": 19200 }, { "epoch": 0.6980885238752816, "grad_norm": 1.7563225030899048, "learning_rate": 4.984078391922589e-05, "loss": 0.1481, "step": 19210 }, { "epoch": 0.6984519223780798, "grad_norm": 1.8016029596328735, "learning_rate": 4.984019865295194e-05, "loss": 0.1713, "step": 19220 }, { "epoch": 0.6988153208808779, "grad_norm": 2.0969181060791016, "learning_rate": 4.983961231640221e-05, "loss": 0.1959, "step": 19230 }, { "epoch": 0.6991787193836762, "grad_norm": 1.6823608875274658, "learning_rate": 4.9839024909601964e-05, "loss": 0.1729, "step": 19240 }, { "epoch": 0.6995421178864744, "grad_norm": 11.533753395080566, "learning_rate": 4.983843643257652e-05, "loss": 0.2264, "step": 19250 }, { "epoch": 0.6999055163892725, "grad_norm": 4.1039204597473145, "learning_rate": 4.983784688535122e-05, "loss": 0.1738, "step": 19260 }, { "epoch": 0.7002689148920707, "grad_norm": 1.1051629781723022, "learning_rate": 4.983725626795147e-05, "loss": 0.155, "step": 19270 }, { "epoch": 0.7006323133948689, "grad_norm": 4.303994178771973, "learning_rate": 4.983666458040273e-05, "loss": 0.1593, "step": 19280 }, { "epoch": 0.700995711897667, "grad_norm": 1.2324292659759521, "learning_rate": 4.983607182273047e-05, "loss": 0.1642, "step": 19290 }, { "epoch": 0.7013591104004652, "grad_norm": 6.101926326751709, "learning_rate": 4.983547799496024e-05, "loss": 0.2338, "step": 19300 }, { "epoch": 0.7017225089032633, "grad_norm": 1.1532049179077148, "learning_rate": 4.983488309711763e-05, "loss": 0.1591, "step": 19310 }, { "epoch": 0.7020859074060615, "grad_norm": 0.8216233253479004, "learning_rate": 4.983428712922828e-05, "loss": 0.1489, "step": 19320 }, { "epoch": 0.7024493059088597, "grad_norm": 1.489461064338684, "learning_rate": 4.983369009131785e-05, "loss": 0.2048, "step": 19330 }, { "epoch": 0.7028127044116578, "grad_norm": 1.0493615865707397, "learning_rate": 4.983309198341207e-05, "loss": 0.1525, "step": 19340 }, { "epoch": 0.703176102914456, "grad_norm": 10.2578706741333, "learning_rate": 4.983249280553672e-05, "loss": 0.2297, "step": 19350 }, { "epoch": 0.7035395014172542, "grad_norm": 1.5366660356521606, "learning_rate": 4.983189255771761e-05, "loss": 0.1644, "step": 19360 }, { "epoch": 0.7039028999200523, "grad_norm": 1.4915844202041626, "learning_rate": 4.9831291239980596e-05, "loss": 0.1599, "step": 19370 }, { "epoch": 0.7042662984228505, "grad_norm": 1.3012590408325195, "learning_rate": 4.98306888523516e-05, "loss": 0.1907, "step": 19380 }, { "epoch": 0.7046296969256487, "grad_norm": 1.5029476881027222, "learning_rate": 4.983008539485656e-05, "loss": 0.1391, "step": 19390 }, { "epoch": 0.7049930954284468, "grad_norm": 3.0202033519744873, "learning_rate": 4.9829480867521495e-05, "loss": 0.2218, "step": 19400 }, { "epoch": 0.705356493931245, "grad_norm": 1.7761317491531372, "learning_rate": 4.9828875270372434e-05, "loss": 0.1605, "step": 19410 }, { "epoch": 0.7057198924340432, "grad_norm": 1.420793890953064, "learning_rate": 4.9828268603435485e-05, "loss": 1.5838, "step": 19420 }, { "epoch": 0.7060832909368413, "grad_norm": 2.079665422439575, "learning_rate": 4.982766086673678e-05, "loss": 0.2146, "step": 19430 }, { "epoch": 0.7064466894396395, "grad_norm": 2.440471887588501, "learning_rate": 4.98270520603025e-05, "loss": 0.1733, "step": 19440 }, { "epoch": 0.7068100879424377, "grad_norm": 7.773731708526611, "learning_rate": 4.982644218415889e-05, "loss": 0.2126, "step": 19450 }, { "epoch": 0.7071734864452358, "grad_norm": 0.9480405449867249, "learning_rate": 4.982583123833221e-05, "loss": 0.1575, "step": 19460 }, { "epoch": 0.707536884948034, "grad_norm": 12.79196548461914, "learning_rate": 4.982521922284881e-05, "loss": 0.2745, "step": 19470 }, { "epoch": 0.7079002834508322, "grad_norm": 4.492150783538818, "learning_rate": 4.982460613773502e-05, "loss": 0.1663, "step": 19480 }, { "epoch": 0.7082636819536303, "grad_norm": 1.2373683452606201, "learning_rate": 4.9823991983017295e-05, "loss": 0.1699, "step": 19490 }, { "epoch": 0.7086270804564285, "grad_norm": 5.8804402351379395, "learning_rate": 4.982337675872207e-05, "loss": 0.242, "step": 19500 }, { "epoch": 0.7089904789592267, "grad_norm": 0.9465837478637695, "learning_rate": 4.982276046487586e-05, "loss": 0.1471, "step": 19510 }, { "epoch": 0.7093538774620248, "grad_norm": 1.6178842782974243, "learning_rate": 4.9822143101505226e-05, "loss": 0.1619, "step": 19520 }, { "epoch": 0.709717275964823, "grad_norm": 2.4963414669036865, "learning_rate": 4.9821524668636766e-05, "loss": 0.1426, "step": 19530 }, { "epoch": 0.7100806744676212, "grad_norm": 1.1380610466003418, "learning_rate": 4.982090516629712e-05, "loss": 0.2364, "step": 19540 }, { "epoch": 0.7104440729704193, "grad_norm": 5.2998046875, "learning_rate": 4.982028459451298e-05, "loss": 0.2661, "step": 19550 }, { "epoch": 0.7108074714732175, "grad_norm": 1.1476637125015259, "learning_rate": 4.9819662953311096e-05, "loss": 0.1306, "step": 19560 }, { "epoch": 0.7111708699760158, "grad_norm": 0.7960777878761292, "learning_rate": 4.981904024271824e-05, "loss": 0.1604, "step": 19570 }, { "epoch": 0.7115342684788138, "grad_norm": 1.9035999774932861, "learning_rate": 4.981841646276124e-05, "loss": 0.1728, "step": 19580 }, { "epoch": 0.711897666981612, "grad_norm": 0.9725393056869507, "learning_rate": 4.981779161346699e-05, "loss": 0.2529, "step": 19590 }, { "epoch": 0.7122610654844101, "grad_norm": 5.759589672088623, "learning_rate": 4.98171656948624e-05, "loss": 0.25, "step": 19600 }, { "epoch": 0.7126244639872084, "grad_norm": 1.3716357946395874, "learning_rate": 4.9816538706974434e-05, "loss": 0.1603, "step": 19610 }, { "epoch": 0.7129878624900066, "grad_norm": 1.4253743886947632, "learning_rate": 4.981591064983011e-05, "loss": 0.1496, "step": 19620 }, { "epoch": 0.7133512609928047, "grad_norm": 2.4253408908843994, "learning_rate": 4.98152815234565e-05, "loss": 0.1694, "step": 19630 }, { "epoch": 0.7137146594956029, "grad_norm": 1.212689757347107, "learning_rate": 4.9814651327880696e-05, "loss": 0.1869, "step": 19640 }, { "epoch": 0.7140780579984011, "grad_norm": 7.003270626068115, "learning_rate": 4.981402006312986e-05, "loss": 0.2709, "step": 19650 }, { "epoch": 0.7144414565011992, "grad_norm": 1.6173512935638428, "learning_rate": 4.981338772923119e-05, "loss": 0.1651, "step": 19660 }, { "epoch": 0.7148048550039974, "grad_norm": 2.2197723388671875, "learning_rate": 4.981275432621192e-05, "loss": 0.1657, "step": 19670 }, { "epoch": 0.7151682535067956, "grad_norm": 1.8906898498535156, "learning_rate": 4.981211985409936e-05, "loss": 2.3111, "step": 19680 }, { "epoch": 0.7155316520095937, "grad_norm": 3.50747013092041, "learning_rate": 4.981148431292084e-05, "loss": 0.1498, "step": 19690 }, { "epoch": 0.7158950505123919, "grad_norm": 4.080805778503418, "learning_rate": 4.981084770270373e-05, "loss": 0.2094, "step": 19700 }, { "epoch": 0.7162584490151901, "grad_norm": 2.1056652069091797, "learning_rate": 4.981021002347547e-05, "loss": 0.157, "step": 19710 }, { "epoch": 0.7166218475179882, "grad_norm": 1.07776939868927, "learning_rate": 4.980957127526354e-05, "loss": 0.2049, "step": 19720 }, { "epoch": 0.7169852460207864, "grad_norm": 3.5387072563171387, "learning_rate": 4.980893145809546e-05, "loss": 0.1706, "step": 19730 }, { "epoch": 0.7173486445235846, "grad_norm": 1.5516027212142944, "learning_rate": 4.980829057199879e-05, "loss": 0.1371, "step": 19740 }, { "epoch": 0.7177120430263827, "grad_norm": 6.618633270263672, "learning_rate": 4.9807648617001145e-05, "loss": 0.1833, "step": 19750 }, { "epoch": 0.7180754415291809, "grad_norm": 1.7093079090118408, "learning_rate": 4.980700559313019e-05, "loss": 0.1592, "step": 19760 }, { "epoch": 0.7184388400319791, "grad_norm": 1.1217936277389526, "learning_rate": 4.9806361500413626e-05, "loss": 0.145, "step": 19770 }, { "epoch": 0.7188022385347772, "grad_norm": 1.869722604751587, "learning_rate": 4.980571633887921e-05, "loss": 0.1605, "step": 19780 }, { "epoch": 0.7191656370375754, "grad_norm": 1.1555829048156738, "learning_rate": 4.980507010855473e-05, "loss": 0.1539, "step": 19790 }, { "epoch": 0.7195290355403736, "grad_norm": 5.0145111083984375, "learning_rate": 4.9804422809468046e-05, "loss": 0.2334, "step": 19800 }, { "epoch": 0.7195290355403736, "eval_loss": 0.394449919462204, "eval_runtime": 180.0311, "eval_samples_per_second": 41.182, "eval_steps_per_second": 5.149, "eval_wer": 0.2100677110752083, "step": 19800 }, { "epoch": 0.7198924340431717, "grad_norm": 1.0865716934204102, "learning_rate": 4.980377444164702e-05, "loss": 0.1569, "step": 19810 }, { "epoch": 0.7202558325459699, "grad_norm": 1.5475140810012817, "learning_rate": 4.980312500511962e-05, "loss": 0.1268, "step": 19820 }, { "epoch": 0.7206192310487681, "grad_norm": 1.9507659673690796, "learning_rate": 4.980247449991381e-05, "loss": 0.2092, "step": 19830 }, { "epoch": 0.7209826295515662, "grad_norm": 1.185339093208313, "learning_rate": 4.980182292605762e-05, "loss": 0.1432, "step": 19840 }, { "epoch": 0.7213460280543644, "grad_norm": 5.294797420501709, "learning_rate": 4.980117028357912e-05, "loss": 0.2459, "step": 19850 }, { "epoch": 0.7217094265571626, "grad_norm": 2.691941976547241, "learning_rate": 4.980051657250645e-05, "loss": 0.1747, "step": 19860 }, { "epoch": 0.7220728250599607, "grad_norm": 1.3377537727355957, "learning_rate": 4.9799861792867756e-05, "loss": 0.1541, "step": 19870 }, { "epoch": 0.7224362235627589, "grad_norm": 3.39907169342041, "learning_rate": 4.979920594469124e-05, "loss": 0.166, "step": 19880 }, { "epoch": 0.722799622065557, "grad_norm": 1.738271951675415, "learning_rate": 4.9798549028005195e-05, "loss": 0.1591, "step": 19890 }, { "epoch": 0.7231630205683552, "grad_norm": 4.062039852142334, "learning_rate": 4.9797891042837893e-05, "loss": 0.2372, "step": 19900 }, { "epoch": 0.7235264190711534, "grad_norm": 2.46109676361084, "learning_rate": 4.979723198921771e-05, "loss": 0.1606, "step": 19910 }, { "epoch": 0.7238898175739515, "grad_norm": 1.3511689901351929, "learning_rate": 4.9796571867173017e-05, "loss": 0.148, "step": 19920 }, { "epoch": 0.7242532160767498, "grad_norm": 4.831977844238281, "learning_rate": 4.979591067673227e-05, "loss": 0.1832, "step": 19930 }, { "epoch": 0.724616614579548, "grad_norm": 0.9530340433120728, "learning_rate": 4.979524841792397e-05, "loss": 0.1776, "step": 19940 }, { "epoch": 0.724980013082346, "grad_norm": 2.886121988296509, "learning_rate": 4.979458509077663e-05, "loss": 0.217, "step": 19950 }, { "epoch": 0.7253434115851443, "grad_norm": 2.6050822734832764, "learning_rate": 4.979392069531883e-05, "loss": 0.1709, "step": 19960 }, { "epoch": 0.7257068100879425, "grad_norm": 1.1615772247314453, "learning_rate": 4.979325523157921e-05, "loss": 0.1891, "step": 19970 }, { "epoch": 0.7260702085907406, "grad_norm": 5.947473526000977, "learning_rate": 4.979258869958643e-05, "loss": 0.1685, "step": 19980 }, { "epoch": 0.7264336070935388, "grad_norm": 2.2721457481384277, "learning_rate": 4.979192109936922e-05, "loss": 0.1733, "step": 19990 }, { "epoch": 0.726797005596337, "grad_norm": 2.83907413482666, "learning_rate": 4.979125243095635e-05, "loss": 0.2067, "step": 20000 }, { "epoch": 0.7271604040991351, "grad_norm": 1.84774649143219, "learning_rate": 4.9790582694376605e-05, "loss": 0.1634, "step": 20010 }, { "epoch": 0.7275238026019333, "grad_norm": 3.5162901878356934, "learning_rate": 4.978991188965887e-05, "loss": 0.1546, "step": 20020 }, { "epoch": 0.7278872011047315, "grad_norm": 1.3396214246749878, "learning_rate": 4.9789240016832026e-05, "loss": 0.1549, "step": 20030 }, { "epoch": 0.7282505996075296, "grad_norm": 0.8957159519195557, "learning_rate": 4.978856707592503e-05, "loss": 0.4856, "step": 20040 }, { "epoch": 0.7286139981103278, "grad_norm": 3.291719913482666, "learning_rate": 4.978789306696688e-05, "loss": 0.1672, "step": 20050 }, { "epoch": 0.728977396613126, "grad_norm": 1.2237446308135986, "learning_rate": 4.978721798998661e-05, "loss": 0.1547, "step": 20060 }, { "epoch": 0.7293407951159241, "grad_norm": 1.5760120153427124, "learning_rate": 4.978654184501331e-05, "loss": 0.1491, "step": 20070 }, { "epoch": 0.7297041936187223, "grad_norm": 2.661914587020874, "learning_rate": 4.978586463207612e-05, "loss": 0.2399, "step": 20080 }, { "epoch": 0.7300675921215205, "grad_norm": 1.4015228748321533, "learning_rate": 4.978518635120421e-05, "loss": 0.1592, "step": 20090 }, { "epoch": 0.7304309906243186, "grad_norm": 11.479881286621094, "learning_rate": 4.9784507002426793e-05, "loss": 0.2478, "step": 20100 }, { "epoch": 0.7307943891271168, "grad_norm": 2.3282432556152344, "learning_rate": 4.9783826585773164e-05, "loss": 0.1565, "step": 20110 }, { "epoch": 0.731157787629915, "grad_norm": 1.0281476974487305, "learning_rate": 4.9783145101272625e-05, "loss": 2.6872, "step": 20120 }, { "epoch": 0.7315211861327131, "grad_norm": 1.4759191274642944, "learning_rate": 4.978246254895455e-05, "loss": 0.1755, "step": 20130 }, { "epoch": 0.7318845846355113, "grad_norm": 1.1100878715515137, "learning_rate": 4.978177892884833e-05, "loss": 0.1519, "step": 20140 }, { "epoch": 0.7322479831383095, "grad_norm": 5.326310157775879, "learning_rate": 4.9781094240983435e-05, "loss": 0.257, "step": 20150 }, { "epoch": 0.7326113816411076, "grad_norm": 8.199230194091797, "learning_rate": 4.978040848538936e-05, "loss": 0.192, "step": 20160 }, { "epoch": 0.7329747801439058, "grad_norm": 1.579663872718811, "learning_rate": 4.9779721662095654e-05, "loss": 0.1738, "step": 20170 }, { "epoch": 0.733338178646704, "grad_norm": 3.319883346557617, "learning_rate": 4.97790337711319e-05, "loss": 0.1809, "step": 20180 }, { "epoch": 0.7337015771495021, "grad_norm": 1.4813331365585327, "learning_rate": 4.977834481252776e-05, "loss": 0.1645, "step": 20190 }, { "epoch": 0.7340649756523003, "grad_norm": 4.392731666564941, "learning_rate": 4.9777654786312886e-05, "loss": 0.1897, "step": 20200 }, { "epoch": 0.7344283741550984, "grad_norm": 1.7336299419403076, "learning_rate": 4.9776963692517034e-05, "loss": 0.1751, "step": 20210 }, { "epoch": 0.7347917726578966, "grad_norm": 1.6261765956878662, "learning_rate": 4.977627153116998e-05, "loss": 0.156, "step": 20220 }, { "epoch": 0.7351551711606948, "grad_norm": 1.9801748991012573, "learning_rate": 4.977557830230153e-05, "loss": 0.2069, "step": 20230 }, { "epoch": 0.7355185696634929, "grad_norm": 1.4615390300750732, "learning_rate": 4.977488400594157e-05, "loss": 0.1458, "step": 20240 }, { "epoch": 0.7358819681662911, "grad_norm": 3.78981876373291, "learning_rate": 4.977418864212e-05, "loss": 0.1765, "step": 20250 }, { "epoch": 0.7362453666690894, "grad_norm": 0.813947319984436, "learning_rate": 4.97734922108668e-05, "loss": 0.1482, "step": 20260 }, { "epoch": 0.7366087651718874, "grad_norm": 1.1082271337509155, "learning_rate": 4.977279471221195e-05, "loss": 0.149, "step": 20270 }, { "epoch": 0.7369721636746857, "grad_norm": 4.023866176605225, "learning_rate": 4.9772096146185527e-05, "loss": 0.1797, "step": 20280 }, { "epoch": 0.7373355621774839, "grad_norm": 1.3649333715438843, "learning_rate": 4.977139651281762e-05, "loss": 0.182, "step": 20290 }, { "epoch": 0.737698960680282, "grad_norm": 8.213293075561523, "learning_rate": 4.977069581213837e-05, "loss": 0.2117, "step": 20300 }, { "epoch": 0.7380623591830802, "grad_norm": 1.0769990682601929, "learning_rate": 4.9769994044177976e-05, "loss": 0.1689, "step": 20310 }, { "epoch": 0.7384257576858784, "grad_norm": 1.712949275970459, "learning_rate": 4.9769291208966674e-05, "loss": 0.1402, "step": 20320 }, { "epoch": 0.7387891561886765, "grad_norm": 2.213164806365967, "learning_rate": 4.976858730653473e-05, "loss": 0.193, "step": 20330 }, { "epoch": 0.7391525546914747, "grad_norm": 1.9228605031967163, "learning_rate": 4.97678823369125e-05, "loss": 0.1517, "step": 20340 }, { "epoch": 0.7395159531942729, "grad_norm": 8.813825607299805, "learning_rate": 4.976717630013034e-05, "loss": 0.2682, "step": 20350 }, { "epoch": 0.739879351697071, "grad_norm": 1.9778189659118652, "learning_rate": 4.976646919621867e-05, "loss": 0.1701, "step": 20360 }, { "epoch": 0.7402427501998692, "grad_norm": 1.8553961515426636, "learning_rate": 4.976576102520797e-05, "loss": 0.1455, "step": 20370 }, { "epoch": 0.7406061487026674, "grad_norm": 3.1159512996673584, "learning_rate": 4.976505178712874e-05, "loss": 0.2252, "step": 20380 }, { "epoch": 0.7409695472054655, "grad_norm": 1.9035766124725342, "learning_rate": 4.9764341482011545e-05, "loss": 0.1815, "step": 20390 }, { "epoch": 0.7413329457082637, "grad_norm": 2.228940725326538, "learning_rate": 4.976363010988698e-05, "loss": 0.1995, "step": 20400 }, { "epoch": 0.7413329457082637, "eval_loss": 0.35944151878356934, "eval_runtime": 179.8589, "eval_samples_per_second": 41.221, "eval_steps_per_second": 5.154, "eval_wer": 0.200864087715795, "step": 20400 }, { "epoch": 0.7416963442110619, "grad_norm": 1.5204256772994995, "learning_rate": 4.976291767078571e-05, "loss": 1.6497, "step": 20410 }, { "epoch": 0.74205974271386, "grad_norm": 1.3520594835281372, "learning_rate": 4.976220416473842e-05, "loss": 0.1503, "step": 20420 }, { "epoch": 0.7424231412166582, "grad_norm": 2.7322440147399902, "learning_rate": 4.976148959177586e-05, "loss": 0.1784, "step": 20430 }, { "epoch": 0.7427865397194564, "grad_norm": 1.3193668127059937, "learning_rate": 4.9760773951928815e-05, "loss": 0.1685, "step": 20440 }, { "epoch": 0.7431499382222545, "grad_norm": 11.000434875488281, "learning_rate": 4.976005724522812e-05, "loss": 0.2147, "step": 20450 }, { "epoch": 0.7435133367250527, "grad_norm": 1.1825796365737915, "learning_rate": 4.9759339471704656e-05, "loss": 0.2116, "step": 20460 }, { "epoch": 0.7438767352278509, "grad_norm": 1.1518877744674683, "learning_rate": 4.975862063138934e-05, "loss": 0.141, "step": 20470 }, { "epoch": 0.744240133730649, "grad_norm": 6.054372310638428, "learning_rate": 4.975790072431316e-05, "loss": 0.1766, "step": 20480 }, { "epoch": 0.7446035322334472, "grad_norm": 1.0629233121871948, "learning_rate": 4.975717975050713e-05, "loss": 0.1641, "step": 20490 }, { "epoch": 0.7449669307362453, "grad_norm": 2.4782843589782715, "learning_rate": 4.97564577100023e-05, "loss": 0.2186, "step": 20500 }, { "epoch": 0.7453303292390435, "grad_norm": 1.5713534355163574, "learning_rate": 4.975573460282979e-05, "loss": 0.1535, "step": 20510 }, { "epoch": 0.7456937277418417, "grad_norm": 0.7279618382453918, "learning_rate": 4.975501042902078e-05, "loss": 0.1372, "step": 20520 }, { "epoch": 0.7460571262446398, "grad_norm": 5.573297500610352, "learning_rate": 4.975428518860643e-05, "loss": 0.161, "step": 20530 }, { "epoch": 0.746420524747438, "grad_norm": 1.022141695022583, "learning_rate": 4.975355888161801e-05, "loss": 0.1645, "step": 20540 }, { "epoch": 0.7467839232502362, "grad_norm": 2.9584996700286865, "learning_rate": 4.9752831508086805e-05, "loss": 0.2085, "step": 20550 }, { "epoch": 0.7471473217530343, "grad_norm": 2.2749557495117188, "learning_rate": 4.975210306804418e-05, "loss": 0.1531, "step": 20560 }, { "epoch": 0.7475107202558325, "grad_norm": 1.877822995185852, "learning_rate": 4.9751373561521484e-05, "loss": 0.1654, "step": 20570 }, { "epoch": 0.7478741187586307, "grad_norm": 7.727886199951172, "learning_rate": 4.975064298855017e-05, "loss": 0.2026, "step": 20580 }, { "epoch": 0.7482375172614288, "grad_norm": 1.2424033880233765, "learning_rate": 4.974991134916171e-05, "loss": 0.1834, "step": 20590 }, { "epoch": 0.748600915764227, "grad_norm": 7.272613525390625, "learning_rate": 4.974917864338764e-05, "loss": 0.2266, "step": 20600 }, { "epoch": 0.7489643142670253, "grad_norm": 0.6424925327301025, "learning_rate": 4.974844487125952e-05, "loss": 0.1496, "step": 20610 }, { "epoch": 0.7493277127698234, "grad_norm": 2.064819097518921, "learning_rate": 4.974771003280896e-05, "loss": 0.192, "step": 20620 }, { "epoch": 0.7496911112726216, "grad_norm": 2.55157470703125, "learning_rate": 4.974697412806763e-05, "loss": 0.1863, "step": 20630 }, { "epoch": 0.7500545097754198, "grad_norm": 1.10732901096344, "learning_rate": 4.974623715706723e-05, "loss": 0.1452, "step": 20640 }, { "epoch": 0.7504179082782179, "grad_norm": 6.665337562561035, "learning_rate": 4.9745499119839526e-05, "loss": 0.2393, "step": 20650 }, { "epoch": 0.7507813067810161, "grad_norm": 2.315764904022217, "learning_rate": 4.974476001641631e-05, "loss": 0.1724, "step": 20660 }, { "epoch": 0.7511447052838143, "grad_norm": 1.7643327713012695, "learning_rate": 4.974401984682942e-05, "loss": 0.1676, "step": 20670 }, { "epoch": 0.7515081037866124, "grad_norm": 2.556265115737915, "learning_rate": 4.974327861111075e-05, "loss": 0.1706, "step": 20680 }, { "epoch": 0.7518715022894106, "grad_norm": 1.0939987897872925, "learning_rate": 4.9742536309292257e-05, "loss": 0.1514, "step": 20690 }, { "epoch": 0.7522349007922088, "grad_norm": 2.3087685108184814, "learning_rate": 4.97417929414059e-05, "loss": 0.2064, "step": 20700 }, { "epoch": 0.7525982992950069, "grad_norm": 1.6968719959259033, "learning_rate": 4.974104850748372e-05, "loss": 0.65, "step": 20710 }, { "epoch": 0.7529616977978051, "grad_norm": 1.3144559860229492, "learning_rate": 4.974030300755779e-05, "loss": 3.2825, "step": 20720 }, { "epoch": 0.7533250963006033, "grad_norm": 2.346266031265259, "learning_rate": 4.973955644166022e-05, "loss": 0.1621, "step": 20730 }, { "epoch": 0.7536884948034014, "grad_norm": 0.8026605248451233, "learning_rate": 4.973880880982319e-05, "loss": 0.1566, "step": 20740 }, { "epoch": 0.7540518933061996, "grad_norm": 8.70439624786377, "learning_rate": 4.973806011207891e-05, "loss": 0.2671, "step": 20750 }, { "epoch": 0.7544152918089978, "grad_norm": 0.9762817025184631, "learning_rate": 4.973731034845964e-05, "loss": 0.1692, "step": 20760 }, { "epoch": 0.7547786903117959, "grad_norm": 1.3316736221313477, "learning_rate": 4.973655951899768e-05, "loss": 0.1605, "step": 20770 }, { "epoch": 0.7551420888145941, "grad_norm": 1.9772186279296875, "learning_rate": 4.9735807623725394e-05, "loss": 0.1551, "step": 20780 }, { "epoch": 0.7555054873173922, "grad_norm": 1.4639058113098145, "learning_rate": 4.9735054662675154e-05, "loss": 0.2075, "step": 20790 }, { "epoch": 0.7558688858201904, "grad_norm": 10.605428695678711, "learning_rate": 4.973430063587943e-05, "loss": 0.2542, "step": 20800 }, { "epoch": 0.7562322843229886, "grad_norm": 1.9553091526031494, "learning_rate": 4.9733545543370684e-05, "loss": 0.1353, "step": 20810 }, { "epoch": 0.7565956828257867, "grad_norm": 2.2855403423309326, "learning_rate": 4.9732789385181466e-05, "loss": 0.5004, "step": 20820 }, { "epoch": 0.7569590813285849, "grad_norm": 1.7468841075897217, "learning_rate": 4.973203216134435e-05, "loss": 0.1433, "step": 20830 }, { "epoch": 0.7573224798313831, "grad_norm": 0.9522268772125244, "learning_rate": 4.973127387189197e-05, "loss": 0.1488, "step": 20840 }, { "epoch": 0.7576858783341812, "grad_norm": 13.445122718811035, "learning_rate": 4.9730514516856996e-05, "loss": 0.2154, "step": 20850 }, { "epoch": 0.7580492768369794, "grad_norm": 1.0712549686431885, "learning_rate": 4.972975409627214e-05, "loss": 0.144, "step": 20860 }, { "epoch": 0.7584126753397776, "grad_norm": 0.6894069314002991, "learning_rate": 4.972899261017017e-05, "loss": 0.1612, "step": 20870 }, { "epoch": 0.7587760738425757, "grad_norm": 2.059844970703125, "learning_rate": 4.9728230058583893e-05, "loss": 0.1664, "step": 20880 }, { "epoch": 0.7591394723453739, "grad_norm": 2.0392911434173584, "learning_rate": 4.972746644154616e-05, "loss": 0.1991, "step": 20890 }, { "epoch": 0.7595028708481721, "grad_norm": 2.9800570011138916, "learning_rate": 4.972670175908989e-05, "loss": 0.2725, "step": 20900 }, { "epoch": 0.7598662693509702, "grad_norm": 2.390784502029419, "learning_rate": 4.972593601124801e-05, "loss": 0.3158, "step": 20910 }, { "epoch": 0.7602296678537684, "grad_norm": 6.595739364624023, "learning_rate": 4.972516919805352e-05, "loss": 0.1658, "step": 20920 }, { "epoch": 0.7605930663565666, "grad_norm": 2.2043120861053467, "learning_rate": 4.972440131953947e-05, "loss": 0.163, "step": 20930 }, { "epoch": 0.7609564648593647, "grad_norm": 0.9223461747169495, "learning_rate": 4.972363237573894e-05, "loss": 0.1276, "step": 20940 }, { "epoch": 0.761319863362163, "grad_norm": 12.165254592895508, "learning_rate": 4.972286236668505e-05, "loss": 0.2105, "step": 20950 }, { "epoch": 0.7616832618649612, "grad_norm": 1.2093875408172607, "learning_rate": 4.9722091292410984e-05, "loss": 0.1697, "step": 20960 }, { "epoch": 0.7620466603677593, "grad_norm": 0.8847984075546265, "learning_rate": 4.9721396414828535e-05, "loss": 3.043, "step": 20970 }, { "epoch": 0.7624100588705575, "grad_norm": 1.6682274341583252, "learning_rate": 4.9720623316727705e-05, "loss": 0.1841, "step": 20980 }, { "epoch": 0.7627734573733557, "grad_norm": 1.2780869007110596, "learning_rate": 4.971984915350317e-05, "loss": 0.1412, "step": 20990 }, { "epoch": 0.7631368558761538, "grad_norm": 36.68233108520508, "learning_rate": 4.97190739251883e-05, "loss": 0.2059, "step": 21000 }, { "epoch": 0.7631368558761538, "eval_loss": 0.3906314969062805, "eval_runtime": 180.3915, "eval_samples_per_second": 41.1, "eval_steps_per_second": 5.139, "eval_wer": 0.21157441864686768, "step": 21000 }, { "epoch": 0.763500254378952, "grad_norm": 24.036775588989258, "learning_rate": 4.971829763181647e-05, "loss": 0.3942, "step": 21010 }, { "epoch": 0.7638636528817502, "grad_norm": 1.6546601057052612, "learning_rate": 4.971752027342115e-05, "loss": 0.1555, "step": 21020 }, { "epoch": 0.7642270513845483, "grad_norm": 3.100032091140747, "learning_rate": 4.971674185003583e-05, "loss": 0.1917, "step": 21030 }, { "epoch": 0.7645904498873465, "grad_norm": 3.2824084758758545, "learning_rate": 4.9715962361694045e-05, "loss": 0.1744, "step": 21040 }, { "epoch": 0.7649538483901447, "grad_norm": 7.680720329284668, "learning_rate": 4.9715181808429376e-05, "loss": 0.2567, "step": 21050 }, { "epoch": 0.7653172468929428, "grad_norm": 1.5478154420852661, "learning_rate": 4.971440019027547e-05, "loss": 0.1949, "step": 21060 }, { "epoch": 0.765680645395741, "grad_norm": 1.1294565200805664, "learning_rate": 4.971361750726598e-05, "loss": 0.1546, "step": 21070 }, { "epoch": 0.7660440438985391, "grad_norm": 3.339749813079834, "learning_rate": 4.971283375943465e-05, "loss": 0.1784, "step": 21080 }, { "epoch": 0.7664074424013373, "grad_norm": 1.9784200191497803, "learning_rate": 4.9712048946815244e-05, "loss": 0.8969, "step": 21090 }, { "epoch": 0.7667708409041355, "grad_norm": 13.550655364990234, "learning_rate": 4.971126306944157e-05, "loss": 0.2037, "step": 21100 }, { "epoch": 0.7671342394069336, "grad_norm": 60.52021408081055, "learning_rate": 4.971047612734749e-05, "loss": 1.0649, "step": 21110 }, { "epoch": 0.7674976379097318, "grad_norm": 1.7544801235198975, "learning_rate": 4.970968812056693e-05, "loss": 0.1619, "step": 21120 }, { "epoch": 0.76786103641253, "grad_norm": 2.0749471187591553, "learning_rate": 4.970889904913382e-05, "loss": 0.1934, "step": 21130 }, { "epoch": 0.7682244349153281, "grad_norm": 2.33097767829895, "learning_rate": 4.970810891308215e-05, "loss": 0.3121, "step": 21140 }, { "epoch": 0.7685878334181263, "grad_norm": 3.5586440563201904, "learning_rate": 4.9707317712445996e-05, "loss": 0.2198, "step": 21150 }, { "epoch": 0.7689512319209245, "grad_norm": 1.7430351972579956, "learning_rate": 4.970652544725942e-05, "loss": 0.1884, "step": 21160 }, { "epoch": 0.7693146304237226, "grad_norm": 1.2475924491882324, "learning_rate": 4.9705732117556574e-05, "loss": 0.183, "step": 21170 }, { "epoch": 0.7696780289265208, "grad_norm": 1.369491457939148, "learning_rate": 4.970493772337164e-05, "loss": 0.1854, "step": 21180 }, { "epoch": 0.770041427429319, "grad_norm": 1.8093339204788208, "learning_rate": 4.970414226473883e-05, "loss": 0.1389, "step": 21190 }, { "epoch": 0.7704048259321171, "grad_norm": 15.3746919631958, "learning_rate": 4.9703345741692425e-05, "loss": 0.2603, "step": 21200 }, { "epoch": 0.7707682244349153, "grad_norm": 0.9604819416999817, "learning_rate": 4.970254815426675e-05, "loss": 0.1663, "step": 21210 }, { "epoch": 0.7711316229377135, "grad_norm": 1.3457413911819458, "learning_rate": 4.970174950249617e-05, "loss": 0.1784, "step": 21220 }, { "epoch": 0.7714950214405116, "grad_norm": 3.19975209236145, "learning_rate": 4.970094978641509e-05, "loss": 0.2369, "step": 21230 }, { "epoch": 0.7718584199433098, "grad_norm": 1.4974329471588135, "learning_rate": 4.970014900605797e-05, "loss": 0.1553, "step": 21240 }, { "epoch": 0.772221818446108, "grad_norm": 6.426448345184326, "learning_rate": 4.969934716145932e-05, "loss": 0.1848, "step": 21250 }, { "epoch": 0.7725852169489061, "grad_norm": 4.081672668457031, "learning_rate": 4.969854425265368e-05, "loss": 0.2135, "step": 21260 }, { "epoch": 0.7729486154517043, "grad_norm": 0.7796603441238403, "learning_rate": 4.9697740279675635e-05, "loss": 0.2853, "step": 21270 }, { "epoch": 0.7733120139545026, "grad_norm": 1.2303035259246826, "learning_rate": 4.969693524255984e-05, "loss": 0.5319, "step": 21280 }, { "epoch": 0.7736754124573006, "grad_norm": 0.9134958386421204, "learning_rate": 4.9696129141340986e-05, "loss": 0.1789, "step": 21290 }, { "epoch": 0.7740388109600989, "grad_norm": 1.8099846839904785, "learning_rate": 4.969532197605379e-05, "loss": 0.1967, "step": 21300 }, { "epoch": 0.7744022094628971, "grad_norm": 3.75593900680542, "learning_rate": 4.969451374673304e-05, "loss": 0.1908, "step": 21310 }, { "epoch": 0.7747656079656952, "grad_norm": 2.851921319961548, "learning_rate": 4.969370445341355e-05, "loss": 0.1616, "step": 21320 }, { "epoch": 0.7751290064684934, "grad_norm": 2.978349447250366, "learning_rate": 4.96928940961302e-05, "loss": 0.1682, "step": 21330 }, { "epoch": 0.7754924049712916, "grad_norm": 2.945326089859009, "learning_rate": 4.96920826749179e-05, "loss": 0.1897, "step": 21340 }, { "epoch": 0.7758558034740897, "grad_norm": 5.529159069061279, "learning_rate": 4.9691270189811614e-05, "loss": 0.2351, "step": 21350 }, { "epoch": 0.7762192019768879, "grad_norm": 0.816582441329956, "learning_rate": 4.969045664084634e-05, "loss": 0.255, "step": 21360 }, { "epoch": 0.776582600479686, "grad_norm": 3.373413324356079, "learning_rate": 4.968964202805715e-05, "loss": 0.165, "step": 21370 }, { "epoch": 0.7769459989824842, "grad_norm": 1.4986653327941895, "learning_rate": 4.968882635147912e-05, "loss": 0.1803, "step": 21380 }, { "epoch": 0.7773093974852824, "grad_norm": 4.049030303955078, "learning_rate": 4.968800961114741e-05, "loss": 0.2312, "step": 21390 }, { "epoch": 0.7776727959880805, "grad_norm": 1.8616725206375122, "learning_rate": 4.968719180709721e-05, "loss": 0.2038, "step": 21400 }, { "epoch": 0.7780361944908787, "grad_norm": 0.7410339117050171, "learning_rate": 4.968637293936374e-05, "loss": 0.1736, "step": 21410 }, { "epoch": 0.7783995929936769, "grad_norm": 0.9004227519035339, "learning_rate": 4.968555300798231e-05, "loss": 0.6926, "step": 21420 }, { "epoch": 0.778762991496475, "grad_norm": 1.9912917613983154, "learning_rate": 4.968473201298822e-05, "loss": 0.183, "step": 21430 }, { "epoch": 0.7791263899992732, "grad_norm": 1.5098110437393188, "learning_rate": 4.968390995441686e-05, "loss": 0.1555, "step": 21440 }, { "epoch": 0.7794897885020714, "grad_norm": 1.5687317848205566, "learning_rate": 4.9683086832303655e-05, "loss": 0.199, "step": 21450 }, { "epoch": 0.7798531870048695, "grad_norm": 1.456758975982666, "learning_rate": 4.9682262646684054e-05, "loss": 0.1573, "step": 21460 }, { "epoch": 0.7802165855076677, "grad_norm": 1.152894377708435, "learning_rate": 4.9681437397593575e-05, "loss": 0.136, "step": 21470 }, { "epoch": 0.7805799840104659, "grad_norm": 6.458597183227539, "learning_rate": 4.968061108506777e-05, "loss": 0.2111, "step": 21480 }, { "epoch": 0.780943382513264, "grad_norm": 1.3398655652999878, "learning_rate": 4.967978370914226e-05, "loss": 0.1785, "step": 21490 }, { "epoch": 0.7813067810160622, "grad_norm": 12.363832473754883, "learning_rate": 4.967895526985267e-05, "loss": 0.217, "step": 21500 }, { "epoch": 0.7816701795188604, "grad_norm": 3.800936698913574, "learning_rate": 4.967812576723471e-05, "loss": 0.1533, "step": 21510 }, { "epoch": 0.7820335780216585, "grad_norm": 0.9531782865524292, "learning_rate": 4.967729520132411e-05, "loss": 1.353, "step": 21520 }, { "epoch": 0.7823969765244567, "grad_norm": 1.3066377639770508, "learning_rate": 4.967646357215667e-05, "loss": 0.1338, "step": 21530 }, { "epoch": 0.7827603750272549, "grad_norm": 1.1814554929733276, "learning_rate": 4.967563087976821e-05, "loss": 0.1735, "step": 21540 }, { "epoch": 0.783123773530053, "grad_norm": 4.6233367919921875, "learning_rate": 4.967479712419461e-05, "loss": 0.2266, "step": 21550 }, { "epoch": 0.7834871720328512, "grad_norm": 1.366377353668213, "learning_rate": 4.96739623054718e-05, "loss": 0.1595, "step": 21560 }, { "epoch": 0.7838505705356494, "grad_norm": 2.0722217559814453, "learning_rate": 4.967312642363574e-05, "loss": 0.1721, "step": 21570 }, { "epoch": 0.7842139690384475, "grad_norm": 2.186340570449829, "learning_rate": 4.967228947872245e-05, "loss": 0.1653, "step": 21580 }, { "epoch": 0.7845773675412457, "grad_norm": 2.4222512245178223, "learning_rate": 4.9671451470767996e-05, "loss": 0.1446, "step": 21590 }, { "epoch": 0.784940766044044, "grad_norm": 62.15577697753906, "learning_rate": 4.9670612399808467e-05, "loss": 0.2911, "step": 21600 }, { "epoch": 0.784940766044044, "eval_loss": 0.3627218008041382, "eval_runtime": 179.8971, "eval_samples_per_second": 41.212, "eval_steps_per_second": 5.153, "eval_wer": 0.21580409170947773, "step": 21600 }, { "epoch": 0.785304164546842, "grad_norm": 1.236609935760498, "learning_rate": 4.9669772265880044e-05, "loss": 0.1417, "step": 21610 }, { "epoch": 0.7856675630496402, "grad_norm": 1.2447402477264404, "learning_rate": 4.96689310690189e-05, "loss": 0.1508, "step": 21620 }, { "epoch": 0.7860309615524385, "grad_norm": 4.567975997924805, "learning_rate": 4.966808880926129e-05, "loss": 0.3503, "step": 21630 }, { "epoch": 0.7863943600552366, "grad_norm": 0.9699403047561646, "learning_rate": 4.96672454866435e-05, "loss": 0.1615, "step": 21640 }, { "epoch": 0.7867577585580348, "grad_norm": 11.004621505737305, "learning_rate": 4.966640110120187e-05, "loss": 0.2604, "step": 21650 }, { "epoch": 0.7871211570608329, "grad_norm": 1.3322606086730957, "learning_rate": 4.9665555652972784e-05, "loss": 0.1958, "step": 21660 }, { "epoch": 0.7874845555636311, "grad_norm": 1.0020729303359985, "learning_rate": 4.966470914199266e-05, "loss": 0.1207, "step": 21670 }, { "epoch": 0.7878479540664293, "grad_norm": 3.457019567489624, "learning_rate": 4.9663861568297976e-05, "loss": 0.3774, "step": 21680 }, { "epoch": 0.7882113525692274, "grad_norm": 2.4993362426757812, "learning_rate": 4.9663012931925254e-05, "loss": 0.1537, "step": 21690 }, { "epoch": 0.7885747510720256, "grad_norm": 11.104598999023438, "learning_rate": 4.966216323291106e-05, "loss": 0.2472, "step": 21700 }, { "epoch": 0.7889381495748238, "grad_norm": 1.5027676820755005, "learning_rate": 4.9661312471291996e-05, "loss": 0.154, "step": 21710 }, { "epoch": 0.7893015480776219, "grad_norm": 1.1929068565368652, "learning_rate": 4.9660460647104726e-05, "loss": 0.1416, "step": 21720 }, { "epoch": 0.7896649465804201, "grad_norm": 17.008617401123047, "learning_rate": 4.965960776038594e-05, "loss": 0.3858, "step": 21730 }, { "epoch": 0.7900283450832183, "grad_norm": 1.6043013334274292, "learning_rate": 4.96587538111724e-05, "loss": 0.1624, "step": 21740 }, { "epoch": 0.7903917435860164, "grad_norm": 10.960922241210938, "learning_rate": 4.96578987995009e-05, "loss": 0.2034, "step": 21750 }, { "epoch": 0.7907551420888146, "grad_norm": 1.4807969331741333, "learning_rate": 4.965704272540826e-05, "loss": 0.1491, "step": 21760 }, { "epoch": 0.7911185405916128, "grad_norm": 0.9724571108818054, "learning_rate": 4.965618558893139e-05, "loss": 0.1455, "step": 21770 }, { "epoch": 0.7914819390944109, "grad_norm": 2.6035313606262207, "learning_rate": 4.965532739010722e-05, "loss": 0.1696, "step": 21780 }, { "epoch": 0.7918453375972091, "grad_norm": 0.7998749017715454, "learning_rate": 4.9654468128972695e-05, "loss": 0.1549, "step": 21790 }, { "epoch": 0.7922087361000073, "grad_norm": 14.13917350769043, "learning_rate": 4.965360780556487e-05, "loss": 0.2124, "step": 21800 }, { "epoch": 0.7925721346028054, "grad_norm": 1.88883638381958, "learning_rate": 4.9652746419920804e-05, "loss": 0.1475, "step": 21810 }, { "epoch": 0.7929355331056036, "grad_norm": 1.5585650205612183, "learning_rate": 4.965188397207761e-05, "loss": 0.1534, "step": 21820 }, { "epoch": 0.7932989316084018, "grad_norm": 2.6418206691741943, "learning_rate": 4.965102046207244e-05, "loss": 0.1608, "step": 21830 }, { "epoch": 0.7936623301111999, "grad_norm": 1.1672085523605347, "learning_rate": 4.965015588994251e-05, "loss": 0.1596, "step": 21840 }, { "epoch": 0.7940257286139981, "grad_norm": 3.009610652923584, "learning_rate": 4.964929025572507e-05, "loss": 0.1805, "step": 21850 }, { "epoch": 0.7943891271167963, "grad_norm": 1.8774985074996948, "learning_rate": 4.964842355945742e-05, "loss": 0.1583, "step": 21860 }, { "epoch": 0.7947525256195944, "grad_norm": 1.1219382286071777, "learning_rate": 4.964755580117689e-05, "loss": 0.1524, "step": 21870 }, { "epoch": 0.7951159241223926, "grad_norm": 6.0511627197265625, "learning_rate": 4.964668698092088e-05, "loss": 0.2349, "step": 21880 }, { "epoch": 0.7954793226251908, "grad_norm": 3.4487464427948, "learning_rate": 4.9645817098726824e-05, "loss": 0.1915, "step": 21890 }, { "epoch": 0.7958427211279889, "grad_norm": 4.096559524536133, "learning_rate": 4.9644946154632196e-05, "loss": 0.2067, "step": 21900 }, { "epoch": 0.7962061196307871, "grad_norm": 4.144627571105957, "learning_rate": 4.9644074148674526e-05, "loss": 0.1564, "step": 21910 }, { "epoch": 0.7965695181335853, "grad_norm": 1.3851386308670044, "learning_rate": 4.9643201080891384e-05, "loss": 0.1656, "step": 21920 }, { "epoch": 0.7969329166363834, "grad_norm": 1.3050576448440552, "learning_rate": 4.9642326951320384e-05, "loss": 0.1555, "step": 21930 }, { "epoch": 0.7972963151391816, "grad_norm": 1.578134298324585, "learning_rate": 4.96414517599992e-05, "loss": 0.1637, "step": 21940 }, { "epoch": 0.7976597136419797, "grad_norm": 10.813237190246582, "learning_rate": 4.9640575506965535e-05, "loss": 0.3143, "step": 21950 }, { "epoch": 0.798023112144778, "grad_norm": 0.7118828892707825, "learning_rate": 4.963969819225713e-05, "loss": 0.1581, "step": 21960 }, { "epoch": 0.7983865106475762, "grad_norm": 1.389856219291687, "learning_rate": 4.963881981591182e-05, "loss": 0.1466, "step": 21970 }, { "epoch": 0.7987499091503742, "grad_norm": 1.1921494007110596, "learning_rate": 4.963794037796741e-05, "loss": 0.1604, "step": 21980 }, { "epoch": 0.7991133076531725, "grad_norm": 4.355441093444824, "learning_rate": 4.963705987846182e-05, "loss": 0.1792, "step": 21990 }, { "epoch": 0.7994767061559707, "grad_norm": 8.20235824584961, "learning_rate": 4.963617831743298e-05, "loss": 0.2314, "step": 22000 }, { "epoch": 0.7998401046587688, "grad_norm": 1.3720426559448242, "learning_rate": 4.963529569491887e-05, "loss": 0.1378, "step": 22010 }, { "epoch": 0.800203503161567, "grad_norm": 1.490679383277893, "learning_rate": 4.963441201095752e-05, "loss": 0.1505, "step": 22020 }, { "epoch": 0.8005669016643652, "grad_norm": 1.576416254043579, "learning_rate": 4.963352726558701e-05, "loss": 0.1379, "step": 22030 }, { "epoch": 0.8009303001671633, "grad_norm": 1.547780156135559, "learning_rate": 4.9632641458845454e-05, "loss": 0.1584, "step": 22040 }, { "epoch": 0.8012936986699615, "grad_norm": 41.95133972167969, "learning_rate": 4.963175459077102e-05, "loss": 0.6762, "step": 22050 }, { "epoch": 0.8016570971727597, "grad_norm": 0.8984355330467224, "learning_rate": 4.963086666140192e-05, "loss": 0.1513, "step": 22060 }, { "epoch": 0.8020204956755578, "grad_norm": 1.6865235567092896, "learning_rate": 4.9629977670776404e-05, "loss": 0.1659, "step": 22070 }, { "epoch": 0.802383894178356, "grad_norm": 5.291965007781982, "learning_rate": 4.96290876189328e-05, "loss": 0.1735, "step": 22080 }, { "epoch": 0.8027472926811542, "grad_norm": 0.9124179482460022, "learning_rate": 4.962819650590943e-05, "loss": 0.163, "step": 22090 }, { "epoch": 0.8031106911839523, "grad_norm": 5.151334762573242, "learning_rate": 4.9627304331744705e-05, "loss": 0.2997, "step": 22100 }, { "epoch": 0.8034740896867505, "grad_norm": 0.7093039155006409, "learning_rate": 4.9626411096477066e-05, "loss": 0.1297, "step": 22110 }, { "epoch": 0.8038374881895487, "grad_norm": 0.7643496990203857, "learning_rate": 4.962551680014499e-05, "loss": 0.1568, "step": 22120 }, { "epoch": 0.8042008866923468, "grad_norm": 2.0619888305664062, "learning_rate": 4.9624621442787005e-05, "loss": 0.1685, "step": 22130 }, { "epoch": 0.804564285195145, "grad_norm": 1.3836963176727295, "learning_rate": 4.9623725024441704e-05, "loss": 0.1597, "step": 22140 }, { "epoch": 0.8049276836979432, "grad_norm": 10.014172554016113, "learning_rate": 4.96228275451477e-05, "loss": 0.2371, "step": 22150 }, { "epoch": 0.8052910822007413, "grad_norm": 0.8201650381088257, "learning_rate": 4.962192900494367e-05, "loss": 0.1457, "step": 22160 }, { "epoch": 0.8056544807035395, "grad_norm": 2.9909164905548096, "learning_rate": 4.962102940386832e-05, "loss": 0.1584, "step": 22170 }, { "epoch": 0.8060178792063377, "grad_norm": 1.8986990451812744, "learning_rate": 4.9620128741960414e-05, "loss": 0.1521, "step": 22180 }, { "epoch": 0.8063812777091358, "grad_norm": 1.2521679401397705, "learning_rate": 4.9619227019258766e-05, "loss": 0.1398, "step": 22190 }, { "epoch": 0.806744676211934, "grad_norm": 9.087230682373047, "learning_rate": 4.9618324235802214e-05, "loss": 0.2414, "step": 22200 }, { "epoch": 0.806744676211934, "eval_loss": 0.3814217448234558, "eval_runtime": 180.9296, "eval_samples_per_second": 40.977, "eval_steps_per_second": 5.124, "eval_wer": 0.21859059306188394, "step": 22200 }, { "epoch": 0.8071080747147322, "grad_norm": 1.3065155744552612, "learning_rate": 4.9617420391629666e-05, "loss": 0.1382, "step": 22210 }, { "epoch": 0.8074714732175303, "grad_norm": 1.0691299438476562, "learning_rate": 4.961651548678006e-05, "loss": 0.1692, "step": 22220 }, { "epoch": 0.8078348717203285, "grad_norm": 2.515131711959839, "learning_rate": 4.961560952129239e-05, "loss": 0.1719, "step": 22230 }, { "epoch": 0.8081982702231267, "grad_norm": 1.3650884628295898, "learning_rate": 4.9614702495205686e-05, "loss": 0.1918, "step": 22240 }, { "epoch": 0.8085616687259248, "grad_norm": 4.730445384979248, "learning_rate": 4.961379440855903e-05, "loss": 0.2002, "step": 22250 }, { "epoch": 0.808925067228723, "grad_norm": 1.0421544313430786, "learning_rate": 4.9612885261391555e-05, "loss": 0.1544, "step": 22260 }, { "epoch": 0.8092884657315211, "grad_norm": 1.1957643032073975, "learning_rate": 4.961197505374242e-05, "loss": 0.1471, "step": 22270 }, { "epoch": 0.8096518642343193, "grad_norm": 2.936429977416992, "learning_rate": 4.961106378565086e-05, "loss": 0.2068, "step": 22280 }, { "epoch": 0.8100152627371175, "grad_norm": 2.0803070068359375, "learning_rate": 4.961015145715612e-05, "loss": 0.1496, "step": 22290 }, { "epoch": 0.8103786612399156, "grad_norm": 10.564451217651367, "learning_rate": 4.960923806829752e-05, "loss": 0.2549, "step": 22300 }, { "epoch": 0.8107420597427138, "grad_norm": 1.0569120645523071, "learning_rate": 4.9608323619114406e-05, "loss": 0.1624, "step": 22310 }, { "epoch": 0.8111054582455121, "grad_norm": 1.4505226612091064, "learning_rate": 4.960740810964619e-05, "loss": 0.1523, "step": 22320 }, { "epoch": 0.8114688567483102, "grad_norm": 5.100767135620117, "learning_rate": 4.960649153993231e-05, "loss": 0.1562, "step": 22330 }, { "epoch": 0.8118322552511084, "grad_norm": 2.2787342071533203, "learning_rate": 4.960557391001226e-05, "loss": 0.1691, "step": 22340 }, { "epoch": 0.8121956537539066, "grad_norm": 15.405048370361328, "learning_rate": 4.960465521992558e-05, "loss": 0.2542, "step": 22350 }, { "epoch": 0.8125590522567047, "grad_norm": 0.7388777732849121, "learning_rate": 4.9603735469711845e-05, "loss": 0.1522, "step": 22360 }, { "epoch": 0.8129224507595029, "grad_norm": 0.9490914344787598, "learning_rate": 4.960281465941069e-05, "loss": 0.1317, "step": 22370 }, { "epoch": 0.8132858492623011, "grad_norm": 2.281085252761841, "learning_rate": 4.960189278906179e-05, "loss": 0.1503, "step": 22380 }, { "epoch": 0.8136492477650992, "grad_norm": 0.9328985810279846, "learning_rate": 4.960096985870486e-05, "loss": 0.1556, "step": 22390 }, { "epoch": 0.8140126462678974, "grad_norm": 4.4524617195129395, "learning_rate": 4.960004586837967e-05, "loss": 0.2387, "step": 22400 }, { "epoch": 0.8143760447706956, "grad_norm": 1.5577040910720825, "learning_rate": 4.959912081812603e-05, "loss": 0.1557, "step": 22410 }, { "epoch": 0.8147394432734937, "grad_norm": 2.358896493911743, "learning_rate": 4.95981947079838e-05, "loss": 0.2016, "step": 22420 }, { "epoch": 0.8151028417762919, "grad_norm": 2.1001386642456055, "learning_rate": 4.9597267537992885e-05, "loss": 0.1587, "step": 22430 }, { "epoch": 0.8154662402790901, "grad_norm": 2.7561607360839844, "learning_rate": 4.959633930819323e-05, "loss": 0.1616, "step": 22440 }, { "epoch": 0.8158296387818882, "grad_norm": 4.204514980316162, "learning_rate": 4.959541001862482e-05, "loss": 0.6089, "step": 22450 }, { "epoch": 0.8161930372846864, "grad_norm": 1.3738398551940918, "learning_rate": 4.959447966932771e-05, "loss": 0.1756, "step": 22460 }, { "epoch": 0.8165564357874846, "grad_norm": 0.705806314945221, "learning_rate": 4.959354826034197e-05, "loss": 0.1213, "step": 22470 }, { "epoch": 0.8169198342902827, "grad_norm": 2.053788661956787, "learning_rate": 4.9592615791707755e-05, "loss": 0.1765, "step": 22480 }, { "epoch": 0.8172832327930809, "grad_norm": 2.0120911598205566, "learning_rate": 4.959168226346521e-05, "loss": 0.1444, "step": 22490 }, { "epoch": 0.8176466312958791, "grad_norm": 6.552361011505127, "learning_rate": 4.959074767565458e-05, "loss": 0.2201, "step": 22500 }, { "epoch": 0.8180100297986772, "grad_norm": 1.3007264137268066, "learning_rate": 4.958981202831613e-05, "loss": 0.1488, "step": 22510 }, { "epoch": 0.8183734283014754, "grad_norm": 1.7885551452636719, "learning_rate": 4.958887532149016e-05, "loss": 2.6491, "step": 22520 }, { "epoch": 0.8187368268042736, "grad_norm": 1.7092806100845337, "learning_rate": 4.9587937555217054e-05, "loss": 0.1946, "step": 22530 }, { "epoch": 0.8191002253070717, "grad_norm": 2.56215238571167, "learning_rate": 4.958699872953719e-05, "loss": 0.1676, "step": 22540 }, { "epoch": 0.8194636238098699, "grad_norm": 2.085753917694092, "learning_rate": 4.958605884449104e-05, "loss": 0.2038, "step": 22550 }, { "epoch": 0.819827022312668, "grad_norm": 0.8225610852241516, "learning_rate": 4.958511790011909e-05, "loss": 0.5185, "step": 22560 }, { "epoch": 0.8201904208154662, "grad_norm": 1.6775872707366943, "learning_rate": 4.9584175896461884e-05, "loss": 0.17, "step": 22570 }, { "epoch": 0.8205538193182644, "grad_norm": 3.4285826683044434, "learning_rate": 4.958323283356001e-05, "loss": 0.164, "step": 22580 }, { "epoch": 0.8209172178210625, "grad_norm": 1.892842411994934, "learning_rate": 4.95822887114541e-05, "loss": 1.2783, "step": 22590 }, { "epoch": 0.8212806163238607, "grad_norm": 4.959444522857666, "learning_rate": 4.9581343530184834e-05, "loss": 0.2062, "step": 22600 }, { "epoch": 0.8216440148266589, "grad_norm": 2.4584267139434814, "learning_rate": 4.958039728979293e-05, "loss": 0.1443, "step": 22610 }, { "epoch": 0.822007413329457, "grad_norm": 1.118804693222046, "learning_rate": 4.957944999031917e-05, "loss": 0.16, "step": 22620 }, { "epoch": 0.8223708118322552, "grad_norm": 1.5434421300888062, "learning_rate": 4.9578501631804365e-05, "loss": 0.2104, "step": 22630 }, { "epoch": 0.8227342103350535, "grad_norm": 1.3116744756698608, "learning_rate": 4.9577552214289374e-05, "loss": 0.1326, "step": 22640 }, { "epoch": 0.8230976088378515, "grad_norm": 11.34653377532959, "learning_rate": 4.95766017378151e-05, "loss": 0.2231, "step": 22650 }, { "epoch": 0.8234610073406498, "grad_norm": 1.0379194021224976, "learning_rate": 4.957565020242251e-05, "loss": 0.1805, "step": 22660 }, { "epoch": 0.823824405843448, "grad_norm": 1.8218019008636475, "learning_rate": 4.957469760815259e-05, "loss": 0.1287, "step": 22670 }, { "epoch": 0.8241878043462461, "grad_norm": 1.1962164640426636, "learning_rate": 4.957374395504638e-05, "loss": 0.4115, "step": 22680 }, { "epoch": 0.8245512028490443, "grad_norm": 1.9947481155395508, "learning_rate": 4.957278924314499e-05, "loss": 0.1407, "step": 22690 }, { "epoch": 0.8249146013518425, "grad_norm": 25.343172073364258, "learning_rate": 4.957183347248953e-05, "loss": 0.4247, "step": 22700 }, { "epoch": 0.8252779998546406, "grad_norm": 1.4444775581359863, "learning_rate": 4.95708766431212e-05, "loss": 0.1641, "step": 22710 }, { "epoch": 0.8256413983574388, "grad_norm": 1.621640920639038, "learning_rate": 4.9569918755081216e-05, "loss": 0.1289, "step": 22720 }, { "epoch": 0.826004796860237, "grad_norm": 1.018471360206604, "learning_rate": 4.9568959808410854e-05, "loss": 0.1694, "step": 22730 }, { "epoch": 0.8263681953630351, "grad_norm": 3.1913223266601562, "learning_rate": 4.9567999803151424e-05, "loss": 0.1898, "step": 22740 }, { "epoch": 0.8267315938658333, "grad_norm": 8.095772743225098, "learning_rate": 4.956703873934431e-05, "loss": 0.2246, "step": 22750 }, { "epoch": 0.8270949923686315, "grad_norm": 1.738887906074524, "learning_rate": 4.956607661703089e-05, "loss": 0.1678, "step": 22760 }, { "epoch": 0.8274583908714296, "grad_norm": 0.9688615202903748, "learning_rate": 4.9565113436252644e-05, "loss": 0.1341, "step": 22770 }, { "epoch": 0.8278217893742278, "grad_norm": 2.2478010654449463, "learning_rate": 4.956414919705106e-05, "loss": 0.1823, "step": 22780 }, { "epoch": 0.828185187877026, "grad_norm": 1.6718928813934326, "learning_rate": 4.956318389946769e-05, "loss": 0.1543, "step": 22790 }, { "epoch": 0.8285485863798241, "grad_norm": 5.168727874755859, "learning_rate": 4.956221754354412e-05, "loss": 0.1795, "step": 22800 }, { "epoch": 0.8285485863798241, "eval_loss": 0.3908107876777649, "eval_runtime": 180.5873, "eval_samples_per_second": 41.055, "eval_steps_per_second": 5.133, "eval_wer": 0.20674569317624847, "step": 22800 }, { "epoch": 0.8289119848826223, "grad_norm": 0.9549854397773743, "learning_rate": 4.956125012932199e-05, "loss": 0.1559, "step": 22810 }, { "epoch": 0.8292753833854205, "grad_norm": 3.2057716846466064, "learning_rate": 4.9560281656842977e-05, "loss": 0.1675, "step": 22820 }, { "epoch": 0.8296387818882186, "grad_norm": 1.7775851488113403, "learning_rate": 4.955931212614882e-05, "loss": 0.1997, "step": 22830 }, { "epoch": 0.8300021803910168, "grad_norm": 1.7028132677078247, "learning_rate": 4.9558341537281274e-05, "loss": 0.1505, "step": 22840 }, { "epoch": 0.8303655788938149, "grad_norm": 2.7027060985565186, "learning_rate": 4.955736989028218e-05, "loss": 0.2009, "step": 22850 }, { "epoch": 0.8307289773966131, "grad_norm": 1.8419814109802246, "learning_rate": 4.955639718519339e-05, "loss": 0.1355, "step": 22860 }, { "epoch": 0.8310923758994113, "grad_norm": 0.8633226156234741, "learning_rate": 4.955542342205682e-05, "loss": 0.178, "step": 22870 }, { "epoch": 0.8314557744022094, "grad_norm": 6.966017723083496, "learning_rate": 4.955444860091442e-05, "loss": 0.1885, "step": 22880 }, { "epoch": 0.8318191729050076, "grad_norm": 1.9565801620483398, "learning_rate": 4.955347272180819e-05, "loss": 0.1485, "step": 22890 }, { "epoch": 0.8321825714078058, "grad_norm": 22.704593658447266, "learning_rate": 4.9552495784780196e-05, "loss": 0.2294, "step": 22900 }, { "epoch": 0.8325459699106039, "grad_norm": 2.0515658855438232, "learning_rate": 4.95515177898725e-05, "loss": 0.166, "step": 22910 }, { "epoch": 0.8329093684134021, "grad_norm": 2.9277150630950928, "learning_rate": 4.9550538737127275e-05, "loss": 0.8898, "step": 22920 }, { "epoch": 0.8332727669162003, "grad_norm": 3.9280052185058594, "learning_rate": 4.9549558626586676e-05, "loss": 0.171, "step": 22930 }, { "epoch": 0.8336361654189984, "grad_norm": 2.5431272983551025, "learning_rate": 4.954857745829294e-05, "loss": 0.1539, "step": 22940 }, { "epoch": 0.8339995639217966, "grad_norm": 2.815434694290161, "learning_rate": 4.954759523228835e-05, "loss": 0.2126, "step": 22950 }, { "epoch": 0.8343629624245948, "grad_norm": 0.6958141922950745, "learning_rate": 4.9546611948615224e-05, "loss": 0.2069, "step": 22960 }, { "epoch": 0.8347263609273929, "grad_norm": 0.7068191766738892, "learning_rate": 4.9545627607315924e-05, "loss": 0.1287, "step": 22970 }, { "epoch": 0.8350897594301911, "grad_norm": 1.8746801614761353, "learning_rate": 4.954464220843287e-05, "loss": 0.1488, "step": 22980 }, { "epoch": 0.8354531579329894, "grad_norm": 1.5134693384170532, "learning_rate": 4.95436557520085e-05, "loss": 0.1337, "step": 22990 }, { "epoch": 0.8358165564357874, "grad_norm": 4.778042316436768, "learning_rate": 4.9542668238085344e-05, "loss": 0.2172, "step": 23000 }, { "epoch": 0.8361799549385857, "grad_norm": 1.074409008026123, "learning_rate": 4.9541679666705924e-05, "loss": 0.1696, "step": 23010 }, { "epoch": 0.8365433534413839, "grad_norm": 1.6725049018859863, "learning_rate": 4.954069003791286e-05, "loss": 0.136, "step": 23020 }, { "epoch": 0.836906751944182, "grad_norm": 3.194450616836548, "learning_rate": 4.953969935174877e-05, "loss": 0.2067, "step": 23030 }, { "epoch": 0.8372701504469802, "grad_norm": 7.7923150062561035, "learning_rate": 4.9538707608256345e-05, "loss": 0.1938, "step": 23040 }, { "epoch": 0.8376335489497784, "grad_norm": 8.767574310302734, "learning_rate": 4.953771480747833e-05, "loss": 0.2473, "step": 23050 }, { "epoch": 0.8379969474525765, "grad_norm": 1.3911685943603516, "learning_rate": 4.953672094945748e-05, "loss": 0.1497, "step": 23060 }, { "epoch": 0.8383603459553747, "grad_norm": 0.7775372266769409, "learning_rate": 4.953572603423662e-05, "loss": 0.7581, "step": 23070 }, { "epoch": 0.8387237444581729, "grad_norm": 2.6937413215637207, "learning_rate": 4.9534730061858634e-05, "loss": 0.1849, "step": 23080 }, { "epoch": 0.839087142960971, "grad_norm": 0.7375633716583252, "learning_rate": 4.953373303236642e-05, "loss": 0.1706, "step": 23090 }, { "epoch": 0.8394505414637692, "grad_norm": 3.070746421813965, "learning_rate": 4.953273494580295e-05, "loss": 0.2114, "step": 23100 }, { "epoch": 0.8398139399665674, "grad_norm": 0.7470118403434753, "learning_rate": 4.953173580221121e-05, "loss": 0.13, "step": 23110 }, { "epoch": 0.8401773384693655, "grad_norm": 1.040595531463623, "learning_rate": 4.953073560163426e-05, "loss": 0.2088, "step": 23120 }, { "epoch": 0.8405407369721637, "grad_norm": 3.9858949184417725, "learning_rate": 4.95297343441152e-05, "loss": 0.1528, "step": 23130 }, { "epoch": 0.8409041354749618, "grad_norm": 1.4031178951263428, "learning_rate": 4.952873202969716e-05, "loss": 2.5826, "step": 23140 }, { "epoch": 0.84126753397776, "grad_norm": 16.660646438598633, "learning_rate": 4.952772865842332e-05, "loss": 0.3101, "step": 23150 }, { "epoch": 0.8416309324805582, "grad_norm": 1.21910560131073, "learning_rate": 4.952672423033693e-05, "loss": 0.1326, "step": 23160 }, { "epoch": 0.8419943309833563, "grad_norm": 1.4494057893753052, "learning_rate": 4.952571874548126e-05, "loss": 0.1567, "step": 23170 }, { "epoch": 0.8423577294861545, "grad_norm": 1.1903733015060425, "learning_rate": 4.952471220389964e-05, "loss": 0.1537, "step": 23180 }, { "epoch": 0.8427211279889527, "grad_norm": 1.0293620824813843, "learning_rate": 4.9523704605635414e-05, "loss": 0.1695, "step": 23190 }, { "epoch": 0.8430845264917508, "grad_norm": 9.536385536193848, "learning_rate": 4.9522695950732025e-05, "loss": 0.2702, "step": 23200 }, { "epoch": 0.843447924994549, "grad_norm": 1.1565468311309814, "learning_rate": 4.9521686239232915e-05, "loss": 0.1452, "step": 23210 }, { "epoch": 0.8438113234973472, "grad_norm": 1.0805953741073608, "learning_rate": 4.9520675471181586e-05, "loss": 0.1478, "step": 23220 }, { "epoch": 0.8441747220001453, "grad_norm": 2.7216696739196777, "learning_rate": 4.95196636466216e-05, "loss": 0.1965, "step": 23230 }, { "epoch": 0.8445381205029435, "grad_norm": 2.2064578533172607, "learning_rate": 4.9518650765596564e-05, "loss": 0.213, "step": 23240 }, { "epoch": 0.8449015190057417, "grad_norm": 11.686285972595215, "learning_rate": 4.951763682815009e-05, "loss": 0.2929, "step": 23250 }, { "epoch": 0.8452649175085398, "grad_norm": 1.6271568536758423, "learning_rate": 4.9516621834325885e-05, "loss": 0.1406, "step": 23260 }, { "epoch": 0.845628316011338, "grad_norm": 2.791619300842285, "learning_rate": 4.951560578416767e-05, "loss": 0.1431, "step": 23270 }, { "epoch": 0.8459917145141362, "grad_norm": 1.9396895170211792, "learning_rate": 4.951458867771923e-05, "loss": 0.1516, "step": 23280 }, { "epoch": 0.8463551130169343, "grad_norm": 0.9364364147186279, "learning_rate": 4.951357051502439e-05, "loss": 0.1935, "step": 23290 }, { "epoch": 0.8467185115197325, "grad_norm": 2.275146007537842, "learning_rate": 4.9512551296127005e-05, "loss": 0.1832, "step": 23300 }, { "epoch": 0.8470819100225307, "grad_norm": 1.4089415073394775, "learning_rate": 4.951153102107101e-05, "loss": 0.1511, "step": 23310 }, { "epoch": 0.8474453085253288, "grad_norm": 1.2446107864379883, "learning_rate": 4.951050968990035e-05, "loss": 0.282, "step": 23320 }, { "epoch": 0.847808707028127, "grad_norm": 2.595438241958618, "learning_rate": 4.950948730265905e-05, "loss": 0.1643, "step": 23330 }, { "epoch": 0.8481721055309253, "grad_norm": 1.1884585618972778, "learning_rate": 4.950846385939114e-05, "loss": 0.1445, "step": 23340 }, { "epoch": 0.8485355040337234, "grad_norm": 33.609004974365234, "learning_rate": 4.9507439360140716e-05, "loss": 0.185, "step": 23350 }, { "epoch": 0.8488989025365216, "grad_norm": 0.573637068271637, "learning_rate": 4.950641380495194e-05, "loss": 0.1417, "step": 23360 }, { "epoch": 0.8492623010393198, "grad_norm": 1.1126424074172974, "learning_rate": 4.9505387193868975e-05, "loss": 0.1592, "step": 23370 }, { "epoch": 0.8496256995421179, "grad_norm": 2.466045379638672, "learning_rate": 4.9504359526936074e-05, "loss": 0.1507, "step": 23380 }, { "epoch": 0.8499890980449161, "grad_norm": 1.273472547531128, "learning_rate": 4.95033308041975e-05, "loss": 0.174, "step": 23390 }, { "epoch": 0.8503524965477143, "grad_norm": 5.497190475463867, "learning_rate": 4.9502301025697595e-05, "loss": 0.2269, "step": 23400 }, { "epoch": 0.8503524965477143, "eval_loss": 0.3661801218986511, "eval_runtime": 181.0852, "eval_samples_per_second": 40.942, "eval_steps_per_second": 5.119, "eval_wer": 0.198767404287763, "step": 23400 }, { "epoch": 0.8507158950505124, "grad_norm": 0.740798830986023, "learning_rate": 4.950127019148071e-05, "loss": 0.148, "step": 23410 }, { "epoch": 0.8510792935533106, "grad_norm": 1.7785030603408813, "learning_rate": 4.950023830159127e-05, "loss": 0.175, "step": 23420 }, { "epoch": 0.8514426920561087, "grad_norm": 0.7675313949584961, "learning_rate": 4.949920535607374e-05, "loss": 0.1635, "step": 23430 }, { "epoch": 0.8518060905589069, "grad_norm": 0.9880558252334595, "learning_rate": 4.9498171354972617e-05, "loss": 0.1732, "step": 23440 }, { "epoch": 0.8521694890617051, "grad_norm": 5.804686069488525, "learning_rate": 4.9497136298332454e-05, "loss": 0.2142, "step": 23450 }, { "epoch": 0.8525328875645032, "grad_norm": 1.063359022140503, "learning_rate": 4.949610018619785e-05, "loss": 0.1529, "step": 23460 }, { "epoch": 0.8528962860673014, "grad_norm": 1.9043885469436646, "learning_rate": 4.949506301861344e-05, "loss": 0.1633, "step": 23470 }, { "epoch": 0.8532596845700996, "grad_norm": 2.0380702018737793, "learning_rate": 4.9494024795623926e-05, "loss": 0.1595, "step": 23480 }, { "epoch": 0.8536230830728977, "grad_norm": 1.65935218334198, "learning_rate": 4.949298551727403e-05, "loss": 0.1526, "step": 23490 }, { "epoch": 0.8539864815756959, "grad_norm": 1.7575215101242065, "learning_rate": 4.9491945183608536e-05, "loss": 0.1924, "step": 23500 }, { "epoch": 0.8543498800784941, "grad_norm": 2.332193374633789, "learning_rate": 4.949090379467226e-05, "loss": 0.1536, "step": 23510 }, { "epoch": 0.8547132785812922, "grad_norm": 1.0475032329559326, "learning_rate": 4.948986135051009e-05, "loss": 0.1322, "step": 23520 }, { "epoch": 0.8550766770840904, "grad_norm": 3.1753509044647217, "learning_rate": 4.948881785116692e-05, "loss": 0.1457, "step": 23530 }, { "epoch": 0.8554400755868886, "grad_norm": 0.7468664646148682, "learning_rate": 4.948777329668772e-05, "loss": 0.1385, "step": 23540 }, { "epoch": 0.8558034740896867, "grad_norm": 6.77406120300293, "learning_rate": 4.9486727687117507e-05, "loss": 0.19, "step": 23550 }, { "epoch": 0.8561668725924849, "grad_norm": 1.6008226871490479, "learning_rate": 4.9485681022501316e-05, "loss": 0.1609, "step": 23560 }, { "epoch": 0.8565302710952831, "grad_norm": 1.1062623262405396, "learning_rate": 4.948463330288425e-05, "loss": 0.1624, "step": 23570 }, { "epoch": 0.8568936695980812, "grad_norm": 1.6599873304367065, "learning_rate": 4.948358452831145e-05, "loss": 0.1532, "step": 23580 }, { "epoch": 0.8572570681008794, "grad_norm": 1.264592170715332, "learning_rate": 4.9482534698828106e-05, "loss": 0.1696, "step": 23590 }, { "epoch": 0.8576204666036776, "grad_norm": 2.027796745300293, "learning_rate": 4.948148381447945e-05, "loss": 0.1913, "step": 23600 }, { "epoch": 0.8579838651064757, "grad_norm": 1.3213417530059814, "learning_rate": 4.948043187531076e-05, "loss": 0.1517, "step": 23610 }, { "epoch": 0.8583472636092739, "grad_norm": 1.6190669536590576, "learning_rate": 4.9479378881367366e-05, "loss": 0.1517, "step": 23620 }, { "epoch": 0.8587106621120721, "grad_norm": 5.381803512573242, "learning_rate": 4.947832483269464e-05, "loss": 0.1504, "step": 23630 }, { "epoch": 0.8590740606148702, "grad_norm": 3.4807474613189697, "learning_rate": 4.947726972933798e-05, "loss": 0.1887, "step": 23640 }, { "epoch": 0.8594374591176684, "grad_norm": 4.890349864959717, "learning_rate": 4.947621357134287e-05, "loss": 0.219, "step": 23650 }, { "epoch": 0.8598008576204667, "grad_norm": 1.1006419658660889, "learning_rate": 4.947515635875479e-05, "loss": 0.1743, "step": 23660 }, { "epoch": 0.8601642561232647, "grad_norm": 0.9933237433433533, "learning_rate": 4.9474098091619314e-05, "loss": 0.1294, "step": 23670 }, { "epoch": 0.860527654626063, "grad_norm": 3.392524480819702, "learning_rate": 4.947303876998203e-05, "loss": 0.1784, "step": 23680 }, { "epoch": 0.8608910531288612, "grad_norm": 1.466454029083252, "learning_rate": 4.947197839388857e-05, "loss": 0.1828, "step": 23690 }, { "epoch": 0.8612544516316593, "grad_norm": 3.670731544494629, "learning_rate": 4.947091696338465e-05, "loss": 0.1772, "step": 23700 }, { "epoch": 0.8616178501344575, "grad_norm": 1.3586241006851196, "learning_rate": 4.9469854478515976e-05, "loss": 0.1512, "step": 23710 }, { "epoch": 0.8619812486372556, "grad_norm": 0.8312864303588867, "learning_rate": 4.9468790939328336e-05, "loss": 0.1582, "step": 23720 }, { "epoch": 0.8623446471400538, "grad_norm": 0.9825647473335266, "learning_rate": 4.946772634586756e-05, "loss": 0.1662, "step": 23730 }, { "epoch": 0.862708045642852, "grad_norm": 2.7960050106048584, "learning_rate": 4.94666606981795e-05, "loss": 0.226, "step": 23740 }, { "epoch": 0.8630714441456501, "grad_norm": 5.3017683029174805, "learning_rate": 4.94655939963101e-05, "loss": 0.2065, "step": 23750 }, { "epoch": 0.8634348426484483, "grad_norm": 1.0958201885223389, "learning_rate": 4.946452624030529e-05, "loss": 0.2177, "step": 23760 }, { "epoch": 0.8637982411512465, "grad_norm": 1.0320892333984375, "learning_rate": 4.94634574302111e-05, "loss": 0.1263, "step": 23770 }, { "epoch": 0.8641616396540446, "grad_norm": 1.0401560068130493, "learning_rate": 4.946238756607356e-05, "loss": 0.6474, "step": 23780 }, { "epoch": 0.8645250381568428, "grad_norm": 1.378184199333191, "learning_rate": 4.9461316647938785e-05, "loss": 0.1783, "step": 23790 }, { "epoch": 0.864888436659641, "grad_norm": 7.429476261138916, "learning_rate": 4.9460244675852906e-05, "loss": 0.2744, "step": 23800 }, { "epoch": 0.8652518351624391, "grad_norm": 2.2409234046936035, "learning_rate": 4.945917164986211e-05, "loss": 0.2088, "step": 23810 }, { "epoch": 0.8656152336652373, "grad_norm": 1.1307353973388672, "learning_rate": 4.945809757001264e-05, "loss": 0.1311, "step": 23820 }, { "epoch": 0.8659786321680355, "grad_norm": 1.6061898469924927, "learning_rate": 4.945702243635077e-05, "loss": 0.1683, "step": 23830 }, { "epoch": 0.8663420306708336, "grad_norm": 1.0011060237884521, "learning_rate": 4.945594624892281e-05, "loss": 0.8323, "step": 23840 }, { "epoch": 0.8667054291736318, "grad_norm": 6.631030082702637, "learning_rate": 4.9454869007775154e-05, "loss": 0.177, "step": 23850 }, { "epoch": 0.86706882767643, "grad_norm": 2.8532910346984863, "learning_rate": 4.9453790712954195e-05, "loss": 0.145, "step": 23860 }, { "epoch": 0.8674322261792281, "grad_norm": 2.6437554359436035, "learning_rate": 4.945271136450641e-05, "loss": 0.1496, "step": 23870 }, { "epoch": 0.8677956246820263, "grad_norm": 3.0070180892944336, "learning_rate": 4.945163096247829e-05, "loss": 0.1582, "step": 23880 }, { "epoch": 0.8681590231848245, "grad_norm": 0.8612903356552124, "learning_rate": 4.9450549506916386e-05, "loss": 0.157, "step": 23890 }, { "epoch": 0.8685224216876226, "grad_norm": 9.475138664245605, "learning_rate": 4.94494669978673e-05, "loss": 0.312, "step": 23900 }, { "epoch": 0.8688858201904208, "grad_norm": 0.789193868637085, "learning_rate": 4.944838343537768e-05, "loss": 0.1385, "step": 23910 }, { "epoch": 0.869249218693219, "grad_norm": 0.9372280240058899, "learning_rate": 4.94472988194942e-05, "loss": 0.1581, "step": 23920 }, { "epoch": 0.8696126171960171, "grad_norm": 4.738519191741943, "learning_rate": 4.94462131502636e-05, "loss": 0.1693, "step": 23930 }, { "epoch": 0.8699760156988153, "grad_norm": 0.9660571217536926, "learning_rate": 4.9445126427732654e-05, "loss": 0.1578, "step": 23940 }, { "epoch": 0.8703394142016135, "grad_norm": 8.137104034423828, "learning_rate": 4.944403865194818e-05, "loss": 0.1857, "step": 23950 }, { "epoch": 0.8707028127044116, "grad_norm": 1.1240946054458618, "learning_rate": 4.944294982295706e-05, "loss": 0.2508, "step": 23960 }, { "epoch": 0.8710662112072098, "grad_norm": 3.6192643642425537, "learning_rate": 4.94418599408062e-05, "loss": 0.1354, "step": 23970 }, { "epoch": 0.871429609710008, "grad_norm": 2.76771879196167, "learning_rate": 4.944076900554256e-05, "loss": 0.1638, "step": 23980 }, { "epoch": 0.8717930082128061, "grad_norm": 1.734529972076416, "learning_rate": 4.9439677017213143e-05, "loss": 0.1414, "step": 23990 }, { "epoch": 0.8721564067156043, "grad_norm": 6.897458553314209, "learning_rate": 4.9438583975864996e-05, "loss": 0.2154, "step": 24000 }, { "epoch": 0.8721564067156043, "eval_loss": 0.37997984886169434, "eval_runtime": 180.3101, "eval_samples_per_second": 41.118, "eval_steps_per_second": 5.141, "eval_wer": 0.20322399114128561, "step": 24000 }, { "epoch": 0.8725198052184026, "grad_norm": 1.5639888048171997, "learning_rate": 4.943748988154523e-05, "loss": 0.1372, "step": 24010 }, { "epoch": 0.8728832037212007, "grad_norm": 4.484424114227295, "learning_rate": 4.943639473430096e-05, "loss": 0.3205, "step": 24020 }, { "epoch": 0.8732466022239989, "grad_norm": 1.9517849683761597, "learning_rate": 4.9435298534179396e-05, "loss": 0.2085, "step": 24030 }, { "epoch": 0.873610000726797, "grad_norm": 1.3041925430297852, "learning_rate": 4.943420128122776e-05, "loss": 0.1446, "step": 24040 }, { "epoch": 0.8739733992295952, "grad_norm": 29.67850685119629, "learning_rate": 4.943310297549332e-05, "loss": 0.2643, "step": 24050 }, { "epoch": 0.8743367977323934, "grad_norm": 4.462527751922607, "learning_rate": 4.9432003617023405e-05, "loss": 0.2067, "step": 24060 }, { "epoch": 0.8747001962351915, "grad_norm": 1.2176992893218994, "learning_rate": 4.9430903205865384e-05, "loss": 0.1353, "step": 24070 }, { "epoch": 0.8750635947379897, "grad_norm": 2.044191360473633, "learning_rate": 4.9429801742066675e-05, "loss": 0.1632, "step": 24080 }, { "epoch": 0.8754269932407879, "grad_norm": 3.0303845405578613, "learning_rate": 4.942869922567473e-05, "loss": 0.1533, "step": 24090 }, { "epoch": 0.875790391743586, "grad_norm": 4.44179105758667, "learning_rate": 4.942759565673705e-05, "loss": 0.2054, "step": 24100 }, { "epoch": 0.8761537902463842, "grad_norm": 2.158686637878418, "learning_rate": 4.942649103530119e-05, "loss": 0.1457, "step": 24110 }, { "epoch": 0.8765171887491824, "grad_norm": 5.875476837158203, "learning_rate": 4.942538536141473e-05, "loss": 0.1941, "step": 24120 }, { "epoch": 0.8768805872519805, "grad_norm": 1.7252172231674194, "learning_rate": 4.9424278635125335e-05, "loss": 0.155, "step": 24130 }, { "epoch": 0.8772439857547787, "grad_norm": 1.6594487428665161, "learning_rate": 4.9423170856480674e-05, "loss": 0.1736, "step": 24140 }, { "epoch": 0.8776073842575769, "grad_norm": 6.2919697761535645, "learning_rate": 4.9422062025528474e-05, "loss": 0.2313, "step": 24150 }, { "epoch": 0.877970782760375, "grad_norm": 2.1133229732513428, "learning_rate": 4.942095214231651e-05, "loss": 0.1642, "step": 24160 }, { "epoch": 0.8783341812631732, "grad_norm": 1.02867591381073, "learning_rate": 4.941984120689262e-05, "loss": 0.1554, "step": 24170 }, { "epoch": 0.8786975797659714, "grad_norm": 1.7262704372406006, "learning_rate": 4.941872921930465e-05, "loss": 0.1428, "step": 24180 }, { "epoch": 0.8790609782687695, "grad_norm": 1.095211386680603, "learning_rate": 4.9417616179600526e-05, "loss": 0.1683, "step": 24190 }, { "epoch": 0.8794243767715677, "grad_norm": 9.772414207458496, "learning_rate": 4.94165020878282e-05, "loss": 0.2224, "step": 24200 }, { "epoch": 0.8797877752743659, "grad_norm": 0.6741021871566772, "learning_rate": 4.9415386944035665e-05, "loss": 0.7216, "step": 24210 }, { "epoch": 0.880151173777164, "grad_norm": 0.6714327335357666, "learning_rate": 4.941427074827098e-05, "loss": 0.1321, "step": 24220 }, { "epoch": 0.8805145722799622, "grad_norm": 9.116118431091309, "learning_rate": 4.941315350058223e-05, "loss": 0.1738, "step": 24230 }, { "epoch": 0.8808779707827604, "grad_norm": 1.119581937789917, "learning_rate": 4.941203520101757e-05, "loss": 0.1076, "step": 24240 }, { "epoch": 0.8812413692855585, "grad_norm": 1.5630614757537842, "learning_rate": 4.941091584962516e-05, "loss": 0.1734, "step": 24250 }, { "epoch": 0.8816047677883567, "grad_norm": 3.4376001358032227, "learning_rate": 4.940979544645325e-05, "loss": 0.1567, "step": 24260 }, { "epoch": 0.8819681662911549, "grad_norm": 1.1688649654388428, "learning_rate": 4.94086739915501e-05, "loss": 0.137, "step": 24270 }, { "epoch": 0.882331564793953, "grad_norm": 2.02235746383667, "learning_rate": 4.9407551484964035e-05, "loss": 0.1718, "step": 24280 }, { "epoch": 0.8826949632967512, "grad_norm": 1.7484105825424194, "learning_rate": 4.940642792674341e-05, "loss": 0.1973, "step": 24290 }, { "epoch": 0.8830583617995494, "grad_norm": 7.056839942932129, "learning_rate": 4.940530331693666e-05, "loss": 0.1916, "step": 24300 }, { "epoch": 0.8834217603023475, "grad_norm": 1.4804614782333374, "learning_rate": 4.940417765559221e-05, "loss": 0.1418, "step": 24310 }, { "epoch": 0.8837851588051457, "grad_norm": 1.3168327808380127, "learning_rate": 4.940305094275859e-05, "loss": 0.1466, "step": 24320 }, { "epoch": 0.8841485573079438, "grad_norm": 2.4612350463867188, "learning_rate": 4.9401923178484325e-05, "loss": 0.1956, "step": 24330 }, { "epoch": 0.884511955810742, "grad_norm": 0.8389832973480225, "learning_rate": 4.9400794362818005e-05, "loss": 0.1751, "step": 24340 }, { "epoch": 0.8848753543135403, "grad_norm": 2.618521213531494, "learning_rate": 4.939966449580828e-05, "loss": 0.2133, "step": 24350 }, { "epoch": 0.8852387528163383, "grad_norm": 0.767784833908081, "learning_rate": 4.9398533577503826e-05, "loss": 0.1256, "step": 24360 }, { "epoch": 0.8856021513191366, "grad_norm": 1.7649836540222168, "learning_rate": 4.939740160795336e-05, "loss": 0.1925, "step": 24370 }, { "epoch": 0.8859655498219348, "grad_norm": 2.182840347290039, "learning_rate": 4.9396268587205685e-05, "loss": 0.184, "step": 24380 }, { "epoch": 0.8863289483247329, "grad_norm": 1.6524356603622437, "learning_rate": 4.939513451530958e-05, "loss": 0.1582, "step": 24390 }, { "epoch": 0.8866923468275311, "grad_norm": 13.93655776977539, "learning_rate": 4.939399939231394e-05, "loss": 0.1813, "step": 24400 }, { "epoch": 0.8870557453303293, "grad_norm": 1.9153752326965332, "learning_rate": 4.939286321826766e-05, "loss": 0.2093, "step": 24410 }, { "epoch": 0.8874191438331274, "grad_norm": 1.9444178342819214, "learning_rate": 4.9391725993219685e-05, "loss": 0.1489, "step": 24420 }, { "epoch": 0.8877825423359256, "grad_norm": 2.9371562004089355, "learning_rate": 4.939058771721903e-05, "loss": 0.1648, "step": 24430 }, { "epoch": 0.8881459408387238, "grad_norm": 3.127439498901367, "learning_rate": 4.938944839031473e-05, "loss": 0.1756, "step": 24440 }, { "epoch": 0.8885093393415219, "grad_norm": 11.735489845275879, "learning_rate": 4.938830801255588e-05, "loss": 0.2049, "step": 24450 }, { "epoch": 0.8888727378443201, "grad_norm": 1.0685577392578125, "learning_rate": 4.938716658399161e-05, "loss": 0.147, "step": 24460 }, { "epoch": 0.8892361363471183, "grad_norm": 3.6975417137145996, "learning_rate": 4.93860241046711e-05, "loss": 0.1402, "step": 24470 }, { "epoch": 0.8895995348499164, "grad_norm": 1.703731894493103, "learning_rate": 4.938488057464358e-05, "loss": 0.1418, "step": 24480 }, { "epoch": 0.8899629333527146, "grad_norm": 1.5911983251571655, "learning_rate": 4.938373599395831e-05, "loss": 0.1268, "step": 24490 }, { "epoch": 0.8903263318555128, "grad_norm": 5.278975486755371, "learning_rate": 4.9382590362664613e-05, "loss": 0.2388, "step": 24500 }, { "epoch": 0.8906897303583109, "grad_norm": 1.673403263092041, "learning_rate": 4.9381443680811865e-05, "loss": 0.1568, "step": 24510 }, { "epoch": 0.8910531288611091, "grad_norm": 0.5384930968284607, "learning_rate": 4.938029594844945e-05, "loss": 0.1364, "step": 24520 }, { "epoch": 0.8914165273639073, "grad_norm": 1.4231863021850586, "learning_rate": 4.937914716562683e-05, "loss": 0.1358, "step": 24530 }, { "epoch": 0.8917799258667054, "grad_norm": 1.2151052951812744, "learning_rate": 4.937799733239349e-05, "loss": 0.1673, "step": 24540 }, { "epoch": 0.8921433243695036, "grad_norm": 9.278292655944824, "learning_rate": 4.937684644879899e-05, "loss": 0.2505, "step": 24550 }, { "epoch": 0.8925067228723018, "grad_norm": 2.3570127487182617, "learning_rate": 4.937569451489291e-05, "loss": 0.1447, "step": 24560 }, { "epoch": 0.8928701213750999, "grad_norm": 0.44337037205696106, "learning_rate": 4.937454153072488e-05, "loss": 0.2015, "step": 24570 }, { "epoch": 0.8932335198778981, "grad_norm": 2.4552314281463623, "learning_rate": 4.937338749634458e-05, "loss": 0.1838, "step": 24580 }, { "epoch": 0.8935969183806963, "grad_norm": 0.9864338636398315, "learning_rate": 4.937223241180174e-05, "loss": 0.1356, "step": 24590 }, { "epoch": 0.8939603168834944, "grad_norm": 8.218843460083008, "learning_rate": 4.937107627714612e-05, "loss": 0.2109, "step": 24600 }, { "epoch": 0.8939603168834944, "eval_loss": 0.38069987297058105, "eval_runtime": 180.4244, "eval_samples_per_second": 41.092, "eval_steps_per_second": 5.138, "eval_wer": 0.20163559460489772, "step": 24600 }, { "epoch": 0.8943237153862926, "grad_norm": 0.7269652485847473, "learning_rate": 4.936991909242753e-05, "loss": 0.1756, "step": 24610 }, { "epoch": 0.8946871138890907, "grad_norm": 0.9835095405578613, "learning_rate": 4.9368760857695836e-05, "loss": 0.1297, "step": 24620 }, { "epoch": 0.8950505123918889, "grad_norm": 3.5632708072662354, "learning_rate": 4.9367601573000944e-05, "loss": 0.146, "step": 24630 }, { "epoch": 0.8954139108946871, "grad_norm": 0.7898311614990234, "learning_rate": 4.93664412383928e-05, "loss": 0.1693, "step": 24640 }, { "epoch": 0.8957773093974852, "grad_norm": 3.8220248222351074, "learning_rate": 4.93652798539214e-05, "loss": 0.1739, "step": 24650 }, { "epoch": 0.8961407079002834, "grad_norm": 0.7946699857711792, "learning_rate": 4.936411741963678e-05, "loss": 0.1271, "step": 24660 }, { "epoch": 0.8965041064030816, "grad_norm": 1.5677101612091064, "learning_rate": 4.936295393558903e-05, "loss": 0.1365, "step": 24670 }, { "epoch": 0.8968675049058797, "grad_norm": 18.39532470703125, "learning_rate": 4.9361789401828285e-05, "loss": 0.2035, "step": 24680 }, { "epoch": 0.897230903408678, "grad_norm": 2.577984094619751, "learning_rate": 4.93606238184047e-05, "loss": 0.127, "step": 24690 }, { "epoch": 0.8975943019114762, "grad_norm": 3.4822871685028076, "learning_rate": 4.9359457185368515e-05, "loss": 0.2335, "step": 24700 }, { "epoch": 0.8979577004142743, "grad_norm": 1.6475412845611572, "learning_rate": 4.935828950277e-05, "loss": 0.1581, "step": 24710 }, { "epoch": 0.8983210989170725, "grad_norm": 2.0972635746002197, "learning_rate": 4.9357120770659446e-05, "loss": 0.1608, "step": 24720 }, { "epoch": 0.8986844974198707, "grad_norm": 3.194946050643921, "learning_rate": 4.9355950989087226e-05, "loss": 0.1911, "step": 24730 }, { "epoch": 0.8990478959226688, "grad_norm": 1.1382654905319214, "learning_rate": 4.9354780158103744e-05, "loss": 0.1671, "step": 24740 }, { "epoch": 0.899411294425467, "grad_norm": 7.309133052825928, "learning_rate": 4.9353608277759433e-05, "loss": 0.192, "step": 24750 }, { "epoch": 0.8997746929282652, "grad_norm": 1.0215349197387695, "learning_rate": 4.9352435348104786e-05, "loss": 0.1713, "step": 24760 }, { "epoch": 0.9001380914310633, "grad_norm": 2.319836378097534, "learning_rate": 4.935126136919035e-05, "loss": 0.1441, "step": 24770 }, { "epoch": 0.9005014899338615, "grad_norm": 3.443413496017456, "learning_rate": 4.9350086341066716e-05, "loss": 0.2136, "step": 24780 }, { "epoch": 0.9008648884366597, "grad_norm": 0.9862478971481323, "learning_rate": 4.934891026378449e-05, "loss": 0.134, "step": 24790 }, { "epoch": 0.9012282869394578, "grad_norm": 10.3681640625, "learning_rate": 4.934773313739435e-05, "loss": 0.3034, "step": 24800 }, { "epoch": 0.901591685442256, "grad_norm": 0.9848408102989197, "learning_rate": 4.9346554961947014e-05, "loss": 0.1503, "step": 24810 }, { "epoch": 0.9019550839450542, "grad_norm": 1.3456752300262451, "learning_rate": 4.934537573749326e-05, "loss": 2.2881, "step": 24820 }, { "epoch": 0.9023184824478523, "grad_norm": 0.8639931082725525, "learning_rate": 4.9344195464083884e-05, "loss": 0.1565, "step": 24830 }, { "epoch": 0.9026818809506505, "grad_norm": 1.1297109127044678, "learning_rate": 4.9343014141769744e-05, "loss": 0.1338, "step": 24840 }, { "epoch": 0.9030452794534487, "grad_norm": 20.8160343170166, "learning_rate": 4.934183177060173e-05, "loss": 0.2155, "step": 24850 }, { "epoch": 0.9034086779562468, "grad_norm": 0.8113746643066406, "learning_rate": 4.9340648350630804e-05, "loss": 0.126, "step": 24860 }, { "epoch": 0.903772076459045, "grad_norm": 1.7760541439056396, "learning_rate": 4.9339463881907946e-05, "loss": 0.119, "step": 24870 }, { "epoch": 0.9041354749618432, "grad_norm": 1.8657050132751465, "learning_rate": 4.933827836448418e-05, "loss": 0.1772, "step": 24880 }, { "epoch": 0.9044988734646413, "grad_norm": 1.2576991319656372, "learning_rate": 4.9337091798410594e-05, "loss": 0.1609, "step": 24890 }, { "epoch": 0.9048622719674395, "grad_norm": 4.8249311447143555, "learning_rate": 4.933590418373833e-05, "loss": 1.7033, "step": 24900 }, { "epoch": 0.9052256704702376, "grad_norm": 1.065819501876831, "learning_rate": 4.9334715520518526e-05, "loss": 0.1559, "step": 24910 }, { "epoch": 0.9055890689730358, "grad_norm": 0.961330771446228, "learning_rate": 4.933352580880242e-05, "loss": 0.1459, "step": 24920 }, { "epoch": 0.905952467475834, "grad_norm": 2.0911202430725098, "learning_rate": 4.933233504864126e-05, "loss": 0.2173, "step": 24930 }, { "epoch": 0.9063158659786321, "grad_norm": 0.5074183940887451, "learning_rate": 4.933114324008636e-05, "loss": 0.1544, "step": 24940 }, { "epoch": 0.9066792644814303, "grad_norm": 3.663172483444214, "learning_rate": 4.932995038318907e-05, "loss": 0.2042, "step": 24950 }, { "epoch": 0.9070426629842285, "grad_norm": 1.691545844078064, "learning_rate": 4.9328756478000784e-05, "loss": 0.1616, "step": 24960 }, { "epoch": 0.9074060614870266, "grad_norm": 1.6613342761993408, "learning_rate": 4.9327561524572944e-05, "loss": 0.1212, "step": 24970 }, { "epoch": 0.9077694599898248, "grad_norm": 2.5737128257751465, "learning_rate": 4.9326365522957044e-05, "loss": 0.1753, "step": 24980 }, { "epoch": 0.908132858492623, "grad_norm": 1.717429280281067, "learning_rate": 4.932516847320459e-05, "loss": 0.1436, "step": 24990 }, { "epoch": 0.9084962569954211, "grad_norm": 13.324812889099121, "learning_rate": 4.9323970375367186e-05, "loss": 0.1983, "step": 25000 }, { "epoch": 0.9088596554982193, "grad_norm": 1.374232530593872, "learning_rate": 4.932277122949644e-05, "loss": 0.1588, "step": 25010 }, { "epoch": 0.9092230540010175, "grad_norm": 1.1790850162506104, "learning_rate": 4.932157103564402e-05, "loss": 0.1603, "step": 25020 }, { "epoch": 0.9095864525038156, "grad_norm": 2.7326996326446533, "learning_rate": 4.932036979386165e-05, "loss": 0.1656, "step": 25030 }, { "epoch": 0.9099498510066139, "grad_norm": 1.2364397048950195, "learning_rate": 4.931916750420107e-05, "loss": 0.2311, "step": 25040 }, { "epoch": 0.9103132495094121, "grad_norm": 3.7070934772491455, "learning_rate": 4.9317964166714095e-05, "loss": 0.2286, "step": 25050 }, { "epoch": 0.9106766480122102, "grad_norm": 2.05336594581604, "learning_rate": 4.931675978145256e-05, "loss": 0.1404, "step": 25060 }, { "epoch": 0.9110400465150084, "grad_norm": 1.3064135313034058, "learning_rate": 4.931555434846837e-05, "loss": 0.1395, "step": 25070 }, { "epoch": 0.9114034450178066, "grad_norm": 1.252254843711853, "learning_rate": 4.931434786781346e-05, "loss": 0.1595, "step": 25080 }, { "epoch": 0.9117668435206047, "grad_norm": 1.399654507637024, "learning_rate": 4.931314033953981e-05, "loss": 0.1495, "step": 25090 }, { "epoch": 0.9121302420234029, "grad_norm": 9.340110778808594, "learning_rate": 4.931193176369945e-05, "loss": 0.2489, "step": 25100 }, { "epoch": 0.9124936405262011, "grad_norm": 1.4071942567825317, "learning_rate": 4.931072214034445e-05, "loss": 0.1409, "step": 25110 }, { "epoch": 0.9128570390289992, "grad_norm": 1.5617743730545044, "learning_rate": 4.9309511469526934e-05, "loss": 0.2026, "step": 25120 }, { "epoch": 0.9132204375317974, "grad_norm": 1.4382219314575195, "learning_rate": 4.930829975129906e-05, "loss": 0.1426, "step": 25130 }, { "epoch": 0.9135838360345956, "grad_norm": 1.0388094186782837, "learning_rate": 4.930708698571303e-05, "loss": 0.132, "step": 25140 }, { "epoch": 0.9139472345373937, "grad_norm": 3.9398436546325684, "learning_rate": 4.9305873172821126e-05, "loss": 0.2257, "step": 25150 }, { "epoch": 0.9143106330401919, "grad_norm": 2.5586395263671875, "learning_rate": 4.930465831267562e-05, "loss": 0.1508, "step": 25160 }, { "epoch": 0.9146740315429901, "grad_norm": 1.6908849477767944, "learning_rate": 4.930344240532886e-05, "loss": 0.1407, "step": 25170 }, { "epoch": 0.9150374300457882, "grad_norm": 3.980564594268799, "learning_rate": 4.930222545083324e-05, "loss": 0.1749, "step": 25180 }, { "epoch": 0.9154008285485864, "grad_norm": 1.7451142072677612, "learning_rate": 4.930100744924119e-05, "loss": 0.1415, "step": 25190 }, { "epoch": 0.9157642270513845, "grad_norm": 11.09490966796875, "learning_rate": 4.9299788400605194e-05, "loss": 0.248, "step": 25200 }, { "epoch": 0.9157642270513845, "eval_loss": 0.36305877566337585, "eval_runtime": 180.0742, "eval_samples_per_second": 41.172, "eval_steps_per_second": 5.148, "eval_wer": 0.19528200845934612, "step": 25200 }, { "epoch": 0.9161276255541827, "grad_norm": 0.9552545547485352, "learning_rate": 4.929856830497778e-05, "loss": 0.1765, "step": 25210 }, { "epoch": 0.9164910240569809, "grad_norm": 1.0652204751968384, "learning_rate": 4.929734716241151e-05, "loss": 0.1412, "step": 25220 }, { "epoch": 0.916854422559779, "grad_norm": 2.473240375518799, "learning_rate": 4.929612497295899e-05, "loss": 0.1511, "step": 25230 }, { "epoch": 0.9172178210625772, "grad_norm": 2.0563089847564697, "learning_rate": 4.929490173667291e-05, "loss": 0.1562, "step": 25240 }, { "epoch": 0.9175812195653754, "grad_norm": 5.446952819824219, "learning_rate": 4.929367745360593e-05, "loss": 0.2416, "step": 25250 }, { "epoch": 0.9179446180681735, "grad_norm": 0.724795937538147, "learning_rate": 4.929245212381085e-05, "loss": 0.1554, "step": 25260 }, { "epoch": 0.9183080165709717, "grad_norm": 1.0962814092636108, "learning_rate": 4.929122574734043e-05, "loss": 0.1567, "step": 25270 }, { "epoch": 0.9186714150737699, "grad_norm": 1.3689608573913574, "learning_rate": 4.9289998324247524e-05, "loss": 0.1498, "step": 25280 }, { "epoch": 0.919034813576568, "grad_norm": 3.039569139480591, "learning_rate": 4.9288769854585015e-05, "loss": 0.1666, "step": 25290 }, { "epoch": 0.9193982120793662, "grad_norm": 10.71928882598877, "learning_rate": 4.928754033840583e-05, "loss": 0.2487, "step": 25300 }, { "epoch": 0.9197616105821644, "grad_norm": 0.47624918818473816, "learning_rate": 4.928630977576295e-05, "loss": 0.1457, "step": 25310 }, { "epoch": 0.9201250090849625, "grad_norm": 1.2840664386749268, "learning_rate": 4.9285078166709386e-05, "loss": 0.1437, "step": 25320 }, { "epoch": 0.9204884075877607, "grad_norm": 2.118415117263794, "learning_rate": 4.928384551129822e-05, "loss": 0.1861, "step": 25330 }, { "epoch": 0.9208518060905589, "grad_norm": 0.8363248109817505, "learning_rate": 4.928261180958255e-05, "loss": 0.1494, "step": 25340 }, { "epoch": 0.921215204593357, "grad_norm": 20.23488998413086, "learning_rate": 4.928137706161553e-05, "loss": 0.2414, "step": 25350 }, { "epoch": 0.9215786030961552, "grad_norm": 1.1590826511383057, "learning_rate": 4.928014126745037e-05, "loss": 0.162, "step": 25360 }, { "epoch": 0.9219420015989535, "grad_norm": 1.1986241340637207, "learning_rate": 4.9278904427140315e-05, "loss": 0.1323, "step": 25370 }, { "epoch": 0.9223054001017515, "grad_norm": 5.075083255767822, "learning_rate": 4.927766654073864e-05, "loss": 0.2944, "step": 25380 }, { "epoch": 0.9226687986045498, "grad_norm": 3.1853582859039307, "learning_rate": 4.927642760829871e-05, "loss": 0.1792, "step": 25390 }, { "epoch": 0.923032197107348, "grad_norm": 5.919759273529053, "learning_rate": 4.927518762987388e-05, "loss": 0.2182, "step": 25400 }, { "epoch": 0.9233955956101461, "grad_norm": 1.4639918804168701, "learning_rate": 4.927394660551759e-05, "loss": 0.1277, "step": 25410 }, { "epoch": 0.9237589941129443, "grad_norm": 1.205178141593933, "learning_rate": 4.927270453528331e-05, "loss": 0.1197, "step": 25420 }, { "epoch": 0.9241223926157425, "grad_norm": 1.4328303337097168, "learning_rate": 4.927146141922455e-05, "loss": 0.1522, "step": 25430 }, { "epoch": 0.9244857911185406, "grad_norm": 0.6114678382873535, "learning_rate": 4.927021725739488e-05, "loss": 0.1661, "step": 25440 }, { "epoch": 0.9248491896213388, "grad_norm": 34.13093566894531, "learning_rate": 4.92689720498479e-05, "loss": 0.2852, "step": 25450 }, { "epoch": 0.925212588124137, "grad_norm": 0.9967424273490906, "learning_rate": 4.9267725796637256e-05, "loss": 0.1433, "step": 25460 }, { "epoch": 0.9255759866269351, "grad_norm": 1.0493268966674805, "learning_rate": 4.926647849781666e-05, "loss": 0.1361, "step": 25470 }, { "epoch": 0.9259393851297333, "grad_norm": 2.582016944885254, "learning_rate": 4.926523015343985e-05, "loss": 0.1829, "step": 25480 }, { "epoch": 0.9263027836325314, "grad_norm": 5.122225284576416, "learning_rate": 4.92639807635606e-05, "loss": 0.1148, "step": 25490 }, { "epoch": 0.9266661821353296, "grad_norm": 8.054966926574707, "learning_rate": 4.9262730328232755e-05, "loss": 0.2363, "step": 25500 }, { "epoch": 0.9270295806381278, "grad_norm": 3.3668735027313232, "learning_rate": 4.926147884751018e-05, "loss": 0.1311, "step": 25510 }, { "epoch": 0.9273929791409259, "grad_norm": 1.0643728971481323, "learning_rate": 4.926022632144681e-05, "loss": 0.1318, "step": 25520 }, { "epoch": 0.9277563776437241, "grad_norm": 1.632354497909546, "learning_rate": 4.9258972750096614e-05, "loss": 0.1958, "step": 25530 }, { "epoch": 0.9281197761465223, "grad_norm": 0.7638659477233887, "learning_rate": 4.9257718133513586e-05, "loss": 0.168, "step": 25540 }, { "epoch": 0.9284831746493204, "grad_norm": 4.14115571975708, "learning_rate": 4.9256462471751796e-05, "loss": 0.1976, "step": 25550 }, { "epoch": 0.9288465731521186, "grad_norm": 39.925689697265625, "learning_rate": 4.925520576486534e-05, "loss": 0.67, "step": 25560 }, { "epoch": 0.9292099716549168, "grad_norm": 1.349623441696167, "learning_rate": 4.9253948012908366e-05, "loss": 0.1475, "step": 25570 }, { "epoch": 0.9295733701577149, "grad_norm": 10.941556930541992, "learning_rate": 4.925268921593508e-05, "loss": 0.1696, "step": 25580 }, { "epoch": 0.9299367686605131, "grad_norm": 1.5406817197799683, "learning_rate": 4.925142937399969e-05, "loss": 0.1444, "step": 25590 }, { "epoch": 0.9303001671633113, "grad_norm": 3.9542319774627686, "learning_rate": 4.925016848715651e-05, "loss": 0.216, "step": 25600 }, { "epoch": 0.9306635656661094, "grad_norm": 2.0055665969848633, "learning_rate": 4.924890655545984e-05, "loss": 0.1248, "step": 25610 }, { "epoch": 0.9310269641689076, "grad_norm": 2.145512819290161, "learning_rate": 4.924764357896408e-05, "loss": 0.1278, "step": 25620 }, { "epoch": 0.9313903626717058, "grad_norm": 6.076485633850098, "learning_rate": 4.924637955772361e-05, "loss": 0.1586, "step": 25630 }, { "epoch": 0.9317537611745039, "grad_norm": 0.9482760429382324, "learning_rate": 4.924511449179293e-05, "loss": 0.1547, "step": 25640 }, { "epoch": 0.9321171596773021, "grad_norm": 2.335090398788452, "learning_rate": 4.924384838122653e-05, "loss": 0.1709, "step": 25650 }, { "epoch": 0.9324805581801003, "grad_norm": 2.1309449672698975, "learning_rate": 4.924258122607895e-05, "loss": 0.1425, "step": 25660 }, { "epoch": 0.9328439566828984, "grad_norm": 1.092887282371521, "learning_rate": 4.924131302640482e-05, "loss": 0.1578, "step": 25670 }, { "epoch": 0.9332073551856966, "grad_norm": 0.7325641512870789, "learning_rate": 4.9240043782258746e-05, "loss": 0.1473, "step": 25680 }, { "epoch": 0.9335707536884948, "grad_norm": 1.296338677406311, "learning_rate": 4.9238773493695443e-05, "loss": 0.2279, "step": 25690 }, { "epoch": 0.9339341521912929, "grad_norm": 1.196590542793274, "learning_rate": 4.923750216076963e-05, "loss": 0.1524, "step": 25700 }, { "epoch": 0.9342975506940912, "grad_norm": 1.5417845249176025, "learning_rate": 4.923622978353608e-05, "loss": 0.1385, "step": 25710 }, { "epoch": 0.9346609491968894, "grad_norm": 1.4865704774856567, "learning_rate": 4.923495636204963e-05, "loss": 0.1435, "step": 25720 }, { "epoch": 0.9350243476996875, "grad_norm": 1.6445010900497437, "learning_rate": 4.923368189636513e-05, "loss": 0.223, "step": 25730 }, { "epoch": 0.9353877462024857, "grad_norm": 0.6629343032836914, "learning_rate": 4.9232406386537505e-05, "loss": 0.1479, "step": 25740 }, { "epoch": 0.9357511447052839, "grad_norm": 8.440834999084473, "learning_rate": 4.923112983262171e-05, "loss": 0.7624, "step": 25750 }, { "epoch": 0.936114543208082, "grad_norm": 1.088809847831726, "learning_rate": 4.922985223467274e-05, "loss": 0.134, "step": 25760 }, { "epoch": 0.9364779417108802, "grad_norm": 1.1839587688446045, "learning_rate": 4.922857359274565e-05, "loss": 0.1284, "step": 25770 }, { "epoch": 0.9368413402136783, "grad_norm": 2.278588056564331, "learning_rate": 4.922729390689553e-05, "loss": 0.1873, "step": 25780 }, { "epoch": 0.9372047387164765, "grad_norm": 1.6524765491485596, "learning_rate": 4.9226013177177515e-05, "loss": 0.1769, "step": 25790 }, { "epoch": 0.9375681372192747, "grad_norm": 18.044713973999023, "learning_rate": 4.922473140364679e-05, "loss": 0.2122, "step": 25800 }, { "epoch": 0.9375681372192747, "eval_loss": 0.3881298005580902, "eval_runtime": 180.2979, "eval_samples_per_second": 41.121, "eval_steps_per_second": 5.141, "eval_wer": 0.1963348884491804, "step": 25800 }, { "epoch": 0.9379315357220728, "grad_norm": 1.1691884994506836, "learning_rate": 4.9223448586358576e-05, "loss": 0.1573, "step": 25810 }, { "epoch": 0.938294934224871, "grad_norm": 1.1012376546859741, "learning_rate": 4.9222164725368156e-05, "loss": 0.1511, "step": 25820 }, { "epoch": 0.9386583327276692, "grad_norm": 2.1937880516052246, "learning_rate": 4.9220879820730844e-05, "loss": 0.1684, "step": 25830 }, { "epoch": 0.9390217312304673, "grad_norm": 1.5964059829711914, "learning_rate": 4.921959387250199e-05, "loss": 0.1897, "step": 25840 }, { "epoch": 0.9393851297332655, "grad_norm": 6.693167209625244, "learning_rate": 4.921830688073701e-05, "loss": 0.2155, "step": 25850 }, { "epoch": 0.9397485282360637, "grad_norm": 1.679046869277954, "learning_rate": 4.921701884549136e-05, "loss": 0.1566, "step": 25860 }, { "epoch": 0.9401119267388618, "grad_norm": 0.648047924041748, "learning_rate": 4.9215729766820536e-05, "loss": 0.1398, "step": 25870 }, { "epoch": 0.94047532524166, "grad_norm": 0.7286267876625061, "learning_rate": 4.921443964478007e-05, "loss": 0.1598, "step": 25880 }, { "epoch": 0.9408387237444582, "grad_norm": 1.3676726818084717, "learning_rate": 4.921314847942555e-05, "loss": 0.1627, "step": 25890 }, { "epoch": 0.9412021222472563, "grad_norm": 11.982099533081055, "learning_rate": 4.921185627081263e-05, "loss": 0.2181, "step": 25900 }, { "epoch": 0.9415655207500545, "grad_norm": 0.8863544464111328, "learning_rate": 4.9210563018996955e-05, "loss": 0.1296, "step": 25910 }, { "epoch": 0.9419289192528527, "grad_norm": 0.8388992547988892, "learning_rate": 4.9209268724034265e-05, "loss": 0.1406, "step": 25920 }, { "epoch": 0.9422923177556508, "grad_norm": 2.4800333976745605, "learning_rate": 4.9207973385980324e-05, "loss": 0.1694, "step": 25930 }, { "epoch": 0.942655716258449, "grad_norm": 4.2597174644470215, "learning_rate": 4.920667700489093e-05, "loss": 0.9439, "step": 25940 }, { "epoch": 0.9430191147612472, "grad_norm": 5.32108736038208, "learning_rate": 4.920537958082196e-05, "loss": 0.1745, "step": 25950 }, { "epoch": 0.9433825132640453, "grad_norm": 1.3563112020492554, "learning_rate": 4.9204081113829316e-05, "loss": 0.1554, "step": 25960 }, { "epoch": 0.9437459117668435, "grad_norm": 8.575587272644043, "learning_rate": 4.9202781603968926e-05, "loss": 0.2015, "step": 25970 }, { "epoch": 0.9441093102696417, "grad_norm": 6.85026216506958, "learning_rate": 4.920148105129679e-05, "loss": 0.1548, "step": 25980 }, { "epoch": 0.9444727087724398, "grad_norm": 1.2886810302734375, "learning_rate": 4.9200179455868944e-05, "loss": 0.136, "step": 25990 }, { "epoch": 0.944836107275238, "grad_norm": 2.0779457092285156, "learning_rate": 4.919887681774148e-05, "loss": 0.1744, "step": 26000 }, { "epoch": 0.9451995057780362, "grad_norm": Infinity, "learning_rate": 4.919770355196496e-05, "loss": 2.7706, "step": 26010 }, { "epoch": 0.9455629042808343, "grad_norm": 0.9514101147651672, "learning_rate": 4.919639893286285e-05, "loss": 0.1435, "step": 26020 }, { "epoch": 0.9459263027836325, "grad_norm": 0.3761270046234131, "learning_rate": 4.9195093271224016e-05, "loss": 0.1525, "step": 26030 }, { "epoch": 0.9462897012864308, "grad_norm": 1.2147834300994873, "learning_rate": 4.919378656710469e-05, "loss": 0.1922, "step": 26040 }, { "epoch": 0.9466530997892288, "grad_norm": 15.408570289611816, "learning_rate": 4.919247882056119e-05, "loss": 0.2773, "step": 26050 }, { "epoch": 0.947016498292027, "grad_norm": 2.2306370735168457, "learning_rate": 4.919117003164985e-05, "loss": 0.1446, "step": 26060 }, { "epoch": 0.9473798967948253, "grad_norm": 1.3414242267608643, "learning_rate": 4.918986020042706e-05, "loss": 0.1484, "step": 26070 }, { "epoch": 0.9477432952976234, "grad_norm": 1.9740337133407593, "learning_rate": 4.9188549326949275e-05, "loss": 0.1845, "step": 26080 }, { "epoch": 0.9481066938004216, "grad_norm": 0.7002670764923096, "learning_rate": 4.9187237411272955e-05, "loss": 0.1559, "step": 26090 }, { "epoch": 0.9484700923032197, "grad_norm": 8.308074951171875, "learning_rate": 4.9185924453454635e-05, "loss": 0.223, "step": 26100 }, { "epoch": 0.9488334908060179, "grad_norm": 0.8129051327705383, "learning_rate": 4.9184610453550884e-05, "loss": 0.1459, "step": 26110 }, { "epoch": 0.9491968893088161, "grad_norm": 1.5998592376708984, "learning_rate": 4.918329541161831e-05, "loss": 0.1394, "step": 26120 }, { "epoch": 0.9495602878116142, "grad_norm": 1.8726842403411865, "learning_rate": 4.918197932771359e-05, "loss": 0.1859, "step": 26130 }, { "epoch": 0.9499236863144124, "grad_norm": 1.1915557384490967, "learning_rate": 4.9180662201893424e-05, "loss": 0.1621, "step": 26140 }, { "epoch": 0.9502870848172106, "grad_norm": 6.970126152038574, "learning_rate": 4.917934403421455e-05, "loss": 0.2613, "step": 26150 }, { "epoch": 0.9506504833200087, "grad_norm": 1.0738050937652588, "learning_rate": 4.9178024824733776e-05, "loss": 0.1383, "step": 26160 }, { "epoch": 0.9510138818228069, "grad_norm": 2.1130123138427734, "learning_rate": 4.9176704573507933e-05, "loss": 0.222, "step": 26170 }, { "epoch": 0.9513772803256051, "grad_norm": 3.1722593307495117, "learning_rate": 4.9175383280593925e-05, "loss": 0.1624, "step": 26180 }, { "epoch": 0.9517406788284032, "grad_norm": 0.9101456999778748, "learning_rate": 4.917406094604865e-05, "loss": 0.16, "step": 26190 }, { "epoch": 0.9521040773312014, "grad_norm": 3.284672260284424, "learning_rate": 4.917273756992911e-05, "loss": 0.1817, "step": 26200 }, { "epoch": 0.9524674758339996, "grad_norm": 2.2083284854888916, "learning_rate": 4.917141315229232e-05, "loss": 0.169, "step": 26210 }, { "epoch": 0.9528308743367977, "grad_norm": 4.354351997375488, "learning_rate": 4.9170087693195335e-05, "loss": 0.2629, "step": 26220 }, { "epoch": 0.9531942728395959, "grad_norm": 2.520522117614746, "learning_rate": 4.916876119269526e-05, "loss": 0.1852, "step": 26230 }, { "epoch": 0.9535576713423941, "grad_norm": 0.8573399186134338, "learning_rate": 4.9167433650849264e-05, "loss": 0.1524, "step": 26240 }, { "epoch": 0.9539210698451922, "grad_norm": 6.314918041229248, "learning_rate": 4.916610506771454e-05, "loss": 0.2685, "step": 26250 }, { "epoch": 0.9542844683479904, "grad_norm": 3.0610973834991455, "learning_rate": 4.916477544334833e-05, "loss": 0.1374, "step": 26260 }, { "epoch": 0.9546478668507886, "grad_norm": 0.9085964560508728, "learning_rate": 4.916344477780793e-05, "loss": 0.1754, "step": 26270 }, { "epoch": 0.9550112653535867, "grad_norm": 3.8550243377685547, "learning_rate": 4.916211307115067e-05, "loss": 0.3546, "step": 26280 }, { "epoch": 0.9553746638563849, "grad_norm": 5.278194904327393, "learning_rate": 4.916078032343392e-05, "loss": 0.1298, "step": 26290 }, { "epoch": 0.9557380623591831, "grad_norm": 2.707965612411499, "learning_rate": 4.9159446534715116e-05, "loss": 0.1689, "step": 26300 }, { "epoch": 0.9561014608619812, "grad_norm": 1.3821223974227905, "learning_rate": 4.9158111705051716e-05, "loss": 0.2117, "step": 26310 }, { "epoch": 0.9564648593647794, "grad_norm": 1.0195057392120361, "learning_rate": 4.915677583450123e-05, "loss": 0.1151, "step": 26320 }, { "epoch": 0.9568282578675776, "grad_norm": 2.078343629837036, "learning_rate": 4.915543892312124e-05, "loss": 0.1433, "step": 26330 }, { "epoch": 0.9571916563703757, "grad_norm": 1.6972254514694214, "learning_rate": 4.915410097096932e-05, "loss": 0.1307, "step": 26340 }, { "epoch": 0.9575550548731739, "grad_norm": 4.440702438354492, "learning_rate": 4.915276197810313e-05, "loss": 0.1806, "step": 26350 }, { "epoch": 0.9579184533759721, "grad_norm": 0.778567373752594, "learning_rate": 4.9151421944580374e-05, "loss": 0.177, "step": 26360 }, { "epoch": 0.9582818518787702, "grad_norm": 1.2955224514007568, "learning_rate": 4.915008087045877e-05, "loss": 0.1395, "step": 26370 }, { "epoch": 0.9586452503815684, "grad_norm": 2.077195405960083, "learning_rate": 4.9148738755796104e-05, "loss": 0.1583, "step": 26380 }, { "epoch": 0.9590086488843665, "grad_norm": 0.8736408352851868, "learning_rate": 4.914739560065021e-05, "loss": 0.1284, "step": 26390 }, { "epoch": 0.9593720473871648, "grad_norm": 2.9465060234069824, "learning_rate": 4.914605140507895e-05, "loss": 0.1929, "step": 26400 }, { "epoch": 0.9593720473871648, "eval_loss": 0.3894718587398529, "eval_runtime": 180.7535, "eval_samples_per_second": 41.017, "eval_steps_per_second": 5.129, "eval_wer": 0.18666836095630548, "step": 26400 }, { "epoch": 0.959735445889963, "grad_norm": 0.7856747508049011, "learning_rate": 4.9144706169140256e-05, "loss": 0.1548, "step": 26410 }, { "epoch": 0.960098844392761, "grad_norm": 3.3245174884796143, "learning_rate": 4.914335989289208e-05, "loss": 0.1328, "step": 26420 }, { "epoch": 0.9604622428955593, "grad_norm": 4.848336219787598, "learning_rate": 4.914201257639243e-05, "loss": 0.145, "step": 26430 }, { "epoch": 0.9608256413983575, "grad_norm": 2.2661678791046143, "learning_rate": 4.9140664219699344e-05, "loss": 0.1482, "step": 26440 }, { "epoch": 0.9611890399011556, "grad_norm": 6.279752731323242, "learning_rate": 4.913931482287094e-05, "loss": 0.2087, "step": 26450 }, { "epoch": 0.9615524384039538, "grad_norm": 0.9201165437698364, "learning_rate": 4.913796438596534e-05, "loss": 0.1641, "step": 26460 }, { "epoch": 0.961915836906752, "grad_norm": 1.0935853719711304, "learning_rate": 4.9136612909040746e-05, "loss": 0.1678, "step": 26470 }, { "epoch": 0.9622792354095501, "grad_norm": 5.511369705200195, "learning_rate": 4.913526039215538e-05, "loss": 0.2284, "step": 26480 }, { "epoch": 0.9626426339123483, "grad_norm": 0.8109707832336426, "learning_rate": 4.913390683536751e-05, "loss": 0.1314, "step": 26490 }, { "epoch": 0.9630060324151465, "grad_norm": 20.594274520874023, "learning_rate": 4.9132552238735464e-05, "loss": 0.2612, "step": 26500 }, { "epoch": 0.9633694309179446, "grad_norm": 40.1435661315918, "learning_rate": 4.913119660231761e-05, "loss": 0.5943, "step": 26510 }, { "epoch": 0.9637328294207428, "grad_norm": 0.6886749863624573, "learning_rate": 4.912983992617235e-05, "loss": 0.1445, "step": 26520 }, { "epoch": 0.964096227923541, "grad_norm": 4.847496032714844, "learning_rate": 4.912848221035815e-05, "loss": 0.1645, "step": 26530 }, { "epoch": 0.9644596264263391, "grad_norm": 1.3625943660736084, "learning_rate": 4.912712345493349e-05, "loss": 0.1403, "step": 26540 }, { "epoch": 0.9648230249291373, "grad_norm": 6.022468090057373, "learning_rate": 4.9125763659956934e-05, "loss": 0.2215, "step": 26550 }, { "epoch": 0.9651864234319355, "grad_norm": 1.5577186346054077, "learning_rate": 4.912440282548706e-05, "loss": 0.1401, "step": 26560 }, { "epoch": 0.9655498219347336, "grad_norm": 1.5680512189865112, "learning_rate": 4.91230409515825e-05, "loss": 0.1373, "step": 26570 }, { "epoch": 0.9659132204375318, "grad_norm": 2.3043782711029053, "learning_rate": 4.912167803830193e-05, "loss": 0.1501, "step": 26580 }, { "epoch": 0.96627661894033, "grad_norm": 1.5168496370315552, "learning_rate": 4.912031408570409e-05, "loss": 0.1665, "step": 26590 }, { "epoch": 0.9666400174431281, "grad_norm": 10.529095649719238, "learning_rate": 4.911894909384773e-05, "loss": 0.1642, "step": 26600 }, { "epoch": 0.9670034159459263, "grad_norm": 2.458815097808838, "learning_rate": 4.911758306279167e-05, "loss": 3.3307, "step": 26610 }, { "epoch": 0.9673668144487245, "grad_norm": 1.2745519876480103, "learning_rate": 4.911621599259477e-05, "loss": 0.1681, "step": 26620 }, { "epoch": 0.9677302129515226, "grad_norm": 1.877960443496704, "learning_rate": 4.911484788331593e-05, "loss": 0.1445, "step": 26630 }, { "epoch": 0.9680936114543208, "grad_norm": 1.3567255735397339, "learning_rate": 4.911347873501408e-05, "loss": 0.132, "step": 26640 }, { "epoch": 0.968457009957119, "grad_norm": 3.44063138961792, "learning_rate": 4.911210854774825e-05, "loss": 0.205, "step": 26650 }, { "epoch": 0.9688204084599171, "grad_norm": 5.335951805114746, "learning_rate": 4.911073732157744e-05, "loss": 0.1475, "step": 26660 }, { "epoch": 0.9691838069627153, "grad_norm": 3.0675578117370605, "learning_rate": 4.910936505656074e-05, "loss": 0.1602, "step": 26670 }, { "epoch": 0.9695472054655134, "grad_norm": 5.95693826675415, "learning_rate": 4.910799175275729e-05, "loss": 0.1888, "step": 26680 }, { "epoch": 0.9699106039683116, "grad_norm": 1.7128913402557373, "learning_rate": 4.910661741022625e-05, "loss": 0.1402, "step": 26690 }, { "epoch": 0.9702740024711098, "grad_norm": 11.855730056762695, "learning_rate": 4.9105242029026844e-05, "loss": 0.1939, "step": 26700 }, { "epoch": 0.9706374009739079, "grad_norm": 2.21028208732605, "learning_rate": 4.910386560921831e-05, "loss": 0.1365, "step": 26710 }, { "epoch": 0.9710007994767061, "grad_norm": 3.6761391162872314, "learning_rate": 4.910248815085998e-05, "loss": 0.1661, "step": 26720 }, { "epoch": 0.9713641979795044, "grad_norm": 1.9474952220916748, "learning_rate": 4.9101109654011196e-05, "loss": 0.1176, "step": 26730 }, { "epoch": 0.9717275964823024, "grad_norm": 4.190001010894775, "learning_rate": 4.909973011873135e-05, "loss": 0.1426, "step": 26740 }, { "epoch": 0.9720909949851007, "grad_norm": 2.785562753677368, "learning_rate": 4.909834954507987e-05, "loss": 0.201, "step": 26750 }, { "epoch": 0.9724543934878989, "grad_norm": 2.296952724456787, "learning_rate": 4.909696793311625e-05, "loss": 1.5478, "step": 26760 }, { "epoch": 0.972817791990697, "grad_norm": 1.471690058708191, "learning_rate": 4.909558528290002e-05, "loss": 0.1254, "step": 26770 }, { "epoch": 0.9731811904934952, "grad_norm": 5.213918685913086, "learning_rate": 4.9094340010048675e-05, "loss": 0.8881, "step": 26780 }, { "epoch": 0.9735445889962934, "grad_norm": 1.5338894128799438, "learning_rate": 4.909295538731665e-05, "loss": 0.1621, "step": 26790 }, { "epoch": 0.9739079874990915, "grad_norm": 4.493140697479248, "learning_rate": 4.909156972650491e-05, "loss": 0.1653, "step": 26800 }, { "epoch": 0.9742713860018897, "grad_norm": 0.9602924585342407, "learning_rate": 4.909018302767313e-05, "loss": 0.1388, "step": 26810 }, { "epoch": 0.9746347845046879, "grad_norm": 1.038445234298706, "learning_rate": 4.9088795290881085e-05, "loss": 0.145, "step": 26820 }, { "epoch": 0.974998183007486, "grad_norm": 3.1368119716644287, "learning_rate": 4.908740651618856e-05, "loss": 0.1732, "step": 26830 }, { "epoch": 0.9753615815102842, "grad_norm": 0.6875894069671631, "learning_rate": 4.908601670365539e-05, "loss": 0.1582, "step": 26840 }, { "epoch": 0.9757249800130824, "grad_norm": 14.604360580444336, "learning_rate": 4.908462585334146e-05, "loss": 0.228, "step": 26850 }, { "epoch": 0.9760883785158805, "grad_norm": 1.819300889968872, "learning_rate": 4.9083233965306694e-05, "loss": 0.5066, "step": 26860 }, { "epoch": 0.9764517770186787, "grad_norm": 5.712610721588135, "learning_rate": 4.908184103961106e-05, "loss": 0.2109, "step": 26870 }, { "epoch": 0.9768151755214769, "grad_norm": 2.8377017974853516, "learning_rate": 4.908044707631459e-05, "loss": 0.1417, "step": 26880 }, { "epoch": 0.977178574024275, "grad_norm": 1.0483819246292114, "learning_rate": 4.907905207547733e-05, "loss": 0.1526, "step": 26890 }, { "epoch": 0.9775419725270732, "grad_norm": 3.996112823486328, "learning_rate": 4.907765603715938e-05, "loss": 0.6109, "step": 26900 }, { "epoch": 0.9779053710298714, "grad_norm": 1.336004614830017, "learning_rate": 4.907625896142091e-05, "loss": 0.8939, "step": 26910 }, { "epoch": 0.9782687695326695, "grad_norm": 0.9394060373306274, "learning_rate": 4.907486084832211e-05, "loss": 0.1515, "step": 26920 }, { "epoch": 0.9786321680354677, "grad_norm": 2.356201410293579, "learning_rate": 4.907346169792321e-05, "loss": 0.1567, "step": 26930 }, { "epoch": 0.9789955665382659, "grad_norm": 0.926143229007721, "learning_rate": 4.907206151028449e-05, "loss": 0.1669, "step": 26940 }, { "epoch": 0.979358965041064, "grad_norm": 4.815629482269287, "learning_rate": 4.90706602854663e-05, "loss": 0.2068, "step": 26950 }, { "epoch": 0.9797223635438622, "grad_norm": 1.3679453134536743, "learning_rate": 4.906925802352899e-05, "loss": 0.15, "step": 26960 }, { "epoch": 0.9800857620466603, "grad_norm": 1.1221717596054077, "learning_rate": 4.9067854724533e-05, "loss": 0.223, "step": 26970 }, { "epoch": 0.9804491605494585, "grad_norm": 2.198657512664795, "learning_rate": 4.906645038853878e-05, "loss": 0.1662, "step": 26980 }, { "epoch": 0.9808125590522567, "grad_norm": 1.7014293670654297, "learning_rate": 4.906504501560684e-05, "loss": 0.1601, "step": 26990 }, { "epoch": 0.9811759575550548, "grad_norm": 8.204177856445312, "learning_rate": 4.906363860579774e-05, "loss": 0.2345, "step": 27000 }, { "epoch": 0.9811759575550548, "eval_loss": 0.3534720242023468, "eval_runtime": 180.0448, "eval_samples_per_second": 41.179, "eval_steps_per_second": 5.149, "eval_wer": 0.19012652712981284, "step": 27000 }, { "epoch": 0.981539356057853, "grad_norm": 1.1557930707931519, "learning_rate": 4.906223115917207e-05, "loss": 0.1357, "step": 27010 }, { "epoch": 0.9819027545606512, "grad_norm": 0.7808053493499756, "learning_rate": 4.906082267579047e-05, "loss": 0.1366, "step": 27020 }, { "epoch": 0.9822661530634493, "grad_norm": 1.4547855854034424, "learning_rate": 4.9059413155713626e-05, "loss": 0.1472, "step": 27030 }, { "epoch": 0.9826295515662475, "grad_norm": 0.5997300148010254, "learning_rate": 4.9058002599002275e-05, "loss": 0.1455, "step": 27040 }, { "epoch": 0.9829929500690457, "grad_norm": 12.663732528686523, "learning_rate": 4.90565910057172e-05, "loss": 0.2728, "step": 27050 }, { "epoch": 0.9833563485718438, "grad_norm": 0.9098420739173889, "learning_rate": 4.9055178375919196e-05, "loss": 0.1467, "step": 27060 }, { "epoch": 0.983719747074642, "grad_norm": 3.4135582447052, "learning_rate": 4.9053764709669156e-05, "loss": 0.2571, "step": 27070 }, { "epoch": 0.9840831455774403, "grad_norm": 1.7984899282455444, "learning_rate": 4.905235000702798e-05, "loss": 0.2026, "step": 27080 }, { "epoch": 0.9844465440802384, "grad_norm": 1.2022316455841064, "learning_rate": 4.9050934268056615e-05, "loss": 0.1378, "step": 27090 }, { "epoch": 0.9848099425830366, "grad_norm": 1.499773621559143, "learning_rate": 4.9049517492816066e-05, "loss": 0.1765, "step": 27100 }, { "epoch": 0.9851733410858348, "grad_norm": 0.8122308254241943, "learning_rate": 4.9048099681367377e-05, "loss": 0.1422, "step": 27110 }, { "epoch": 0.9855367395886329, "grad_norm": 1.203873872756958, "learning_rate": 4.904668083377164e-05, "loss": 0.1436, "step": 27120 }, { "epoch": 0.9859001380914311, "grad_norm": 1.6162346601486206, "learning_rate": 4.9045260950089976e-05, "loss": 0.1667, "step": 27130 }, { "epoch": 0.9862635365942293, "grad_norm": 0.5100680589675903, "learning_rate": 4.904384003038358e-05, "loss": 0.1573, "step": 27140 }, { "epoch": 0.9866269350970274, "grad_norm": 6.364781379699707, "learning_rate": 4.904241807471366e-05, "loss": 0.2245, "step": 27150 }, { "epoch": 0.9869903335998256, "grad_norm": 0.6124529242515564, "learning_rate": 4.9040995083141495e-05, "loss": 0.2296, "step": 27160 }, { "epoch": 0.9873537321026238, "grad_norm": 1.3477269411087036, "learning_rate": 4.903957105572838e-05, "loss": 0.1173, "step": 27170 }, { "epoch": 0.9877171306054219, "grad_norm": 0.8505461812019348, "learning_rate": 4.903814599253569e-05, "loss": 0.1913, "step": 27180 }, { "epoch": 0.9880805291082201, "grad_norm": 0.928269624710083, "learning_rate": 4.903671989362481e-05, "loss": 0.1449, "step": 27190 }, { "epoch": 0.9884439276110183, "grad_norm": 9.115983963012695, "learning_rate": 4.903529275905719e-05, "loss": 0.2025, "step": 27200 }, { "epoch": 0.9888073261138164, "grad_norm": 0.8631362318992615, "learning_rate": 4.903386458889434e-05, "loss": 0.1313, "step": 27210 }, { "epoch": 0.9891707246166146, "grad_norm": 1.5814399719238281, "learning_rate": 4.9032435383197764e-05, "loss": 0.1547, "step": 27220 }, { "epoch": 0.9895341231194128, "grad_norm": 2.2507669925689697, "learning_rate": 4.9031005142029054e-05, "loss": 2.0754, "step": 27230 }, { "epoch": 0.9898975216222109, "grad_norm": 2.0611305236816406, "learning_rate": 4.902957386544984e-05, "loss": 0.1351, "step": 27240 }, { "epoch": 0.9902609201250091, "grad_norm": 4.219666481018066, "learning_rate": 4.9028141553521785e-05, "loss": 0.194, "step": 27250 }, { "epoch": 0.9906243186278072, "grad_norm": 2.4156904220581055, "learning_rate": 4.90267082063066e-05, "loss": 1.9594, "step": 27260 }, { "epoch": 0.9909877171306054, "grad_norm": 4.805545806884766, "learning_rate": 4.9025273823866046e-05, "loss": 0.1608, "step": 27270 }, { "epoch": 0.9913511156334036, "grad_norm": 3.431521415710449, "learning_rate": 4.902383840626193e-05, "loss": 0.1439, "step": 27280 }, { "epoch": 0.9917145141362017, "grad_norm": 0.9847241640090942, "learning_rate": 4.902240195355609e-05, "loss": 0.1568, "step": 27290 }, { "epoch": 0.9920779126389999, "grad_norm": 4.65169095993042, "learning_rate": 4.9020964465810426e-05, "loss": 0.2039, "step": 27300 }, { "epoch": 0.9924413111417981, "grad_norm": 1.466956377029419, "learning_rate": 4.9019525943086865e-05, "loss": 0.1649, "step": 27310 }, { "epoch": 0.9928047096445962, "grad_norm": 4.803518772125244, "learning_rate": 4.901808638544739e-05, "loss": 0.1645, "step": 27320 }, { "epoch": 0.9931681081473944, "grad_norm": 3.4496331214904785, "learning_rate": 4.901664579295404e-05, "loss": 0.1751, "step": 27330 }, { "epoch": 0.9935315066501926, "grad_norm": 0.9507334232330322, "learning_rate": 4.9015204165668866e-05, "loss": 0.1228, "step": 27340 }, { "epoch": 0.9938949051529907, "grad_norm": 5.97396993637085, "learning_rate": 4.901376150365399e-05, "loss": 0.2178, "step": 27350 }, { "epoch": 0.9942583036557889, "grad_norm": 1.7720214128494263, "learning_rate": 4.9012317806971573e-05, "loss": 0.1468, "step": 27360 }, { "epoch": 0.9946217021585871, "grad_norm": 0.9194307923316956, "learning_rate": 4.9010873075683825e-05, "loss": 0.1481, "step": 27370 }, { "epoch": 0.9949851006613852, "grad_norm": 2.8458971977233887, "learning_rate": 4.9009427309852986e-05, "loss": 0.1402, "step": 27380 }, { "epoch": 0.9953484991641834, "grad_norm": 1.9232338666915894, "learning_rate": 4.900798050954134e-05, "loss": 0.155, "step": 27390 }, { "epoch": 0.9957118976669816, "grad_norm": 4.017787456512451, "learning_rate": 4.900653267481125e-05, "loss": 0.2279, "step": 27400 }, { "epoch": 0.9960752961697797, "grad_norm": 0.714726448059082, "learning_rate": 4.9005083805725064e-05, "loss": 0.1271, "step": 27410 }, { "epoch": 0.996438694672578, "grad_norm": 0.8059016466140747, "learning_rate": 4.900363390234524e-05, "loss": 0.138, "step": 27420 }, { "epoch": 0.9968020931753762, "grad_norm": 2.650024175643921, "learning_rate": 4.9002182964734234e-05, "loss": 0.1483, "step": 27430 }, { "epoch": 0.9971654916781743, "grad_norm": 1.200749397277832, "learning_rate": 4.900073099295456e-05, "loss": 0.9832, "step": 27440 }, { "epoch": 0.9975288901809725, "grad_norm": 3.3051798343658447, "learning_rate": 4.8999277987068785e-05, "loss": 0.2136, "step": 27450 }, { "epoch": 0.9978922886837707, "grad_norm": 1.3630801439285278, "learning_rate": 4.899782394713951e-05, "loss": 0.2136, "step": 27460 }, { "epoch": 0.9982556871865688, "grad_norm": 2.5952398777008057, "learning_rate": 4.899636887322939e-05, "loss": 0.1515, "step": 27470 }, { "epoch": 0.998619085689367, "grad_norm": 5.025683879852295, "learning_rate": 4.8994912765401116e-05, "loss": 0.1862, "step": 27480 }, { "epoch": 0.9989824841921652, "grad_norm": 1.1604958772659302, "learning_rate": 4.8993455623717415e-05, "loss": 0.1433, "step": 27490 }, { "epoch": 0.9993458826949633, "grad_norm": 14.864492416381836, "learning_rate": 4.899199744824109e-05, "loss": 0.1752, "step": 27500 }, { "epoch": 0.9997092811977615, "grad_norm": 1.072911024093628, "learning_rate": 4.8990538239034956e-05, "loss": 0.1524, "step": 27510 }, { "epoch": 1.0000726797005597, "grad_norm": 0.41248244047164917, "learning_rate": 4.898907799616188e-05, "loss": 0.1457, "step": 27520 }, { "epoch": 1.000436078203358, "grad_norm": 1.0402699708938599, "learning_rate": 4.89876167196848e-05, "loss": 0.1394, "step": 27530 }, { "epoch": 1.0007994767061559, "grad_norm": 0.8177555203437805, "learning_rate": 4.8986154409666654e-05, "loss": 0.1134, "step": 27540 }, { "epoch": 1.001162875208954, "grad_norm": 1.1209142208099365, "learning_rate": 4.8984691066170465e-05, "loss": 0.1574, "step": 27550 }, { "epoch": 1.0015262737117523, "grad_norm": 1.4969863891601562, "learning_rate": 4.8983226689259264e-05, "loss": 0.1144, "step": 27560 }, { "epoch": 1.0018896722145505, "grad_norm": 0.8014885783195496, "learning_rate": 4.898176127899617e-05, "loss": 0.1616, "step": 27570 }, { "epoch": 1.0022530707173487, "grad_norm": 1.1477352380752563, "learning_rate": 4.89802948354443e-05, "loss": 0.1209, "step": 27580 }, { "epoch": 1.002616469220147, "grad_norm": 1.0199166536331177, "learning_rate": 4.897882735866686e-05, "loss": 0.1422, "step": 27590 }, { "epoch": 1.002979867722945, "grad_norm": 0.8987438678741455, "learning_rate": 4.897735884872705e-05, "loss": 0.1328, "step": 27600 }, { "epoch": 1.002979867722945, "eval_loss": 0.38110727071762085, "eval_runtime": 180.6687, "eval_samples_per_second": 41.036, "eval_steps_per_second": 5.131, "eval_wer": 0.18068691342785048, "step": 27600 }, { "epoch": 1.003343266225743, "grad_norm": 1.1313180923461914, "learning_rate": 4.897588930568817e-05, "loss": 0.1395, "step": 27610 }, { "epoch": 1.0037066647285413, "grad_norm": 0.6739907264709473, "learning_rate": 4.8974418729613526e-05, "loss": 0.2011, "step": 27620 }, { "epoch": 1.0040700632313395, "grad_norm": 0.986926257610321, "learning_rate": 4.8972947120566475e-05, "loss": 0.1212, "step": 27630 }, { "epoch": 1.0044334617341377, "grad_norm": 0.795300304889679, "learning_rate": 4.8971474478610437e-05, "loss": 0.1404, "step": 27640 }, { "epoch": 1.004796860236936, "grad_norm": 1.7036499977111816, "learning_rate": 4.897000080380885e-05, "loss": 0.1677, "step": 27650 }, { "epoch": 1.005160258739734, "grad_norm": 1.4313631057739258, "learning_rate": 4.896852609622521e-05, "loss": 0.1188, "step": 27660 }, { "epoch": 1.0055236572425321, "grad_norm": 0.5508180260658264, "learning_rate": 4.896705035592306e-05, "loss": 0.1627, "step": 27670 }, { "epoch": 1.0058870557453303, "grad_norm": 2.3307416439056396, "learning_rate": 4.896557358296599e-05, "loss": 0.1173, "step": 27680 }, { "epoch": 1.0062504542481285, "grad_norm": 3.0311474800109863, "learning_rate": 4.896409577741762e-05, "loss": 0.1176, "step": 27690 }, { "epoch": 1.0066138527509267, "grad_norm": 1.8580576181411743, "learning_rate": 4.896261693934163e-05, "loss": 0.1647, "step": 27700 }, { "epoch": 1.0069772512537247, "grad_norm": 1.094754934310913, "learning_rate": 4.896113706880174e-05, "loss": 0.1137, "step": 27710 }, { "epoch": 1.007340649756523, "grad_norm": 0.8240002393722534, "learning_rate": 4.89596561658617e-05, "loss": 0.1835, "step": 27720 }, { "epoch": 1.0077040482593211, "grad_norm": 1.4678568840026855, "learning_rate": 4.895817423058533e-05, "loss": 0.1612, "step": 27730 }, { "epoch": 1.0080674467621193, "grad_norm": 0.8481863737106323, "learning_rate": 4.8956691263036473e-05, "loss": 0.1211, "step": 27740 }, { "epoch": 1.0084308452649176, "grad_norm": 5.045682907104492, "learning_rate": 4.895520726327903e-05, "loss": 0.2747, "step": 27750 }, { "epoch": 1.0087942437677158, "grad_norm": 2.3443167209625244, "learning_rate": 4.895372223137694e-05, "loss": 0.1437, "step": 27760 }, { "epoch": 1.0091576422705137, "grad_norm": 1.6994588375091553, "learning_rate": 4.895223616739418e-05, "loss": 0.16, "step": 27770 }, { "epoch": 1.009521040773312, "grad_norm": 2.073699712753296, "learning_rate": 4.8950749071394794e-05, "loss": 0.1341, "step": 27780 }, { "epoch": 1.0098844392761102, "grad_norm": 1.1939536333084106, "learning_rate": 4.894926094344284e-05, "loss": 0.1284, "step": 27790 }, { "epoch": 1.0102478377789084, "grad_norm": 1.4820387363433838, "learning_rate": 4.8947771783602444e-05, "loss": 0.1644, "step": 27800 }, { "epoch": 1.0106112362817066, "grad_norm": 1.8140612840652466, "learning_rate": 4.894628159193778e-05, "loss": 0.1681, "step": 27810 }, { "epoch": 1.0109746347845048, "grad_norm": 1.7120946645736694, "learning_rate": 4.894479036851303e-05, "loss": 0.2066, "step": 27820 }, { "epoch": 1.0113380332873028, "grad_norm": 1.0871057510375977, "learning_rate": 4.894329811339247e-05, "loss": 0.1428, "step": 27830 }, { "epoch": 1.011701431790101, "grad_norm": 0.897597074508667, "learning_rate": 4.8941804826640375e-05, "loss": 0.1202, "step": 27840 }, { "epoch": 1.0120648302928992, "grad_norm": 1.2489410638809204, "learning_rate": 4.89403105083211e-05, "loss": 0.1435, "step": 27850 }, { "epoch": 1.0124282287956974, "grad_norm": 1.043281078338623, "learning_rate": 4.893881515849902e-05, "loss": 0.1657, "step": 27860 }, { "epoch": 1.0127916272984956, "grad_norm": 1.0345379114151, "learning_rate": 4.893731877723857e-05, "loss": 0.1669, "step": 27870 }, { "epoch": 1.0131550258012938, "grad_norm": 3.5156590938568115, "learning_rate": 4.893582136460423e-05, "loss": 0.1356, "step": 27880 }, { "epoch": 1.0135184243040918, "grad_norm": 1.0468858480453491, "learning_rate": 4.893432292066051e-05, "loss": 0.1201, "step": 27890 }, { "epoch": 1.01388182280689, "grad_norm": 1.3872016668319702, "learning_rate": 4.893282344547197e-05, "loss": 0.1472, "step": 27900 }, { "epoch": 1.0142452213096882, "grad_norm": 0.83976811170578, "learning_rate": 4.893132293910322e-05, "loss": 0.1467, "step": 27910 }, { "epoch": 1.0146086198124864, "grad_norm": 0.625514566898346, "learning_rate": 4.892982140161892e-05, "loss": 0.1661, "step": 27920 }, { "epoch": 1.0149720183152846, "grad_norm": 1.0802186727523804, "learning_rate": 4.892831883308375e-05, "loss": 0.1444, "step": 27930 }, { "epoch": 1.0153354168180828, "grad_norm": 0.567722499370575, "learning_rate": 4.892681523356246e-05, "loss": 0.1003, "step": 27940 }, { "epoch": 1.0156988153208808, "grad_norm": 1.1036186218261719, "learning_rate": 4.892531060311985e-05, "loss": 0.1438, "step": 27950 }, { "epoch": 1.016062213823679, "grad_norm": 1.2610325813293457, "learning_rate": 4.892380494182071e-05, "loss": 0.1478, "step": 27960 }, { "epoch": 1.0164256123264772, "grad_norm": 4.7541913986206055, "learning_rate": 4.892229824972995e-05, "loss": 0.2068, "step": 27970 }, { "epoch": 1.0167890108292754, "grad_norm": 14.794916152954102, "learning_rate": 4.8920790526912464e-05, "loss": 0.4626, "step": 27980 }, { "epoch": 1.0171524093320736, "grad_norm": 0.8083056807518005, "learning_rate": 4.891928177343323e-05, "loss": 0.4919, "step": 27990 }, { "epoch": 1.0175158078348718, "grad_norm": 1.1072735786437988, "learning_rate": 4.8917771989357246e-05, "loss": 0.2863, "step": 28000 }, { "epoch": 1.0178792063376698, "grad_norm": 0.8811991810798645, "learning_rate": 4.891626117474957e-05, "loss": 0.1361, "step": 28010 }, { "epoch": 1.018242604840468, "grad_norm": 0.43256062269210815, "learning_rate": 4.8914749329675294e-05, "loss": 0.1668, "step": 28020 }, { "epoch": 1.0186060033432662, "grad_norm": 1.7490280866622925, "learning_rate": 4.891323645419956e-05, "loss": 0.1328, "step": 28030 }, { "epoch": 1.0189694018460644, "grad_norm": 1.5770010948181152, "learning_rate": 4.891172254838755e-05, "loss": 0.1429, "step": 28040 }, { "epoch": 1.0193328003488626, "grad_norm": 0.5603241920471191, "learning_rate": 4.8910207612304495e-05, "loss": 0.1319, "step": 28050 }, { "epoch": 1.0196961988516606, "grad_norm": 1.5490175485610962, "learning_rate": 4.890869164601566e-05, "loss": 0.1292, "step": 28060 }, { "epoch": 1.0200595973544588, "grad_norm": 0.7562422752380371, "learning_rate": 4.8907174649586376e-05, "loss": 0.1978, "step": 28070 }, { "epoch": 1.020422995857257, "grad_norm": 2.67669415473938, "learning_rate": 4.8905656623082e-05, "loss": 0.1367, "step": 28080 }, { "epoch": 1.0207863943600552, "grad_norm": 1.4589964151382446, "learning_rate": 4.890413756656793e-05, "loss": 0.136, "step": 28090 }, { "epoch": 1.0211497928628535, "grad_norm": 0.5042529702186584, "learning_rate": 4.8902617480109626e-05, "loss": 0.1768, "step": 28100 }, { "epoch": 1.0215131913656517, "grad_norm": 3.3886609077453613, "learning_rate": 4.890109636377258e-05, "loss": 0.1827, "step": 28110 }, { "epoch": 1.0218765898684496, "grad_norm": 0.8882365226745605, "learning_rate": 4.889957421762234e-05, "loss": 0.2176, "step": 28120 }, { "epoch": 1.0222399883712479, "grad_norm": 1.5471583604812622, "learning_rate": 4.889805104172447e-05, "loss": 0.1934, "step": 28130 }, { "epoch": 1.022603386874046, "grad_norm": 1.221699595451355, "learning_rate": 4.889652683614461e-05, "loss": 0.1217, "step": 28140 }, { "epoch": 1.0229667853768443, "grad_norm": 1.1075172424316406, "learning_rate": 4.8895001600948444e-05, "loss": 2.944, "step": 28150 }, { "epoch": 1.0233301838796425, "grad_norm": 0.9731149077415466, "learning_rate": 4.889347533620167e-05, "loss": 0.1123, "step": 28160 }, { "epoch": 1.0236935823824407, "grad_norm": 0.8448407649993896, "learning_rate": 4.889194804197006e-05, "loss": 0.1755, "step": 28170 }, { "epoch": 1.0240569808852387, "grad_norm": 0.8480188250541687, "learning_rate": 4.8890419718319414e-05, "loss": 0.1434, "step": 28180 }, { "epoch": 1.0244203793880369, "grad_norm": 1.5608705282211304, "learning_rate": 4.8888890365315584e-05, "loss": 0.1309, "step": 28190 }, { "epoch": 1.024783777890835, "grad_norm": 7.765607833862305, "learning_rate": 4.888735998302447e-05, "loss": 0.1584, "step": 28200 }, { "epoch": 1.024783777890835, "eval_loss": 0.3628901541233063, "eval_runtime": 180.7805, "eval_samples_per_second": 41.011, "eval_steps_per_second": 5.128, "eval_wer": 0.1830921996115236, "step": 28200 }, { "epoch": 1.0251471763936333, "grad_norm": 2.208989381790161, "learning_rate": 4.8885828571512e-05, "loss": 0.1206, "step": 28210 }, { "epoch": 1.0255105748964315, "grad_norm": 0.4320629835128784, "learning_rate": 4.8884296130844166e-05, "loss": 0.2776, "step": 28220 }, { "epoch": 1.0258739733992297, "grad_norm": 1.4430392980575562, "learning_rate": 4.888276266108699e-05, "loss": 0.1181, "step": 28230 }, { "epoch": 1.0262373719020277, "grad_norm": 0.893260657787323, "learning_rate": 4.888122816230655e-05, "loss": 0.1141, "step": 28240 }, { "epoch": 1.026600770404826, "grad_norm": 1.9237782955169678, "learning_rate": 4.887969263456895e-05, "loss": 0.1676, "step": 28250 }, { "epoch": 1.026964168907624, "grad_norm": 1.0318949222564697, "learning_rate": 4.8878156077940376e-05, "loss": 0.1256, "step": 28260 }, { "epoch": 1.0273275674104223, "grad_norm": 0.8919249773025513, "learning_rate": 4.8876618492487e-05, "loss": 0.2314, "step": 28270 }, { "epoch": 1.0276909659132205, "grad_norm": 1.31845223903656, "learning_rate": 4.8875079878275085e-05, "loss": 0.1414, "step": 28280 }, { "epoch": 1.0280543644160187, "grad_norm": 8.070326805114746, "learning_rate": 4.887354023537094e-05, "loss": 0.2391, "step": 28290 }, { "epoch": 1.0284177629188167, "grad_norm": 0.7600485682487488, "learning_rate": 4.887199956384088e-05, "loss": 0.164, "step": 28300 }, { "epoch": 1.028781161421615, "grad_norm": 1.0197162628173828, "learning_rate": 4.88704578637513e-05, "loss": 0.1324, "step": 28310 }, { "epoch": 1.0291445599244131, "grad_norm": 0.5989790558815002, "learning_rate": 4.886891513516861e-05, "loss": 0.2162, "step": 28320 }, { "epoch": 1.0295079584272113, "grad_norm": 1.2145419120788574, "learning_rate": 4.88673713781593e-05, "loss": 1.7629, "step": 28330 }, { "epoch": 1.0298713569300095, "grad_norm": 0.7220103740692139, "learning_rate": 4.8865826592789876e-05, "loss": 0.105, "step": 28340 }, { "epoch": 1.0302347554328075, "grad_norm": 1.2737821340560913, "learning_rate": 4.88642807791269e-05, "loss": 0.172, "step": 28350 }, { "epoch": 1.0305981539356057, "grad_norm": 2.3391408920288086, "learning_rate": 4.886273393723698e-05, "loss": 0.1431, "step": 28360 }, { "epoch": 1.030961552438404, "grad_norm": 1.1937615871429443, "learning_rate": 4.8861186067186756e-05, "loss": 0.1776, "step": 28370 }, { "epoch": 1.0313249509412021, "grad_norm": 0.5789287090301514, "learning_rate": 4.885963716904292e-05, "loss": 0.1412, "step": 28380 }, { "epoch": 1.0316883494440003, "grad_norm": 1.2566107511520386, "learning_rate": 4.885808724287221e-05, "loss": 0.1284, "step": 28390 }, { "epoch": 1.0320517479467985, "grad_norm": 5.225760459899902, "learning_rate": 4.885653628874141e-05, "loss": 0.1411, "step": 28400 }, { "epoch": 1.0324151464495965, "grad_norm": 1.2525557279586792, "learning_rate": 4.885498430671735e-05, "loss": 0.1372, "step": 28410 }, { "epoch": 1.0327785449523947, "grad_norm": 0.5048568844795227, "learning_rate": 4.885343129686688e-05, "loss": 0.1595, "step": 28420 }, { "epoch": 1.033141943455193, "grad_norm": 0.8768513202667236, "learning_rate": 4.8851877259256933e-05, "loss": 0.286, "step": 28430 }, { "epoch": 1.0335053419579912, "grad_norm": 1.2799090147018433, "learning_rate": 4.885032219395446e-05, "loss": 0.1431, "step": 28440 }, { "epoch": 1.0338687404607894, "grad_norm": 0.9944593906402588, "learning_rate": 4.8848766101026466e-05, "loss": 0.13, "step": 28450 }, { "epoch": 1.0342321389635876, "grad_norm": 1.3601889610290527, "learning_rate": 4.8847208980539994e-05, "loss": 0.1379, "step": 28460 }, { "epoch": 1.0345955374663856, "grad_norm": 0.6347102522850037, "learning_rate": 4.884565083256213e-05, "loss": 0.1833, "step": 28470 }, { "epoch": 1.0349589359691838, "grad_norm": NaN, "learning_rate": 4.884424762093241e-05, "loss": 3.779, "step": 28480 }, { "epoch": 1.035322334471982, "grad_norm": 1.6947808265686035, "learning_rate": 4.8842687520905906e-05, "loss": 0.1571, "step": 28490 }, { "epoch": 1.0356857329747802, "grad_norm": 4.521624565124512, "learning_rate": 4.884112639358283e-05, "loss": 0.1429, "step": 28500 }, { "epoch": 1.0360491314775784, "grad_norm": 1.9370489120483398, "learning_rate": 4.883956423903044e-05, "loss": 0.1375, "step": 28510 }, { "epoch": 1.0364125299803766, "grad_norm": 2.3492047786712646, "learning_rate": 4.883800105731606e-05, "loss": 0.1496, "step": 28520 }, { "epoch": 1.0367759284831746, "grad_norm": 1.1862452030181885, "learning_rate": 4.8836436848507026e-05, "loss": 0.1239, "step": 28530 }, { "epoch": 1.0371393269859728, "grad_norm": 2.223708391189575, "learning_rate": 4.883487161267074e-05, "loss": 0.1159, "step": 28540 }, { "epoch": 1.037502725488771, "grad_norm": 5.854187965393066, "learning_rate": 4.8833305349874636e-05, "loss": 0.1732, "step": 28550 }, { "epoch": 1.0378661239915692, "grad_norm": 1.4000542163848877, "learning_rate": 4.883173806018621e-05, "loss": 0.1428, "step": 28560 }, { "epoch": 1.0382295224943674, "grad_norm": 1.8862130641937256, "learning_rate": 4.883016974367298e-05, "loss": 0.2339, "step": 28570 }, { "epoch": 1.0385929209971656, "grad_norm": 1.701545238494873, "learning_rate": 4.8828600400402525e-05, "loss": 0.2063, "step": 28580 }, { "epoch": 1.0389563194999636, "grad_norm": 3.8795692920684814, "learning_rate": 4.8827030030442466e-05, "loss": 0.1317, "step": 28590 }, { "epoch": 1.0393197180027618, "grad_norm": 1.0597456693649292, "learning_rate": 4.882545863386046e-05, "loss": 0.1783, "step": 28600 }, { "epoch": 1.03968311650556, "grad_norm": 0.8949028849601746, "learning_rate": 4.88238862107242e-05, "loss": 0.1453, "step": 28610 }, { "epoch": 1.0400465150083582, "grad_norm": 0.6270145773887634, "learning_rate": 4.8822312761101456e-05, "loss": 0.2118, "step": 28620 }, { "epoch": 1.0404099135111564, "grad_norm": 0.8819754719734192, "learning_rate": 4.8820738285060016e-05, "loss": 0.1398, "step": 28630 }, { "epoch": 1.0407733120139544, "grad_norm": 1.5963236093521118, "learning_rate": 4.881916278266772e-05, "loss": 0.1592, "step": 28640 }, { "epoch": 1.0411367105167526, "grad_norm": 1.2960532903671265, "learning_rate": 4.8817586253992445e-05, "loss": 0.2044, "step": 28650 }, { "epoch": 1.0415001090195508, "grad_norm": 1.6735124588012695, "learning_rate": 4.881600869910212e-05, "loss": 0.143, "step": 28660 }, { "epoch": 1.041863507522349, "grad_norm": 1.2382493019104004, "learning_rate": 4.8814430118064724e-05, "loss": 0.182, "step": 28670 }, { "epoch": 1.0422269060251472, "grad_norm": 1.614788293838501, "learning_rate": 4.881285051094826e-05, "loss": 0.149, "step": 28680 }, { "epoch": 1.0425903045279454, "grad_norm": 1.549124002456665, "learning_rate": 4.88112698778208e-05, "loss": 0.1238, "step": 28690 }, { "epoch": 1.0429537030307434, "grad_norm": 0.8877584338188171, "learning_rate": 4.8809688218750435e-05, "loss": 0.1541, "step": 28700 }, { "epoch": 1.0433171015335416, "grad_norm": 1.1061103343963623, "learning_rate": 4.8808105533805325e-05, "loss": 0.1209, "step": 28710 }, { "epoch": 1.0436805000363398, "grad_norm": 1.8957878351211548, "learning_rate": 4.880652182305365e-05, "loss": 0.1739, "step": 28720 }, { "epoch": 1.044043898539138, "grad_norm": 0.9069591164588928, "learning_rate": 4.880493708656366e-05, "loss": 0.2014, "step": 28730 }, { "epoch": 1.0444072970419362, "grad_norm": 0.7086552381515503, "learning_rate": 4.880335132440364e-05, "loss": 0.1149, "step": 28740 }, { "epoch": 1.0447706955447345, "grad_norm": 0.5514993667602539, "learning_rate": 4.8801764536641883e-05, "loss": 0.163, "step": 28750 }, { "epoch": 1.0451340940475324, "grad_norm": 0.5786269903182983, "learning_rate": 4.880017672334679e-05, "loss": 0.126, "step": 28760 }, { "epoch": 1.0454974925503306, "grad_norm": 0.8554352521896362, "learning_rate": 4.879858788458676e-05, "loss": 0.2564, "step": 28770 }, { "epoch": 1.0458608910531288, "grad_norm": 3.329148769378662, "learning_rate": 4.8796998020430253e-05, "loss": 0.1297, "step": 28780 }, { "epoch": 1.046224289555927, "grad_norm": 1.1520358324050903, "learning_rate": 4.879540713094578e-05, "loss": 0.1156, "step": 28790 }, { "epoch": 1.0465876880587253, "grad_norm": 1.6375194787979126, "learning_rate": 4.879381521620187e-05, "loss": 0.1418, "step": 28800 }, { "epoch": 1.0465876880587253, "eval_loss": 0.35767313838005066, "eval_runtime": 180.3335, "eval_samples_per_second": 41.113, "eval_steps_per_second": 5.14, "eval_wer": 0.18009693757147785, "step": 28800 }, { "epoch": 1.0469510865615235, "grad_norm": 2.6099300384521484, "learning_rate": 4.879222227626712e-05, "loss": 2.0354, "step": 28810 }, { "epoch": 1.0473144850643215, "grad_norm": 0.9497049450874329, "learning_rate": 4.879062831121017e-05, "loss": 0.2014, "step": 28820 }, { "epoch": 1.0476778835671197, "grad_norm": 1.100393533706665, "learning_rate": 4.878903332109969e-05, "loss": 0.1294, "step": 28830 }, { "epoch": 1.0480412820699179, "grad_norm": 0.46238216757774353, "learning_rate": 4.87874373060044e-05, "loss": 0.1103, "step": 28840 }, { "epoch": 1.048404680572716, "grad_norm": 1.111619234085083, "learning_rate": 4.8785840265993085e-05, "loss": 0.1635, "step": 28850 }, { "epoch": 1.0487680790755143, "grad_norm": 1.8693902492523193, "learning_rate": 4.8784242201134534e-05, "loss": 0.1145, "step": 28860 }, { "epoch": 1.0491314775783125, "grad_norm": 0.5382725596427917, "learning_rate": 4.878264311149762e-05, "loss": 0.1699, "step": 28870 }, { "epoch": 1.0494948760811105, "grad_norm": 1.3384134769439697, "learning_rate": 4.878104299715123e-05, "loss": 0.1479, "step": 28880 }, { "epoch": 1.0498582745839087, "grad_norm": 3.7112338542938232, "learning_rate": 4.87794418581643e-05, "loss": 2.828, "step": 28890 }, { "epoch": 1.0502216730867069, "grad_norm": 0.8874093890190125, "learning_rate": 4.8777839694605844e-05, "loss": 0.1274, "step": 28900 }, { "epoch": 1.050585071589505, "grad_norm": 2.278064489364624, "learning_rate": 4.877623650654487e-05, "loss": 0.1298, "step": 28910 }, { "epoch": 1.0509484700923033, "grad_norm": 0.8750000596046448, "learning_rate": 4.877463229405046e-05, "loss": 0.2705, "step": 28920 }, { "epoch": 1.0513118685951013, "grad_norm": 0.5634777545928955, "learning_rate": 4.8773027057191735e-05, "loss": 0.13, "step": 28930 }, { "epoch": 1.0516752670978995, "grad_norm": 1.1990102529525757, "learning_rate": 4.877142079603786e-05, "loss": 0.1115, "step": 28940 }, { "epoch": 1.0520386656006977, "grad_norm": 5.793541431427002, "learning_rate": 4.8769813510658035e-05, "loss": 0.1909, "step": 28950 }, { "epoch": 1.052402064103496, "grad_norm": 1.0433887243270874, "learning_rate": 4.876820520112153e-05, "loss": 0.1225, "step": 28960 }, { "epoch": 1.0527654626062941, "grad_norm": 0.8786159753799438, "learning_rate": 4.8766595867497624e-05, "loss": 0.1772, "step": 28970 }, { "epoch": 1.0531288611090923, "grad_norm": 1.1270724534988403, "learning_rate": 4.8764985509855664e-05, "loss": 0.1683, "step": 28980 }, { "epoch": 1.0534922596118903, "grad_norm": 0.9916827082633972, "learning_rate": 4.876337412826504e-05, "loss": 0.1834, "step": 28990 }, { "epoch": 1.0538556581146885, "grad_norm": 1.1295456886291504, "learning_rate": 4.876176172279517e-05, "loss": 0.1677, "step": 29000 }, { "epoch": 1.0542190566174867, "grad_norm": 1.625546932220459, "learning_rate": 4.876014829351553e-05, "loss": 0.1374, "step": 29010 }, { "epoch": 1.054582455120285, "grad_norm": 0.4282989799976349, "learning_rate": 4.875853384049564e-05, "loss": 0.1826, "step": 29020 }, { "epoch": 1.0549458536230831, "grad_norm": 0.8806937336921692, "learning_rate": 4.875691836380507e-05, "loss": 0.116, "step": 29030 }, { "epoch": 1.0553092521258813, "grad_norm": 0.5082537531852722, "learning_rate": 4.87553018635134e-05, "loss": 0.1867, "step": 29040 }, { "epoch": 1.0556726506286793, "grad_norm": 3.172614336013794, "learning_rate": 4.875368433969031e-05, "loss": 0.1872, "step": 29050 }, { "epoch": 1.0560360491314775, "grad_norm": 4.570537090301514, "learning_rate": 4.875206579240546e-05, "loss": 0.1501, "step": 29060 }, { "epoch": 1.0563994476342757, "grad_norm": 0.9751003980636597, "learning_rate": 4.875044622172862e-05, "loss": 0.1533, "step": 29070 }, { "epoch": 1.056762846137074, "grad_norm": 0.9446988701820374, "learning_rate": 4.874882562772955e-05, "loss": 0.1462, "step": 29080 }, { "epoch": 1.0571262446398721, "grad_norm": 5.769078254699707, "learning_rate": 4.8747204010478086e-05, "loss": 0.1796, "step": 29090 }, { "epoch": 1.0574896431426704, "grad_norm": 6.486478328704834, "learning_rate": 4.8745581370044094e-05, "loss": 0.1674, "step": 29100 }, { "epoch": 1.0578530416454683, "grad_norm": 0.622352123260498, "learning_rate": 4.874395770649748e-05, "loss": 0.137, "step": 29110 }, { "epoch": 1.0582164401482665, "grad_norm": 0.5244133472442627, "learning_rate": 4.8742333019908215e-05, "loss": 0.2035, "step": 29120 }, { "epoch": 1.0585798386510648, "grad_norm": 1.7058534622192383, "learning_rate": 4.87407073103463e-05, "loss": 0.1408, "step": 29130 }, { "epoch": 1.058943237153863, "grad_norm": 0.9428019523620605, "learning_rate": 4.873908057788177e-05, "loss": 0.1128, "step": 29140 }, { "epoch": 1.0593066356566612, "grad_norm": 0.4694746136665344, "learning_rate": 4.8737452822584724e-05, "loss": 0.142, "step": 29150 }, { "epoch": 1.0596700341594594, "grad_norm": 1.3985977172851562, "learning_rate": 4.873582404452529e-05, "loss": 0.1169, "step": 29160 }, { "epoch": 1.0600334326622574, "grad_norm": 0.8285462856292725, "learning_rate": 4.873419424377366e-05, "loss": 0.1945, "step": 29170 }, { "epoch": 1.0603968311650556, "grad_norm": 1.657012939453125, "learning_rate": 4.8732563420400037e-05, "loss": 0.1904, "step": 29180 }, { "epoch": 1.0607602296678538, "grad_norm": 1.4633735418319702, "learning_rate": 4.87309315744747e-05, "loss": 0.1183, "step": 29190 }, { "epoch": 1.061123628170652, "grad_norm": 0.9722393155097961, "learning_rate": 4.872929870606796e-05, "loss": 0.1608, "step": 29200 }, { "epoch": 1.0614870266734502, "grad_norm": 0.6080673933029175, "learning_rate": 4.872766481525016e-05, "loss": 0.1198, "step": 29210 }, { "epoch": 1.0618504251762482, "grad_norm": 0.7502457499504089, "learning_rate": 4.8726029902091715e-05, "loss": 0.1893, "step": 29220 }, { "epoch": 1.0622138236790464, "grad_norm": 1.7775638103485107, "learning_rate": 4.8724393966663054e-05, "loss": 1.4777, "step": 29230 }, { "epoch": 1.0625772221818446, "grad_norm": 1.1095236539840698, "learning_rate": 4.8722757009034666e-05, "loss": 0.1443, "step": 29240 }, { "epoch": 1.0629406206846428, "grad_norm": 0.6879424452781677, "learning_rate": 4.872111902927709e-05, "loss": 0.2048, "step": 29250 }, { "epoch": 1.063304019187441, "grad_norm": 1.2532442808151245, "learning_rate": 4.8719480027460895e-05, "loss": 0.1343, "step": 29260 }, { "epoch": 1.0636674176902392, "grad_norm": 1.0296350717544556, "learning_rate": 4.87178400036567e-05, "loss": 0.1656, "step": 29270 }, { "epoch": 1.0640308161930372, "grad_norm": 1.0346356630325317, "learning_rate": 4.871619895793517e-05, "loss": 0.1466, "step": 29280 }, { "epoch": 1.0643942146958354, "grad_norm": 1.9428579807281494, "learning_rate": 4.8714556890367e-05, "loss": 0.1496, "step": 29290 }, { "epoch": 1.0647576131986336, "grad_norm": 2.6400890350341797, "learning_rate": 4.871291380102295e-05, "loss": 0.1857, "step": 29300 }, { "epoch": 1.0651210117014318, "grad_norm": 7.17543888092041, "learning_rate": 4.8711269689973826e-05, "loss": 0.1242, "step": 29310 }, { "epoch": 1.06548441020423, "grad_norm": 1.8619358539581299, "learning_rate": 4.870962455729045e-05, "loss": 0.2137, "step": 29320 }, { "epoch": 1.0658478087070282, "grad_norm": 4.936455726623535, "learning_rate": 4.8707978403043716e-05, "loss": 0.1551, "step": 29330 }, { "epoch": 1.0662112072098262, "grad_norm": 1.2196155786514282, "learning_rate": 4.8706331227304533e-05, "loss": 0.184, "step": 29340 }, { "epoch": 1.0665746057126244, "grad_norm": 2.0982654094696045, "learning_rate": 4.87046830301439e-05, "loss": 0.1566, "step": 29350 }, { "epoch": 1.0669380042154226, "grad_norm": 1.0265774726867676, "learning_rate": 4.8703033811632806e-05, "loss": 0.1263, "step": 29360 }, { "epoch": 1.0673014027182208, "grad_norm": 2.413862705230713, "learning_rate": 4.870138357184233e-05, "loss": 0.135, "step": 29370 }, { "epoch": 1.067664801221019, "grad_norm": 0.800736665725708, "learning_rate": 4.869973231084356e-05, "loss": 0.1169, "step": 29380 }, { "epoch": 1.0680281997238172, "grad_norm": 4.07125186920166, "learning_rate": 4.8698080028707647e-05, "loss": 0.1745, "step": 29390 }, { "epoch": 1.0683915982266152, "grad_norm": 1.1997871398925781, "learning_rate": 4.8696426725505784e-05, "loss": 0.1427, "step": 29400 }, { "epoch": 1.0683915982266152, "eval_loss": 0.3479246199131012, "eval_runtime": 180.3605, "eval_samples_per_second": 41.107, "eval_steps_per_second": 5.14, "eval_wer": 0.1802693920525714, "step": 29400 }, { "epoch": 1.0687549967294134, "grad_norm": 1.5684832334518433, "learning_rate": 4.8694772401309205e-05, "loss": 0.1491, "step": 29410 }, { "epoch": 1.0691183952322116, "grad_norm": 1.3784462213516235, "learning_rate": 4.8693117056189194e-05, "loss": 0.1741, "step": 29420 }, { "epoch": 1.0694817937350098, "grad_norm": 2.0766236782073975, "learning_rate": 4.869146069021707e-05, "loss": 0.1375, "step": 29430 }, { "epoch": 1.069845192237808, "grad_norm": 0.6553940773010254, "learning_rate": 4.86898033034642e-05, "loss": 0.1371, "step": 29440 }, { "epoch": 1.0702085907406063, "grad_norm": 0.9652252197265625, "learning_rate": 4.868814489600199e-05, "loss": 0.1446, "step": 29450 }, { "epoch": 1.0705719892434042, "grad_norm": 1.123075008392334, "learning_rate": 4.8686485467901896e-05, "loss": 0.1628, "step": 29460 }, { "epoch": 1.0709353877462024, "grad_norm": 1.3370702266693115, "learning_rate": 4.868482501923543e-05, "loss": 0.1822, "step": 29470 }, { "epoch": 1.0712987862490007, "grad_norm": 1.1716543436050415, "learning_rate": 4.868316355007412e-05, "loss": 0.7928, "step": 29480 }, { "epoch": 1.0716621847517989, "grad_norm": 2.255791187286377, "learning_rate": 4.868150106048955e-05, "loss": 0.3897, "step": 29490 }, { "epoch": 1.072025583254597, "grad_norm": 2.2386605739593506, "learning_rate": 4.8679837550553366e-05, "loss": 0.174, "step": 29500 }, { "epoch": 1.072388981757395, "grad_norm": 0.7938382625579834, "learning_rate": 4.867817302033724e-05, "loss": 0.1274, "step": 29510 }, { "epoch": 1.0727523802601933, "grad_norm": 0.8619611263275146, "learning_rate": 4.8676507469912866e-05, "loss": 0.1756, "step": 29520 }, { "epoch": 1.0731157787629915, "grad_norm": 5.2337727546691895, "learning_rate": 4.867484089935205e-05, "loss": 0.1131, "step": 29530 }, { "epoch": 1.0734791772657897, "grad_norm": 1.339237093925476, "learning_rate": 4.867317330872656e-05, "loss": 0.2385, "step": 29540 }, { "epoch": 1.0738425757685879, "grad_norm": 0.9693569540977478, "learning_rate": 4.8671504698108266e-05, "loss": 0.1455, "step": 29550 }, { "epoch": 1.074205974271386, "grad_norm": 0.8684889674186707, "learning_rate": 4.866983506756906e-05, "loss": 0.1516, "step": 29560 }, { "epoch": 1.074569372774184, "grad_norm": 0.5518342852592468, "learning_rate": 4.866816441718088e-05, "loss": 0.2068, "step": 29570 }, { "epoch": 1.0749327712769823, "grad_norm": 1.049777865409851, "learning_rate": 4.86664927470157e-05, "loss": 0.1473, "step": 29580 }, { "epoch": 1.0752961697797805, "grad_norm": 3.8799684047698975, "learning_rate": 4.8664820057145556e-05, "loss": 0.1398, "step": 29590 }, { "epoch": 1.0756595682825787, "grad_norm": 0.9927829504013062, "learning_rate": 4.866314634764252e-05, "loss": 2.0386, "step": 29600 }, { "epoch": 1.076022966785377, "grad_norm": 1.2022935152053833, "learning_rate": 4.86614716185787e-05, "loss": 0.1539, "step": 29610 }, { "epoch": 1.076386365288175, "grad_norm": 0.7556710243225098, "learning_rate": 4.865979587002625e-05, "loss": 0.1718, "step": 29620 }, { "epoch": 1.076749763790973, "grad_norm": 1.0953086614608765, "learning_rate": 4.865811910205738e-05, "loss": 0.1537, "step": 29630 }, { "epoch": 1.0771131622937713, "grad_norm": 0.49788376688957214, "learning_rate": 4.865644131474434e-05, "loss": 0.1436, "step": 29640 }, { "epoch": 1.0774765607965695, "grad_norm": 2.5504343509674072, "learning_rate": 4.865476250815941e-05, "loss": 0.1535, "step": 29650 }, { "epoch": 1.0778399592993677, "grad_norm": 1.280085802078247, "learning_rate": 4.865308268237492e-05, "loss": 0.1389, "step": 29660 }, { "epoch": 1.078203357802166, "grad_norm": 0.4341859519481659, "learning_rate": 4.865140183746326e-05, "loss": 0.2016, "step": 29670 }, { "epoch": 1.0785667563049641, "grad_norm": 0.744679868221283, "learning_rate": 4.864971997349685e-05, "loss": 0.1216, "step": 29680 }, { "epoch": 1.078930154807762, "grad_norm": 1.449559211730957, "learning_rate": 4.8648037090548154e-05, "loss": 0.1202, "step": 29690 }, { "epoch": 1.0792935533105603, "grad_norm": 3.58284330368042, "learning_rate": 4.8646353188689674e-05, "loss": 0.145, "step": 29700 }, { "epoch": 1.0796569518133585, "grad_norm": 1.7318589687347412, "learning_rate": 4.864466826799398e-05, "loss": 0.1361, "step": 29710 }, { "epoch": 1.0800203503161567, "grad_norm": 0.44806694984436035, "learning_rate": 4.864298232853364e-05, "loss": 0.212, "step": 29720 }, { "epoch": 1.080383748818955, "grad_norm": 0.8236504197120667, "learning_rate": 4.864129537038132e-05, "loss": 0.1446, "step": 29730 }, { "epoch": 1.0807471473217531, "grad_norm": 0.920353889465332, "learning_rate": 4.863960739360971e-05, "loss": 0.1472, "step": 29740 }, { "epoch": 1.0811105458245511, "grad_norm": 1.415685772895813, "learning_rate": 4.8637918398291514e-05, "loss": 1.9346, "step": 29750 }, { "epoch": 1.0814739443273493, "grad_norm": 0.7517853379249573, "learning_rate": 4.8636228384499524e-05, "loss": 0.1276, "step": 29760 }, { "epoch": 1.0818373428301475, "grad_norm": 0.5632757544517517, "learning_rate": 4.8634537352306554e-05, "loss": 0.1385, "step": 29770 }, { "epoch": 1.0822007413329457, "grad_norm": 1.5767742395401, "learning_rate": 4.8632845301785455e-05, "loss": 0.1451, "step": 29780 }, { "epoch": 1.082564139835744, "grad_norm": 0.7501896619796753, "learning_rate": 4.8631152233009146e-05, "loss": 0.1337, "step": 29790 }, { "epoch": 1.082927538338542, "grad_norm": 0.7235280871391296, "learning_rate": 4.862945814605056e-05, "loss": 0.1727, "step": 29800 }, { "epoch": 1.0832909368413401, "grad_norm": 0.9608789682388306, "learning_rate": 4.86277630409827e-05, "loss": 0.1346, "step": 29810 }, { "epoch": 1.0836543353441384, "grad_norm": 0.5176007151603699, "learning_rate": 4.862606691787859e-05, "loss": 0.1477, "step": 29820 }, { "epoch": 1.0840177338469366, "grad_norm": 1.1901780366897583, "learning_rate": 4.862436977681133e-05, "loss": 0.1478, "step": 29830 }, { "epoch": 1.0843811323497348, "grad_norm": 4.09995698928833, "learning_rate": 4.8622671617854026e-05, "loss": 0.1369, "step": 29840 }, { "epoch": 1.084744530852533, "grad_norm": 10.049054145812988, "learning_rate": 4.8620972441079855e-05, "loss": 0.8392, "step": 29850 }, { "epoch": 1.085107929355331, "grad_norm": 1.131095051765442, "learning_rate": 4.861927224656202e-05, "loss": 0.1404, "step": 29860 }, { "epoch": 1.0854713278581292, "grad_norm": 1.2740205526351929, "learning_rate": 4.861757103437379e-05, "loss": 0.1726, "step": 29870 }, { "epoch": 1.0858347263609274, "grad_norm": 0.9203113317489624, "learning_rate": 4.861586880458845e-05, "loss": 0.1088, "step": 29880 }, { "epoch": 1.0861981248637256, "grad_norm": 0.8646379113197327, "learning_rate": 4.8614165557279345e-05, "loss": 0.124, "step": 29890 }, { "epoch": 1.0865615233665238, "grad_norm": 1.283758521080017, "learning_rate": 4.861246129251987e-05, "loss": 0.1616, "step": 29900 }, { "epoch": 1.086924921869322, "grad_norm": 2.0251550674438477, "learning_rate": 4.861075601038345e-05, "loss": 0.1475, "step": 29910 }, { "epoch": 1.08728832037212, "grad_norm": 0.7173452973365784, "learning_rate": 4.860904971094356e-05, "loss": 0.182, "step": 29920 }, { "epoch": 1.0876517188749182, "grad_norm": 0.7154909372329712, "learning_rate": 4.8607342394273725e-05, "loss": 0.1263, "step": 29930 }, { "epoch": 1.0880151173777164, "grad_norm": 2.5288286209106445, "learning_rate": 4.860563406044749e-05, "loss": 0.1289, "step": 29940 }, { "epoch": 1.0883785158805146, "grad_norm": 1.4772063493728638, "learning_rate": 4.860392470953848e-05, "loss": 0.1556, "step": 29950 }, { "epoch": 1.0887419143833128, "grad_norm": 1.623298168182373, "learning_rate": 4.8602214341620346e-05, "loss": 0.1493, "step": 29960 }, { "epoch": 1.089105312886111, "grad_norm": 0.4302707016468048, "learning_rate": 4.860050295676676e-05, "loss": 0.2303, "step": 29970 }, { "epoch": 1.089468711388909, "grad_norm": 1.4086140394210815, "learning_rate": 4.8598790555051474e-05, "loss": 0.1549, "step": 29980 }, { "epoch": 1.0898321098917072, "grad_norm": 1.1924636363983154, "learning_rate": 4.859707713654828e-05, "loss": 0.1426, "step": 29990 }, { "epoch": 1.0901955083945054, "grad_norm": 0.8468578457832336, "learning_rate": 4.859536270133097e-05, "loss": 0.1607, "step": 30000 }, { "epoch": 1.0901955083945054, "eval_loss": 0.38150739669799805, "eval_runtime": 180.8598, "eval_samples_per_second": 40.993, "eval_steps_per_second": 5.126, "eval_wer": 0.18088659756385353, "step": 30000 }, { "epoch": 1.0905589068973036, "grad_norm": 1.3293052911758423, "learning_rate": 4.859364724947345e-05, "loss": 0.1199, "step": 30010 }, { "epoch": 1.0909223054001018, "grad_norm": 1.421976923942566, "learning_rate": 4.859193078104961e-05, "loss": 0.1932, "step": 30020 }, { "epoch": 1.0912857039029, "grad_norm": 5.226151466369629, "learning_rate": 4.8590213296133415e-05, "loss": 0.1365, "step": 30030 }, { "epoch": 1.091649102405698, "grad_norm": 1.6307711601257324, "learning_rate": 4.8588494794798866e-05, "loss": 0.1533, "step": 30040 }, { "epoch": 1.0920125009084962, "grad_norm": 1.190746784210205, "learning_rate": 4.858677527712e-05, "loss": 0.1701, "step": 30050 }, { "epoch": 1.0923758994112944, "grad_norm": 1.1558239459991455, "learning_rate": 4.858505474317091e-05, "loss": 0.1792, "step": 30060 }, { "epoch": 1.0927392979140926, "grad_norm": 0.5284643769264221, "learning_rate": 4.858333319302573e-05, "loss": 0.1789, "step": 30070 }, { "epoch": 1.0931026964168908, "grad_norm": 0.7858747243881226, "learning_rate": 4.858161062675863e-05, "loss": 0.1456, "step": 30080 }, { "epoch": 1.0934660949196888, "grad_norm": 1.2685805559158325, "learning_rate": 4.857988704444383e-05, "loss": 0.1326, "step": 30090 }, { "epoch": 1.093829493422487, "grad_norm": 0.9551296830177307, "learning_rate": 4.8578162446155595e-05, "loss": 0.327, "step": 30100 }, { "epoch": 1.0941928919252852, "grad_norm": 3.6769495010375977, "learning_rate": 4.857643683196823e-05, "loss": 0.1111, "step": 30110 }, { "epoch": 1.0945562904280834, "grad_norm": 0.757580041885376, "learning_rate": 4.8574710201956095e-05, "loss": 0.1622, "step": 30120 }, { "epoch": 1.0949196889308817, "grad_norm": 0.762323796749115, "learning_rate": 4.857298255619357e-05, "loss": 0.1218, "step": 30130 }, { "epoch": 1.0952830874336799, "grad_norm": 0.6065217852592468, "learning_rate": 4.85712538947551e-05, "loss": 0.1297, "step": 30140 }, { "epoch": 1.0956464859364778, "grad_norm": 1.1257789134979248, "learning_rate": 4.856952421771517e-05, "loss": 0.1862, "step": 30150 }, { "epoch": 1.096009884439276, "grad_norm": 1.128233790397644, "learning_rate": 4.85677935251483e-05, "loss": 0.1377, "step": 30160 }, { "epoch": 1.0963732829420743, "grad_norm": 0.48844701051712036, "learning_rate": 4.856606181712906e-05, "loss": 0.1967, "step": 30170 }, { "epoch": 1.0967366814448725, "grad_norm": 0.682921290397644, "learning_rate": 4.856432909373206e-05, "loss": 0.1268, "step": 30180 }, { "epoch": 1.0971000799476707, "grad_norm": 0.8049948215484619, "learning_rate": 4.856259535503197e-05, "loss": 0.0971, "step": 30190 }, { "epoch": 1.0974634784504689, "grad_norm": 0.6435711979866028, "learning_rate": 4.8560860601103485e-05, "loss": 1.0273, "step": 30200 }, { "epoch": 1.0978268769532669, "grad_norm": 1.014172911643982, "learning_rate": 4.855912483202134e-05, "loss": 0.1137, "step": 30210 }, { "epoch": 1.098190275456065, "grad_norm": 1.4760230779647827, "learning_rate": 4.8557388047860334e-05, "loss": 0.1585, "step": 30220 }, { "epoch": 1.0985536739588633, "grad_norm": 1.4756141901016235, "learning_rate": 4.855565024869529e-05, "loss": 0.1261, "step": 30230 }, { "epoch": 1.0989170724616615, "grad_norm": 2.1977133750915527, "learning_rate": 4.8553911434601085e-05, "loss": 0.1532, "step": 30240 }, { "epoch": 1.0992804709644597, "grad_norm": 1.9084991216659546, "learning_rate": 4.855217160565265e-05, "loss": 0.1611, "step": 30250 }, { "epoch": 1.099643869467258, "grad_norm": 1.3657923936843872, "learning_rate": 4.855043076192494e-05, "loss": 0.1569, "step": 30260 }, { "epoch": 1.1000072679700559, "grad_norm": 0.6824470162391663, "learning_rate": 4.8548688903492943e-05, "loss": 0.1632, "step": 30270 }, { "epoch": 1.100370666472854, "grad_norm": 0.595958948135376, "learning_rate": 4.854694603043175e-05, "loss": 0.1394, "step": 30280 }, { "epoch": 1.1007340649756523, "grad_norm": 1.1626547574996948, "learning_rate": 4.854520214281642e-05, "loss": 0.1193, "step": 30290 }, { "epoch": 1.1010974634784505, "grad_norm": 1.2703717947006226, "learning_rate": 4.8543457240722104e-05, "loss": 0.1949, "step": 30300 }, { "epoch": 1.1014608619812487, "grad_norm": 1.7159488201141357, "learning_rate": 4.854171132422399e-05, "loss": 0.1524, "step": 30310 }, { "epoch": 1.101824260484047, "grad_norm": 0.7651236057281494, "learning_rate": 4.85399643933973e-05, "loss": 0.1992, "step": 30320 }, { "epoch": 1.102187658986845, "grad_norm": 0.7985833287239075, "learning_rate": 4.8538216448317286e-05, "loss": 0.1363, "step": 30330 }, { "epoch": 1.102551057489643, "grad_norm": 1.2583733797073364, "learning_rate": 4.853646748905928e-05, "loss": 0.1215, "step": 30340 }, { "epoch": 1.1029144559924413, "grad_norm": 1.1982141733169556, "learning_rate": 4.853471751569864e-05, "loss": 0.1656, "step": 30350 }, { "epoch": 1.1032778544952395, "grad_norm": 2.399423599243164, "learning_rate": 4.853296652831075e-05, "loss": 0.1164, "step": 30360 }, { "epoch": 1.1036412529980377, "grad_norm": 1.5785446166992188, "learning_rate": 4.853121452697107e-05, "loss": 0.1682, "step": 30370 }, { "epoch": 1.1040046515008357, "grad_norm": 1.3818514347076416, "learning_rate": 4.852946151175508e-05, "loss": 0.5349, "step": 30380 }, { "epoch": 1.104368050003634, "grad_norm": 1.6894676685333252, "learning_rate": 4.8527707482738305e-05, "loss": 0.1314, "step": 30390 }, { "epoch": 1.1047314485064321, "grad_norm": 1.1517245769500732, "learning_rate": 4.852595243999633e-05, "loss": 0.1515, "step": 30400 }, { "epoch": 1.1050948470092303, "grad_norm": 1.2149289846420288, "learning_rate": 4.852419638360477e-05, "loss": 0.2572, "step": 30410 }, { "epoch": 1.1054582455120285, "grad_norm": 0.8241190314292908, "learning_rate": 4.852243931363929e-05, "loss": 0.2045, "step": 30420 }, { "epoch": 1.1058216440148267, "grad_norm": 0.8909230828285217, "learning_rate": 4.852068123017559e-05, "loss": 0.1281, "step": 30430 }, { "epoch": 1.1061850425176247, "grad_norm": 0.7718971967697144, "learning_rate": 4.8518922133289424e-05, "loss": 0.1207, "step": 30440 }, { "epoch": 1.106548441020423, "grad_norm": 3.393324136734009, "learning_rate": 4.8517162023056575e-05, "loss": 0.4812, "step": 30450 }, { "epoch": 1.1069118395232211, "grad_norm": 1.5000587701797485, "learning_rate": 4.85154008995529e-05, "loss": 0.1246, "step": 30460 }, { "epoch": 1.1072752380260193, "grad_norm": 1.3177014589309692, "learning_rate": 4.8513638762854264e-05, "loss": 0.2352, "step": 30470 }, { "epoch": 1.1076386365288176, "grad_norm": 0.8771611452102661, "learning_rate": 4.8511875613036596e-05, "loss": 1.4369, "step": 30480 }, { "epoch": 1.1080020350316158, "grad_norm": 0.46630170941352844, "learning_rate": 4.8510111450175865e-05, "loss": 0.1184, "step": 30490 }, { "epoch": 1.1083654335344137, "grad_norm": 1.582541823387146, "learning_rate": 4.850834627434808e-05, "loss": 0.154, "step": 30500 }, { "epoch": 1.108728832037212, "grad_norm": 0.9425756335258484, "learning_rate": 4.850658008562929e-05, "loss": 0.1632, "step": 30510 }, { "epoch": 1.1090922305400102, "grad_norm": 0.6453799605369568, "learning_rate": 4.8504812884095616e-05, "loss": 0.2024, "step": 30520 }, { "epoch": 1.1094556290428084, "grad_norm": 0.8643505573272705, "learning_rate": 4.850304466982317e-05, "loss": 0.1373, "step": 30530 }, { "epoch": 1.1098190275456066, "grad_norm": 0.6463938355445862, "learning_rate": 4.850127544288816e-05, "loss": 0.1308, "step": 30540 }, { "epoch": 1.1101824260484048, "grad_norm": 1.2465693950653076, "learning_rate": 4.8499505203366816e-05, "loss": 0.1628, "step": 30550 }, { "epoch": 1.1105458245512028, "grad_norm": 1.085317850112915, "learning_rate": 4.84977339513354e-05, "loss": 0.1533, "step": 30560 }, { "epoch": 1.110909223054001, "grad_norm": 0.5834909081459045, "learning_rate": 4.849596168687022e-05, "loss": 0.1811, "step": 30570 }, { "epoch": 1.1112726215567992, "grad_norm": 1.407309889793396, "learning_rate": 4.849418841004766e-05, "loss": 0.1621, "step": 30580 }, { "epoch": 1.1116360200595974, "grad_norm": 1.0903669595718384, "learning_rate": 4.8492414120944116e-05, "loss": 0.271, "step": 30590 }, { "epoch": 1.1119994185623956, "grad_norm": 1.6495404243469238, "learning_rate": 4.8490638819636036e-05, "loss": 0.1602, "step": 30600 }, { "epoch": 1.1119994185623956, "eval_loss": 0.3786245882511139, "eval_runtime": 180.2563, "eval_samples_per_second": 41.13, "eval_steps_per_second": 5.143, "eval_wer": 0.17622125002269137, "step": 30600 }, { "epoch": 1.1123628170651938, "grad_norm": 1.6046833992004395, "learning_rate": 4.8488862506199905e-05, "loss": 0.142, "step": 30610 }, { "epoch": 1.1127262155679918, "grad_norm": 0.7779229879379272, "learning_rate": 4.848708518071226e-05, "loss": 0.1556, "step": 30620 }, { "epoch": 1.11308961407079, "grad_norm": 6.0123677253723145, "learning_rate": 4.848530684324969e-05, "loss": 0.1379, "step": 30630 }, { "epoch": 1.1134530125735882, "grad_norm": 1.1593163013458252, "learning_rate": 4.8483527493888796e-05, "loss": 0.1091, "step": 30640 }, { "epoch": 1.1138164110763864, "grad_norm": 1.1061301231384277, "learning_rate": 4.848174713270627e-05, "loss": 0.1521, "step": 30650 }, { "epoch": 1.1141798095791846, "grad_norm": 2.006169080734253, "learning_rate": 4.8479965759778804e-05, "loss": 0.1188, "step": 30660 }, { "epoch": 1.1145432080819826, "grad_norm": 0.632653534412384, "learning_rate": 4.8478183375183154e-05, "loss": 0.2039, "step": 30670 }, { "epoch": 1.1149066065847808, "grad_norm": 2.2631378173828125, "learning_rate": 4.847639997899611e-05, "loss": 0.1324, "step": 30680 }, { "epoch": 1.115270005087579, "grad_norm": 0.7694458365440369, "learning_rate": 4.847461557129454e-05, "loss": 0.1043, "step": 30690 }, { "epoch": 1.1156334035903772, "grad_norm": 1.5386550426483154, "learning_rate": 4.847283015215529e-05, "loss": 0.1605, "step": 30700 }, { "epoch": 1.1159968020931754, "grad_norm": 0.9068945646286011, "learning_rate": 4.847104372165531e-05, "loss": 0.1178, "step": 30710 }, { "epoch": 1.1163602005959736, "grad_norm": 1.4700278043746948, "learning_rate": 4.8469256279871564e-05, "loss": 0.1458, "step": 30720 }, { "epoch": 1.1167235990987716, "grad_norm": 1.125613808631897, "learning_rate": 4.846746782688108e-05, "loss": 0.1212, "step": 30730 }, { "epoch": 1.1170869976015698, "grad_norm": 1.081297516822815, "learning_rate": 4.846567836276089e-05, "loss": 0.1218, "step": 30740 }, { "epoch": 1.117450396104368, "grad_norm": 0.6549712419509888, "learning_rate": 4.846388788758812e-05, "loss": 0.1684, "step": 30750 }, { "epoch": 1.1178137946071662, "grad_norm": 0.7256012558937073, "learning_rate": 4.84620964014399e-05, "loss": 0.1425, "step": 30760 }, { "epoch": 1.1181771931099644, "grad_norm": 0.6661650538444519, "learning_rate": 4.846030390439343e-05, "loss": 0.2043, "step": 30770 }, { "epoch": 1.1185405916127626, "grad_norm": 2.5043599605560303, "learning_rate": 4.845851039652594e-05, "loss": 0.1337, "step": 30780 }, { "epoch": 1.1189039901155606, "grad_norm": 1.7362638711929321, "learning_rate": 4.84567158779147e-05, "loss": 0.1146, "step": 30790 }, { "epoch": 1.1192673886183588, "grad_norm": 2.156850576400757, "learning_rate": 4.845492034863703e-05, "loss": 0.1402, "step": 30800 }, { "epoch": 1.119630787121157, "grad_norm": 6.733970642089844, "learning_rate": 4.8453123808770295e-05, "loss": 0.137, "step": 30810 }, { "epoch": 1.1199941856239553, "grad_norm": 1.2163270711898804, "learning_rate": 4.84513262583919e-05, "loss": 0.2038, "step": 30820 }, { "epoch": 1.1203575841267535, "grad_norm": 1.0911026000976562, "learning_rate": 4.84495276975793e-05, "loss": 0.1247, "step": 30830 }, { "epoch": 1.1207209826295517, "grad_norm": 8.4699125289917, "learning_rate": 4.844772812640998e-05, "loss": 0.1883, "step": 30840 }, { "epoch": 1.1210843811323496, "grad_norm": 1.9448401927947998, "learning_rate": 4.8445927544961486e-05, "loss": 0.1259, "step": 30850 }, { "epoch": 1.1214477796351479, "grad_norm": 1.2070740461349487, "learning_rate": 4.844412595331139e-05, "loss": 0.1478, "step": 30860 }, { "epoch": 1.121811178137946, "grad_norm": 0.5514017939567566, "learning_rate": 4.844232335153733e-05, "loss": 0.2209, "step": 30870 }, { "epoch": 1.1221745766407443, "grad_norm": 0.6462703943252563, "learning_rate": 4.844051973971696e-05, "loss": 0.1182, "step": 30880 }, { "epoch": 1.1225379751435425, "grad_norm": 0.9222347140312195, "learning_rate": 4.8438715117927995e-05, "loss": 0.1079, "step": 30890 }, { "epoch": 1.1229013736463407, "grad_norm": 1.1663174629211426, "learning_rate": 4.8436909486248196e-05, "loss": 0.1516, "step": 30900 }, { "epoch": 1.1232647721491387, "grad_norm": 0.8301449418067932, "learning_rate": 4.8435102844755356e-05, "loss": 0.1204, "step": 30910 }, { "epoch": 1.1236281706519369, "grad_norm": 0.8328074216842651, "learning_rate": 4.8433295193527305e-05, "loss": 0.1432, "step": 30920 }, { "epoch": 1.123991569154735, "grad_norm": 1.0741894245147705, "learning_rate": 4.843148653264195e-05, "loss": 0.1286, "step": 30930 }, { "epoch": 1.1243549676575333, "grad_norm": 1.5792789459228516, "learning_rate": 4.842967686217721e-05, "loss": 0.1235, "step": 30940 }, { "epoch": 1.1247183661603315, "grad_norm": 10.680551528930664, "learning_rate": 4.8427866182211056e-05, "loss": 0.2624, "step": 30950 }, { "epoch": 1.1250817646631295, "grad_norm": 0.753760576248169, "learning_rate": 4.8426054492821503e-05, "loss": 0.1185, "step": 30960 }, { "epoch": 1.1254451631659277, "grad_norm": 0.681735098361969, "learning_rate": 4.8424241794086614e-05, "loss": 0.1249, "step": 30970 }, { "epoch": 1.125808561668726, "grad_norm": 1.0460690259933472, "learning_rate": 4.842242808608449e-05, "loss": 0.1437, "step": 30980 }, { "epoch": 1.126171960171524, "grad_norm": 0.8870137929916382, "learning_rate": 4.8420613368893275e-05, "loss": 0.1321, "step": 30990 }, { "epoch": 1.1265353586743223, "grad_norm": 1.765331506729126, "learning_rate": 4.841879764259116e-05, "loss": 0.1722, "step": 31000 }, { "epoch": 1.1268987571771205, "grad_norm": 2.9972617626190186, "learning_rate": 4.841698090725638e-05, "loss": 0.095, "step": 31010 }, { "epoch": 1.1272621556799187, "grad_norm": 0.7516260743141174, "learning_rate": 4.841516316296722e-05, "loss": 0.2073, "step": 31020 }, { "epoch": 1.1276255541827167, "grad_norm": 7.545155048370361, "learning_rate": 4.841334440980197e-05, "loss": 0.2066, "step": 31030 }, { "epoch": 1.127988952685515, "grad_norm": 0.7127543687820435, "learning_rate": 4.841152464783903e-05, "loss": 0.1483, "step": 31040 }, { "epoch": 1.1283523511883131, "grad_norm": 0.8178777694702148, "learning_rate": 4.8409703877156786e-05, "loss": 0.122, "step": 31050 }, { "epoch": 1.1287157496911113, "grad_norm": 2.2176194190979004, "learning_rate": 4.84078820978337e-05, "loss": 0.1074, "step": 31060 }, { "epoch": 1.1290791481939095, "grad_norm": 0.5790374279022217, "learning_rate": 4.8406059309948246e-05, "loss": 0.2401, "step": 31070 }, { "epoch": 1.1294425466967075, "grad_norm": 1.335080623626709, "learning_rate": 4.840423551357899e-05, "loss": 0.1412, "step": 31080 }, { "epoch": 1.1298059451995057, "grad_norm": 2.9304592609405518, "learning_rate": 4.840241070880449e-05, "loss": 0.1137, "step": 31090 }, { "epoch": 1.130169343702304, "grad_norm": 0.6828371286392212, "learning_rate": 4.840058489570338e-05, "loss": 0.162, "step": 31100 }, { "epoch": 1.1305327422051021, "grad_norm": 0.7623898983001709, "learning_rate": 4.8398758074354334e-05, "loss": 0.1164, "step": 31110 }, { "epoch": 1.1308961407079003, "grad_norm": 0.44123783707618713, "learning_rate": 4.8396930244836045e-05, "loss": 0.1378, "step": 31120 }, { "epoch": 1.1312595392106983, "grad_norm": 0.880264937877655, "learning_rate": 4.839510140722728e-05, "loss": 0.1168, "step": 31130 }, { "epoch": 1.1316229377134965, "grad_norm": 0.9946479797363281, "learning_rate": 4.839327156160684e-05, "loss": 0.1099, "step": 31140 }, { "epoch": 1.1319863362162947, "grad_norm": 1.5061123371124268, "learning_rate": 4.8391440708053565e-05, "loss": 0.1167, "step": 31150 }, { "epoch": 1.132349734719093, "grad_norm": 3.687218427658081, "learning_rate": 4.838960884664633e-05, "loss": 0.1141, "step": 31160 }, { "epoch": 1.1327131332218912, "grad_norm": 1.015309453010559, "learning_rate": 4.838777597746408e-05, "loss": 0.1851, "step": 31170 }, { "epoch": 1.1330765317246894, "grad_norm": 0.9765021204948425, "learning_rate": 4.838594210058577e-05, "loss": 0.1276, "step": 31180 }, { "epoch": 1.1334399302274876, "grad_norm": 0.986419677734375, "learning_rate": 4.838410721609041e-05, "loss": 0.107, "step": 31190 }, { "epoch": 1.1338033287302856, "grad_norm": 0.850581169128418, "learning_rate": 4.838227132405709e-05, "loss": 0.1441, "step": 31200 }, { "epoch": 1.1338033287302856, "eval_loss": 0.3806535005569458, "eval_runtime": 180.0975, "eval_samples_per_second": 41.167, "eval_steps_per_second": 5.147, "eval_wer": 0.17877176103254852, "step": 31200 }, { "epoch": 1.1341667272330838, "grad_norm": 0.5492041110992432, "learning_rate": 4.8380434424564885e-05, "loss": 0.1644, "step": 31210 }, { "epoch": 1.134530125735882, "grad_norm": 0.6244884729385376, "learning_rate": 4.837859651769295e-05, "loss": 0.1754, "step": 31220 }, { "epoch": 1.1348935242386802, "grad_norm": 0.7327109575271606, "learning_rate": 4.837675760352047e-05, "loss": 0.1182, "step": 31230 }, { "epoch": 1.1352569227414784, "grad_norm": 1.8642997741699219, "learning_rate": 4.837491768212669e-05, "loss": 0.1262, "step": 31240 }, { "epoch": 1.1356203212442764, "grad_norm": 0.7738135457038879, "learning_rate": 4.837307675359086e-05, "loss": 0.1733, "step": 31250 }, { "epoch": 1.1359837197470746, "grad_norm": 1.2241661548614502, "learning_rate": 4.837123481799232e-05, "loss": 0.158, "step": 31260 }, { "epoch": 1.1363471182498728, "grad_norm": 0.5471898317337036, "learning_rate": 4.836939187541043e-05, "loss": 0.1745, "step": 31270 }, { "epoch": 1.136710516752671, "grad_norm": 1.110005497932434, "learning_rate": 4.836754792592459e-05, "loss": 0.14, "step": 31280 }, { "epoch": 1.1370739152554692, "grad_norm": 18.33467674255371, "learning_rate": 4.836570296961425e-05, "loss": 0.4864, "step": 31290 }, { "epoch": 1.1374373137582674, "grad_norm": 2.1457314491271973, "learning_rate": 4.836385700655891e-05, "loss": 0.1431, "step": 31300 }, { "epoch": 1.1378007122610656, "grad_norm": 1.3444671630859375, "learning_rate": 4.8362010036838096e-05, "loss": 0.1287, "step": 31310 }, { "epoch": 1.1381641107638636, "grad_norm": 1.0178183317184448, "learning_rate": 4.8360162060531395e-05, "loss": 0.1984, "step": 31320 }, { "epoch": 1.1385275092666618, "grad_norm": 1.013101577758789, "learning_rate": 4.835831307771842e-05, "loss": 0.1354, "step": 31330 }, { "epoch": 1.13889090776946, "grad_norm": 1.1397134065628052, "learning_rate": 4.8356463088478855e-05, "loss": 0.1149, "step": 31340 }, { "epoch": 1.1392543062722582, "grad_norm": 0.6582014560699463, "learning_rate": 4.835461209289239e-05, "loss": 0.1581, "step": 31350 }, { "epoch": 1.1396177047750564, "grad_norm": 6.679111480712891, "learning_rate": 4.835276009103878e-05, "loss": 0.1136, "step": 31360 }, { "epoch": 1.1399811032778544, "grad_norm": 1.710073709487915, "learning_rate": 4.835090708299784e-05, "loss": 0.1523, "step": 31370 }, { "epoch": 1.1403445017806526, "grad_norm": 0.8167402148246765, "learning_rate": 4.834905306884939e-05, "loss": 0.1333, "step": 31380 }, { "epoch": 1.1407079002834508, "grad_norm": 1.0377804040908813, "learning_rate": 4.834719804867332e-05, "loss": 0.1484, "step": 31390 }, { "epoch": 1.141071298786249, "grad_norm": 0.6715871691703796, "learning_rate": 4.8345342022549556e-05, "loss": 0.133, "step": 31400 }, { "epoch": 1.1414346972890472, "grad_norm": 0.8593924641609192, "learning_rate": 4.834348499055807e-05, "loss": 0.1381, "step": 31410 }, { "epoch": 1.1417980957918452, "grad_norm": 1.4667985439300537, "learning_rate": 4.834162695277887e-05, "loss": 0.2026, "step": 31420 }, { "epoch": 1.1421614942946434, "grad_norm": 1.1011070013046265, "learning_rate": 4.8339767909292014e-05, "loss": 0.1324, "step": 31430 }, { "epoch": 1.1425248927974416, "grad_norm": 0.6192152500152588, "learning_rate": 4.83379078601776e-05, "loss": 0.116, "step": 31440 }, { "epoch": 1.1428882913002398, "grad_norm": 0.7945598363876343, "learning_rate": 4.8336046805515775e-05, "loss": 0.1652, "step": 31450 }, { "epoch": 1.143251689803038, "grad_norm": 0.9201329350471497, "learning_rate": 4.833418474538672e-05, "loss": 0.1279, "step": 31460 }, { "epoch": 1.1436150883058362, "grad_norm": 0.7190477252006531, "learning_rate": 4.833232167987067e-05, "loss": 0.1603, "step": 31470 }, { "epoch": 1.1439784868086345, "grad_norm": 0.92894047498703, "learning_rate": 4.83304576090479e-05, "loss": 0.1268, "step": 31480 }, { "epoch": 1.1443418853114324, "grad_norm": 0.7764700055122375, "learning_rate": 4.8328592532998716e-05, "loss": 0.1307, "step": 31490 }, { "epoch": 1.1447052838142306, "grad_norm": 1.3679301738739014, "learning_rate": 4.832672645180348e-05, "loss": 0.1734, "step": 31500 }, { "epoch": 1.1450686823170289, "grad_norm": 2.3595213890075684, "learning_rate": 4.832485936554261e-05, "loss": 0.1335, "step": 31510 }, { "epoch": 1.145432080819827, "grad_norm": 0.610569953918457, "learning_rate": 4.832299127429653e-05, "loss": 0.249, "step": 31520 }, { "epoch": 1.1457954793226253, "grad_norm": 1.4595023393630981, "learning_rate": 4.832112217814575e-05, "loss": 0.1103, "step": 31530 }, { "epoch": 1.1461588778254233, "grad_norm": 5.723475933074951, "learning_rate": 4.831925207717077e-05, "loss": 0.126, "step": 31540 }, { "epoch": 1.1465222763282215, "grad_norm": 1.8982267379760742, "learning_rate": 4.8317380971452205e-05, "loss": 0.1422, "step": 31550 }, { "epoch": 1.1468856748310197, "grad_norm": 0.8732501268386841, "learning_rate": 4.831550886107066e-05, "loss": 0.1364, "step": 31560 }, { "epoch": 1.1472490733338179, "grad_norm": 1.0855740308761597, "learning_rate": 4.831363574610679e-05, "loss": 0.1728, "step": 31570 }, { "epoch": 1.147612471836616, "grad_norm": 1.5332953929901123, "learning_rate": 4.8311761626641304e-05, "loss": 0.1336, "step": 31580 }, { "epoch": 1.1479758703394143, "grad_norm": 0.5183860063552856, "learning_rate": 4.8309886502754954e-05, "loss": 0.6253, "step": 31590 }, { "epoch": 1.1483392688422125, "grad_norm": 0.7912465929985046, "learning_rate": 4.830801037452853e-05, "loss": 0.1644, "step": 31600 }, { "epoch": 1.1487026673450105, "grad_norm": 1.5750758647918701, "learning_rate": 4.8306133242042875e-05, "loss": 0.1787, "step": 31610 }, { "epoch": 1.1490660658478087, "grad_norm": 0.5864933133125305, "learning_rate": 4.830425510537886e-05, "loss": 0.1751, "step": 31620 }, { "epoch": 1.1494294643506069, "grad_norm": 0.9228208661079407, "learning_rate": 4.830237596461741e-05, "loss": 0.8842, "step": 31630 }, { "epoch": 1.149792862853405, "grad_norm": 1.0034486055374146, "learning_rate": 4.8300495819839486e-05, "loss": 0.1047, "step": 31640 }, { "epoch": 1.1501562613562033, "grad_norm": 1.125537395477295, "learning_rate": 4.82986146711261e-05, "loss": 0.1447, "step": 31650 }, { "epoch": 1.1505196598590013, "grad_norm": 2.6315014362335205, "learning_rate": 4.829673251855831e-05, "loss": 0.116, "step": 31660 }, { "epoch": 1.1508830583617995, "grad_norm": 0.8134027719497681, "learning_rate": 4.82948493622172e-05, "loss": 0.1889, "step": 31670 }, { "epoch": 1.1512464568645977, "grad_norm": 1.003691554069519, "learning_rate": 4.8292965202183916e-05, "loss": 0.1477, "step": 31680 }, { "epoch": 1.151609855367396, "grad_norm": 1.7551583051681519, "learning_rate": 4.829108003853964e-05, "loss": 0.1156, "step": 31690 }, { "epoch": 1.1519732538701941, "grad_norm": 1.5970351696014404, "learning_rate": 4.8289193871365594e-05, "loss": 0.3698, "step": 31700 }, { "epoch": 1.152336652372992, "grad_norm": 4.339359760284424, "learning_rate": 4.828730670074305e-05, "loss": 0.1259, "step": 31710 }, { "epoch": 1.1527000508757903, "grad_norm": 0.76881343126297, "learning_rate": 4.828541852675331e-05, "loss": 0.3217, "step": 31720 }, { "epoch": 1.1530634493785885, "grad_norm": 3.1044371128082275, "learning_rate": 4.828352934947774e-05, "loss": 0.13, "step": 31730 }, { "epoch": 1.1534268478813867, "grad_norm": 0.5273496508598328, "learning_rate": 4.828163916899774e-05, "loss": 0.1197, "step": 31740 }, { "epoch": 1.153790246384185, "grad_norm": 0.8816530108451843, "learning_rate": 4.827974798539473e-05, "loss": 0.1633, "step": 31750 }, { "epoch": 1.1541536448869831, "grad_norm": 1.220786452293396, "learning_rate": 4.827785579875022e-05, "loss": 0.1293, "step": 31760 }, { "epoch": 1.1545170433897813, "grad_norm": 2.700749397277832, "learning_rate": 4.827596260914572e-05, "loss": 0.1427, "step": 31770 }, { "epoch": 1.1548804418925793, "grad_norm": 1.6649949550628662, "learning_rate": 4.827406841666281e-05, "loss": 3.8743, "step": 31780 }, { "epoch": 1.1552438403953775, "grad_norm": 1.2381266355514526, "learning_rate": 4.827217322138311e-05, "loss": 0.1322, "step": 31790 }, { "epoch": 1.1556072388981757, "grad_norm": 0.6668787598609924, "learning_rate": 4.8270277023388255e-05, "loss": 0.1566, "step": 31800 }, { "epoch": 1.1556072388981757, "eval_loss": 0.35771968960762024, "eval_runtime": 257.3302, "eval_samples_per_second": 28.811, "eval_steps_per_second": 3.602, "eval_wer": 0.1810772051482201, "step": 31800 }, { "epoch": 1.155970637400974, "grad_norm": 1.1917558908462524, "learning_rate": 4.826837982275996e-05, "loss": 0.1274, "step": 31810 }, { "epoch": 1.1563340359037722, "grad_norm": 0.5347509980201721, "learning_rate": 4.8266481619579973e-05, "loss": 0.1872, "step": 31820 }, { "epoch": 1.1566974344065701, "grad_norm": 1.023681402206421, "learning_rate": 4.8264582413930076e-05, "loss": 0.1505, "step": 31830 }, { "epoch": 1.1570608329093683, "grad_norm": 1.00868821144104, "learning_rate": 4.82626822058921e-05, "loss": 0.1364, "step": 31840 }, { "epoch": 1.1574242314121665, "grad_norm": 1.4557231664657593, "learning_rate": 4.8260780995547905e-05, "loss": 0.1799, "step": 31850 }, { "epoch": 1.1577876299149648, "grad_norm": 1.1228946447372437, "learning_rate": 4.8258878782979434e-05, "loss": 0.1392, "step": 31860 }, { "epoch": 1.158151028417763, "grad_norm": 0.6818620562553406, "learning_rate": 4.825716593483377e-05, "loss": 0.1876, "step": 31870 }, { "epoch": 1.1585144269205612, "grad_norm": 1.4734445810317993, "learning_rate": 4.8255261818264976e-05, "loss": 0.1502, "step": 31880 }, { "epoch": 1.1588778254233594, "grad_norm": 0.8915801048278809, "learning_rate": 4.825335669970969e-05, "loss": 0.1207, "step": 31890 }, { "epoch": 1.1592412239261574, "grad_norm": 0.324372798204422, "learning_rate": 4.825145057925e-05, "loss": 0.1674, "step": 31900 }, { "epoch": 1.1596046224289556, "grad_norm": 1.1831437349319458, "learning_rate": 4.824954345696803e-05, "loss": 0.1192, "step": 31910 }, { "epoch": 1.1599680209317538, "grad_norm": 0.5911235809326172, "learning_rate": 4.824763533294596e-05, "loss": 0.1652, "step": 31920 }, { "epoch": 1.160331419434552, "grad_norm": 2.4116334915161133, "learning_rate": 4.8245726207265997e-05, "loss": 0.1297, "step": 31930 }, { "epoch": 1.1606948179373502, "grad_norm": 0.5179087519645691, "learning_rate": 4.8243816080010404e-05, "loss": 0.1066, "step": 31940 }, { "epoch": 1.1610582164401482, "grad_norm": 0.6537795066833496, "learning_rate": 4.824190495126148e-05, "loss": 0.1409, "step": 31950 }, { "epoch": 1.1614216149429464, "grad_norm": 1.0357365608215332, "learning_rate": 4.823999282110155e-05, "loss": 0.1146, "step": 31960 }, { "epoch": 1.1617850134457446, "grad_norm": 0.4709915220737457, "learning_rate": 4.823807968961303e-05, "loss": 0.1726, "step": 31970 }, { "epoch": 1.1621484119485428, "grad_norm": 0.9699262380599976, "learning_rate": 4.823616555687833e-05, "loss": 0.125, "step": 31980 }, { "epoch": 1.162511810451341, "grad_norm": 1.2052152156829834, "learning_rate": 4.8234250422979946e-05, "loss": 0.1164, "step": 31990 }, { "epoch": 1.1628752089541392, "grad_norm": 1.1892735958099365, "learning_rate": 4.823233428800037e-05, "loss": 0.1412, "step": 32000 }, { "epoch": 1.1632386074569372, "grad_norm": 0.7427589893341064, "learning_rate": 4.8230417152022165e-05, "loss": 0.1315, "step": 32010 }, { "epoch": 1.1636020059597354, "grad_norm": 0.7582072019577026, "learning_rate": 4.8228499015127945e-05, "loss": 0.1783, "step": 32020 }, { "epoch": 1.1639654044625336, "grad_norm": 1.1409790515899658, "learning_rate": 4.8226579877400345e-05, "loss": 0.1303, "step": 32030 }, { "epoch": 1.1643288029653318, "grad_norm": 1.1382596492767334, "learning_rate": 4.822465973892206e-05, "loss": 0.1426, "step": 32040 }, { "epoch": 1.16469220146813, "grad_norm": 1.27096688747406, "learning_rate": 4.822273859977583e-05, "loss": 0.1505, "step": 32050 }, { "epoch": 1.1650555999709282, "grad_norm": 1.5508397817611694, "learning_rate": 4.822081646004441e-05, "loss": 0.1366, "step": 32060 }, { "epoch": 1.1654189984737262, "grad_norm": 0.2970573604106903, "learning_rate": 4.821889331981063e-05, "loss": 0.1505, "step": 32070 }, { "epoch": 1.1657823969765244, "grad_norm": 0.9228662848472595, "learning_rate": 4.821696917915736e-05, "loss": 0.1112, "step": 32080 }, { "epoch": 1.1661457954793226, "grad_norm": 0.770660936832428, "learning_rate": 4.821504403816748e-05, "loss": 0.1284, "step": 32090 }, { "epoch": 1.1665091939821208, "grad_norm": 0.8875694274902344, "learning_rate": 4.8213117896923954e-05, "loss": 0.1421, "step": 32100 }, { "epoch": 1.166872592484919, "grad_norm": 0.9725656509399414, "learning_rate": 4.821119075550978e-05, "loss": 0.1269, "step": 32110 }, { "epoch": 1.167235990987717, "grad_norm": 0.5882539749145508, "learning_rate": 4.820926261400797e-05, "loss": 0.159, "step": 32120 }, { "epoch": 1.1675993894905152, "grad_norm": 0.9551408886909485, "learning_rate": 4.820733347250162e-05, "loss": 0.1788, "step": 32130 }, { "epoch": 1.1679627879933134, "grad_norm": 0.929642915725708, "learning_rate": 4.820540333107384e-05, "loss": 0.1329, "step": 32140 }, { "epoch": 1.1683261864961116, "grad_norm": 1.707643747329712, "learning_rate": 4.8203472189807795e-05, "loss": 0.1317, "step": 32150 }, { "epoch": 1.1686895849989098, "grad_norm": 1.401150107383728, "learning_rate": 4.82015400487867e-05, "loss": 0.126, "step": 32160 }, { "epoch": 1.169052983501708, "grad_norm": 0.7058550715446472, "learning_rate": 4.8199606908093785e-05, "loss": 0.1811, "step": 32170 }, { "epoch": 1.1694163820045063, "grad_norm": 1.2024914026260376, "learning_rate": 4.8197672767812366e-05, "loss": 0.1524, "step": 32180 }, { "epoch": 1.1697797805073042, "grad_norm": 0.8119955062866211, "learning_rate": 4.819573762802575e-05, "loss": 0.1281, "step": 32190 }, { "epoch": 1.1701431790101025, "grad_norm": 0.8531884551048279, "learning_rate": 4.8193801488817336e-05, "loss": 0.1362, "step": 32200 }, { "epoch": 1.1705065775129007, "grad_norm": 0.7170140743255615, "learning_rate": 4.819186435027054e-05, "loss": 0.1276, "step": 32210 }, { "epoch": 1.1708699760156989, "grad_norm": 1.7031468152999878, "learning_rate": 4.8189926212468825e-05, "loss": 0.1369, "step": 32220 }, { "epoch": 1.171233374518497, "grad_norm": 1.639916181564331, "learning_rate": 4.81879870754957e-05, "loss": 0.1318, "step": 32230 }, { "epoch": 1.171596773021295, "grad_norm": 1.0148886442184448, "learning_rate": 4.8186046939434716e-05, "loss": 0.119, "step": 32240 }, { "epoch": 1.1719601715240933, "grad_norm": 1.4640549421310425, "learning_rate": 4.818410580436947e-05, "loss": 0.1603, "step": 32250 }, { "epoch": 1.1723235700268915, "grad_norm": 1.0362626314163208, "learning_rate": 4.818216367038358e-05, "loss": 0.1318, "step": 32260 }, { "epoch": 1.1726869685296897, "grad_norm": 0.507990837097168, "learning_rate": 4.818022053756076e-05, "loss": 0.1681, "step": 32270 }, { "epoch": 1.1730503670324879, "grad_norm": 0.7118284106254578, "learning_rate": 4.81782764059847e-05, "loss": 0.1129, "step": 32280 }, { "epoch": 1.173413765535286, "grad_norm": 2.637918472290039, "learning_rate": 4.8176331275739175e-05, "loss": 0.1866, "step": 32290 }, { "epoch": 1.173777164038084, "grad_norm": 1.5417594909667969, "learning_rate": 4.817438514690801e-05, "loss": 0.1382, "step": 32300 }, { "epoch": 1.1741405625408823, "grad_norm": 1.4842432737350464, "learning_rate": 4.817243801957503e-05, "loss": 0.1381, "step": 32310 }, { "epoch": 1.1745039610436805, "grad_norm": 2.0502350330352783, "learning_rate": 4.817048989382415e-05, "loss": 0.1515, "step": 32320 }, { "epoch": 1.1748673595464787, "grad_norm": 1.8963838815689087, "learning_rate": 4.81685407697393e-05, "loss": 0.1453, "step": 32330 }, { "epoch": 1.175230758049277, "grad_norm": 0.6867222785949707, "learning_rate": 4.8166590647404466e-05, "loss": 0.1566, "step": 32340 }, { "epoch": 1.175594156552075, "grad_norm": 1.4324911832809448, "learning_rate": 4.8164639526903665e-05, "loss": 0.1261, "step": 32350 }, { "epoch": 1.175957555054873, "grad_norm": 4.706410884857178, "learning_rate": 4.8162687408320963e-05, "loss": 0.1168, "step": 32360 }, { "epoch": 1.1763209535576713, "grad_norm": 0.6849080324172974, "learning_rate": 4.8160734291740476e-05, "loss": 0.1587, "step": 32370 }, { "epoch": 1.1766843520604695, "grad_norm": 3.180955171585083, "learning_rate": 4.815878017724636e-05, "loss": 0.1312, "step": 32380 }, { "epoch": 1.1770477505632677, "grad_norm": 0.5583860278129578, "learning_rate": 4.81568250649228e-05, "loss": 0.1385, "step": 32390 }, { "epoch": 1.177411149066066, "grad_norm": 0.8250964283943176, "learning_rate": 4.8154868954854036e-05, "loss": 0.1393, "step": 32400 }, { "epoch": 1.177411149066066, "eval_loss": 0.38513997197151184, "eval_runtime": 179.4965, "eval_samples_per_second": 41.304, "eval_steps_per_second": 5.164, "eval_wer": 0.18090475066712655, "step": 32400 }, { "epoch": 1.177774547568864, "grad_norm": 2.3377466201782227, "learning_rate": 4.815291184712437e-05, "loss": 0.1197, "step": 32410 }, { "epoch": 1.1781379460716621, "grad_norm": 0.7508591413497925, "learning_rate": 4.81509537418181e-05, "loss": 0.1786, "step": 32420 }, { "epoch": 1.1785013445744603, "grad_norm": 0.8103131651878357, "learning_rate": 4.81489946390196e-05, "loss": 0.1617, "step": 32430 }, { "epoch": 1.1788647430772585, "grad_norm": 1.2582241296768188, "learning_rate": 4.814703453881329e-05, "loss": 0.1326, "step": 32440 }, { "epoch": 1.1792281415800567, "grad_norm": 1.110107660293579, "learning_rate": 4.8145073441283613e-05, "loss": 0.1504, "step": 32450 }, { "epoch": 1.179591540082855, "grad_norm": 0.9912093281745911, "learning_rate": 4.814311134651509e-05, "loss": 0.133, "step": 32460 }, { "epoch": 1.1799549385856531, "grad_norm": 1.1711434125900269, "learning_rate": 4.814114825459223e-05, "loss": 0.1328, "step": 32470 }, { "epoch": 1.1803183370884511, "grad_norm": 3.884737491607666, "learning_rate": 4.813918416559963e-05, "loss": 0.1225, "step": 32480 }, { "epoch": 1.1806817355912493, "grad_norm": 0.9459224939346313, "learning_rate": 4.8137219079621906e-05, "loss": 0.1507, "step": 32490 }, { "epoch": 1.1810451340940475, "grad_norm": 1.7159967422485352, "learning_rate": 4.813525299674374e-05, "loss": 0.1823, "step": 32500 }, { "epoch": 1.1814085325968458, "grad_norm": 1.3824647665023804, "learning_rate": 4.8133285917049844e-05, "loss": 0.1393, "step": 32510 }, { "epoch": 1.181771931099644, "grad_norm": 214.12107849121094, "learning_rate": 4.813131784062496e-05, "loss": 4.0762, "step": 32520 }, { "epoch": 1.182135329602442, "grad_norm": 2.5384116172790527, "learning_rate": 4.812934876755389e-05, "loss": 0.1379, "step": 32530 }, { "epoch": 1.1824987281052401, "grad_norm": 1.4254207611083984, "learning_rate": 4.812737869792148e-05, "loss": 0.1461, "step": 32540 }, { "epoch": 1.1828621266080384, "grad_norm": 1.571662187576294, "learning_rate": 4.812540763181261e-05, "loss": 0.178, "step": 32550 }, { "epoch": 1.1832255251108366, "grad_norm": 5.712926864624023, "learning_rate": 4.8123435569312206e-05, "loss": 0.1071, "step": 32560 }, { "epoch": 1.1835889236136348, "grad_norm": 0.8147953152656555, "learning_rate": 4.812146251050523e-05, "loss": 0.1211, "step": 32570 }, { "epoch": 1.183952322116433, "grad_norm": 1.1877583265304565, "learning_rate": 4.8119488455476714e-05, "loss": 0.1668, "step": 32580 }, { "epoch": 1.184315720619231, "grad_norm": 0.7466074824333191, "learning_rate": 4.8117513404311686e-05, "loss": 0.098, "step": 32590 }, { "epoch": 1.1846791191220292, "grad_norm": 1.6904805898666382, "learning_rate": 4.8115537357095265e-05, "loss": 0.1626, "step": 32600 }, { "epoch": 1.1850425176248274, "grad_norm": 0.879503607749939, "learning_rate": 4.811356031391259e-05, "loss": 0.1129, "step": 32610 }, { "epoch": 1.1854059161276256, "grad_norm": 2.447317600250244, "learning_rate": 4.811158227484883e-05, "loss": 0.1255, "step": 32620 }, { "epoch": 1.1857693146304238, "grad_norm": 0.9513424038887024, "learning_rate": 4.810960323998922e-05, "loss": 0.1347, "step": 32630 }, { "epoch": 1.186132713133222, "grad_norm": 0.46179428696632385, "learning_rate": 4.810762320941903e-05, "loss": 0.1002, "step": 32640 }, { "epoch": 1.18649611163602, "grad_norm": 0.7595782279968262, "learning_rate": 4.8105642183223585e-05, "loss": 0.1585, "step": 32650 }, { "epoch": 1.1868595101388182, "grad_norm": 1.8892844915390015, "learning_rate": 4.8103660161488216e-05, "loss": 0.1475, "step": 32660 }, { "epoch": 1.1872229086416164, "grad_norm": 3.290606737136841, "learning_rate": 4.810167714429834e-05, "loss": 0.1603, "step": 32670 }, { "epoch": 1.1875863071444146, "grad_norm": 1.3222955465316772, "learning_rate": 4.809969313173939e-05, "loss": 0.1251, "step": 32680 }, { "epoch": 1.1879497056472128, "grad_norm": 1.8568757772445679, "learning_rate": 4.809770812389686e-05, "loss": 0.1517, "step": 32690 }, { "epoch": 1.1883131041500108, "grad_norm": 1.3318365812301636, "learning_rate": 4.8095722120856255e-05, "loss": 0.1778, "step": 32700 }, { "epoch": 1.188676502652809, "grad_norm": 1.513069748878479, "learning_rate": 4.8093735122703164e-05, "loss": 0.1325, "step": 32710 }, { "epoch": 1.1890399011556072, "grad_norm": 0.37486693263053894, "learning_rate": 4.809174712952319e-05, "loss": 0.1482, "step": 32720 }, { "epoch": 1.1894032996584054, "grad_norm": 3.7855522632598877, "learning_rate": 4.8089758141402e-05, "loss": 0.1237, "step": 32730 }, { "epoch": 1.1897666981612036, "grad_norm": 0.6902849674224854, "learning_rate": 4.8087768158425285e-05, "loss": 0.1099, "step": 32740 }, { "epoch": 1.1901300966640018, "grad_norm": 0.6842343211174011, "learning_rate": 4.808577718067878e-05, "loss": 0.17, "step": 32750 }, { "epoch": 1.1904934951668, "grad_norm": 0.9745518565177917, "learning_rate": 4.808378520824829e-05, "loss": 0.1446, "step": 32760 }, { "epoch": 1.190856893669598, "grad_norm": 1.468474268913269, "learning_rate": 4.808179224121962e-05, "loss": 0.1563, "step": 32770 }, { "epoch": 1.1912202921723962, "grad_norm": 1.6509790420532227, "learning_rate": 4.807979827967864e-05, "loss": 0.1416, "step": 32780 }, { "epoch": 1.1915836906751944, "grad_norm": 0.9928446412086487, "learning_rate": 4.8077803323711277e-05, "loss": 1.6808, "step": 32790 }, { "epoch": 1.1919470891779926, "grad_norm": 3.463270425796509, "learning_rate": 4.807580737340348e-05, "loss": 0.1462, "step": 32800 }, { "epoch": 1.1923104876807908, "grad_norm": 1.0357753038406372, "learning_rate": 4.807381042884125e-05, "loss": 0.1475, "step": 32810 }, { "epoch": 1.1926738861835888, "grad_norm": 0.6824864745140076, "learning_rate": 4.807181249011062e-05, "loss": 0.1884, "step": 32820 }, { "epoch": 1.193037284686387, "grad_norm": 8.779791831970215, "learning_rate": 4.8069813557297685e-05, "loss": 0.128, "step": 32830 }, { "epoch": 1.1934006831891852, "grad_norm": 1.07723867893219, "learning_rate": 4.806781363048856e-05, "loss": 0.1433, "step": 32840 }, { "epoch": 1.1937640816919834, "grad_norm": 1.9113037586212158, "learning_rate": 4.806581270976942e-05, "loss": 0.1575, "step": 32850 }, { "epoch": 1.1941274801947817, "grad_norm": 1.2443821430206299, "learning_rate": 4.806381079522648e-05, "loss": 0.1585, "step": 32860 }, { "epoch": 1.1944908786975799, "grad_norm": 0.46389827132225037, "learning_rate": 4.8061807886946e-05, "loss": 0.2578, "step": 32870 }, { "epoch": 1.1948542772003778, "grad_norm": 0.9189543128013611, "learning_rate": 4.8059803985014274e-05, "loss": 0.125, "step": 32880 }, { "epoch": 1.195217675703176, "grad_norm": 0.8623115420341492, "learning_rate": 4.805779908951763e-05, "loss": 0.1094, "step": 32890 }, { "epoch": 1.1955810742059743, "grad_norm": 0.5328871607780457, "learning_rate": 4.805579320054247e-05, "loss": 0.142, "step": 32900 }, { "epoch": 1.1959444727087725, "grad_norm": 0.8360912799835205, "learning_rate": 4.805378631817522e-05, "loss": 0.1465, "step": 32910 }, { "epoch": 1.1963078712115707, "grad_norm": 0.4089026153087616, "learning_rate": 4.805177844250234e-05, "loss": 0.1779, "step": 32920 }, { "epoch": 1.1966712697143689, "grad_norm": 1.4934437274932861, "learning_rate": 4.8049769573610336e-05, "loss": 0.1577, "step": 32930 }, { "epoch": 1.1970346682171669, "grad_norm": 0.991147518157959, "learning_rate": 4.8047759711585784e-05, "loss": 0.1302, "step": 32940 }, { "epoch": 1.197398066719965, "grad_norm": 4.548572540283203, "learning_rate": 4.804574885651526e-05, "loss": 0.1184, "step": 32950 }, { "epoch": 1.1977614652227633, "grad_norm": 1.7906454801559448, "learning_rate": 4.8043737008485424e-05, "loss": 0.138, "step": 32960 }, { "epoch": 1.1981248637255615, "grad_norm": 0.4827491044998169, "learning_rate": 4.804172416758294e-05, "loss": 0.1573, "step": 32970 }, { "epoch": 1.1984882622283597, "grad_norm": 0.8055851459503174, "learning_rate": 4.803971033389455e-05, "loss": 0.1203, "step": 32980 }, { "epoch": 1.1988516607311577, "grad_norm": 0.7492426633834839, "learning_rate": 4.8037695507507016e-05, "loss": 0.1158, "step": 32990 }, { "epoch": 1.1992150592339559, "grad_norm": 0.8737430572509766, "learning_rate": 4.8035679688507154e-05, "loss": 0.1672, "step": 33000 }, { "epoch": 1.1992150592339559, "eval_loss": 0.36745160818099976, "eval_runtime": 180.2517, "eval_samples_per_second": 41.131, "eval_steps_per_second": 5.143, "eval_wer": 0.17665692450124348, "step": 33000 }, { "epoch": 1.199578457736754, "grad_norm": 0.9123022556304932, "learning_rate": 4.803366287698182e-05, "loss": 0.1182, "step": 33010 }, { "epoch": 1.1999418562395523, "grad_norm": 0.5147042870521545, "learning_rate": 4.803164507301789e-05, "loss": 0.1293, "step": 33020 }, { "epoch": 1.2003052547423505, "grad_norm": 2.508376359939575, "learning_rate": 4.8029626276702336e-05, "loss": 0.1518, "step": 33030 }, { "epoch": 1.2006686532451487, "grad_norm": 1.3006081581115723, "learning_rate": 4.802760648812213e-05, "loss": 0.1503, "step": 33040 }, { "epoch": 1.201032051747947, "grad_norm": 1.490337610244751, "learning_rate": 4.802558570736427e-05, "loss": 0.1589, "step": 33050 }, { "epoch": 1.201395450250745, "grad_norm": 0.6895734667778015, "learning_rate": 4.802356393451587e-05, "loss": 0.137, "step": 33060 }, { "epoch": 1.201758848753543, "grad_norm": 0.45895853638648987, "learning_rate": 4.8021541169664006e-05, "loss": 0.2112, "step": 33070 }, { "epoch": 1.2021222472563413, "grad_norm": 1.6609526872634888, "learning_rate": 4.801951741289585e-05, "loss": 0.1392, "step": 33080 }, { "epoch": 1.2024856457591395, "grad_norm": 1.1131823062896729, "learning_rate": 4.801749266429858e-05, "loss": 0.124, "step": 33090 }, { "epoch": 1.2028490442619377, "grad_norm": 0.547478973865509, "learning_rate": 4.8015466923959465e-05, "loss": 0.166, "step": 33100 }, { "epoch": 1.2032124427647357, "grad_norm": 0.778753936290741, "learning_rate": 4.801344019196576e-05, "loss": 0.1414, "step": 33110 }, { "epoch": 1.203575841267534, "grad_norm": 1.1527098417282104, "learning_rate": 4.801141246840481e-05, "loss": 0.1719, "step": 33120 }, { "epoch": 1.2039392397703321, "grad_norm": 0.9628286361694336, "learning_rate": 4.800938375336395e-05, "loss": 0.1168, "step": 33130 }, { "epoch": 1.2043026382731303, "grad_norm": 1.7359286546707153, "learning_rate": 4.8007354046930624e-05, "loss": 0.1145, "step": 33140 }, { "epoch": 1.2046660367759285, "grad_norm": 0.8443882465362549, "learning_rate": 4.8005323349192276e-05, "loss": 0.138, "step": 33150 }, { "epoch": 1.2050294352787267, "grad_norm": 1.166198968887329, "learning_rate": 4.8003291660236396e-05, "loss": 0.164, "step": 33160 }, { "epoch": 1.2053928337815247, "grad_norm": 0.42992278933525085, "learning_rate": 4.800125898015052e-05, "loss": 0.1786, "step": 33170 }, { "epoch": 1.205756232284323, "grad_norm": 0.7348678112030029, "learning_rate": 4.799922530902223e-05, "loss": 0.1175, "step": 33180 }, { "epoch": 1.2061196307871211, "grad_norm": 1.4282450675964355, "learning_rate": 4.799719064693917e-05, "loss": 0.1397, "step": 33190 }, { "epoch": 1.2064830292899194, "grad_norm": 0.9985376596450806, "learning_rate": 4.7995154993988974e-05, "loss": 0.1382, "step": 33200 }, { "epoch": 1.2068464277927176, "grad_norm": 0.7168998718261719, "learning_rate": 4.799311835025937e-05, "loss": 0.1123, "step": 33210 }, { "epoch": 1.2072098262955158, "grad_norm": 0.521123468875885, "learning_rate": 4.799108071583811e-05, "loss": 0.1753, "step": 33220 }, { "epoch": 1.2075732247983137, "grad_norm": 1.0951159000396729, "learning_rate": 4.7989042090812976e-05, "loss": 0.1182, "step": 33230 }, { "epoch": 1.207936623301112, "grad_norm": 1.108727216720581, "learning_rate": 4.798700247527182e-05, "loss": 1.4247, "step": 33240 }, { "epoch": 1.2083000218039102, "grad_norm": 0.4534373879432678, "learning_rate": 4.7984961869302516e-05, "loss": 0.1715, "step": 33250 }, { "epoch": 1.2086634203067084, "grad_norm": 0.6849185824394226, "learning_rate": 4.798292027299298e-05, "loss": 0.1367, "step": 33260 }, { "epoch": 1.2090268188095066, "grad_norm": 0.8563576340675354, "learning_rate": 4.7980877686431195e-05, "loss": 0.2058, "step": 33270 }, { "epoch": 1.2093902173123046, "grad_norm": 0.5488440990447998, "learning_rate": 4.797883410970514e-05, "loss": 0.1246, "step": 33280 }, { "epoch": 1.2097536158151028, "grad_norm": 0.5783109068870544, "learning_rate": 4.7976789542902895e-05, "loss": 0.1135, "step": 33290 }, { "epoch": 1.210117014317901, "grad_norm": 2.218514919281006, "learning_rate": 4.7974743986112536e-05, "loss": 0.8269, "step": 33300 }, { "epoch": 1.2104804128206992, "grad_norm": 1.6320664882659912, "learning_rate": 4.79726974394222e-05, "loss": 0.1185, "step": 33310 }, { "epoch": 1.2108438113234974, "grad_norm": 1.287618637084961, "learning_rate": 4.797064990292007e-05, "loss": 0.1815, "step": 33320 }, { "epoch": 1.2112072098262956, "grad_norm": 2.3232581615448, "learning_rate": 4.796860137669437e-05, "loss": 0.1285, "step": 33330 }, { "epoch": 1.2115706083290938, "grad_norm": 1.2804290056228638, "learning_rate": 4.796655186083335e-05, "loss": 0.1339, "step": 33340 }, { "epoch": 1.2119340068318918, "grad_norm": 0.6492500901222229, "learning_rate": 4.796450135542534e-05, "loss": 0.1278, "step": 33350 }, { "epoch": 1.21229740533469, "grad_norm": 1.7094756364822388, "learning_rate": 4.796244986055867e-05, "loss": 0.1337, "step": 33360 }, { "epoch": 1.2126608038374882, "grad_norm": 1.5763776302337646, "learning_rate": 4.796039737632173e-05, "loss": 0.2283, "step": 33370 }, { "epoch": 1.2130242023402864, "grad_norm": 0.631926417350769, "learning_rate": 4.795834390280296e-05, "loss": 0.2165, "step": 33380 }, { "epoch": 1.2133876008430846, "grad_norm": 1.4329982995986938, "learning_rate": 4.795628944009084e-05, "loss": 0.1255, "step": 33390 }, { "epoch": 1.2137509993458826, "grad_norm": 0.5400133728981018, "learning_rate": 4.795423398827389e-05, "loss": 0.1361, "step": 33400 }, { "epoch": 1.2141143978486808, "grad_norm": 0.8651421070098877, "learning_rate": 4.795217754744067e-05, "loss": 0.1336, "step": 33410 }, { "epoch": 1.214477796351479, "grad_norm": 0.32640397548675537, "learning_rate": 4.795012011767977e-05, "loss": 0.208, "step": 33420 }, { "epoch": 1.2148411948542772, "grad_norm": 2.425781726837158, "learning_rate": 4.794806169907987e-05, "loss": 0.1107, "step": 33430 }, { "epoch": 1.2152045933570754, "grad_norm": 1.9098165035247803, "learning_rate": 4.794600229172963e-05, "loss": 0.1087, "step": 33440 }, { "epoch": 1.2155679918598736, "grad_norm": 1.4842039346694946, "learning_rate": 4.794394189571779e-05, "loss": 0.141, "step": 33450 }, { "epoch": 1.2159313903626716, "grad_norm": 1.6379314661026, "learning_rate": 4.794188051113313e-05, "loss": 0.1382, "step": 33460 }, { "epoch": 1.2162947888654698, "grad_norm": 1.6831467151641846, "learning_rate": 4.7939818138064474e-05, "loss": 0.1564, "step": 33470 }, { "epoch": 1.216658187368268, "grad_norm": 0.4303675889968872, "learning_rate": 4.793775477660067e-05, "loss": 0.1153, "step": 33480 }, { "epoch": 1.2170215858710662, "grad_norm": 0.5871365070343018, "learning_rate": 4.7935690426830624e-05, "loss": 0.1122, "step": 33490 }, { "epoch": 1.2173849843738644, "grad_norm": 0.7488551735877991, "learning_rate": 4.7933625088843287e-05, "loss": 1.8561, "step": 33500 }, { "epoch": 1.2177483828766626, "grad_norm": 1.4515953063964844, "learning_rate": 4.793155876272764e-05, "loss": 0.127, "step": 33510 }, { "epoch": 1.2181117813794606, "grad_norm": 0.9288650155067444, "learning_rate": 4.7929491448572716e-05, "loss": 0.2149, "step": 33520 }, { "epoch": 1.2184751798822588, "grad_norm": 1.544545292854309, "learning_rate": 4.792742314646759e-05, "loss": 0.6921, "step": 33530 }, { "epoch": 1.218838578385057, "grad_norm": 1.1275858879089355, "learning_rate": 4.792535385650138e-05, "loss": 0.1592, "step": 33540 }, { "epoch": 1.2192019768878553, "grad_norm": 0.7861330509185791, "learning_rate": 4.7923283578763236e-05, "loss": 0.156, "step": 33550 }, { "epoch": 1.2195653753906535, "grad_norm": 1.7547698020935059, "learning_rate": 4.792121231334237e-05, "loss": 0.135, "step": 33560 }, { "epoch": 1.2199287738934514, "grad_norm": 0.9989791512489319, "learning_rate": 4.7919140060328014e-05, "loss": 0.2015, "step": 33570 }, { "epoch": 1.2202921723962497, "grad_norm": 0.8089576959609985, "learning_rate": 4.791706681980945e-05, "loss": 2.7874, "step": 33580 }, { "epoch": 1.2206555708990479, "grad_norm": 1.2729178667068481, "learning_rate": 4.791499259187603e-05, "loss": 0.1749, "step": 33590 }, { "epoch": 1.221018969401846, "grad_norm": 1.6203336715698242, "learning_rate": 4.7912917376617106e-05, "loss": 0.1524, "step": 33600 }, { "epoch": 1.221018969401846, "eval_loss": 0.3562403917312622, "eval_runtime": 180.3906, "eval_samples_per_second": 41.1, "eval_steps_per_second": 5.139, "eval_wer": 0.1815582623849547, "step": 33600 }, { "epoch": 1.2213823679046443, "grad_norm": 1.8868520259857178, "learning_rate": 4.7910841174122104e-05, "loss": 0.1514, "step": 33610 }, { "epoch": 1.2217457664074425, "grad_norm": 1.3601691722869873, "learning_rate": 4.7908763984480465e-05, "loss": 0.1675, "step": 33620 }, { "epoch": 1.2221091649102407, "grad_norm": 1.2268040180206299, "learning_rate": 4.790668580778169e-05, "loss": 0.1363, "step": 33630 }, { "epoch": 1.2224725634130387, "grad_norm": 1.918747901916504, "learning_rate": 4.790460664411534e-05, "loss": 0.1397, "step": 33640 }, { "epoch": 1.2228359619158369, "grad_norm": 0.6259877681732178, "learning_rate": 4.790252649357098e-05, "loss": 0.1555, "step": 33650 }, { "epoch": 1.223199360418635, "grad_norm": 2.5940511226654053, "learning_rate": 4.7900445356238235e-05, "loss": 0.1508, "step": 33660 }, { "epoch": 1.2235627589214333, "grad_norm": 1.1692243814468384, "learning_rate": 4.7898363232206785e-05, "loss": 0.1642, "step": 33670 }, { "epoch": 1.2239261574242315, "grad_norm": 1.459763526916504, "learning_rate": 4.789628012156633e-05, "loss": 0.1325, "step": 33680 }, { "epoch": 1.2242895559270295, "grad_norm": 0.4898362159729004, "learning_rate": 4.789419602440663e-05, "loss": 0.1604, "step": 33690 }, { "epoch": 1.2246529544298277, "grad_norm": 1.6771429777145386, "learning_rate": 4.7892110940817495e-05, "loss": 0.217, "step": 33700 }, { "epoch": 1.225016352932626, "grad_norm": 1.0040748119354248, "learning_rate": 4.789002487088874e-05, "loss": 0.1428, "step": 33710 }, { "epoch": 1.225379751435424, "grad_norm": 0.5210689306259155, "learning_rate": 4.788793781471025e-05, "loss": 0.1777, "step": 33720 }, { "epoch": 1.2257431499382223, "grad_norm": 2.0783729553222656, "learning_rate": 4.788584977237196e-05, "loss": 0.1373, "step": 33730 }, { "epoch": 1.2261065484410205, "grad_norm": 0.8238822221755981, "learning_rate": 4.788376074396384e-05, "loss": 0.1246, "step": 33740 }, { "epoch": 1.2264699469438185, "grad_norm": 1.1031908988952637, "learning_rate": 4.7881670729575875e-05, "loss": 0.1488, "step": 33750 }, { "epoch": 1.2268333454466167, "grad_norm": 1.3136149644851685, "learning_rate": 4.787957972929814e-05, "loss": 0.1382, "step": 33760 }, { "epoch": 1.227196743949415, "grad_norm": 0.9418723583221436, "learning_rate": 4.7877487743220726e-05, "loss": 0.1531, "step": 33770 }, { "epoch": 1.2275601424522131, "grad_norm": 1.3498002290725708, "learning_rate": 4.7875394771433755e-05, "loss": 0.1345, "step": 33780 }, { "epoch": 1.2279235409550113, "grad_norm": 1.0489355325698853, "learning_rate": 4.7873300814027415e-05, "loss": 0.1522, "step": 33790 }, { "epoch": 1.2282869394578095, "grad_norm": 1.1034955978393555, "learning_rate": 4.7871205871091926e-05, "loss": 0.1721, "step": 33800 }, { "epoch": 1.2286503379606075, "grad_norm": 1.1162317991256714, "learning_rate": 4.786910994271756e-05, "loss": 0.1774, "step": 33810 }, { "epoch": 1.2290137364634057, "grad_norm": 0.6511724591255188, "learning_rate": 4.786701302899461e-05, "loss": 0.1491, "step": 33820 }, { "epoch": 1.229377134966204, "grad_norm": 0.730034589767456, "learning_rate": 4.786491513001343e-05, "loss": 0.1304, "step": 33830 }, { "epoch": 1.2297405334690021, "grad_norm": 0.3531613051891327, "learning_rate": 4.786281624586441e-05, "loss": 0.1022, "step": 33840 }, { "epoch": 1.2301039319718003, "grad_norm": 0.8404261469841003, "learning_rate": 4.786071637663798e-05, "loss": 0.1366, "step": 33850 }, { "epoch": 1.2304673304745983, "grad_norm": 1.0911661386489868, "learning_rate": 4.785861552242462e-05, "loss": 0.1132, "step": 33860 }, { "epoch": 1.2308307289773965, "grad_norm": 0.9053283333778381, "learning_rate": 4.785651368331485e-05, "loss": 0.1393, "step": 33870 }, { "epoch": 1.2311941274801947, "grad_norm": 1.065520167350769, "learning_rate": 4.7854410859399236e-05, "loss": 0.1277, "step": 33880 }, { "epoch": 1.231557525982993, "grad_norm": 0.3727855384349823, "learning_rate": 4.785230705076837e-05, "loss": 0.1213, "step": 33890 }, { "epoch": 1.2319209244857912, "grad_norm": 1.7203010320663452, "learning_rate": 4.78502022575129e-05, "loss": 0.1735, "step": 33900 }, { "epoch": 1.2322843229885894, "grad_norm": 0.7186889052391052, "learning_rate": 4.7848096479723516e-05, "loss": 0.1195, "step": 33910 }, { "epoch": 1.2326477214913876, "grad_norm": 1.0675809383392334, "learning_rate": 4.784598971749095e-05, "loss": 0.1699, "step": 33920 }, { "epoch": 1.2330111199941856, "grad_norm": 1.6882377862930298, "learning_rate": 4.784388197090597e-05, "loss": 0.1275, "step": 33930 }, { "epoch": 1.2333745184969838, "grad_norm": 0.4500318765640259, "learning_rate": 4.78417732400594e-05, "loss": 0.1248, "step": 33940 }, { "epoch": 1.233737916999782, "grad_norm": 1.0862751007080078, "learning_rate": 4.783966352504209e-05, "loss": 0.1585, "step": 33950 }, { "epoch": 1.2341013155025802, "grad_norm": 0.9130736589431763, "learning_rate": 4.7837552825944943e-05, "loss": 0.1489, "step": 33960 }, { "epoch": 1.2344647140053784, "grad_norm": 0.47646433115005493, "learning_rate": 4.783544114285891e-05, "loss": 0.135, "step": 33970 }, { "epoch": 1.2348281125081764, "grad_norm": 0.7090937495231628, "learning_rate": 4.783332847587495e-05, "loss": 0.1231, "step": 33980 }, { "epoch": 1.2351915110109746, "grad_norm": 2.1009280681610107, "learning_rate": 4.7831214825084117e-05, "loss": 0.1239, "step": 33990 }, { "epoch": 1.2355549095137728, "grad_norm": 0.6040928363800049, "learning_rate": 4.782910019057747e-05, "loss": 0.1757, "step": 34000 }, { "epoch": 1.235918308016571, "grad_norm": 3.8224098682403564, "learning_rate": 4.782698457244612e-05, "loss": 0.1201, "step": 34010 }, { "epoch": 1.2362817065193692, "grad_norm": 0.4506910741329193, "learning_rate": 4.782486797078122e-05, "loss": 0.1381, "step": 34020 }, { "epoch": 1.2366451050221674, "grad_norm": 1.0445079803466797, "learning_rate": 4.782275038567398e-05, "loss": 0.1386, "step": 34030 }, { "epoch": 1.2370085035249654, "grad_norm": 1.1579469442367554, "learning_rate": 4.7820631817215625e-05, "loss": 0.1221, "step": 34040 }, { "epoch": 1.2373719020277636, "grad_norm": 1.023468017578125, "learning_rate": 4.781851226549743e-05, "loss": 0.1524, "step": 34050 }, { "epoch": 1.2377353005305618, "grad_norm": 1.0542868375778198, "learning_rate": 4.781639173061074e-05, "loss": 0.1268, "step": 34060 }, { "epoch": 1.23809869903336, "grad_norm": 0.7573347687721252, "learning_rate": 4.7814270212646915e-05, "loss": 0.2058, "step": 34070 }, { "epoch": 1.2384620975361582, "grad_norm": 1.2218323945999146, "learning_rate": 4.781214771169736e-05, "loss": 0.1141, "step": 34080 }, { "epoch": 1.2388254960389564, "grad_norm": 0.7725077867507935, "learning_rate": 4.781002422785352e-05, "loss": 0.1221, "step": 34090 }, { "epoch": 1.2391888945417544, "grad_norm": 2.2234578132629395, "learning_rate": 4.78078997612069e-05, "loss": 1.9034, "step": 34100 }, { "epoch": 1.2395522930445526, "grad_norm": 1.260764718055725, "learning_rate": 4.780577431184902e-05, "loss": 0.1205, "step": 34110 }, { "epoch": 1.2399156915473508, "grad_norm": 0.5173097252845764, "learning_rate": 4.780364787987148e-05, "loss": 0.2101, "step": 34120 }, { "epoch": 1.240279090050149, "grad_norm": 0.9755317568778992, "learning_rate": 4.780152046536588e-05, "loss": 0.1041, "step": 34130 }, { "epoch": 1.2406424885529472, "grad_norm": 1.4319573640823364, "learning_rate": 4.77993920684239e-05, "loss": 0.1382, "step": 34140 }, { "epoch": 1.2410058870557452, "grad_norm": 0.8623887896537781, "learning_rate": 4.7797262689137224e-05, "loss": 0.1646, "step": 34150 }, { "epoch": 1.2413692855585434, "grad_norm": 1.1775789260864258, "learning_rate": 4.779513232759762e-05, "loss": 0.124, "step": 34160 }, { "epoch": 1.2417326840613416, "grad_norm": 23.601593017578125, "learning_rate": 4.779300098389687e-05, "loss": 0.304, "step": 34170 }, { "epoch": 1.2420960825641398, "grad_norm": 0.9336787462234497, "learning_rate": 4.77908686581268e-05, "loss": 0.1376, "step": 34180 }, { "epoch": 1.242459481066938, "grad_norm": 0.7417952418327332, "learning_rate": 4.77887353503793e-05, "loss": 0.1208, "step": 34190 }, { "epoch": 1.2428228795697362, "grad_norm": 1.48567795753479, "learning_rate": 4.778660106074626e-05, "loss": 0.1198, "step": 34200 }, { "epoch": 1.2428228795697362, "eval_loss": 0.3608033359050751, "eval_runtime": 180.8757, "eval_samples_per_second": 40.989, "eval_steps_per_second": 5.125, "eval_wer": 0.18227530996423838, "step": 34200 }, { "epoch": 1.2431862780725345, "grad_norm": 4.077025890350342, "learning_rate": 4.778446578931967e-05, "loss": 0.13, "step": 34210 }, { "epoch": 1.2435496765753324, "grad_norm": 1.267830729484558, "learning_rate": 4.7782329536191504e-05, "loss": 0.183, "step": 34220 }, { "epoch": 1.2439130750781306, "grad_norm": 0.9263830780982971, "learning_rate": 4.778019230145383e-05, "loss": 0.135, "step": 34230 }, { "epoch": 1.2442764735809289, "grad_norm": 1.3920031785964966, "learning_rate": 4.777805408519872e-05, "loss": 0.1485, "step": 34240 }, { "epoch": 1.244639872083727, "grad_norm": 1.263641357421875, "learning_rate": 4.7775914887518306e-05, "loss": 0.1503, "step": 34250 }, { "epoch": 1.2450032705865253, "grad_norm": 1.514445185661316, "learning_rate": 4.777377470850475e-05, "loss": 0.1604, "step": 34260 }, { "epoch": 1.2453666690893233, "grad_norm": 0.9733619093894958, "learning_rate": 4.7771633548250266e-05, "loss": 0.1674, "step": 34270 }, { "epoch": 1.2457300675921215, "grad_norm": 1.6468124389648438, "learning_rate": 4.776949140684712e-05, "loss": 0.1229, "step": 34280 }, { "epoch": 1.2460934660949197, "grad_norm": 0.9954056739807129, "learning_rate": 4.77673482843876e-05, "loss": 0.1237, "step": 34290 }, { "epoch": 1.2464568645977179, "grad_norm": 1.7785327434539795, "learning_rate": 4.776520418096406e-05, "loss": 0.1784, "step": 34300 }, { "epoch": 1.246820263100516, "grad_norm": 1.952333688735962, "learning_rate": 4.776305909666886e-05, "loss": 0.1355, "step": 34310 }, { "epoch": 1.2471836616033143, "grad_norm": 0.7019221782684326, "learning_rate": 4.7760913031594445e-05, "loss": 0.1856, "step": 34320 }, { "epoch": 1.2475470601061123, "grad_norm": 2.3900887966156006, "learning_rate": 4.775876598583327e-05, "loss": 0.1279, "step": 34330 }, { "epoch": 1.2479104586089105, "grad_norm": 2.4521565437316895, "learning_rate": 4.7756617959477834e-05, "loss": 0.1384, "step": 34340 }, { "epoch": 1.2482738571117087, "grad_norm": 1.043819546699524, "learning_rate": 4.7754468952620704e-05, "loss": 0.1485, "step": 34350 }, { "epoch": 1.248637255614507, "grad_norm": 2.2905571460723877, "learning_rate": 4.775231896535446e-05, "loss": 0.1342, "step": 34360 }, { "epoch": 1.249000654117305, "grad_norm": 1.3930597305297852, "learning_rate": 4.7750167997771756e-05, "loss": 0.1989, "step": 34370 }, { "epoch": 1.2493640526201033, "grad_norm": 1.1254252195358276, "learning_rate": 4.7748016049965255e-05, "loss": 0.113, "step": 34380 }, { "epoch": 1.2497274511229013, "grad_norm": 0.8257030248641968, "learning_rate": 4.774586312202768e-05, "loss": 0.1212, "step": 34390 }, { "epoch": 1.2500908496256995, "grad_norm": 1.2986866235733032, "learning_rate": 4.774370921405179e-05, "loss": 0.1446, "step": 34400 }, { "epoch": 1.2504542481284977, "grad_norm": 2.2006325721740723, "learning_rate": 4.77415543261304e-05, "loss": 0.1567, "step": 34410 }, { "epoch": 1.250817646631296, "grad_norm": 0.6778092384338379, "learning_rate": 4.7739398458356335e-05, "loss": 0.1768, "step": 34420 }, { "epoch": 1.2511810451340941, "grad_norm": 1.152696132659912, "learning_rate": 4.773724161082251e-05, "loss": 0.1024, "step": 34430 }, { "epoch": 1.251544443636892, "grad_norm": 2.375783681869507, "learning_rate": 4.7735083783621835e-05, "loss": 0.2642, "step": 34440 }, { "epoch": 1.2519078421396903, "grad_norm": 1.3765895366668701, "learning_rate": 4.77329249768473e-05, "loss": 0.1861, "step": 34450 }, { "epoch": 1.2522712406424885, "grad_norm": 1.7743607759475708, "learning_rate": 4.773076519059191e-05, "loss": 0.1429, "step": 34460 }, { "epoch": 1.2526346391452867, "grad_norm": 1.4731152057647705, "learning_rate": 4.772860442494872e-05, "loss": 0.1508, "step": 34470 }, { "epoch": 1.252998037648085, "grad_norm": 1.5612653493881226, "learning_rate": 4.7726442680010836e-05, "loss": 0.1291, "step": 34480 }, { "epoch": 1.2533614361508831, "grad_norm": 1.9972872734069824, "learning_rate": 4.77242799558714e-05, "loss": 0.1154, "step": 34490 }, { "epoch": 1.2537248346536813, "grad_norm": 0.7144235372543335, "learning_rate": 4.772211625262359e-05, "loss": 0.1793, "step": 34500 }, { "epoch": 1.2540882331564793, "grad_norm": 10.059864044189453, "learning_rate": 4.7719951570360636e-05, "loss": 0.1346, "step": 34510 }, { "epoch": 1.2544516316592775, "grad_norm": 1.0801091194152832, "learning_rate": 4.771778590917581e-05, "loss": 0.1918, "step": 34520 }, { "epoch": 1.2548150301620757, "grad_norm": 2.0628061294555664, "learning_rate": 4.771561926916242e-05, "loss": 0.1477, "step": 34530 }, { "epoch": 1.255178428664874, "grad_norm": 2.5143215656280518, "learning_rate": 4.771345165041381e-05, "loss": 0.1226, "step": 34540 }, { "epoch": 1.2555418271676722, "grad_norm": 1.197352409362793, "learning_rate": 4.7711283053023394e-05, "loss": 0.152, "step": 34550 }, { "epoch": 1.2559052256704701, "grad_norm": 0.9427943825721741, "learning_rate": 4.7709113477084595e-05, "loss": 0.1359, "step": 34560 }, { "epoch": 1.2562686241732683, "grad_norm": 1.0930500030517578, "learning_rate": 4.770694292269089e-05, "loss": 0.1659, "step": 34570 }, { "epoch": 1.2566320226760666, "grad_norm": 0.7914316654205322, "learning_rate": 4.770477138993581e-05, "loss": 0.1224, "step": 34580 }, { "epoch": 1.2569954211788648, "grad_norm": 0.6064370274543762, "learning_rate": 4.770259887891292e-05, "loss": 0.1153, "step": 34590 }, { "epoch": 1.257358819681663, "grad_norm": 0.8653318285942078, "learning_rate": 4.770042538971581e-05, "loss": 0.1715, "step": 34600 }, { "epoch": 1.257722218184461, "grad_norm": 0.5470715761184692, "learning_rate": 4.7698250922438145e-05, "loss": 0.1447, "step": 34610 }, { "epoch": 1.2580856166872594, "grad_norm": 0.8058337569236755, "learning_rate": 4.769607547717361e-05, "loss": 0.1742, "step": 34620 }, { "epoch": 1.2584490151900574, "grad_norm": 2.5231611728668213, "learning_rate": 4.7693899054015926e-05, "loss": 1.9069, "step": 34630 }, { "epoch": 1.2588124136928556, "grad_norm": 0.603464663028717, "learning_rate": 4.7691721653058886e-05, "loss": 0.1244, "step": 34640 }, { "epoch": 1.2591758121956538, "grad_norm": 0.7844828963279724, "learning_rate": 4.76895432743963e-05, "loss": 0.3787, "step": 34650 }, { "epoch": 1.259539210698452, "grad_norm": 0.7887173295021057, "learning_rate": 4.7687363918122016e-05, "loss": 0.1268, "step": 34660 }, { "epoch": 1.2599026092012502, "grad_norm": 0.669452965259552, "learning_rate": 4.768518358432994e-05, "loss": 0.1572, "step": 34670 }, { "epoch": 1.2602660077040482, "grad_norm": 1.193303108215332, "learning_rate": 4.768300227311403e-05, "loss": 0.5754, "step": 34680 }, { "epoch": 1.2606294062068464, "grad_norm": 0.8210042715072632, "learning_rate": 4.7680819984568246e-05, "loss": 0.1372, "step": 34690 }, { "epoch": 1.2609928047096446, "grad_norm": 2.98244309425354, "learning_rate": 4.767863671878663e-05, "loss": 0.2028, "step": 34700 }, { "epoch": 1.2613562032124428, "grad_norm": 1.3739604949951172, "learning_rate": 4.767645247586325e-05, "loss": 0.164, "step": 34710 }, { "epoch": 1.261719601715241, "grad_norm": 0.6770296096801758, "learning_rate": 4.7674267255892226e-05, "loss": 0.2234, "step": 34720 }, { "epoch": 1.262083000218039, "grad_norm": 0.5827689170837402, "learning_rate": 4.767208105896769e-05, "loss": 0.1405, "step": 34730 }, { "epoch": 1.2624463987208372, "grad_norm": 0.7818326354026794, "learning_rate": 4.766989388518385e-05, "loss": 0.1194, "step": 34740 }, { "epoch": 1.2628097972236354, "grad_norm": 0.8514626026153564, "learning_rate": 4.7667705734634946e-05, "loss": 0.1529, "step": 34750 }, { "epoch": 1.2631731957264336, "grad_norm": 0.7973842024803162, "learning_rate": 4.766551660741525e-05, "loss": 0.1224, "step": 34760 }, { "epoch": 1.2635365942292318, "grad_norm": 1.019089937210083, "learning_rate": 4.766332650361909e-05, "loss": 0.1768, "step": 34770 }, { "epoch": 1.26389999273203, "grad_norm": 1.0458087921142578, "learning_rate": 4.766113542334082e-05, "loss": 0.1382, "step": 34780 }, { "epoch": 1.2642633912348282, "grad_norm": 1.0272470712661743, "learning_rate": 4.765894336667486e-05, "loss": 0.1272, "step": 34790 }, { "epoch": 1.2646267897376262, "grad_norm": 2.0589025020599365, "learning_rate": 4.765675033371565e-05, "loss": 0.1682, "step": 34800 }, { "epoch": 1.2646267897376262, "eval_loss": 0.3476085662841797, "eval_runtime": 181.248, "eval_samples_per_second": 40.905, "eval_steps_per_second": 5.115, "eval_wer": 0.17651169967505945, "step": 34800 }, { "epoch": 1.2649901882404244, "grad_norm": 1.1303410530090332, "learning_rate": 4.7654556324557685e-05, "loss": 0.1348, "step": 34810 }, { "epoch": 1.2653535867432226, "grad_norm": 0.799231231212616, "learning_rate": 4.765236133929549e-05, "loss": 0.1645, "step": 34820 }, { "epoch": 1.2657169852460208, "grad_norm": 1.2402738332748413, "learning_rate": 4.765016537802364e-05, "loss": 0.1235, "step": 34830 }, { "epoch": 1.266080383748819, "grad_norm": 4.433220386505127, "learning_rate": 4.7647968440836753e-05, "loss": 0.1706, "step": 34840 }, { "epoch": 1.266443782251617, "grad_norm": 0.8201845288276672, "learning_rate": 4.764577052782949e-05, "loss": 0.1704, "step": 34850 }, { "epoch": 1.2668071807544152, "grad_norm": 1.2809802293777466, "learning_rate": 4.764357163909655e-05, "loss": 0.1368, "step": 34860 }, { "epoch": 1.2671705792572134, "grad_norm": 0.7995765209197998, "learning_rate": 4.7641371774732676e-05, "loss": 0.1773, "step": 34870 }, { "epoch": 1.2675339777600116, "grad_norm": 1.1338168382644653, "learning_rate": 4.763917093483264e-05, "loss": 0.138, "step": 34880 }, { "epoch": 1.2678973762628098, "grad_norm": 0.85684734582901, "learning_rate": 4.763696911949129e-05, "loss": 0.1387, "step": 34890 }, { "epoch": 1.2682607747656078, "grad_norm": 1.212156057357788, "learning_rate": 4.763476632880348e-05, "loss": 0.1377, "step": 34900 }, { "epoch": 1.2686241732684063, "grad_norm": 2.2248573303222656, "learning_rate": 4.7632562562864125e-05, "loss": 0.1295, "step": 34910 }, { "epoch": 1.2689875717712042, "grad_norm": 1.2567734718322754, "learning_rate": 4.763035782176818e-05, "loss": 0.2109, "step": 34920 }, { "epoch": 1.2693509702740025, "grad_norm": 0.9226292967796326, "learning_rate": 4.7628152105610624e-05, "loss": 0.1315, "step": 34930 }, { "epoch": 1.2697143687768007, "grad_norm": 0.9735257029533386, "learning_rate": 4.762594541448651e-05, "loss": 0.1139, "step": 34940 }, { "epoch": 1.2700777672795989, "grad_norm": 3.8411102294921875, "learning_rate": 4.7623737748490914e-05, "loss": 0.5175, "step": 34950 }, { "epoch": 1.270441165782397, "grad_norm": 1.0780479907989502, "learning_rate": 4.762152910771895e-05, "loss": 0.1226, "step": 34960 }, { "epoch": 1.270804564285195, "grad_norm": 0.7885404229164124, "learning_rate": 4.761931949226579e-05, "loss": 0.1821, "step": 34970 }, { "epoch": 1.2711679627879933, "grad_norm": 3.387125015258789, "learning_rate": 4.761710890222663e-05, "loss": 0.1608, "step": 34980 }, { "epoch": 1.2715313612907915, "grad_norm": 0.9549399614334106, "learning_rate": 4.761489733769672e-05, "loss": 0.1071, "step": 34990 }, { "epoch": 1.2718947597935897, "grad_norm": 1.3453798294067383, "learning_rate": 4.761268479877134e-05, "loss": 0.1466, "step": 35000 }, { "epoch": 1.2722581582963879, "grad_norm": 0.9733071327209473, "learning_rate": 4.761047128554584e-05, "loss": 0.1646, "step": 35010 }, { "epoch": 1.2726215567991859, "grad_norm": 0.5874946713447571, "learning_rate": 4.760825679811557e-05, "loss": 0.2291, "step": 35020 }, { "epoch": 1.272984955301984, "grad_norm": 1.9506993293762207, "learning_rate": 4.7606041336575965e-05, "loss": 0.2256, "step": 35030 }, { "epoch": 1.2733483538047823, "grad_norm": 1.2343640327453613, "learning_rate": 4.760382490102247e-05, "loss": 0.133, "step": 35040 }, { "epoch": 1.2737117523075805, "grad_norm": 0.38314980268478394, "learning_rate": 4.7601607491550574e-05, "loss": 0.1682, "step": 35050 }, { "epoch": 1.2740751508103787, "grad_norm": 0.928424596786499, "learning_rate": 4.7599389108255846e-05, "loss": 0.0998, "step": 35060 }, { "epoch": 1.274438549313177, "grad_norm": 1.730793833732605, "learning_rate": 4.7597169751233833e-05, "loss": 0.1682, "step": 35070 }, { "epoch": 1.2748019478159751, "grad_norm": 1.0121824741363525, "learning_rate": 4.7594949420580184e-05, "loss": 0.1093, "step": 35080 }, { "epoch": 1.275165346318773, "grad_norm": 1.4093934297561646, "learning_rate": 4.759272811639055e-05, "loss": 0.156, "step": 35090 }, { "epoch": 1.2755287448215713, "grad_norm": 0.9377339482307434, "learning_rate": 4.759050583876066e-05, "loss": 0.1577, "step": 35100 }, { "epoch": 1.2758921433243695, "grad_norm": 1.6535552740097046, "learning_rate": 4.7588282587786246e-05, "loss": 0.1117, "step": 35110 }, { "epoch": 1.2762555418271677, "grad_norm": 0.7115573287010193, "learning_rate": 4.75860583635631e-05, "loss": 0.1443, "step": 35120 }, { "epoch": 1.276618940329966, "grad_norm": 1.6995899677276611, "learning_rate": 4.7583833166187065e-05, "loss": 0.126, "step": 35130 }, { "epoch": 1.276982338832764, "grad_norm": 1.9350817203521729, "learning_rate": 4.7581606995754005e-05, "loss": 0.1224, "step": 35140 }, { "epoch": 1.2773457373355621, "grad_norm": 1.8662594556808472, "learning_rate": 4.757937985235985e-05, "loss": 0.1735, "step": 35150 }, { "epoch": 1.2777091358383603, "grad_norm": 3.6977062225341797, "learning_rate": 4.7577151736100554e-05, "loss": 0.162, "step": 35160 }, { "epoch": 1.2780725343411585, "grad_norm": 0.5185838341712952, "learning_rate": 4.757492264707213e-05, "loss": 0.1593, "step": 35170 }, { "epoch": 1.2784359328439567, "grad_norm": 0.6665944457054138, "learning_rate": 4.7572692585370596e-05, "loss": 0.1144, "step": 35180 }, { "epoch": 1.2787993313467547, "grad_norm": 1.1617207527160645, "learning_rate": 4.757046155109206e-05, "loss": 0.1162, "step": 35190 }, { "epoch": 1.2791627298495531, "grad_norm": 1.3124502897262573, "learning_rate": 4.756822954433264e-05, "loss": 0.1441, "step": 35200 }, { "epoch": 1.2795261283523511, "grad_norm": 1.4907313585281372, "learning_rate": 4.756599656518851e-05, "loss": 0.1275, "step": 35210 }, { "epoch": 1.2798895268551493, "grad_norm": 1.0033677816390991, "learning_rate": 4.756376261375587e-05, "loss": 0.1795, "step": 35220 }, { "epoch": 1.2802529253579475, "grad_norm": 0.9439616799354553, "learning_rate": 4.756152769013099e-05, "loss": 0.1158, "step": 35230 }, { "epoch": 1.2806163238607458, "grad_norm": 2.641103506088257, "learning_rate": 4.755929179441016e-05, "loss": 0.1102, "step": 35240 }, { "epoch": 1.280979722363544, "grad_norm": 1.1864644289016724, "learning_rate": 4.7557054926689694e-05, "loss": 0.136, "step": 35250 }, { "epoch": 1.281343120866342, "grad_norm": 0.7801216244697571, "learning_rate": 4.755481708706601e-05, "loss": 0.1436, "step": 35260 }, { "epoch": 1.2817065193691402, "grad_norm": 0.5771633386611938, "learning_rate": 4.7552578275635494e-05, "loss": 0.1687, "step": 35270 }, { "epoch": 1.2820699178719384, "grad_norm": 1.247269630432129, "learning_rate": 4.755033849249463e-05, "loss": 0.1168, "step": 35280 }, { "epoch": 1.2824333163747366, "grad_norm": 1.3845196962356567, "learning_rate": 4.7548097737739905e-05, "loss": 0.1122, "step": 35290 }, { "epoch": 1.2827967148775348, "grad_norm": 1.6793280839920044, "learning_rate": 4.754585601146788e-05, "loss": 0.168, "step": 35300 }, { "epoch": 1.2831601133803328, "grad_norm": 0.8970069289207458, "learning_rate": 4.754361331377514e-05, "loss": 0.1172, "step": 35310 }, { "epoch": 1.283523511883131, "grad_norm": 0.6038461923599243, "learning_rate": 4.7541369644758315e-05, "loss": 0.1962, "step": 35320 }, { "epoch": 1.2838869103859292, "grad_norm": 0.9473531246185303, "learning_rate": 4.753912500451407e-05, "loss": 0.1288, "step": 35330 }, { "epoch": 1.2842503088887274, "grad_norm": 1.5339337587356567, "learning_rate": 4.753687939313912e-05, "loss": 0.1644, "step": 35340 }, { "epoch": 1.2846137073915256, "grad_norm": 0.9454206228256226, "learning_rate": 4.753463281073023e-05, "loss": 0.1319, "step": 35350 }, { "epoch": 1.2849771058943238, "grad_norm": 4.068907260894775, "learning_rate": 4.753238525738419e-05, "loss": 0.1241, "step": 35360 }, { "epoch": 1.285340504397122, "grad_norm": 1.1616491079330444, "learning_rate": 4.753013673319784e-05, "loss": 0.2266, "step": 35370 }, { "epoch": 1.28570390289992, "grad_norm": 1.232142448425293, "learning_rate": 4.7527887238268065e-05, "loss": 0.1438, "step": 35380 }, { "epoch": 1.2860673014027182, "grad_norm": 0.5053390860557556, "learning_rate": 4.7525636772691775e-05, "loss": 0.1214, "step": 35390 }, { "epoch": 1.2864306999055164, "grad_norm": 0.9241679310798645, "learning_rate": 4.752338533656594e-05, "loss": 0.3857, "step": 35400 }, { "epoch": 1.2864306999055164, "eval_loss": 0.33814677596092224, "eval_runtime": 180.0316, "eval_samples_per_second": 41.182, "eval_steps_per_second": 5.149, "eval_wer": 0.1815401092816817, "step": 35400 }, { "epoch": 1.2867940984083146, "grad_norm": 1.1576555967330933, "learning_rate": 4.7521132929987575e-05, "loss": 0.1429, "step": 35410 }, { "epoch": 1.2871574969111128, "grad_norm": 0.4765828549861908, "learning_rate": 4.751887955305372e-05, "loss": 0.1976, "step": 35420 }, { "epoch": 1.2875208954139108, "grad_norm": 0.8831065893173218, "learning_rate": 4.751662520586148e-05, "loss": 0.1543, "step": 35430 }, { "epoch": 1.287884293916709, "grad_norm": 1.9074327945709229, "learning_rate": 4.751436988850796e-05, "loss": 0.133, "step": 35440 }, { "epoch": 1.2882476924195072, "grad_norm": 1.6553431749343872, "learning_rate": 4.7512113601090356e-05, "loss": 0.1731, "step": 35450 }, { "epoch": 1.2886110909223054, "grad_norm": 1.2409085035324097, "learning_rate": 4.750985634370587e-05, "loss": 0.119, "step": 35460 }, { "epoch": 1.2889744894251036, "grad_norm": 4.575315475463867, "learning_rate": 4.7507598116451763e-05, "loss": 0.2287, "step": 35470 }, { "epoch": 1.2893378879279016, "grad_norm": 1.1517298221588135, "learning_rate": 4.7505338919425334e-05, "loss": 0.1263, "step": 35480 }, { "epoch": 1.2897012864307, "grad_norm": 0.8862209320068359, "learning_rate": 4.7503078752723935e-05, "loss": 0.9246, "step": 35490 }, { "epoch": 1.290064684933498, "grad_norm": 2.2056024074554443, "learning_rate": 4.750081761644493e-05, "loss": 0.1316, "step": 35500 }, { "epoch": 1.2904280834362962, "grad_norm": 1.9249043464660645, "learning_rate": 4.749855551068576e-05, "loss": 0.1341, "step": 35510 }, { "epoch": 1.2907914819390944, "grad_norm": 0.4050438404083252, "learning_rate": 4.749629243554387e-05, "loss": 0.1876, "step": 35520 }, { "epoch": 1.2911548804418926, "grad_norm": 0.8166261911392212, "learning_rate": 4.74940283911168e-05, "loss": 0.1141, "step": 35530 }, { "epoch": 1.2915182789446908, "grad_norm": 0.4988127648830414, "learning_rate": 4.749176337750206e-05, "loss": 0.1548, "step": 35540 }, { "epoch": 1.2918816774474888, "grad_norm": 1.80185067653656, "learning_rate": 4.748949739479728e-05, "loss": 0.1658, "step": 35550 }, { "epoch": 1.292245075950287, "grad_norm": 0.9549736380577087, "learning_rate": 4.748723044310006e-05, "loss": 0.1373, "step": 35560 }, { "epoch": 1.2926084744530852, "grad_norm": 1.3219162225723267, "learning_rate": 4.74849625225081e-05, "loss": 0.2101, "step": 35570 }, { "epoch": 1.2929718729558835, "grad_norm": 1.4803717136383057, "learning_rate": 4.74826936331191e-05, "loss": 0.121, "step": 35580 }, { "epoch": 1.2933352714586817, "grad_norm": 2.6438705921173096, "learning_rate": 4.7480423775030834e-05, "loss": 0.13, "step": 35590 }, { "epoch": 1.2936986699614796, "grad_norm": 2.5969061851501465, "learning_rate": 4.7478152948341094e-05, "loss": 0.165, "step": 35600 }, { "epoch": 1.2940620684642778, "grad_norm": 0.6594710946083069, "learning_rate": 4.74758811531477e-05, "loss": 0.3067, "step": 35610 }, { "epoch": 1.294425466967076, "grad_norm": 1.1262328624725342, "learning_rate": 4.747360838954858e-05, "loss": 0.1782, "step": 35620 }, { "epoch": 1.2947888654698743, "grad_norm": 0.9950854182243347, "learning_rate": 4.747133465764163e-05, "loss": 0.1254, "step": 35630 }, { "epoch": 1.2951522639726725, "grad_norm": 0.7515049576759338, "learning_rate": 4.746905995752482e-05, "loss": 0.144, "step": 35640 }, { "epoch": 1.2955156624754707, "grad_norm": 0.9141899347305298, "learning_rate": 4.746678428929616e-05, "loss": 0.1263, "step": 35650 }, { "epoch": 1.2958790609782689, "grad_norm": 1.5138301849365234, "learning_rate": 4.74645076530537e-05, "loss": 0.143, "step": 35660 }, { "epoch": 1.2962424594810669, "grad_norm": 0.6234374046325684, "learning_rate": 4.746223004889554e-05, "loss": 0.1492, "step": 35670 }, { "epoch": 1.296605857983865, "grad_norm": 0.6530427932739258, "learning_rate": 4.745995147691981e-05, "loss": 0.1579, "step": 35680 }, { "epoch": 1.2969692564866633, "grad_norm": 0.9193394780158997, "learning_rate": 4.745767193722468e-05, "loss": 0.141, "step": 35690 }, { "epoch": 1.2973326549894615, "grad_norm": 0.8602085709571838, "learning_rate": 4.745539142990837e-05, "loss": 0.1302, "step": 35700 }, { "epoch": 1.2976960534922597, "grad_norm": 0.7672144174575806, "learning_rate": 4.745310995506914e-05, "loss": 0.1632, "step": 35710 }, { "epoch": 1.2980594519950577, "grad_norm": 0.728992760181427, "learning_rate": 4.74508275128053e-05, "loss": 0.161, "step": 35720 }, { "epoch": 1.2984228504978559, "grad_norm": 0.923298716545105, "learning_rate": 4.7448544103215164e-05, "loss": 0.125, "step": 35730 }, { "epoch": 1.298786249000654, "grad_norm": 0.6956040859222412, "learning_rate": 4.744625972639715e-05, "loss": 0.1071, "step": 35740 }, { "epoch": 1.2991496475034523, "grad_norm": 0.7756535410881042, "learning_rate": 4.7443974382449664e-05, "loss": 0.1452, "step": 35750 }, { "epoch": 1.2995130460062505, "grad_norm": 1.3024572134017944, "learning_rate": 4.7441688071471174e-05, "loss": 0.1328, "step": 35760 }, { "epoch": 1.2998764445090485, "grad_norm": 1.8605810403823853, "learning_rate": 4.7439400793560196e-05, "loss": 0.2063, "step": 35770 }, { "epoch": 1.300239843011847, "grad_norm": 1.6558598279953003, "learning_rate": 4.743711254881528e-05, "loss": 0.1282, "step": 35780 }, { "epoch": 1.300603241514645, "grad_norm": 0.8223969340324402, "learning_rate": 4.743482333733501e-05, "loss": 0.1348, "step": 35790 }, { "epoch": 1.300966640017443, "grad_norm": 1.5702069997787476, "learning_rate": 4.743253315921803e-05, "loss": 0.1656, "step": 35800 }, { "epoch": 1.3013300385202413, "grad_norm": 0.6384185552597046, "learning_rate": 4.743024201456301e-05, "loss": 0.1369, "step": 35810 }, { "epoch": 1.3016934370230395, "grad_norm": 1.1003926992416382, "learning_rate": 4.7427949903468667e-05, "loss": 0.1606, "step": 35820 }, { "epoch": 1.3020568355258377, "grad_norm": 1.1869399547576904, "learning_rate": 4.742565682603376e-05, "loss": 0.131, "step": 35830 }, { "epoch": 1.3024202340286357, "grad_norm": 1.0088342428207397, "learning_rate": 4.7423362782357096e-05, "loss": 0.1183, "step": 35840 }, { "epoch": 1.302783632531434, "grad_norm": 0.8730582594871521, "learning_rate": 4.7421067772537506e-05, "loss": 0.1228, "step": 35850 }, { "epoch": 1.3031470310342321, "grad_norm": 1.3678339719772339, "learning_rate": 4.7418771796673886e-05, "loss": 0.119, "step": 35860 }, { "epoch": 1.3035104295370303, "grad_norm": 0.5349250435829163, "learning_rate": 4.7416474854865154e-05, "loss": 0.1571, "step": 35870 }, { "epoch": 1.3038738280398285, "grad_norm": 1.4240535497665405, "learning_rate": 4.741417694721028e-05, "loss": 0.1221, "step": 35880 }, { "epoch": 1.3042372265426265, "grad_norm": 2.4048521518707275, "learning_rate": 4.741187807380827e-05, "loss": 0.1265, "step": 35890 }, { "epoch": 1.3046006250454247, "grad_norm": 0.9620640277862549, "learning_rate": 4.740957823475818e-05, "loss": 0.1601, "step": 35900 }, { "epoch": 1.304964023548223, "grad_norm": 2.351884603500366, "learning_rate": 4.740727743015909e-05, "loss": 0.1134, "step": 35910 }, { "epoch": 1.3053274220510211, "grad_norm": 0.6285625100135803, "learning_rate": 4.7404975660110146e-05, "loss": 0.4726, "step": 35920 }, { "epoch": 1.3056908205538194, "grad_norm": 0.6645105481147766, "learning_rate": 4.740267292471051e-05, "loss": 0.1163, "step": 35930 }, { "epoch": 1.3060542190566176, "grad_norm": 1.6493772268295288, "learning_rate": 4.7400369224059415e-05, "loss": 1.652, "step": 35940 }, { "epoch": 1.3064176175594158, "grad_norm": 0.6978940367698669, "learning_rate": 4.739806455825611e-05, "loss": 0.1534, "step": 35950 }, { "epoch": 1.3067810160622138, "grad_norm": 3.241497039794922, "learning_rate": 4.739575892739989e-05, "loss": 0.1196, "step": 35960 }, { "epoch": 1.307144414565012, "grad_norm": 1.1746867895126343, "learning_rate": 4.739345233159011e-05, "loss": 0.1576, "step": 35970 }, { "epoch": 1.3075078130678102, "grad_norm": 0.9227213859558105, "learning_rate": 4.7391144770926144e-05, "loss": 0.5262, "step": 35980 }, { "epoch": 1.3078712115706084, "grad_norm": 2.3250370025634766, "learning_rate": 4.738883624550741e-05, "loss": 0.1231, "step": 35990 }, { "epoch": 1.3082346100734066, "grad_norm": 1.344184398651123, "learning_rate": 4.738652675543339e-05, "loss": 0.1508, "step": 36000 }, { "epoch": 1.3082346100734066, "eval_loss": 0.3749592900276184, "eval_runtime": 180.4784, "eval_samples_per_second": 41.08, "eval_steps_per_second": 5.136, "eval_wer": 0.17836331620890591, "step": 36000 }, { "epoch": 1.3085980085762046, "grad_norm": 2.1917757987976074, "learning_rate": 4.738421630080358e-05, "loss": 0.1232, "step": 36010 }, { "epoch": 1.3089614070790028, "grad_norm": 0.7760763764381409, "learning_rate": 4.738190488171753e-05, "loss": 0.1965, "step": 36020 }, { "epoch": 1.309324805581801, "grad_norm": 1.6439956426620483, "learning_rate": 4.737959249827484e-05, "loss": 0.13, "step": 36030 }, { "epoch": 1.3096882040845992, "grad_norm": 1.612452507019043, "learning_rate": 4.7377279150575137e-05, "loss": 0.1335, "step": 36040 }, { "epoch": 1.3100516025873974, "grad_norm": 0.9884024858474731, "learning_rate": 4.737496483871809e-05, "loss": 1.4829, "step": 36050 }, { "epoch": 1.3104150010901954, "grad_norm": 0.8376805782318115, "learning_rate": 4.737264956280342e-05, "loss": 0.1502, "step": 36060 }, { "epoch": 1.3107783995929938, "grad_norm": 0.7544919848442078, "learning_rate": 4.7370333322930884e-05, "loss": 0.1799, "step": 36070 }, { "epoch": 1.3111417980957918, "grad_norm": 1.0415360927581787, "learning_rate": 4.736801611920028e-05, "loss": 0.133, "step": 36080 }, { "epoch": 1.31150519659859, "grad_norm": 1.3284482955932617, "learning_rate": 4.736569795171144e-05, "loss": 0.6017, "step": 36090 }, { "epoch": 1.3118685951013882, "grad_norm": 0.766444742679596, "learning_rate": 4.736337882056425e-05, "loss": 0.1629, "step": 36100 }, { "epoch": 1.3122319936041864, "grad_norm": 1.8423712253570557, "learning_rate": 4.7361058725858645e-05, "loss": 0.1417, "step": 36110 }, { "epoch": 1.3125953921069846, "grad_norm": 0.7671094536781311, "learning_rate": 4.735873766769458e-05, "loss": 0.1498, "step": 36120 }, { "epoch": 1.3129587906097826, "grad_norm": 0.7939559817314148, "learning_rate": 4.735641564617206e-05, "loss": 0.1101, "step": 36130 }, { "epoch": 1.3133221891125808, "grad_norm": 1.1651771068572998, "learning_rate": 4.735409266139113e-05, "loss": 0.1478, "step": 36140 }, { "epoch": 1.313685587615379, "grad_norm": 0.3433835506439209, "learning_rate": 4.735176871345188e-05, "loss": 0.1775, "step": 36150 }, { "epoch": 1.3140489861181772, "grad_norm": 1.2903847694396973, "learning_rate": 4.734944380245445e-05, "loss": 0.129, "step": 36160 }, { "epoch": 1.3144123846209754, "grad_norm": 1.4051779508590698, "learning_rate": 4.734711792849901e-05, "loss": 0.2257, "step": 36170 }, { "epoch": 1.3147757831237734, "grad_norm": 0.7781183123588562, "learning_rate": 4.734479109168577e-05, "loss": 0.1072, "step": 36180 }, { "epoch": 1.3151391816265716, "grad_norm": 3.805746078491211, "learning_rate": 4.734246329211498e-05, "loss": 0.1327, "step": 36190 }, { "epoch": 1.3155025801293698, "grad_norm": 1.0254390239715576, "learning_rate": 4.734013452988694e-05, "loss": 0.1479, "step": 36200 }, { "epoch": 1.315865978632168, "grad_norm": 1.2095835208892822, "learning_rate": 4.7337804805101994e-05, "loss": 0.1286, "step": 36210 }, { "epoch": 1.3162293771349662, "grad_norm": 0.7073403596878052, "learning_rate": 4.733547411786052e-05, "loss": 0.1385, "step": 36220 }, { "epoch": 1.3165927756377644, "grad_norm": 6.6172709465026855, "learning_rate": 4.7333142468262924e-05, "loss": 0.1202, "step": 36230 }, { "epoch": 1.3169561741405627, "grad_norm": 0.9846429228782654, "learning_rate": 4.733080985640969e-05, "loss": 0.1202, "step": 36240 }, { "epoch": 1.3173195726433606, "grad_norm": 1.7326525449752808, "learning_rate": 4.7328476282401313e-05, "loss": 0.1269, "step": 36250 }, { "epoch": 1.3176829711461588, "grad_norm": 1.7362529039382935, "learning_rate": 4.7326141746338334e-05, "loss": 0.139, "step": 36260 }, { "epoch": 1.318046369648957, "grad_norm": 0.5070465207099915, "learning_rate": 4.732380624832135e-05, "loss": 0.1963, "step": 36270 }, { "epoch": 1.3184097681517553, "grad_norm": 2.0635170936584473, "learning_rate": 4.7321469788450976e-05, "loss": 0.1251, "step": 36280 }, { "epoch": 1.3187731666545535, "grad_norm": 17.00756072998047, "learning_rate": 4.731913236682789e-05, "loss": 0.4956, "step": 36290 }, { "epoch": 1.3191365651573514, "grad_norm": 0.4699925184249878, "learning_rate": 4.7316793983552806e-05, "loss": 0.1397, "step": 36300 }, { "epoch": 1.3194999636601497, "grad_norm": 1.5378074645996094, "learning_rate": 4.731445463872647e-05, "loss": 0.1444, "step": 36310 }, { "epoch": 1.3198633621629479, "grad_norm": 0.6688012480735779, "learning_rate": 4.731211433244967e-05, "loss": 0.2393, "step": 36320 }, { "epoch": 1.320226760665746, "grad_norm": 1.1277016401290894, "learning_rate": 4.7309773064823274e-05, "loss": 0.6115, "step": 36330 }, { "epoch": 1.3205901591685443, "grad_norm": 0.7446697354316711, "learning_rate": 4.7307430835948114e-05, "loss": 0.1687, "step": 36340 }, { "epoch": 1.3209535576713423, "grad_norm": 1.6629223823547363, "learning_rate": 4.730508764592514e-05, "loss": 0.1681, "step": 36350 }, { "epoch": 1.3213169561741407, "grad_norm": 1.4008903503417969, "learning_rate": 4.73027434948553e-05, "loss": 0.1343, "step": 36360 }, { "epoch": 1.3216803546769387, "grad_norm": 0.6811515092849731, "learning_rate": 4.7300398382839586e-05, "loss": 0.186, "step": 36370 }, { "epoch": 1.3220437531797369, "grad_norm": 0.822640597820282, "learning_rate": 4.7298052309979055e-05, "loss": 0.1336, "step": 36380 }, { "epoch": 1.322407151682535, "grad_norm": 1.145392894744873, "learning_rate": 4.729570527637479e-05, "loss": 0.1267, "step": 36390 }, { "epoch": 1.3227705501853333, "grad_norm": 1.6381548643112183, "learning_rate": 4.729335728212792e-05, "loss": 0.1213, "step": 36400 }, { "epoch": 1.3231339486881315, "grad_norm": 1.882562518119812, "learning_rate": 4.729100832733959e-05, "loss": 0.1384, "step": 36410 }, { "epoch": 1.3234973471909295, "grad_norm": 0.7675313949584961, "learning_rate": 4.728865841211103e-05, "loss": 0.1554, "step": 36420 }, { "epoch": 1.3238607456937277, "grad_norm": 0.900806725025177, "learning_rate": 4.728630753654349e-05, "loss": 0.1174, "step": 36430 }, { "epoch": 1.324224144196526, "grad_norm": 1.4791582822799683, "learning_rate": 4.7283955700738235e-05, "loss": 0.1449, "step": 36440 }, { "epoch": 1.324587542699324, "grad_norm": 0.5582447052001953, "learning_rate": 4.728160290479663e-05, "loss": 0.1754, "step": 36450 }, { "epoch": 1.3249509412021223, "grad_norm": 2.113154411315918, "learning_rate": 4.727924914882002e-05, "loss": 0.1091, "step": 36460 }, { "epoch": 1.3253143397049203, "grad_norm": 0.9747204184532166, "learning_rate": 4.727689443290985e-05, "loss": 0.144, "step": 36470 }, { "epoch": 1.3256777382077185, "grad_norm": 0.6118887662887573, "learning_rate": 4.727453875716755e-05, "loss": 0.1144, "step": 36480 }, { "epoch": 1.3260411367105167, "grad_norm": 1.146438717842102, "learning_rate": 4.727218212169464e-05, "loss": 0.1138, "step": 36490 }, { "epoch": 1.326404535213315, "grad_norm": 1.2453789710998535, "learning_rate": 4.7269824526592636e-05, "loss": 0.1642, "step": 36500 }, { "epoch": 1.3267679337161131, "grad_norm": 2.001384973526001, "learning_rate": 4.726746597196313e-05, "loss": 0.1317, "step": 36510 }, { "epoch": 1.3271313322189113, "grad_norm": 0.4389583170413971, "learning_rate": 4.726510645790775e-05, "loss": 0.1477, "step": 36520 }, { "epoch": 1.3274947307217095, "grad_norm": 0.5064995884895325, "learning_rate": 4.726274598452815e-05, "loss": 0.1208, "step": 36530 }, { "epoch": 1.3278581292245075, "grad_norm": 1.937470555305481, "learning_rate": 4.726038455192603e-05, "loss": 0.1425, "step": 36540 }, { "epoch": 1.3282215277273057, "grad_norm": 0.7836539149284363, "learning_rate": 4.725802216020315e-05, "loss": 0.1942, "step": 36550 }, { "epoch": 1.328584926230104, "grad_norm": 0.7476559281349182, "learning_rate": 4.725565880946129e-05, "loss": 0.1269, "step": 36560 }, { "epoch": 1.3289483247329021, "grad_norm": 1.584917426109314, "learning_rate": 4.725329449980227e-05, "loss": 0.1939, "step": 36570 }, { "epoch": 1.3293117232357003, "grad_norm": 1.6503409147262573, "learning_rate": 4.7250929231327975e-05, "loss": 0.1281, "step": 36580 }, { "epoch": 1.3296751217384983, "grad_norm": 1.2938586473464966, "learning_rate": 4.72485630041403e-05, "loss": 0.1505, "step": 36590 }, { "epoch": 1.3300385202412965, "grad_norm": 0.7093682289123535, "learning_rate": 4.724619581834121e-05, "loss": 0.1517, "step": 36600 }, { "epoch": 1.3300385202412965, "eval_loss": 0.3578657805919647, "eval_runtime": 181.1919, "eval_samples_per_second": 40.918, "eval_steps_per_second": 5.116, "eval_wer": 0.17275400729754753, "step": 36600 }, { "epoch": 1.3304019187440947, "grad_norm": 2.326840877532959, "learning_rate": 4.72438276740327e-05, "loss": 0.1333, "step": 36610 }, { "epoch": 1.330765317246893, "grad_norm": 1.0283209085464478, "learning_rate": 4.7241458571316794e-05, "loss": 0.177, "step": 36620 }, { "epoch": 1.3311287157496912, "grad_norm": 1.416473388671875, "learning_rate": 4.7239325559518525e-05, "loss": 3.9838, "step": 36630 }, { "epoch": 1.3314921142524891, "grad_norm": 0.40842917561531067, "learning_rate": 4.7236954636109833e-05, "loss": 0.1116, "step": 36640 }, { "epoch": 1.3318555127552876, "grad_norm": 2.1062543392181396, "learning_rate": 4.7234582754589886e-05, "loss": 0.121, "step": 36650 }, { "epoch": 1.3322189112580856, "grad_norm": 0.6768646836280823, "learning_rate": 4.723220991506088e-05, "loss": 0.1271, "step": 36660 }, { "epoch": 1.3325823097608838, "grad_norm": 1.1778359413146973, "learning_rate": 4.7229836117625044e-05, "loss": 0.1289, "step": 36670 }, { "epoch": 1.332945708263682, "grad_norm": 2.1446762084960938, "learning_rate": 4.7227461362384664e-05, "loss": 0.163, "step": 36680 }, { "epoch": 1.3333091067664802, "grad_norm": 0.9764724373817444, "learning_rate": 4.7225085649442063e-05, "loss": 0.1131, "step": 36690 }, { "epoch": 1.3336725052692784, "grad_norm": 0.6134273409843445, "learning_rate": 4.72227089788996e-05, "loss": 0.1269, "step": 36700 }, { "epoch": 1.3340359037720764, "grad_norm": 0.8482096195220947, "learning_rate": 4.722033135085967e-05, "loss": 0.1211, "step": 36710 }, { "epoch": 1.3343993022748746, "grad_norm": 1.1198707818984985, "learning_rate": 4.7217952765424734e-05, "loss": 0.1681, "step": 36720 }, { "epoch": 1.3347627007776728, "grad_norm": 1.5771534442901611, "learning_rate": 4.721557322269725e-05, "loss": 0.7935, "step": 36730 }, { "epoch": 1.335126099280471, "grad_norm": 1.8235740661621094, "learning_rate": 4.721319272277977e-05, "loss": 0.1249, "step": 36740 }, { "epoch": 1.3354894977832692, "grad_norm": 2.7422354221343994, "learning_rate": 4.7210811265774845e-05, "loss": 0.1637, "step": 36750 }, { "epoch": 1.3358528962860672, "grad_norm": 3.130943775177002, "learning_rate": 4.720842885178509e-05, "loss": 0.1437, "step": 36760 }, { "epoch": 1.3362162947888654, "grad_norm": 0.48167362809181213, "learning_rate": 4.720604548091316e-05, "loss": 0.2081, "step": 36770 }, { "epoch": 1.3365796932916636, "grad_norm": 1.3520551919937134, "learning_rate": 4.720366115326174e-05, "loss": 0.1566, "step": 36780 }, { "epoch": 1.3369430917944618, "grad_norm": 0.9920271635055542, "learning_rate": 4.720127586893355e-05, "loss": 0.0995, "step": 36790 }, { "epoch": 1.33730649029726, "grad_norm": 0.6343932747840881, "learning_rate": 4.7198889628031376e-05, "loss": 0.6462, "step": 36800 }, { "epoch": 1.3376698888000582, "grad_norm": 0.8678078651428223, "learning_rate": 4.719650243065804e-05, "loss": 0.1224, "step": 36810 }, { "epoch": 1.3380332873028564, "grad_norm": 1.1642274856567383, "learning_rate": 4.719411427691639e-05, "loss": 0.1438, "step": 36820 }, { "epoch": 1.3383966858056544, "grad_norm": 0.6214116811752319, "learning_rate": 4.719172516690932e-05, "loss": 0.1172, "step": 36830 }, { "epoch": 1.3387600843084526, "grad_norm": 0.9811148643493652, "learning_rate": 4.7189335100739764e-05, "loss": 0.1229, "step": 36840 }, { "epoch": 1.3391234828112508, "grad_norm": 0.6965753436088562, "learning_rate": 4.718694407851072e-05, "loss": 0.1432, "step": 36850 }, { "epoch": 1.339486881314049, "grad_norm": 2.3423101902008057, "learning_rate": 4.718455210032519e-05, "loss": 0.1578, "step": 36860 }, { "epoch": 1.3398502798168472, "grad_norm": 1.1584868431091309, "learning_rate": 4.718215916628625e-05, "loss": 0.1792, "step": 36870 }, { "epoch": 1.3402136783196452, "grad_norm": 1.1057560443878174, "learning_rate": 4.717976527649698e-05, "loss": 0.129, "step": 36880 }, { "epoch": 1.3405770768224434, "grad_norm": 1.6027841567993164, "learning_rate": 4.7177370431060554e-05, "loss": 0.0984, "step": 36890 }, { "epoch": 1.3409404753252416, "grad_norm": 1.4970412254333496, "learning_rate": 4.717497463008014e-05, "loss": 0.1764, "step": 36900 }, { "epoch": 1.3413038738280398, "grad_norm": 7.802013397216797, "learning_rate": 4.717257787365897e-05, "loss": 0.1511, "step": 36910 }, { "epoch": 1.341667272330838, "grad_norm": 0.6998898983001709, "learning_rate": 4.717018016190031e-05, "loss": 0.1874, "step": 36920 }, { "epoch": 1.342030670833636, "grad_norm": 1.1323654651641846, "learning_rate": 4.716778149490747e-05, "loss": 0.0968, "step": 36930 }, { "epoch": 1.3423940693364345, "grad_norm": 0.9213439226150513, "learning_rate": 4.716538187278379e-05, "loss": 0.11, "step": 36940 }, { "epoch": 1.3427574678392324, "grad_norm": 3.241694688796997, "learning_rate": 4.7162981295632676e-05, "loss": 0.1695, "step": 36950 }, { "epoch": 1.3431208663420307, "grad_norm": 2.2264153957366943, "learning_rate": 4.716057976355755e-05, "loss": 0.1568, "step": 36960 }, { "epoch": 1.3434842648448289, "grad_norm": 2.439816474914551, "learning_rate": 4.715817727666189e-05, "loss": 0.2052, "step": 36970 }, { "epoch": 1.343847663347627, "grad_norm": 0.8145691752433777, "learning_rate": 4.715577383504921e-05, "loss": 0.1501, "step": 36980 }, { "epoch": 1.3442110618504253, "grad_norm": 0.8044644594192505, "learning_rate": 4.7153369438823074e-05, "loss": 0.1088, "step": 36990 }, { "epoch": 1.3445744603532233, "grad_norm": 1.2818701267242432, "learning_rate": 4.715096408808707e-05, "loss": 0.1492, "step": 37000 }, { "epoch": 1.3449378588560215, "grad_norm": 3.879460096359253, "learning_rate": 4.714855778294482e-05, "loss": 0.1407, "step": 37010 }, { "epoch": 1.3453012573588197, "grad_norm": 0.7606347799301147, "learning_rate": 4.714615052350004e-05, "loss": 0.2055, "step": 37020 }, { "epoch": 1.3456646558616179, "grad_norm": 14.105179786682129, "learning_rate": 4.714374230985642e-05, "loss": 0.2572, "step": 37030 }, { "epoch": 1.346028054364416, "grad_norm": 0.9659761786460876, "learning_rate": 4.714133314211774e-05, "loss": 0.1126, "step": 37040 }, { "epoch": 1.346391452867214, "grad_norm": 0.8018509745597839, "learning_rate": 4.7138923020387785e-05, "loss": 0.1731, "step": 37050 }, { "epoch": 1.3467548513700123, "grad_norm": 1.450352668762207, "learning_rate": 4.7136511944770414e-05, "loss": 0.139, "step": 37060 }, { "epoch": 1.3471182498728105, "grad_norm": 0.4030288755893707, "learning_rate": 4.71340999153695e-05, "loss": 0.1317, "step": 37070 }, { "epoch": 1.3474816483756087, "grad_norm": 1.5737247467041016, "learning_rate": 4.713168693228898e-05, "loss": 0.1183, "step": 37080 }, { "epoch": 1.347845046878407, "grad_norm": 0.9841533303260803, "learning_rate": 4.712927299563281e-05, "loss": 0.126, "step": 37090 }, { "epoch": 1.348208445381205, "grad_norm": 0.9880457520484924, "learning_rate": 4.7126858105505004e-05, "loss": 0.1518, "step": 37100 }, { "epoch": 1.3485718438840033, "grad_norm": 1.250982403755188, "learning_rate": 4.7124442262009605e-05, "loss": 0.1241, "step": 37110 }, { "epoch": 1.3489352423868013, "grad_norm": 0.8015254139900208, "learning_rate": 4.712202546525071e-05, "loss": 0.1294, "step": 37120 }, { "epoch": 1.3492986408895995, "grad_norm": 1.3923901319503784, "learning_rate": 4.711960771533245e-05, "loss": 0.1273, "step": 37130 }, { "epoch": 1.3496620393923977, "grad_norm": 1.7166532278060913, "learning_rate": 4.7117189012359e-05, "loss": 0.1309, "step": 37140 }, { "epoch": 1.350025437895196, "grad_norm": 1.0079472064971924, "learning_rate": 4.711476935643456e-05, "loss": 0.1275, "step": 37150 }, { "epoch": 1.3503888363979941, "grad_norm": 0.7986971139907837, "learning_rate": 4.71123487476634e-05, "loss": 0.1422, "step": 37160 }, { "epoch": 1.350752234900792, "grad_norm": 0.3768475353717804, "learning_rate": 4.71099271861498e-05, "loss": 0.1943, "step": 37170 }, { "epoch": 1.3511156334035903, "grad_norm": 1.1428521871566772, "learning_rate": 4.7107504671998115e-05, "loss": 0.1335, "step": 37180 }, { "epoch": 1.3514790319063885, "grad_norm": 0.627876341342926, "learning_rate": 4.7105081205312715e-05, "loss": 0.1301, "step": 37190 }, { "epoch": 1.3518424304091867, "grad_norm": 1.823798656463623, "learning_rate": 4.710265678619801e-05, "loss": 0.1732, "step": 37200 }, { "epoch": 1.3518424304091867, "eval_loss": 0.3695838451385498, "eval_runtime": 181.0815, "eval_samples_per_second": 40.943, "eval_steps_per_second": 5.119, "eval_wer": 0.17419717900775136, "step": 37200 }, { "epoch": 1.352205828911985, "grad_norm": 1.1491807699203491, "learning_rate": 4.710023141475846e-05, "loss": 0.1267, "step": 37210 }, { "epoch": 1.352569227414783, "grad_norm": 0.9230825901031494, "learning_rate": 4.709780509109858e-05, "loss": 0.1339, "step": 37220 }, { "epoch": 1.3529326259175813, "grad_norm": 1.4637092351913452, "learning_rate": 4.7095377815322893e-05, "loss": 0.1199, "step": 37230 }, { "epoch": 1.3532960244203793, "grad_norm": 2.766608476638794, "learning_rate": 4.7092949587536e-05, "loss": 0.1153, "step": 37240 }, { "epoch": 1.3536594229231775, "grad_norm": 0.6508689522743225, "learning_rate": 4.7090520407842516e-05, "loss": 0.1263, "step": 37250 }, { "epoch": 1.3540228214259757, "grad_norm": 1.2959270477294922, "learning_rate": 4.70880902763471e-05, "loss": 0.207, "step": 37260 }, { "epoch": 1.354386219928774, "grad_norm": 0.4432971477508545, "learning_rate": 4.708565919315447e-05, "loss": 0.189, "step": 37270 }, { "epoch": 1.3547496184315722, "grad_norm": 1.1359493732452393, "learning_rate": 4.708322715836936e-05, "loss": 0.1161, "step": 37280 }, { "epoch": 1.3551130169343701, "grad_norm": 1.1669936180114746, "learning_rate": 4.708079417209657e-05, "loss": 0.1298, "step": 37290 }, { "epoch": 1.3554764154371683, "grad_norm": 1.0905638933181763, "learning_rate": 4.707836023444092e-05, "loss": 0.1763, "step": 37300 }, { "epoch": 1.3558398139399666, "grad_norm": 1.1597601175308228, "learning_rate": 4.707592534550729e-05, "loss": 0.1346, "step": 37310 }, { "epoch": 1.3562032124427648, "grad_norm": 0.3999848961830139, "learning_rate": 4.707348950540057e-05, "loss": 0.1614, "step": 37320 }, { "epoch": 1.356566610945563, "grad_norm": 1.9580241441726685, "learning_rate": 4.7071052714225736e-05, "loss": 0.134, "step": 37330 }, { "epoch": 1.356930009448361, "grad_norm": 0.7617779970169067, "learning_rate": 4.7068614972087764e-05, "loss": 0.1372, "step": 37340 }, { "epoch": 1.3572934079511592, "grad_norm": 1.103390097618103, "learning_rate": 4.706617627909169e-05, "loss": 0.1162, "step": 37350 }, { "epoch": 1.3576568064539574, "grad_norm": 1.1971250772476196, "learning_rate": 4.70637366353426e-05, "loss": 0.1358, "step": 37360 }, { "epoch": 1.3580202049567556, "grad_norm": 0.47730955481529236, "learning_rate": 4.70612960409456e-05, "loss": 0.1518, "step": 37370 }, { "epoch": 1.3583836034595538, "grad_norm": 1.2747211456298828, "learning_rate": 4.705885449600584e-05, "loss": 0.1264, "step": 37380 }, { "epoch": 1.358747001962352, "grad_norm": 1.5373166799545288, "learning_rate": 4.705641200062854e-05, "loss": 0.1362, "step": 37390 }, { "epoch": 1.3591104004651502, "grad_norm": 7.401641368865967, "learning_rate": 4.705396855491891e-05, "loss": 0.1524, "step": 37400 }, { "epoch": 1.3594737989679482, "grad_norm": 1.0285519361495972, "learning_rate": 4.705152415898225e-05, "loss": 0.1343, "step": 37410 }, { "epoch": 1.3598371974707464, "grad_norm": 0.9249128699302673, "learning_rate": 4.704907881292387e-05, "loss": 0.1857, "step": 37420 }, { "epoch": 1.3602005959735446, "grad_norm": 1.0541716814041138, "learning_rate": 4.7046632516849135e-05, "loss": 0.1519, "step": 37430 }, { "epoch": 1.3605639944763428, "grad_norm": 0.9401641488075256, "learning_rate": 4.704418527086345e-05, "loss": 0.1239, "step": 37440 }, { "epoch": 1.360927392979141, "grad_norm": 1.179436445236206, "learning_rate": 4.7041737075072254e-05, "loss": 0.1345, "step": 37450 }, { "epoch": 1.361290791481939, "grad_norm": 0.5075955390930176, "learning_rate": 4.703928792958103e-05, "loss": 0.1119, "step": 37460 }, { "epoch": 1.3616541899847372, "grad_norm": 1.7815593481063843, "learning_rate": 4.7036837834495306e-05, "loss": 0.1378, "step": 37470 }, { "epoch": 1.3620175884875354, "grad_norm": 1.0368989706039429, "learning_rate": 4.7034386789920646e-05, "loss": 0.1193, "step": 37480 }, { "epoch": 1.3623809869903336, "grad_norm": 0.9185715913772583, "learning_rate": 4.703193479596266e-05, "loss": 0.1539, "step": 37490 }, { "epoch": 1.3627443854931318, "grad_norm": 1.1198723316192627, "learning_rate": 4.7029481852726996e-05, "loss": 0.1483, "step": 37500 }, { "epoch": 1.3631077839959298, "grad_norm": 0.8500091433525085, "learning_rate": 4.702702796031934e-05, "loss": 0.1742, "step": 37510 }, { "epoch": 1.3634711824987282, "grad_norm": 1.0554280281066895, "learning_rate": 4.7024573118845414e-05, "loss": 0.1406, "step": 37520 }, { "epoch": 1.3638345810015262, "grad_norm": 1.524234414100647, "learning_rate": 4.702211732841101e-05, "loss": 0.1242, "step": 37530 }, { "epoch": 1.3641979795043244, "grad_norm": 1.3234226703643799, "learning_rate": 4.701966058912191e-05, "loss": 0.1114, "step": 37540 }, { "epoch": 1.3645613780071226, "grad_norm": 4.683910846710205, "learning_rate": 4.701720290108399e-05, "loss": 0.1377, "step": 37550 }, { "epoch": 1.3649247765099208, "grad_norm": 1.4473618268966675, "learning_rate": 4.701474426440313e-05, "loss": 0.1136, "step": 37560 }, { "epoch": 1.365288175012719, "grad_norm": 1.2548261880874634, "learning_rate": 4.701228467918527e-05, "loss": 0.1376, "step": 37570 }, { "epoch": 1.365651573515517, "grad_norm": 0.5335317850112915, "learning_rate": 4.7009824145536385e-05, "loss": 0.1103, "step": 37580 }, { "epoch": 1.3660149720183152, "grad_norm": 2.0894274711608887, "learning_rate": 4.700736266356249e-05, "loss": 0.2852, "step": 37590 }, { "epoch": 1.3663783705211134, "grad_norm": 4.296121120452881, "learning_rate": 4.700490023336963e-05, "loss": 0.1475, "step": 37600 }, { "epoch": 1.3667417690239116, "grad_norm": 3.073425054550171, "learning_rate": 4.700243685506393e-05, "loss": 0.1254, "step": 37610 }, { "epoch": 1.3671051675267099, "grad_norm": 0.5121023058891296, "learning_rate": 4.69999725287515e-05, "loss": 0.1737, "step": 37620 }, { "epoch": 1.3674685660295078, "grad_norm": 0.8064444661140442, "learning_rate": 4.699750725453853e-05, "loss": 0.1045, "step": 37630 }, { "epoch": 1.367831964532306, "grad_norm": 0.5956308841705322, "learning_rate": 4.699504103253124e-05, "loss": 0.1059, "step": 37640 }, { "epoch": 1.3681953630351043, "grad_norm": 1.8260743618011475, "learning_rate": 4.699257386283589e-05, "loss": 0.1677, "step": 37650 }, { "epoch": 1.3685587615379025, "grad_norm": 2.3063583374023438, "learning_rate": 4.699010574555879e-05, "loss": 0.1406, "step": 37660 }, { "epoch": 1.3689221600407007, "grad_norm": 0.5565524697303772, "learning_rate": 4.698763668080627e-05, "loss": 0.1556, "step": 37670 }, { "epoch": 1.3692855585434989, "grad_norm": 1.148147702217102, "learning_rate": 4.698516666868471e-05, "loss": 0.124, "step": 37680 }, { "epoch": 1.369648957046297, "grad_norm": 1.5487512350082397, "learning_rate": 4.698269570930055e-05, "loss": 0.1478, "step": 37690 }, { "epoch": 1.370012355549095, "grad_norm": 1.8273712396621704, "learning_rate": 4.698022380276024e-05, "loss": 0.1591, "step": 37700 }, { "epoch": 1.3703757540518933, "grad_norm": 1.7402414083480835, "learning_rate": 4.6977750949170294e-05, "loss": 0.1075, "step": 37710 }, { "epoch": 1.3707391525546915, "grad_norm": 0.7992825508117676, "learning_rate": 4.697527714863726e-05, "loss": 0.1653, "step": 37720 }, { "epoch": 1.3711025510574897, "grad_norm": 5.170393943786621, "learning_rate": 4.697280240126772e-05, "loss": 0.1563, "step": 37730 }, { "epoch": 1.371465949560288, "grad_norm": 1.3735640048980713, "learning_rate": 4.697032670716831e-05, "loss": 0.1231, "step": 37740 }, { "epoch": 1.3718293480630859, "grad_norm": 1.8720015287399292, "learning_rate": 4.696785006644569e-05, "loss": 1.526, "step": 37750 }, { "epoch": 1.372192746565884, "grad_norm": 1.9550750255584717, "learning_rate": 4.696537247920657e-05, "loss": 0.1503, "step": 37760 }, { "epoch": 1.3725561450686823, "grad_norm": 0.5374103784561157, "learning_rate": 4.6962893945557704e-05, "loss": 0.1949, "step": 37770 }, { "epoch": 1.3729195435714805, "grad_norm": 1.097432255744934, "learning_rate": 4.6960414465605876e-05, "loss": 0.1157, "step": 37780 }, { "epoch": 1.3732829420742787, "grad_norm": 0.40494269132614136, "learning_rate": 4.695793403945793e-05, "loss": 0.126, "step": 37790 }, { "epoch": 1.3736463405770767, "grad_norm": 1.9734747409820557, "learning_rate": 4.695545266722073e-05, "loss": 0.1457, "step": 37800 }, { "epoch": 1.3736463405770767, "eval_loss": 0.3492252230644226, "eval_runtime": 181.0909, "eval_samples_per_second": 40.941, "eval_steps_per_second": 5.119, "eval_wer": 0.18146749686858968, "step": 37800 }, { "epoch": 1.3740097390798751, "grad_norm": 19.56266212463379, "learning_rate": 4.6952970349001204e-05, "loss": 0.421, "step": 37810 }, { "epoch": 1.374373137582673, "grad_norm": 0.8468944430351257, "learning_rate": 4.695048708490628e-05, "loss": 0.2035, "step": 37820 }, { "epoch": 1.3747365360854713, "grad_norm": 0.6463280320167542, "learning_rate": 4.6948002875042976e-05, "loss": 0.1323, "step": 37830 }, { "epoch": 1.3750999345882695, "grad_norm": 1.1540967226028442, "learning_rate": 4.694551771951831e-05, "loss": 1.2856, "step": 37840 }, { "epoch": 1.3754633330910677, "grad_norm": 1.295023798942566, "learning_rate": 4.6943031618439374e-05, "loss": 0.7144, "step": 37850 }, { "epoch": 1.375826731593866, "grad_norm": 1.5403015613555908, "learning_rate": 4.694054457191328e-05, "loss": 0.1429, "step": 37860 }, { "epoch": 1.376190130096664, "grad_norm": 1.434574842453003, "learning_rate": 4.693805658004718e-05, "loss": 0.1686, "step": 37870 }, { "epoch": 1.3765535285994621, "grad_norm": 0.6648684740066528, "learning_rate": 4.693556764294829e-05, "loss": 0.1098, "step": 37880 }, { "epoch": 1.3769169271022603, "grad_norm": 0.7901143431663513, "learning_rate": 4.6933077760723824e-05, "loss": 0.115, "step": 37890 }, { "epoch": 1.3772803256050585, "grad_norm": 0.9131706953048706, "learning_rate": 4.693058693348108e-05, "loss": 0.1266, "step": 37900 }, { "epoch": 1.3776437241078567, "grad_norm": 1.2103451490402222, "learning_rate": 4.692809516132738e-05, "loss": 0.1205, "step": 37910 }, { "epoch": 1.3780071226106547, "grad_norm": 0.42282989621162415, "learning_rate": 4.6925602444370075e-05, "loss": 0.1753, "step": 37920 }, { "epoch": 1.3783705211134532, "grad_norm": 0.51373291015625, "learning_rate": 4.692310878271658e-05, "loss": 0.1635, "step": 37930 }, { "epoch": 1.3787339196162511, "grad_norm": 0.7226901650428772, "learning_rate": 4.692061417647431e-05, "loss": 0.1226, "step": 37940 }, { "epoch": 1.3790973181190493, "grad_norm": 5.131813049316406, "learning_rate": 4.6918118625750784e-05, "loss": 0.1682, "step": 37950 }, { "epoch": 1.3794607166218475, "grad_norm": 1.30665922164917, "learning_rate": 4.6915622130653506e-05, "loss": 0.128, "step": 37960 }, { "epoch": 1.3798241151246458, "grad_norm": 0.3638138175010681, "learning_rate": 4.691312469129006e-05, "loss": 0.1852, "step": 37970 }, { "epoch": 1.380187513627444, "grad_norm": 1.5417994260787964, "learning_rate": 4.691062630776802e-05, "loss": 0.119, "step": 37980 }, { "epoch": 1.380550912130242, "grad_norm": 1.4151712656021118, "learning_rate": 4.6908126980195055e-05, "loss": 0.1079, "step": 37990 }, { "epoch": 1.3809143106330402, "grad_norm": 1.137370228767395, "learning_rate": 4.6905626708678855e-05, "loss": 0.2152, "step": 38000 }, { "epoch": 1.3812777091358384, "grad_norm": 1.0430890321731567, "learning_rate": 4.690312549332714e-05, "loss": 0.1243, "step": 38010 }, { "epoch": 1.3816411076386366, "grad_norm": 1.260365605354309, "learning_rate": 4.690062333424767e-05, "loss": 0.1755, "step": 38020 }, { "epoch": 1.3820045061414348, "grad_norm": 0.8367292284965515, "learning_rate": 4.689812023154827e-05, "loss": 1.1881, "step": 38030 }, { "epoch": 1.3823679046442328, "grad_norm": 1.2440451383590698, "learning_rate": 4.6895616185336775e-05, "loss": 0.1292, "step": 38040 }, { "epoch": 1.382731303147031, "grad_norm": 2.23522686958313, "learning_rate": 4.6893111195721094e-05, "loss": 0.1491, "step": 38050 }, { "epoch": 1.3830947016498292, "grad_norm": 1.0018017292022705, "learning_rate": 4.6890605262809145e-05, "loss": 0.112, "step": 38060 }, { "epoch": 1.3834581001526274, "grad_norm": 0.6445533037185669, "learning_rate": 4.68880983867089e-05, "loss": 0.1597, "step": 38070 }, { "epoch": 1.3838214986554256, "grad_norm": 1.5876944065093994, "learning_rate": 4.6885590567528375e-05, "loss": 0.1341, "step": 38080 }, { "epoch": 1.3841848971582236, "grad_norm": 1.5150282382965088, "learning_rate": 4.6883081805375616e-05, "loss": 0.1167, "step": 38090 }, { "epoch": 1.384548295661022, "grad_norm": 1.7657722234725952, "learning_rate": 4.688057210035873e-05, "loss": 0.1608, "step": 38100 }, { "epoch": 1.38491169416382, "grad_norm": 1.027761459350586, "learning_rate": 4.687806145258584e-05, "loss": 0.1566, "step": 38110 }, { "epoch": 1.3852750926666182, "grad_norm": 0.3475823998451233, "learning_rate": 4.6875549862165126e-05, "loss": 0.1502, "step": 38120 }, { "epoch": 1.3856384911694164, "grad_norm": 0.7863835692405701, "learning_rate": 4.687303732920481e-05, "loss": 0.1761, "step": 38130 }, { "epoch": 1.3860018896722146, "grad_norm": 2.0150928497314453, "learning_rate": 4.687052385381313e-05, "loss": 0.1417, "step": 38140 }, { "epoch": 1.3863652881750128, "grad_norm": 0.6676269769668579, "learning_rate": 4.6868009436098386e-05, "loss": 0.1307, "step": 38150 }, { "epoch": 1.3867286866778108, "grad_norm": 1.6957210302352905, "learning_rate": 4.6865494076168934e-05, "loss": 0.1385, "step": 38160 }, { "epoch": 1.387092085180609, "grad_norm": 0.8534975051879883, "learning_rate": 4.686297777413313e-05, "loss": 0.1568, "step": 38170 }, { "epoch": 1.3874554836834072, "grad_norm": 0.7309104800224304, "learning_rate": 4.6860460530099416e-05, "loss": 0.5466, "step": 38180 }, { "epoch": 1.3878188821862054, "grad_norm": 1.2103863954544067, "learning_rate": 4.6857942344176225e-05, "loss": 0.1227, "step": 38190 }, { "epoch": 1.3881822806890036, "grad_norm": 0.7991679906845093, "learning_rate": 4.685542321647207e-05, "loss": 0.1603, "step": 38200 }, { "epoch": 1.3885456791918016, "grad_norm": 1.146906852722168, "learning_rate": 4.685290314709549e-05, "loss": 0.1268, "step": 38210 }, { "epoch": 1.3889090776946, "grad_norm": 1.020175576210022, "learning_rate": 4.685038213615508e-05, "loss": 0.1517, "step": 38220 }, { "epoch": 1.389272476197398, "grad_norm": 1.1214244365692139, "learning_rate": 4.684786018375944e-05, "loss": 0.1185, "step": 38230 }, { "epoch": 1.3896358747001962, "grad_norm": 0.830916166305542, "learning_rate": 4.6845337290017235e-05, "loss": 0.1281, "step": 38240 }, { "epoch": 1.3899992732029944, "grad_norm": 0.5939742922782898, "learning_rate": 4.684281345503718e-05, "loss": 0.1599, "step": 38250 }, { "epoch": 1.3903626717057926, "grad_norm": 4.498940467834473, "learning_rate": 4.6840288678928003e-05, "loss": 0.1159, "step": 38260 }, { "epoch": 1.3907260702085908, "grad_norm": 0.6612393856048584, "learning_rate": 4.6837762961798495e-05, "loss": 0.1842, "step": 38270 }, { "epoch": 1.3910894687113888, "grad_norm": 2.456289529800415, "learning_rate": 4.683523630375748e-05, "loss": 0.1558, "step": 38280 }, { "epoch": 1.391452867214187, "grad_norm": 0.5414180159568787, "learning_rate": 4.683270870491383e-05, "loss": 0.1347, "step": 38290 }, { "epoch": 1.3918162657169852, "grad_norm": 2.1812076568603516, "learning_rate": 4.683018016537644e-05, "loss": 0.1558, "step": 38300 }, { "epoch": 1.3921796642197835, "grad_norm": 1.2050772905349731, "learning_rate": 4.682765068525425e-05, "loss": 0.1591, "step": 38310 }, { "epoch": 1.3925430627225817, "grad_norm": 1.050423264503479, "learning_rate": 4.6825120264656266e-05, "loss": 0.1578, "step": 38320 }, { "epoch": 1.3929064612253796, "grad_norm": 2.753676652908325, "learning_rate": 4.68225889036915e-05, "loss": 0.1373, "step": 38330 }, { "epoch": 1.3932698597281779, "grad_norm": 2.3123908042907715, "learning_rate": 4.682005660246902e-05, "loss": 0.1198, "step": 38340 }, { "epoch": 1.393633258230976, "grad_norm": 0.6317697167396545, "learning_rate": 4.681752336109794e-05, "loss": 0.157, "step": 38350 }, { "epoch": 1.3939966567337743, "grad_norm": 1.788620948791504, "learning_rate": 4.681498917968741e-05, "loss": 0.1424, "step": 38360 }, { "epoch": 1.3943600552365725, "grad_norm": 1.064799189567566, "learning_rate": 4.68124540583466e-05, "loss": 0.1762, "step": 38370 }, { "epoch": 1.3947234537393705, "grad_norm": 1.3951762914657593, "learning_rate": 4.6809917997184764e-05, "loss": 0.1198, "step": 38380 }, { "epoch": 1.3950868522421689, "grad_norm": 1.0863114595413208, "learning_rate": 4.6807380996311154e-05, "loss": 0.1393, "step": 38390 }, { "epoch": 1.3954502507449669, "grad_norm": 1.141787052154541, "learning_rate": 4.6804843055835105e-05, "loss": 0.1603, "step": 38400 }, { "epoch": 1.3954502507449669, "eval_loss": 0.3523618280887604, "eval_runtime": 180.8149, "eval_samples_per_second": 41.003, "eval_steps_per_second": 5.127, "eval_wer": 0.17399749487174831, "step": 38400 }, { "epoch": 1.395813649247765, "grad_norm": 1.5692111253738403, "learning_rate": 4.6802304175865936e-05, "loss": 0.1328, "step": 38410 }, { "epoch": 1.3961770477505633, "grad_norm": 0.4182591140270233, "learning_rate": 4.679976435651305e-05, "loss": 0.1562, "step": 38420 }, { "epoch": 1.3965404462533615, "grad_norm": 0.6963622570037842, "learning_rate": 4.67972235978859e-05, "loss": 0.1131, "step": 38430 }, { "epoch": 1.3969038447561597, "grad_norm": 1.0345783233642578, "learning_rate": 4.679468190009392e-05, "loss": 0.1231, "step": 38440 }, { "epoch": 1.3972672432589577, "grad_norm": 1.6084190607070923, "learning_rate": 4.679213926324665e-05, "loss": 0.133, "step": 38450 }, { "epoch": 1.3976306417617559, "grad_norm": 1.2635602951049805, "learning_rate": 4.678959568745364e-05, "loss": 0.1344, "step": 38460 }, { "epoch": 1.397994040264554, "grad_norm": 0.6128044724464417, "learning_rate": 4.678705117282447e-05, "loss": 0.1639, "step": 38470 }, { "epoch": 1.3983574387673523, "grad_norm": 1.128151297569275, "learning_rate": 4.6784505719468795e-05, "loss": 0.1342, "step": 38480 }, { "epoch": 1.3987208372701505, "grad_norm": 1.6067559719085693, "learning_rate": 4.678195932749627e-05, "loss": 0.1344, "step": 38490 }, { "epoch": 1.3990842357729485, "grad_norm": 0.4303024113178253, "learning_rate": 4.677941199701662e-05, "loss": 0.1519, "step": 38500 }, { "epoch": 1.399447634275747, "grad_norm": 3.09531307220459, "learning_rate": 4.6776863728139596e-05, "loss": 0.1197, "step": 38510 }, { "epoch": 1.399811032778545, "grad_norm": 1.2062981128692627, "learning_rate": 4.6774314520975e-05, "loss": 0.1488, "step": 38520 }, { "epoch": 1.4001744312813431, "grad_norm": 0.7981544733047485, "learning_rate": 4.6771764375632664e-05, "loss": 0.1155, "step": 38530 }, { "epoch": 1.4005378297841413, "grad_norm": 0.6589852571487427, "learning_rate": 4.676921329222247e-05, "loss": 0.1463, "step": 38540 }, { "epoch": 1.4009012282869395, "grad_norm": 1.496664047241211, "learning_rate": 4.676666127085433e-05, "loss": 0.1811, "step": 38550 }, { "epoch": 1.4012646267897377, "grad_norm": 0.7335402965545654, "learning_rate": 4.676410831163819e-05, "loss": 0.1364, "step": 38560 }, { "epoch": 1.4016280252925357, "grad_norm": 0.8753761053085327, "learning_rate": 4.676155441468407e-05, "loss": 0.1954, "step": 38570 }, { "epoch": 1.401991423795334, "grad_norm": 1.4288660287857056, "learning_rate": 4.6758999580101994e-05, "loss": 0.1347, "step": 38580 }, { "epoch": 1.4023548222981321, "grad_norm": 1.1383757591247559, "learning_rate": 4.675644380800205e-05, "loss": 0.1604, "step": 38590 }, { "epoch": 1.4027182208009303, "grad_norm": 1.4642599821090698, "learning_rate": 4.6753887098494344e-05, "loss": 0.1878, "step": 38600 }, { "epoch": 1.4030816193037285, "grad_norm": 0.9396153688430786, "learning_rate": 4.675132945168905e-05, "loss": 0.1201, "step": 38610 }, { "epoch": 1.4034450178065265, "grad_norm": 0.835436999797821, "learning_rate": 4.674877086769636e-05, "loss": 0.4225, "step": 38620 }, { "epoch": 1.4038084163093247, "grad_norm": 0.9934596419334412, "learning_rate": 4.674621134662651e-05, "loss": 0.1145, "step": 38630 }, { "epoch": 1.404171814812123, "grad_norm": 1.5066030025482178, "learning_rate": 4.674365088858979e-05, "loss": 0.101, "step": 38640 }, { "epoch": 1.4045352133149211, "grad_norm": 2.4759950637817383, "learning_rate": 4.674108949369652e-05, "loss": 0.1225, "step": 38650 }, { "epoch": 1.4048986118177194, "grad_norm": 2.4329168796539307, "learning_rate": 4.6738527162057054e-05, "loss": 0.1469, "step": 38660 }, { "epoch": 1.4052620103205173, "grad_norm": 0.7068483829498291, "learning_rate": 4.67359638937818e-05, "loss": 0.237, "step": 38670 }, { "epoch": 1.4056254088233158, "grad_norm": 2.3423826694488525, "learning_rate": 4.6733399688981207e-05, "loss": 0.1095, "step": 38680 }, { "epoch": 1.4059888073261138, "grad_norm": 0.7500453591346741, "learning_rate": 4.673083454776575e-05, "loss": 0.1337, "step": 38690 }, { "epoch": 1.406352205828912, "grad_norm": 2.1220805644989014, "learning_rate": 4.6728268470245937e-05, "loss": 0.1489, "step": 38700 }, { "epoch": 1.4067156043317102, "grad_norm": 3.195551633834839, "learning_rate": 4.672570145653234e-05, "loss": 0.1456, "step": 38710 }, { "epoch": 1.4070790028345084, "grad_norm": 0.8845533728599548, "learning_rate": 4.672313350673558e-05, "loss": 0.1529, "step": 38720 }, { "epoch": 1.4074424013373066, "grad_norm": 1.278830885887146, "learning_rate": 4.6720564620966294e-05, "loss": 0.1209, "step": 38730 }, { "epoch": 1.4078057998401046, "grad_norm": 1.8450745344161987, "learning_rate": 4.671799479933515e-05, "loss": 0.0863, "step": 38740 }, { "epoch": 1.4081691983429028, "grad_norm": 0.5718597173690796, "learning_rate": 4.6715424041952894e-05, "loss": 0.1597, "step": 38750 }, { "epoch": 1.408532596845701, "grad_norm": 3.594273090362549, "learning_rate": 4.671285234893027e-05, "loss": 0.1373, "step": 38760 }, { "epoch": 1.4088959953484992, "grad_norm": 1.3270690441131592, "learning_rate": 4.671027972037809e-05, "loss": 0.1727, "step": 38770 }, { "epoch": 1.4092593938512974, "grad_norm": 88.81269073486328, "learning_rate": 4.670770615640721e-05, "loss": 1.6965, "step": 38780 }, { "epoch": 1.4096227923540954, "grad_norm": 1.1233614683151245, "learning_rate": 4.670513165712851e-05, "loss": 0.1316, "step": 38790 }, { "epoch": 1.4099861908568938, "grad_norm": 0.49995678663253784, "learning_rate": 4.6702556222652905e-05, "loss": 0.1492, "step": 38800 }, { "epoch": 1.4103495893596918, "grad_norm": 0.7330392599105835, "learning_rate": 4.669997985309138e-05, "loss": 0.1065, "step": 38810 }, { "epoch": 1.41071298786249, "grad_norm": 0.49762871861457825, "learning_rate": 4.6697402548554925e-05, "loss": 0.167, "step": 38820 }, { "epoch": 1.4110763863652882, "grad_norm": 3.198273181915283, "learning_rate": 4.6694824309154596e-05, "loss": 0.5685, "step": 38830 }, { "epoch": 1.4114397848680864, "grad_norm": 0.7750107645988464, "learning_rate": 4.6692245135001476e-05, "loss": 0.1291, "step": 38840 }, { "epoch": 1.4118031833708846, "grad_norm": 0.6449529528617859, "learning_rate": 4.66896650262067e-05, "loss": 0.1522, "step": 38850 }, { "epoch": 1.4121665818736826, "grad_norm": 0.7553302049636841, "learning_rate": 4.668708398288142e-05, "loss": 0.1089, "step": 38860 }, { "epoch": 1.4125299803764808, "grad_norm": 0.3948783576488495, "learning_rate": 4.6684502005136864e-05, "loss": 0.1421, "step": 38870 }, { "epoch": 1.412893378879279, "grad_norm": 0.7775730490684509, "learning_rate": 4.668191909308426e-05, "loss": 0.1014, "step": 38880 }, { "epoch": 1.4132567773820772, "grad_norm": 0.3911081850528717, "learning_rate": 4.667933524683492e-05, "loss": 0.1504, "step": 38890 }, { "epoch": 1.4136201758848754, "grad_norm": 0.48814857006073, "learning_rate": 4.667675046650015e-05, "loss": 0.138, "step": 38900 }, { "epoch": 1.4139835743876734, "grad_norm": 2.613859176635742, "learning_rate": 4.667416475219133e-05, "loss": 0.1158, "step": 38910 }, { "epoch": 1.4143469728904716, "grad_norm": 0.9073649048805237, "learning_rate": 4.667157810401987e-05, "loss": 0.1753, "step": 38920 }, { "epoch": 1.4147103713932698, "grad_norm": 1.4347561597824097, "learning_rate": 4.666899052209722e-05, "loss": 0.1397, "step": 38930 }, { "epoch": 1.415073769896068, "grad_norm": 1.014145851135254, "learning_rate": 4.666640200653486e-05, "loss": 0.1425, "step": 38940 }, { "epoch": 1.4154371683988662, "grad_norm": 0.5404003858566284, "learning_rate": 4.6663812557444334e-05, "loss": 0.1918, "step": 38950 }, { "epoch": 1.4158005669016644, "grad_norm": 0.7507174015045166, "learning_rate": 4.66612221749372e-05, "loss": 0.1492, "step": 38960 }, { "epoch": 1.4161639654044627, "grad_norm": 0.47643178701400757, "learning_rate": 4.665863085912508e-05, "loss": 0.136, "step": 38970 }, { "epoch": 1.4165273639072606, "grad_norm": 1.3153865337371826, "learning_rate": 4.66560386101196e-05, "loss": 0.1421, "step": 38980 }, { "epoch": 1.4168907624100588, "grad_norm": 0.7111690640449524, "learning_rate": 4.665344542803248e-05, "loss": 0.1184, "step": 38990 }, { "epoch": 1.417254160912857, "grad_norm": 5.245561599731445, "learning_rate": 4.665085131297544e-05, "loss": 0.1343, "step": 39000 }, { "epoch": 1.417254160912857, "eval_loss": 0.34118154644966125, "eval_runtime": 180.7794, "eval_samples_per_second": 41.011, "eval_steps_per_second": 5.128, "eval_wer": 0.17092962041861057, "step": 39000 }, { "epoch": 1.4176175594156553, "grad_norm": 1.7938792705535889, "learning_rate": 4.664825626506025e-05, "loss": 0.1703, "step": 39010 }, { "epoch": 1.4179809579184535, "grad_norm": 0.7494391202926636, "learning_rate": 4.664566028439873e-05, "loss": 0.1644, "step": 39020 }, { "epoch": 1.4183443564212515, "grad_norm": 0.7234100699424744, "learning_rate": 4.664306337110272e-05, "loss": 0.1115, "step": 39030 }, { "epoch": 1.4187077549240497, "grad_norm": 0.6583457589149475, "learning_rate": 4.6640465525284114e-05, "loss": 0.1448, "step": 39040 }, { "epoch": 1.4190711534268479, "grad_norm": 1.7157262563705444, "learning_rate": 4.663786674705484e-05, "loss": 0.176, "step": 39050 }, { "epoch": 1.419434551929646, "grad_norm": 1.9635696411132812, "learning_rate": 4.663526703652688e-05, "loss": 0.1453, "step": 39060 }, { "epoch": 1.4197979504324443, "grad_norm": 0.5495097041130066, "learning_rate": 4.663266639381224e-05, "loss": 0.1114, "step": 39070 }, { "epoch": 1.4201613489352423, "grad_norm": 1.3244194984436035, "learning_rate": 4.663006481902298e-05, "loss": 0.146, "step": 39080 }, { "epoch": 1.4205247474380407, "grad_norm": 1.1683903932571411, "learning_rate": 4.662746231227119e-05, "loss": 0.1351, "step": 39090 }, { "epoch": 1.4208881459408387, "grad_norm": 2.392890214920044, "learning_rate": 4.662485887366899e-05, "loss": 0.2378, "step": 39100 }, { "epoch": 1.4212515444436369, "grad_norm": 1.7971110343933105, "learning_rate": 4.662225450332856e-05, "loss": 0.1168, "step": 39110 }, { "epoch": 1.421614942946435, "grad_norm": 1.0140278339385986, "learning_rate": 4.6619649201362124e-05, "loss": 0.1332, "step": 39120 }, { "epoch": 1.4219783414492333, "grad_norm": 3.1083390712738037, "learning_rate": 4.661704296788193e-05, "loss": 0.1483, "step": 39130 }, { "epoch": 1.4223417399520315, "grad_norm": 0.6209553480148315, "learning_rate": 4.661443580300026e-05, "loss": 0.1212, "step": 39140 }, { "epoch": 1.4227051384548295, "grad_norm": 0.6733147501945496, "learning_rate": 4.661182770682946e-05, "loss": 0.1343, "step": 39150 }, { "epoch": 1.4230685369576277, "grad_norm": 1.1502153873443604, "learning_rate": 4.660921867948189e-05, "loss": 0.1508, "step": 39160 }, { "epoch": 1.423431935460426, "grad_norm": 0.33853545784950256, "learning_rate": 4.660660872106999e-05, "loss": 0.1661, "step": 39170 }, { "epoch": 1.423795333963224, "grad_norm": 1.0470768213272095, "learning_rate": 4.660399783170618e-05, "loss": 0.1231, "step": 39180 }, { "epoch": 1.4241587324660223, "grad_norm": 0.5467321872711182, "learning_rate": 4.660138601150298e-05, "loss": 0.1166, "step": 39190 }, { "epoch": 1.4245221309688203, "grad_norm": 1.3816486597061157, "learning_rate": 4.659877326057291e-05, "loss": 0.1177, "step": 39200 }, { "epoch": 1.4248855294716185, "grad_norm": 1.0260194540023804, "learning_rate": 4.659615957902855e-05, "loss": 0.153, "step": 39210 }, { "epoch": 1.4252489279744167, "grad_norm": 0.7443385124206543, "learning_rate": 4.6593544966982524e-05, "loss": 0.1735, "step": 39220 }, { "epoch": 1.425612326477215, "grad_norm": 1.1526659727096558, "learning_rate": 4.659092942454746e-05, "loss": 0.1491, "step": 39230 }, { "epoch": 1.4259757249800131, "grad_norm": 0.8841147422790527, "learning_rate": 4.658831295183608e-05, "loss": 0.1611, "step": 39240 }, { "epoch": 1.4263391234828113, "grad_norm": 1.596132516860962, "learning_rate": 4.65856955489611e-05, "loss": 0.1543, "step": 39250 }, { "epoch": 1.4267025219856095, "grad_norm": 2.1355278491973877, "learning_rate": 4.65830772160353e-05, "loss": 0.1489, "step": 39260 }, { "epoch": 1.4270659204884075, "grad_norm": 0.9080690145492554, "learning_rate": 4.6580457953171496e-05, "loss": 0.28, "step": 39270 }, { "epoch": 1.4274293189912057, "grad_norm": 0.9187225699424744, "learning_rate": 4.6577837760482546e-05, "loss": 0.1243, "step": 39280 }, { "epoch": 1.427792717494004, "grad_norm": 1.0374051332473755, "learning_rate": 4.6575216638081335e-05, "loss": 0.1088, "step": 39290 }, { "epoch": 1.4281561159968021, "grad_norm": 0.5795188546180725, "learning_rate": 4.657259458608081e-05, "loss": 0.1288, "step": 39300 }, { "epoch": 1.4285195144996004, "grad_norm": 1.0621544122695923, "learning_rate": 4.656997160459394e-05, "loss": 0.1311, "step": 39310 }, { "epoch": 1.4288829130023983, "grad_norm": 0.35324281454086304, "learning_rate": 4.656734769373373e-05, "loss": 0.1635, "step": 39320 }, { "epoch": 1.4292463115051965, "grad_norm": 1.4020544290542603, "learning_rate": 4.656472285361326e-05, "loss": 0.1281, "step": 39330 }, { "epoch": 1.4296097100079947, "grad_norm": 0.9644222259521484, "learning_rate": 4.65620970843456e-05, "loss": 0.111, "step": 39340 }, { "epoch": 1.429973108510793, "grad_norm": 3.3897273540496826, "learning_rate": 4.65594703860439e-05, "loss": 0.2219, "step": 39350 }, { "epoch": 1.4303365070135912, "grad_norm": 1.1418486833572388, "learning_rate": 4.655684275882132e-05, "loss": 0.1347, "step": 39360 }, { "epoch": 1.4306999055163891, "grad_norm": 0.7159132361412048, "learning_rate": 4.655421420279109e-05, "loss": 0.2765, "step": 39370 }, { "epoch": 1.4310633040191876, "grad_norm": 1.4189454317092896, "learning_rate": 4.655158471806647e-05, "loss": 0.1247, "step": 39380 }, { "epoch": 1.4314267025219856, "grad_norm": 1.0472137928009033, "learning_rate": 4.6548954304760725e-05, "loss": 0.1193, "step": 39390 }, { "epoch": 1.4317901010247838, "grad_norm": 1.6883853673934937, "learning_rate": 4.654632296298723e-05, "loss": 0.1336, "step": 39400 }, { "epoch": 1.432153499527582, "grad_norm": 3.4792749881744385, "learning_rate": 4.654369069285933e-05, "loss": 0.1224, "step": 39410 }, { "epoch": 1.4325168980303802, "grad_norm": 0.9245648384094238, "learning_rate": 4.654105749449046e-05, "loss": 0.1684, "step": 39420 }, { "epoch": 1.4328802965331784, "grad_norm": 0.7134508490562439, "learning_rate": 4.653842336799406e-05, "loss": 0.1268, "step": 39430 }, { "epoch": 1.4332436950359764, "grad_norm": 0.8069209456443787, "learning_rate": 4.6535788313483624e-05, "loss": 0.1224, "step": 39440 }, { "epoch": 1.4336070935387746, "grad_norm": 1.5594000816345215, "learning_rate": 4.6533152331072706e-05, "loss": 0.178, "step": 39450 }, { "epoch": 1.4339704920415728, "grad_norm": 0.6994547843933105, "learning_rate": 4.653051542087486e-05, "loss": 0.1184, "step": 39460 }, { "epoch": 1.434333890544371, "grad_norm": 1.0693833827972412, "learning_rate": 4.6527877583003714e-05, "loss": 0.1696, "step": 39470 }, { "epoch": 1.4346972890471692, "grad_norm": 1.625401496887207, "learning_rate": 4.652523881757292e-05, "loss": 0.9774, "step": 39480 }, { "epoch": 1.4350606875499672, "grad_norm": 1.0620099306106567, "learning_rate": 4.652259912469618e-05, "loss": 0.141, "step": 39490 }, { "epoch": 1.4354240860527654, "grad_norm": 0.8238838315010071, "learning_rate": 4.6519958504487206e-05, "loss": 0.2848, "step": 39500 }, { "epoch": 1.4357874845555636, "grad_norm": 4.339720726013184, "learning_rate": 4.6517316957059796e-05, "loss": 0.1307, "step": 39510 }, { "epoch": 1.4361508830583618, "grad_norm": 1.6212254762649536, "learning_rate": 4.6514674482527754e-05, "loss": 0.1486, "step": 39520 }, { "epoch": 1.43651428156116, "grad_norm": 1.0148829221725464, "learning_rate": 4.651203108100494e-05, "loss": 0.1216, "step": 39530 }, { "epoch": 1.4368776800639582, "grad_norm": 0.8317530155181885, "learning_rate": 4.650938675260525e-05, "loss": 0.1058, "step": 39540 }, { "epoch": 1.4372410785667564, "grad_norm": 1.1663634777069092, "learning_rate": 4.6506741497442614e-05, "loss": 0.2442, "step": 39550 }, { "epoch": 1.4376044770695544, "grad_norm": 1.4678232669830322, "learning_rate": 4.6504095315631006e-05, "loss": 0.1353, "step": 39560 }, { "epoch": 1.4379678755723526, "grad_norm": 0.8588351011276245, "learning_rate": 4.6501448207284446e-05, "loss": 0.157, "step": 39570 }, { "epoch": 1.4383312740751508, "grad_norm": 0.7000893950462341, "learning_rate": 4.6498800172516985e-05, "loss": 0.1191, "step": 39580 }, { "epoch": 1.438694672577949, "grad_norm": 1.0854928493499756, "learning_rate": 4.649615121144271e-05, "loss": 0.1258, "step": 39590 }, { "epoch": 1.4390580710807472, "grad_norm": 1.003110647201538, "learning_rate": 4.649350132417577e-05, "loss": 0.1264, "step": 39600 }, { "epoch": 1.4390580710807472, "eval_loss": 0.34889447689056396, "eval_runtime": 180.6213, "eval_samples_per_second": 41.047, "eval_steps_per_second": 5.132, "eval_wer": 0.17173743351425927, "step": 39600 }, { "epoch": 1.4394214695835452, "grad_norm": 0.8560311198234558, "learning_rate": 4.649085051083033e-05, "loss": 0.129, "step": 39610 }, { "epoch": 1.4397848680863434, "grad_norm": 1.6158629655838013, "learning_rate": 4.6488198771520605e-05, "loss": 0.1359, "step": 39620 }, { "epoch": 1.4401482665891416, "grad_norm": 0.6946542263031006, "learning_rate": 4.6485546106360856e-05, "loss": 0.1195, "step": 39630 }, { "epoch": 1.4405116650919398, "grad_norm": 0.6091057062149048, "learning_rate": 4.648289251546536e-05, "loss": 0.1321, "step": 39640 }, { "epoch": 1.440875063594738, "grad_norm": 0.37464994192123413, "learning_rate": 4.648023799894847e-05, "loss": 0.1339, "step": 39650 }, { "epoch": 1.441238462097536, "grad_norm": 0.5378652215003967, "learning_rate": 4.647758255692456e-05, "loss": 0.1127, "step": 39660 }, { "epoch": 1.4416018606003345, "grad_norm": 1.4586265087127686, "learning_rate": 4.647492618950802e-05, "loss": 0.2027, "step": 39670 }, { "epoch": 1.4419652591031324, "grad_norm": 0.6388387084007263, "learning_rate": 4.647226889681333e-05, "loss": 0.1197, "step": 39680 }, { "epoch": 1.4423286576059307, "grad_norm": 0.7849758267402649, "learning_rate": 4.646961067895496e-05, "loss": 0.1165, "step": 39690 }, { "epoch": 1.4426920561087289, "grad_norm": 1.16459059715271, "learning_rate": 4.6466951536047464e-05, "loss": 0.1799, "step": 39700 }, { "epoch": 1.443055454611527, "grad_norm": 3.2792208194732666, "learning_rate": 4.64642914682054e-05, "loss": 0.1191, "step": 39710 }, { "epoch": 1.4434188531143253, "grad_norm": 0.3408263921737671, "learning_rate": 4.64616304755434e-05, "loss": 0.1971, "step": 39720 }, { "epoch": 1.4437822516171233, "grad_norm": 0.45033156871795654, "learning_rate": 4.645896855817609e-05, "loss": 0.1441, "step": 39730 }, { "epoch": 1.4441456501199215, "grad_norm": 2.284130096435547, "learning_rate": 4.645630571621817e-05, "loss": 0.122, "step": 39740 }, { "epoch": 1.4445090486227197, "grad_norm": 3.047889232635498, "learning_rate": 4.645364194978439e-05, "loss": 0.172, "step": 39750 }, { "epoch": 1.4448724471255179, "grad_norm": 1.2850980758666992, "learning_rate": 4.645097725898951e-05, "loss": 0.1088, "step": 39760 }, { "epoch": 1.445235845628316, "grad_norm": 0.8555011749267578, "learning_rate": 4.644831164394834e-05, "loss": 0.1516, "step": 39770 }, { "epoch": 1.445599244131114, "grad_norm": 0.9414917230606079, "learning_rate": 4.644564510477574e-05, "loss": 0.1104, "step": 39780 }, { "epoch": 1.4459626426339123, "grad_norm": 1.658109188079834, "learning_rate": 4.644297764158659e-05, "loss": 0.1361, "step": 39790 }, { "epoch": 1.4463260411367105, "grad_norm": 1.9984872341156006, "learning_rate": 4.644030925449583e-05, "loss": 0.5671, "step": 39800 }, { "epoch": 1.4466894396395087, "grad_norm": 2.086899995803833, "learning_rate": 4.6437639943618424e-05, "loss": 0.124, "step": 39810 }, { "epoch": 1.447052838142307, "grad_norm": 0.39096391201019287, "learning_rate": 4.64349697090694e-05, "loss": 0.2021, "step": 39820 }, { "epoch": 1.447416236645105, "grad_norm": 0.9561779499053955, "learning_rate": 4.643229855096378e-05, "loss": 0.1371, "step": 39830 }, { "epoch": 1.4477796351479033, "grad_norm": 1.6168954372406006, "learning_rate": 4.6429626469416685e-05, "loss": 0.1149, "step": 39840 }, { "epoch": 1.4481430336507013, "grad_norm": 1.4393991231918335, "learning_rate": 4.642695346454323e-05, "loss": 0.1472, "step": 39850 }, { "epoch": 1.4485064321534995, "grad_norm": 1.9806978702545166, "learning_rate": 4.642427953645859e-05, "loss": 0.1443, "step": 39860 }, { "epoch": 1.4488698306562977, "grad_norm": 1.012040615081787, "learning_rate": 4.642160468527797e-05, "loss": 0.155, "step": 39870 }, { "epoch": 1.449233229159096, "grad_norm": 0.620448112487793, "learning_rate": 4.641892891111662e-05, "loss": 0.12, "step": 39880 }, { "epoch": 1.4495966276618941, "grad_norm": 0.5192741751670837, "learning_rate": 4.6416252214089834e-05, "loss": 0.1096, "step": 39890 }, { "epoch": 1.449960026164692, "grad_norm": 1.4234672784805298, "learning_rate": 4.641357459431294e-05, "loss": 0.1548, "step": 39900 }, { "epoch": 1.4503234246674903, "grad_norm": 1.2218151092529297, "learning_rate": 4.641089605190131e-05, "loss": 0.1452, "step": 39910 }, { "epoch": 1.4506868231702885, "grad_norm": 0.5271123051643372, "learning_rate": 4.6408216586970344e-05, "loss": 0.1238, "step": 39920 }, { "epoch": 1.4510502216730867, "grad_norm": 1.0670936107635498, "learning_rate": 4.640553619963549e-05, "loss": 0.7119, "step": 39930 }, { "epoch": 1.451413620175885, "grad_norm": 2.9407644271850586, "learning_rate": 4.6402854890012256e-05, "loss": 0.1246, "step": 39940 }, { "epoch": 1.451777018678683, "grad_norm": 0.8619846701622009, "learning_rate": 4.6400172658216144e-05, "loss": 0.1524, "step": 39950 }, { "epoch": 1.4521404171814813, "grad_norm": 1.110069751739502, "learning_rate": 4.639748950436275e-05, "loss": 0.1147, "step": 39960 }, { "epoch": 1.4525038156842793, "grad_norm": 0.6605796813964844, "learning_rate": 4.639480542856764e-05, "loss": 0.1266, "step": 39970 }, { "epoch": 1.4528672141870775, "grad_norm": 0.8681196570396423, "learning_rate": 4.639212043094651e-05, "loss": 0.1168, "step": 39980 }, { "epoch": 1.4532306126898757, "grad_norm": 0.7025002241134644, "learning_rate": 4.6389434511615015e-05, "loss": 0.1117, "step": 39990 }, { "epoch": 1.453594011192674, "grad_norm": 1.203703761100769, "learning_rate": 4.6386747670688897e-05, "loss": 0.1524, "step": 40000 }, { "epoch": 1.4539574096954722, "grad_norm": 2.274060010910034, "learning_rate": 4.638405990828391e-05, "loss": 0.113, "step": 40010 }, { "epoch": 1.4543208081982701, "grad_norm": 1.424842357635498, "learning_rate": 4.638137122451587e-05, "loss": 0.1986, "step": 40020 }, { "epoch": 1.4546842067010683, "grad_norm": 2.1440541744232178, "learning_rate": 4.637868161950062e-05, "loss": 0.1406, "step": 40030 }, { "epoch": 1.4550476052038666, "grad_norm": 0.9488077759742737, "learning_rate": 4.6375991093354035e-05, "loss": 0.1827, "step": 40040 }, { "epoch": 1.4554110037066648, "grad_norm": 7.7812724113464355, "learning_rate": 4.637329964619206e-05, "loss": 0.2488, "step": 40050 }, { "epoch": 1.455774402209463, "grad_norm": 1.2816716432571411, "learning_rate": 4.6370607278130646e-05, "loss": 0.1125, "step": 40060 }, { "epoch": 1.456137800712261, "grad_norm": 0.6140567660331726, "learning_rate": 4.63679139892858e-05, "loss": 0.1526, "step": 40070 }, { "epoch": 1.4565011992150592, "grad_norm": 1.3745895624160767, "learning_rate": 4.636521977977357e-05, "loss": 0.1406, "step": 40080 }, { "epoch": 1.4568645977178574, "grad_norm": 0.934893786907196, "learning_rate": 4.636252464971004e-05, "loss": 0.1236, "step": 40090 }, { "epoch": 1.4572279962206556, "grad_norm": 4.1601738929748535, "learning_rate": 4.635982859921132e-05, "loss": 0.1256, "step": 40100 }, { "epoch": 1.4575913947234538, "grad_norm": 1.3297815322875977, "learning_rate": 4.635713162839359e-05, "loss": 0.1076, "step": 40110 }, { "epoch": 1.457954793226252, "grad_norm": 1.6878186464309692, "learning_rate": 4.6354433737373055e-05, "loss": 0.2212, "step": 40120 }, { "epoch": 1.4583181917290502, "grad_norm": 1.2743428945541382, "learning_rate": 4.6351734926265946e-05, "loss": 0.105, "step": 40130 }, { "epoch": 1.4586815902318482, "grad_norm": 1.5052075386047363, "learning_rate": 4.634903519518854e-05, "loss": 0.234, "step": 40140 }, { "epoch": 1.4590449887346464, "grad_norm": 1.7959517240524292, "learning_rate": 4.634633454425718e-05, "loss": 0.1572, "step": 40150 }, { "epoch": 1.4594083872374446, "grad_norm": 0.578747034072876, "learning_rate": 4.63436329735882e-05, "loss": 0.1111, "step": 40160 }, { "epoch": 1.4597717857402428, "grad_norm": 0.3820185959339142, "learning_rate": 4.634093048329803e-05, "loss": 0.1589, "step": 40170 }, { "epoch": 1.460135184243041, "grad_norm": 1.621700406074524, "learning_rate": 4.633822707350309e-05, "loss": 0.1401, "step": 40180 }, { "epoch": 1.460498582745839, "grad_norm": 1.2941464185714722, "learning_rate": 4.633552274431987e-05, "loss": 0.1678, "step": 40190 }, { "epoch": 1.4608619812486372, "grad_norm": 0.9888546466827393, "learning_rate": 4.633281749586488e-05, "loss": 0.1649, "step": 40200 }, { "epoch": 1.4608619812486372, "eval_loss": 0.3407399654388428, "eval_runtime": 180.8259, "eval_samples_per_second": 41.001, "eval_steps_per_second": 5.126, "eval_wer": 0.17137437144879916, "step": 40200 }, { "epoch": 1.4612253797514354, "grad_norm": 0.6030024290084839, "learning_rate": 4.633011132825469e-05, "loss": 0.1086, "step": 40210 }, { "epoch": 1.4615887782542336, "grad_norm": 2.6498842239379883, "learning_rate": 4.63274042416059e-05, "loss": 0.1341, "step": 40220 }, { "epoch": 1.4619521767570318, "grad_norm": 1.0614917278289795, "learning_rate": 4.632469623603514e-05, "loss": 0.1197, "step": 40230 }, { "epoch": 1.4623155752598298, "grad_norm": 1.7546344995498657, "learning_rate": 4.63219873116591e-05, "loss": 0.1518, "step": 40240 }, { "epoch": 1.4626789737626282, "grad_norm": 2.726959705352783, "learning_rate": 4.631927746859448e-05, "loss": 0.1514, "step": 40250 }, { "epoch": 1.4630423722654262, "grad_norm": 1.0468662977218628, "learning_rate": 4.6316566706958055e-05, "loss": 0.1309, "step": 40260 }, { "epoch": 1.4634057707682244, "grad_norm": 0.7446948885917664, "learning_rate": 4.631385502686661e-05, "loss": 0.176, "step": 40270 }, { "epoch": 1.4637691692710226, "grad_norm": 2.804288148880005, "learning_rate": 4.6311142428436996e-05, "loss": 0.103, "step": 40280 }, { "epoch": 1.4641325677738208, "grad_norm": 1.1131904125213623, "learning_rate": 4.630842891178607e-05, "loss": 0.1168, "step": 40290 }, { "epoch": 1.464495966276619, "grad_norm": 1.084128975868225, "learning_rate": 4.6305714477030766e-05, "loss": 0.5731, "step": 40300 }, { "epoch": 1.464859364779417, "grad_norm": 2.9369328022003174, "learning_rate": 4.630299912428803e-05, "loss": 0.12, "step": 40310 }, { "epoch": 1.4652227632822152, "grad_norm": 0.449259489774704, "learning_rate": 4.630028285367485e-05, "loss": 0.1396, "step": 40320 }, { "epoch": 1.4655861617850134, "grad_norm": 0.6570121049880981, "learning_rate": 4.6297565665308276e-05, "loss": 0.1143, "step": 40330 }, { "epoch": 1.4659495602878116, "grad_norm": 0.9117491841316223, "learning_rate": 4.629484755930537e-05, "loss": 0.0996, "step": 40340 }, { "epoch": 1.4663129587906099, "grad_norm": 1.197102665901184, "learning_rate": 4.629212853578325e-05, "loss": 0.1522, "step": 40350 }, { "epoch": 1.4666763572934078, "grad_norm": 2.8265323638916016, "learning_rate": 4.6289408594859075e-05, "loss": 0.1239, "step": 40360 }, { "epoch": 1.467039755796206, "grad_norm": 0.4153755307197571, "learning_rate": 4.628668773665002e-05, "loss": 0.1448, "step": 40370 }, { "epoch": 1.4674031542990043, "grad_norm": 0.9625080823898315, "learning_rate": 4.628396596127335e-05, "loss": 0.1249, "step": 40380 }, { "epoch": 1.4677665528018025, "grad_norm": 3.2815330028533936, "learning_rate": 4.62812432688463e-05, "loss": 0.1238, "step": 40390 }, { "epoch": 1.4681299513046007, "grad_norm": 0.5435966849327087, "learning_rate": 4.627851965948619e-05, "loss": 0.149, "step": 40400 }, { "epoch": 1.4684933498073989, "grad_norm": 1.5212714672088623, "learning_rate": 4.6275795133310383e-05, "loss": 0.1458, "step": 40410 }, { "epoch": 1.468856748310197, "grad_norm": 0.9295603632926941, "learning_rate": 4.627306969043627e-05, "loss": 0.2099, "step": 40420 }, { "epoch": 1.469220146812995, "grad_norm": 1.0282838344573975, "learning_rate": 4.627034333098127e-05, "loss": 0.1408, "step": 40430 }, { "epoch": 1.4695835453157933, "grad_norm": 1.5155305862426758, "learning_rate": 4.6267616055062855e-05, "loss": 0.2335, "step": 40440 }, { "epoch": 1.4699469438185915, "grad_norm": 0.6780584454536438, "learning_rate": 4.626488786279854e-05, "loss": 0.1611, "step": 40450 }, { "epoch": 1.4703103423213897, "grad_norm": 1.1376898288726807, "learning_rate": 4.626215875430586e-05, "loss": 0.1282, "step": 40460 }, { "epoch": 1.470673740824188, "grad_norm": 9.535052299499512, "learning_rate": 4.6259428729702414e-05, "loss": 0.3186, "step": 40470 }, { "epoch": 1.4710371393269859, "grad_norm": 1.4366358518600464, "learning_rate": 4.625669778910582e-05, "loss": 0.1202, "step": 40480 }, { "epoch": 1.471400537829784, "grad_norm": 0.9380308985710144, "learning_rate": 4.625396593263376e-05, "loss": 0.1125, "step": 40490 }, { "epoch": 1.4717639363325823, "grad_norm": 1.2832533121109009, "learning_rate": 4.6251233160403916e-05, "loss": 2.9509, "step": 40500 }, { "epoch": 1.4721273348353805, "grad_norm": 3.405505895614624, "learning_rate": 4.624849947253406e-05, "loss": 0.1369, "step": 40510 }, { "epoch": 1.4724907333381787, "grad_norm": 0.7077997326850891, "learning_rate": 4.624576486914196e-05, "loss": 0.1233, "step": 40520 }, { "epoch": 1.4728541318409767, "grad_norm": 0.7341346740722656, "learning_rate": 4.624302935034545e-05, "loss": 0.1146, "step": 40530 }, { "epoch": 1.4732175303437751, "grad_norm": 0.9452338218688965, "learning_rate": 4.6240292916262376e-05, "loss": 0.1696, "step": 40540 }, { "epoch": 1.473580928846573, "grad_norm": 0.8162540197372437, "learning_rate": 4.623755556701066e-05, "loss": 0.138, "step": 40550 }, { "epoch": 1.4739443273493713, "grad_norm": 0.551977276802063, "learning_rate": 4.623481730270824e-05, "loss": 0.2382, "step": 40560 }, { "epoch": 1.4743077258521695, "grad_norm": 0.6273486018180847, "learning_rate": 4.623207812347309e-05, "loss": 0.1902, "step": 40570 }, { "epoch": 1.4746711243549677, "grad_norm": 0.5531787872314453, "learning_rate": 4.622933802942324e-05, "loss": 0.1103, "step": 40580 }, { "epoch": 1.475034522857766, "grad_norm": 0.4982399344444275, "learning_rate": 4.622659702067675e-05, "loss": 0.1081, "step": 40590 }, { "epoch": 1.475397921360564, "grad_norm": 0.9091404676437378, "learning_rate": 4.622385509735172e-05, "loss": 0.3927, "step": 40600 }, { "epoch": 1.4757613198633621, "grad_norm": 0.6481756567955017, "learning_rate": 4.622111225956629e-05, "loss": 0.1186, "step": 40610 }, { "epoch": 1.4761247183661603, "grad_norm": 1.1302157640457153, "learning_rate": 4.621836850743864e-05, "loss": 0.1667, "step": 40620 }, { "epoch": 1.4764881168689585, "grad_norm": 2.197112560272217, "learning_rate": 4.6215623841086974e-05, "loss": 0.1226, "step": 40630 }, { "epoch": 1.4768515153717567, "grad_norm": 3.960108995437622, "learning_rate": 4.621287826062957e-05, "loss": 0.1343, "step": 40640 }, { "epoch": 1.4772149138745547, "grad_norm": 1.0748779773712158, "learning_rate": 4.6210131766184714e-05, "loss": 0.1492, "step": 40650 }, { "epoch": 1.477578312377353, "grad_norm": 1.8779007196426392, "learning_rate": 4.620738435787075e-05, "loss": 0.1446, "step": 40660 }, { "epoch": 1.4779417108801511, "grad_norm": 0.5000749230384827, "learning_rate": 4.620463603580605e-05, "loss": 0.1762, "step": 40670 }, { "epoch": 1.4783051093829493, "grad_norm": 0.6756991147994995, "learning_rate": 4.620188680010903e-05, "loss": 0.1375, "step": 40680 }, { "epoch": 1.4786685078857476, "grad_norm": 0.7340139746665955, "learning_rate": 4.619941170692398e-05, "loss": 2.0738, "step": 40690 }, { "epoch": 1.4790319063885458, "grad_norm": 4.0170722007751465, "learning_rate": 4.6196660735651925e-05, "loss": 0.157, "step": 40700 }, { "epoch": 1.479395304891344, "grad_norm": 0.8254412412643433, "learning_rate": 4.619390885109118e-05, "loss": 0.1385, "step": 40710 }, { "epoch": 1.479758703394142, "grad_norm": 0.7477695345878601, "learning_rate": 4.619115605336031e-05, "loss": 0.1772, "step": 40720 }, { "epoch": 1.4801221018969402, "grad_norm": 0.9684391617774963, "learning_rate": 4.618840234257792e-05, "loss": 3.1891, "step": 40730 }, { "epoch": 1.4804855003997384, "grad_norm": 0.9600037336349487, "learning_rate": 4.6185647718862655e-05, "loss": 0.1289, "step": 40740 }, { "epoch": 1.4808488989025366, "grad_norm": 1.848919153213501, "learning_rate": 4.6182892182333226e-05, "loss": 0.1588, "step": 40750 }, { "epoch": 1.4812122974053348, "grad_norm": 1.707576036453247, "learning_rate": 4.6180135733108335e-05, "loss": 0.1578, "step": 40760 }, { "epoch": 1.4815756959081328, "grad_norm": 0.9908716678619385, "learning_rate": 4.617737837130675e-05, "loss": 0.1763, "step": 40770 }, { "epoch": 1.481939094410931, "grad_norm": 1.638818383216858, "learning_rate": 4.617462009704728e-05, "loss": 0.1323, "step": 40780 }, { "epoch": 1.4823024929137292, "grad_norm": 2.1605606079101562, "learning_rate": 4.6171860910448774e-05, "loss": 0.123, "step": 40790 }, { "epoch": 1.4826658914165274, "grad_norm": 0.7089453935623169, "learning_rate": 4.6169100811630106e-05, "loss": 0.1489, "step": 40800 }, { "epoch": 1.4826658914165274, "eval_loss": 0.3524834215641022, "eval_runtime": 180.6874, "eval_samples_per_second": 41.032, "eval_steps_per_second": 5.13, "eval_wer": 0.17247263419681594, "step": 40800 }, { "epoch": 1.4830292899193256, "grad_norm": 0.8966375589370728, "learning_rate": 4.616633980071021e-05, "loss": 0.1114, "step": 40810 }, { "epoch": 1.4833926884221236, "grad_norm": 0.8344945907592773, "learning_rate": 4.616357787780804e-05, "loss": 0.1765, "step": 40820 }, { "epoch": 1.483756086924922, "grad_norm": 1.2400190830230713, "learning_rate": 4.616081504304259e-05, "loss": 0.1805, "step": 40830 }, { "epoch": 1.48411948542772, "grad_norm": 0.9686151742935181, "learning_rate": 4.615805129653292e-05, "loss": 0.1259, "step": 40840 }, { "epoch": 1.4844828839305182, "grad_norm": 1.00034499168396, "learning_rate": 4.615528663839811e-05, "loss": 0.1632, "step": 40850 }, { "epoch": 1.4848462824333164, "grad_norm": 3.2312328815460205, "learning_rate": 4.6152521068757256e-05, "loss": 0.116, "step": 40860 }, { "epoch": 1.4852096809361146, "grad_norm": 0.4837055206298828, "learning_rate": 4.6149754587729535e-05, "loss": 0.1534, "step": 40870 }, { "epoch": 1.4855730794389128, "grad_norm": 8.643519401550293, "learning_rate": 4.614698719543413e-05, "loss": 0.1122, "step": 40880 }, { "epoch": 1.4859364779417108, "grad_norm": 0.9113799333572388, "learning_rate": 4.61442188919903e-05, "loss": 0.1046, "step": 40890 }, { "epoch": 1.486299876444509, "grad_norm": 0.7763462662696838, "learning_rate": 4.61414496775173e-05, "loss": 0.1515, "step": 40900 }, { "epoch": 1.4866632749473072, "grad_norm": 1.2019357681274414, "learning_rate": 4.6138679552134464e-05, "loss": 0.1372, "step": 40910 }, { "epoch": 1.4870266734501054, "grad_norm": 1.1948570013046265, "learning_rate": 4.6135908515961136e-05, "loss": 0.2073, "step": 40920 }, { "epoch": 1.4873900719529036, "grad_norm": 1.3027549982070923, "learning_rate": 4.6133136569116706e-05, "loss": 0.12, "step": 40930 }, { "epoch": 1.4877534704557016, "grad_norm": 1.4980496168136597, "learning_rate": 4.613036371172062e-05, "loss": 2.4225, "step": 40940 }, { "epoch": 1.4881168689584998, "grad_norm": 0.7265346050262451, "learning_rate": 4.612758994389234e-05, "loss": 0.1631, "step": 40950 }, { "epoch": 1.488480267461298, "grad_norm": 0.6485431790351868, "learning_rate": 4.612481526575138e-05, "loss": 0.1175, "step": 40960 }, { "epoch": 1.4888436659640962, "grad_norm": 0.9532496333122253, "learning_rate": 4.612203967741729e-05, "loss": 0.2368, "step": 40970 }, { "epoch": 1.4892070644668944, "grad_norm": 3.3696892261505127, "learning_rate": 4.6119263179009676e-05, "loss": 0.1388, "step": 40980 }, { "epoch": 1.4895704629696926, "grad_norm": 0.7628744840621948, "learning_rate": 4.611648577064814e-05, "loss": 0.1475, "step": 40990 }, { "epoch": 1.4899338614724909, "grad_norm": 1.4854507446289062, "learning_rate": 4.611370745245237e-05, "loss": 0.1717, "step": 41000 }, { "epoch": 1.4902972599752888, "grad_norm": 1.2280082702636719, "learning_rate": 4.6110928224542074e-05, "loss": 0.1389, "step": 41010 }, { "epoch": 1.490660658478087, "grad_norm": 0.5658448934555054, "learning_rate": 4.6108148087036984e-05, "loss": 0.1625, "step": 41020 }, { "epoch": 1.4910240569808852, "grad_norm": 1.1708754301071167, "learning_rate": 4.6105367040056903e-05, "loss": 0.1283, "step": 41030 }, { "epoch": 1.4913874554836835, "grad_norm": 1.175658106803894, "learning_rate": 4.610258508372165e-05, "loss": 0.1197, "step": 41040 }, { "epoch": 1.4917508539864817, "grad_norm": 1.0719672441482544, "learning_rate": 4.609980221815109e-05, "loss": 0.1361, "step": 41050 }, { "epoch": 1.4921142524892796, "grad_norm": 0.7982541918754578, "learning_rate": 4.6097018443465114e-05, "loss": 0.1302, "step": 41060 }, { "epoch": 1.4924776509920779, "grad_norm": 0.360454797744751, "learning_rate": 4.609423375978369e-05, "loss": 0.2231, "step": 41070 }, { "epoch": 1.492841049494876, "grad_norm": 0.64405757188797, "learning_rate": 4.609144816722678e-05, "loss": 0.1212, "step": 41080 }, { "epoch": 1.4932044479976743, "grad_norm": 0.7874402403831482, "learning_rate": 4.608866166591441e-05, "loss": 3.1348, "step": 41090 }, { "epoch": 1.4935678465004725, "grad_norm": 1.059163212776184, "learning_rate": 4.608587425596665e-05, "loss": 0.1464, "step": 41100 }, { "epoch": 1.4939312450032705, "grad_norm": 1.5717148780822754, "learning_rate": 4.608308593750359e-05, "loss": 0.1104, "step": 41110 }, { "epoch": 1.4942946435060689, "grad_norm": 0.6417020559310913, "learning_rate": 4.6080296710645365e-05, "loss": 0.1573, "step": 41120 }, { "epoch": 1.4946580420088669, "grad_norm": 0.8871016502380371, "learning_rate": 4.607750657551216e-05, "loss": 0.1087, "step": 41130 }, { "epoch": 1.495021440511665, "grad_norm": 2.3125686645507812, "learning_rate": 4.6074715532224196e-05, "loss": 0.1379, "step": 41140 }, { "epoch": 1.4953848390144633, "grad_norm": 2.087214708328247, "learning_rate": 4.607192358090172e-05, "loss": 0.1311, "step": 41150 }, { "epoch": 1.4957482375172615, "grad_norm": 3.1915369033813477, "learning_rate": 4.6069130721665035e-05, "loss": 0.1231, "step": 41160 }, { "epoch": 1.4961116360200597, "grad_norm": 0.4626937508583069, "learning_rate": 4.606633695463447e-05, "loss": 0.1544, "step": 41170 }, { "epoch": 1.4964750345228577, "grad_norm": 161.15541076660156, "learning_rate": 4.6063542279930395e-05, "loss": 3.413, "step": 41180 }, { "epoch": 1.496838433025656, "grad_norm": 0.9905474185943604, "learning_rate": 4.606074669767323e-05, "loss": 0.1285, "step": 41190 }, { "epoch": 1.497201831528454, "grad_norm": 0.5389920473098755, "learning_rate": 4.6057950207983426e-05, "loss": 0.1184, "step": 41200 }, { "epoch": 1.4975652300312523, "grad_norm": 2.7976090908050537, "learning_rate": 4.605515281098147e-05, "loss": 0.2461, "step": 41210 }, { "epoch": 1.4979286285340505, "grad_norm": 0.4971259534358978, "learning_rate": 4.60523545067879e-05, "loss": 0.2034, "step": 41220 }, { "epoch": 1.4982920270368485, "grad_norm": 1.5046378374099731, "learning_rate": 4.6049555295523274e-05, "loss": 0.1342, "step": 41230 }, { "epoch": 1.4986554255396467, "grad_norm": 1.337195634841919, "learning_rate": 4.60467551773082e-05, "loss": 0.1198, "step": 41240 }, { "epoch": 1.499018824042445, "grad_norm": 1.2729612588882446, "learning_rate": 4.6043954152263336e-05, "loss": 0.1312, "step": 41250 }, { "epoch": 1.4993822225452431, "grad_norm": 0.9693030714988708, "learning_rate": 4.6041152220509365e-05, "loss": 0.1554, "step": 41260 }, { "epoch": 1.4997456210480413, "grad_norm": 0.48035889863967896, "learning_rate": 4.6038349382167e-05, "loss": 0.1611, "step": 41270 }, { "epoch": 1.5001090195508393, "grad_norm": 1.015608787536621, "learning_rate": 4.603554563735702e-05, "loss": 0.1322, "step": 41280 }, { "epoch": 1.5004724180536377, "grad_norm": 1.900895595550537, "learning_rate": 4.603274098620023e-05, "loss": 0.1819, "step": 41290 }, { "epoch": 1.5008358165564357, "grad_norm": 1.780765414237976, "learning_rate": 4.602993542881745e-05, "loss": 0.1396, "step": 41300 }, { "epoch": 1.501199215059234, "grad_norm": 3.2523162364959717, "learning_rate": 4.602712896532959e-05, "loss": 0.1333, "step": 41310 }, { "epoch": 1.5015626135620321, "grad_norm": 0.344933420419693, "learning_rate": 4.6024321595857554e-05, "loss": 0.1417, "step": 41320 }, { "epoch": 1.5019260120648303, "grad_norm": 0.7336893081665039, "learning_rate": 4.6021513320522304e-05, "loss": 0.1551, "step": 41330 }, { "epoch": 1.5022894105676285, "grad_norm": 0.9252750873565674, "learning_rate": 4.601870413944484e-05, "loss": 0.1049, "step": 41340 }, { "epoch": 1.5026528090704265, "grad_norm": 2.0064470767974854, "learning_rate": 4.60158940527462e-05, "loss": 0.1593, "step": 41350 }, { "epoch": 1.503016207573225, "grad_norm": 1.2280207872390747, "learning_rate": 4.601308306054746e-05, "loss": 0.1276, "step": 41360 }, { "epoch": 1.503379606076023, "grad_norm": 0.7326213717460632, "learning_rate": 4.601027116296974e-05, "loss": 0.1853, "step": 41370 }, { "epoch": 1.5037430045788212, "grad_norm": 2.208380937576294, "learning_rate": 4.600745836013418e-05, "loss": 0.1343, "step": 41380 }, { "epoch": 1.5041064030816194, "grad_norm": 0.7113050818443298, "learning_rate": 4.6004644652161996e-05, "loss": 0.198, "step": 41390 }, { "epoch": 1.5044698015844173, "grad_norm": 1.8392037153244019, "learning_rate": 4.60018300391744e-05, "loss": 0.1283, "step": 41400 }, { "epoch": 1.5044698015844173, "eval_loss": 0.35653889179229736, "eval_runtime": 180.3372, "eval_samples_per_second": 41.112, "eval_steps_per_second": 5.14, "eval_wer": 0.16764390872619675, "step": 41400 }, { "epoch": 1.5048332000872158, "grad_norm": 1.0530060529708862, "learning_rate": 4.5999014521292674e-05, "loss": 0.1072, "step": 41410 }, { "epoch": 1.5051965985900138, "grad_norm": 1.0648863315582275, "learning_rate": 4.599619809863813e-05, "loss": 0.1939, "step": 41420 }, { "epoch": 1.505559997092812, "grad_norm": 1.4178556203842163, "learning_rate": 4.599338077133212e-05, "loss": 0.1325, "step": 41430 }, { "epoch": 1.5059233955956102, "grad_norm": 0.6156584024429321, "learning_rate": 4.5990562539496015e-05, "loss": 0.1337, "step": 41440 }, { "epoch": 1.5062867940984082, "grad_norm": 0.9399839639663696, "learning_rate": 4.598774340325126e-05, "loss": 0.1697, "step": 41450 }, { "epoch": 1.5066501926012066, "grad_norm": 0.9702737927436829, "learning_rate": 4.598492336271931e-05, "loss": 0.0978, "step": 41460 }, { "epoch": 1.5070135911040046, "grad_norm": 0.8199527263641357, "learning_rate": 4.598210241802169e-05, "loss": 0.185, "step": 41470 }, { "epoch": 1.5073769896068028, "grad_norm": 1.122827172279358, "learning_rate": 4.597928056927993e-05, "loss": 0.1234, "step": 41480 }, { "epoch": 1.507740388109601, "grad_norm": 1.9142221212387085, "learning_rate": 4.5976457816615606e-05, "loss": 0.1346, "step": 41490 }, { "epoch": 1.5081037866123992, "grad_norm": 1.0756717920303345, "learning_rate": 4.5973634160150345e-05, "loss": 0.1431, "step": 41500 }, { "epoch": 1.5084671851151974, "grad_norm": 1.6231876611709595, "learning_rate": 4.5970809600005826e-05, "loss": 0.1608, "step": 41510 }, { "epoch": 1.5088305836179954, "grad_norm": 0.3704961836338043, "learning_rate": 4.596798413630373e-05, "loss": 0.1501, "step": 41520 }, { "epoch": 1.5091939821207938, "grad_norm": 0.7752798199653625, "learning_rate": 4.59651577691658e-05, "loss": 0.1344, "step": 41530 }, { "epoch": 1.5095573806235918, "grad_norm": 2.622103214263916, "learning_rate": 4.596233049871382e-05, "loss": 0.1232, "step": 41540 }, { "epoch": 1.50992077912639, "grad_norm": 0.4142579436302185, "learning_rate": 4.595950232506961e-05, "loss": 0.1227, "step": 41550 }, { "epoch": 1.5102841776291882, "grad_norm": 0.9995001554489136, "learning_rate": 4.5956673248355e-05, "loss": 0.1143, "step": 41560 }, { "epoch": 1.5106475761319862, "grad_norm": 2.1356821060180664, "learning_rate": 4.595384326869191e-05, "loss": 0.1969, "step": 41570 }, { "epoch": 1.5110109746347846, "grad_norm": 0.9950689673423767, "learning_rate": 4.5951012386202274e-05, "loss": 0.1362, "step": 41580 }, { "epoch": 1.5113743731375826, "grad_norm": 0.6441085934638977, "learning_rate": 4.5948180601008054e-05, "loss": 0.1557, "step": 41590 }, { "epoch": 1.5117377716403808, "grad_norm": 2.1033713817596436, "learning_rate": 4.594534791323127e-05, "loss": 0.1718, "step": 41600 }, { "epoch": 1.512101170143179, "grad_norm": 1.3968003988265991, "learning_rate": 4.5942514322993965e-05, "loss": 0.2915, "step": 41610 }, { "epoch": 1.5124645686459772, "grad_norm": 0.7833322882652283, "learning_rate": 4.593967983041823e-05, "loss": 0.1379, "step": 41620 }, { "epoch": 1.5128279671487754, "grad_norm": 1.0050405263900757, "learning_rate": 4.5936844435626196e-05, "loss": 0.1307, "step": 41630 }, { "epoch": 1.5131913656515734, "grad_norm": 1.9530189037322998, "learning_rate": 4.593400813874003e-05, "loss": 0.139, "step": 41640 }, { "epoch": 1.5135547641543718, "grad_norm": 0.45743170380592346, "learning_rate": 4.593117093988194e-05, "loss": 0.1422, "step": 41650 }, { "epoch": 1.5139181626571698, "grad_norm": 1.310746431350708, "learning_rate": 4.592833283917416e-05, "loss": 1.58, "step": 41660 }, { "epoch": 1.514281561159968, "grad_norm": 0.6696259379386902, "learning_rate": 4.592549383673898e-05, "loss": 0.1466, "step": 41670 }, { "epoch": 1.5146449596627662, "grad_norm": 1.0350476503372192, "learning_rate": 4.5922653932698734e-05, "loss": 0.1114, "step": 41680 }, { "epoch": 1.5150083581655642, "grad_norm": 1.5413391590118408, "learning_rate": 4.591981312717577e-05, "loss": 0.1225, "step": 41690 }, { "epoch": 1.5153717566683627, "grad_norm": 0.8129068613052368, "learning_rate": 4.5916971420292485e-05, "loss": 0.1951, "step": 41700 }, { "epoch": 1.5157351551711606, "grad_norm": 1.1114506721496582, "learning_rate": 4.591412881217133e-05, "loss": 0.1227, "step": 41710 }, { "epoch": 1.5160985536739588, "grad_norm": 0.5106993317604065, "learning_rate": 4.5911285302934775e-05, "loss": 0.1985, "step": 41720 }, { "epoch": 1.516461952176757, "grad_norm": 1.2125110626220703, "learning_rate": 4.590844089270534e-05, "loss": 0.1233, "step": 41730 }, { "epoch": 1.516825350679555, "grad_norm": 1.3580394983291626, "learning_rate": 4.590559558160558e-05, "loss": 0.1227, "step": 41740 }, { "epoch": 1.5171887491823535, "grad_norm": 0.4338432252407074, "learning_rate": 4.590274936975809e-05, "loss": 0.1462, "step": 41750 }, { "epoch": 1.5175521476851515, "grad_norm": 0.9010568857192993, "learning_rate": 4.58999022572855e-05, "loss": 0.1372, "step": 41760 }, { "epoch": 1.5179155461879497, "grad_norm": 0.737705647945404, "learning_rate": 4.589705424431048e-05, "loss": 0.1538, "step": 41770 }, { "epoch": 1.5182789446907479, "grad_norm": 1.0285004377365112, "learning_rate": 4.589420533095575e-05, "loss": 0.1101, "step": 41780 }, { "epoch": 1.518642343193546, "grad_norm": 0.5717383027076721, "learning_rate": 4.589135551734405e-05, "loss": 0.1157, "step": 41790 }, { "epoch": 1.5190057416963443, "grad_norm": 1.1417220830917358, "learning_rate": 4.588850480359818e-05, "loss": 0.1359, "step": 41800 }, { "epoch": 1.5193691401991423, "grad_norm": 2.673459768295288, "learning_rate": 4.588565318984095e-05, "loss": 0.1238, "step": 41810 }, { "epoch": 1.5197325387019407, "grad_norm": 1.1211605072021484, "learning_rate": 4.588280067619524e-05, "loss": 0.1642, "step": 41820 }, { "epoch": 1.5200959372047387, "grad_norm": 2.358137369155884, "learning_rate": 4.587994726278395e-05, "loss": 0.1234, "step": 41830 }, { "epoch": 1.5204593357075369, "grad_norm": 0.8301489949226379, "learning_rate": 4.587709294973002e-05, "loss": 0.1274, "step": 41840 }, { "epoch": 1.520822734210335, "grad_norm": 2.1138226985931396, "learning_rate": 4.587423773715644e-05, "loss": 0.1326, "step": 41850 }, { "epoch": 1.521186132713133, "grad_norm": 0.7757201194763184, "learning_rate": 4.587138162518623e-05, "loss": 0.1183, "step": 41860 }, { "epoch": 1.5215495312159315, "grad_norm": 0.7807698249816895, "learning_rate": 4.586852461394243e-05, "loss": 0.1485, "step": 41870 }, { "epoch": 1.5219129297187295, "grad_norm": 2.2938053607940674, "learning_rate": 4.586566670354817e-05, "loss": 0.1152, "step": 41880 }, { "epoch": 1.5222763282215277, "grad_norm": 1.2340235710144043, "learning_rate": 4.5862807894126566e-05, "loss": 0.1766, "step": 41890 }, { "epoch": 1.522639726724326, "grad_norm": 0.9382178783416748, "learning_rate": 4.5859948185800806e-05, "loss": 0.1273, "step": 41900 }, { "epoch": 1.523003125227124, "grad_norm": 4.5072526931762695, "learning_rate": 4.58570875786941e-05, "loss": 0.1333, "step": 41910 }, { "epoch": 1.5233665237299223, "grad_norm": 0.41228216886520386, "learning_rate": 4.5854226072929696e-05, "loss": 0.1766, "step": 41920 }, { "epoch": 1.5237299222327203, "grad_norm": 0.869669497013092, "learning_rate": 4.5851363668630886e-05, "loss": 0.1271, "step": 41930 }, { "epoch": 1.5240933207355187, "grad_norm": 1.169318675994873, "learning_rate": 4.584850036592101e-05, "loss": 0.083, "step": 41940 }, { "epoch": 1.5244567192383167, "grad_norm": 3.336904287338257, "learning_rate": 4.5845636164923426e-05, "loss": 0.1357, "step": 41950 }, { "epoch": 1.524820117741115, "grad_norm": 1.167758584022522, "learning_rate": 4.584277106576156e-05, "loss": 0.1162, "step": 41960 }, { "epoch": 1.5251835162439131, "grad_norm": 0.9635423421859741, "learning_rate": 4.5839905068558835e-05, "loss": 0.2177, "step": 41970 }, { "epoch": 1.5255469147467111, "grad_norm": 1.3818042278289795, "learning_rate": 4.583703817343876e-05, "loss": 0.1246, "step": 41980 }, { "epoch": 1.5259103132495095, "grad_norm": 1.1299431324005127, "learning_rate": 4.583417038052484e-05, "loss": 0.1359, "step": 41990 }, { "epoch": 1.5262737117523075, "grad_norm": 2.181351661682129, "learning_rate": 4.583130168994065e-05, "loss": 0.1706, "step": 42000 }, { "epoch": 1.5262737117523075, "eval_loss": 0.3528802692890167, "eval_runtime": 181.0519, "eval_samples_per_second": 40.95, "eval_steps_per_second": 5.12, "eval_wer": 0.17613956105796286, "step": 42000 }, { "epoch": 1.5266371102551057, "grad_norm": 1.0958346128463745, "learning_rate": 4.582843210180979e-05, "loss": 0.1187, "step": 42010 }, { "epoch": 1.527000508757904, "grad_norm": 0.463438481092453, "learning_rate": 4.58255616162559e-05, "loss": 0.1539, "step": 42020 }, { "epoch": 1.527363907260702, "grad_norm": 0.5655350685119629, "learning_rate": 4.5822690233402656e-05, "loss": 0.1503, "step": 42030 }, { "epoch": 1.5277273057635004, "grad_norm": 1.5692224502563477, "learning_rate": 4.5819817953373764e-05, "loss": 0.1219, "step": 42040 }, { "epoch": 1.5280907042662983, "grad_norm": 0.48884958028793335, "learning_rate": 4.5816944776293016e-05, "loss": 0.1455, "step": 42050 }, { "epoch": 1.5284541027690965, "grad_norm": 0.8623284697532654, "learning_rate": 4.5814070702284175e-05, "loss": 0.1498, "step": 42060 }, { "epoch": 1.5288175012718948, "grad_norm": 0.5985013246536255, "learning_rate": 4.581119573147108e-05, "loss": 0.4594, "step": 42070 }, { "epoch": 1.529180899774693, "grad_norm": 0.9812720417976379, "learning_rate": 4.580831986397761e-05, "loss": 0.1234, "step": 42080 }, { "epoch": 1.5295442982774912, "grad_norm": 0.5680709481239319, "learning_rate": 4.5805443099927666e-05, "loss": 0.1061, "step": 42090 }, { "epoch": 1.5299076967802892, "grad_norm": 0.6387588977813721, "learning_rate": 4.5802565439445225e-05, "loss": 0.1436, "step": 42100 }, { "epoch": 1.5302710952830876, "grad_norm": 1.1865098476409912, "learning_rate": 4.5799686882654236e-05, "loss": 0.1155, "step": 42110 }, { "epoch": 1.5306344937858856, "grad_norm": 0.7588171362876892, "learning_rate": 4.579680742967875e-05, "loss": 0.1799, "step": 42120 }, { "epoch": 1.5309978922886838, "grad_norm": 0.9183505773544312, "learning_rate": 4.579392708064283e-05, "loss": 0.1133, "step": 42130 }, { "epoch": 1.531361290791482, "grad_norm": 1.1988872289657593, "learning_rate": 4.5791045835670575e-05, "loss": 0.1107, "step": 42140 }, { "epoch": 1.53172468929428, "grad_norm": 0.6209965944290161, "learning_rate": 4.578816369488613e-05, "loss": 0.1518, "step": 42150 }, { "epoch": 1.5320880877970784, "grad_norm": 1.3487142324447632, "learning_rate": 4.5785280658413674e-05, "loss": 0.1126, "step": 42160 }, { "epoch": 1.5324514862998764, "grad_norm": 0.6516602039337158, "learning_rate": 4.578239672637743e-05, "loss": 0.1498, "step": 42170 }, { "epoch": 1.5328148848026746, "grad_norm": 2.4193315505981445, "learning_rate": 4.577951189890166e-05, "loss": 0.1408, "step": 42180 }, { "epoch": 1.5331782833054728, "grad_norm": 0.6747106313705444, "learning_rate": 4.577662617611065e-05, "loss": 0.1226, "step": 42190 }, { "epoch": 1.533541681808271, "grad_norm": 3.124244451522827, "learning_rate": 4.5773739558128744e-05, "loss": 0.1512, "step": 42200 }, { "epoch": 1.5339050803110692, "grad_norm": 0.8625807762145996, "learning_rate": 4.5770852045080314e-05, "loss": 0.1187, "step": 42210 }, { "epoch": 1.5342684788138672, "grad_norm": 0.9007976651191711, "learning_rate": 4.576796363708977e-05, "loss": 0.2001, "step": 42220 }, { "epoch": 1.5346318773166656, "grad_norm": 0.7381039261817932, "learning_rate": 4.576507433428157e-05, "loss": 0.1063, "step": 42230 }, { "epoch": 1.5349952758194636, "grad_norm": 0.9550501704216003, "learning_rate": 4.57621841367802e-05, "loss": 0.1448, "step": 42240 }, { "epoch": 1.5353586743222618, "grad_norm": 0.5087346434593201, "learning_rate": 4.5759293044710175e-05, "loss": 0.1665, "step": 42250 }, { "epoch": 1.53572207282506, "grad_norm": 0.4684658646583557, "learning_rate": 4.575640105819609e-05, "loss": 0.1089, "step": 42260 }, { "epoch": 1.536085471327858, "grad_norm": 0.6353893876075745, "learning_rate": 4.575350817736252e-05, "loss": 0.2437, "step": 42270 }, { "epoch": 1.5364488698306564, "grad_norm": 0.7524349689483643, "learning_rate": 4.575061440233414e-05, "loss": 0.1858, "step": 42280 }, { "epoch": 1.5368122683334544, "grad_norm": 0.9425112009048462, "learning_rate": 4.57477197332356e-05, "loss": 0.0948, "step": 42290 }, { "epoch": 1.5371756668362526, "grad_norm": 1.419872522354126, "learning_rate": 4.574482417019165e-05, "loss": 0.1272, "step": 42300 }, { "epoch": 1.5375390653390508, "grad_norm": 0.6511875987052917, "learning_rate": 4.574192771332703e-05, "loss": 0.176, "step": 42310 }, { "epoch": 1.5379024638418488, "grad_norm": 1.2612382173538208, "learning_rate": 4.573903036276655e-05, "loss": 0.1681, "step": 42320 }, { "epoch": 1.5382658623446472, "grad_norm": 0.828471839427948, "learning_rate": 4.573613211863504e-05, "loss": 0.1218, "step": 42330 }, { "epoch": 1.5386292608474452, "grad_norm": 0.7098140716552734, "learning_rate": 4.573323298105737e-05, "loss": 0.1264, "step": 42340 }, { "epoch": 1.5389926593502434, "grad_norm": 0.612920343875885, "learning_rate": 4.573033295015847e-05, "loss": 0.1457, "step": 42350 }, { "epoch": 1.5393560578530416, "grad_norm": 2.700010299682617, "learning_rate": 4.572743202606328e-05, "loss": 0.1416, "step": 42360 }, { "epoch": 1.5397194563558398, "grad_norm": 0.4544985890388489, "learning_rate": 4.5724530208896784e-05, "loss": 0.2174, "step": 42370 }, { "epoch": 1.540082854858638, "grad_norm": 1.7702118158340454, "learning_rate": 4.5721627498784025e-05, "loss": 0.7935, "step": 42380 }, { "epoch": 1.540446253361436, "grad_norm": 2.3855764865875244, "learning_rate": 4.571872389585007e-05, "loss": 0.1142, "step": 42390 }, { "epoch": 1.5408096518642345, "grad_norm": 1.9382286071777344, "learning_rate": 4.5715819400220004e-05, "loss": 0.1349, "step": 42400 }, { "epoch": 1.5411730503670324, "grad_norm": 1.8577841520309448, "learning_rate": 4.5712914012019003e-05, "loss": 0.1154, "step": 42410 }, { "epoch": 1.5415364488698307, "grad_norm": 1.4880726337432861, "learning_rate": 4.571000773137223e-05, "loss": 0.1402, "step": 42420 }, { "epoch": 1.5418998473726289, "grad_norm": 0.6903501152992249, "learning_rate": 4.570710055840491e-05, "loss": 0.1137, "step": 42430 }, { "epoch": 1.5422632458754268, "grad_norm": 1.4438791275024414, "learning_rate": 4.57041924932423e-05, "loss": 0.1285, "step": 42440 }, { "epoch": 1.5426266443782253, "grad_norm": 0.41870322823524475, "learning_rate": 4.57012835360097e-05, "loss": 0.1366, "step": 42450 }, { "epoch": 1.5429900428810233, "grad_norm": 0.9365738034248352, "learning_rate": 4.569837368683245e-05, "loss": 0.1051, "step": 42460 }, { "epoch": 1.5433534413838215, "grad_norm": 1.940673828125, "learning_rate": 4.569546294583593e-05, "loss": 0.157, "step": 42470 }, { "epoch": 1.5437168398866197, "grad_norm": 1.1944515705108643, "learning_rate": 4.5692551313145536e-05, "loss": 1.5159, "step": 42480 }, { "epoch": 1.5440802383894179, "grad_norm": 0.6140870451927185, "learning_rate": 4.568963878888673e-05, "loss": 0.0986, "step": 42490 }, { "epoch": 1.544443636892216, "grad_norm": 1.8208271265029907, "learning_rate": 4.5686725373185016e-05, "loss": 0.1519, "step": 42500 }, { "epoch": 1.544807035395014, "grad_norm": 1.2457455396652222, "learning_rate": 4.56838110661659e-05, "loss": 0.1634, "step": 42510 }, { "epoch": 1.5451704338978125, "grad_norm": 0.5140019655227661, "learning_rate": 4.568089586795496e-05, "loss": 0.1628, "step": 42520 }, { "epoch": 1.5455338324006105, "grad_norm": 0.8539334535598755, "learning_rate": 4.5677979778677796e-05, "loss": 0.1243, "step": 42530 }, { "epoch": 1.5458972309034087, "grad_norm": 1.2581802606582642, "learning_rate": 4.567506279846006e-05, "loss": 0.1715, "step": 42540 }, { "epoch": 1.546260629406207, "grad_norm": 1.8808507919311523, "learning_rate": 4.567214492742743e-05, "loss": 0.1415, "step": 42550 }, { "epoch": 1.5466240279090049, "grad_norm": 1.948970079421997, "learning_rate": 4.566922616570562e-05, "loss": 0.1147, "step": 42560 }, { "epoch": 1.5469874264118033, "grad_norm": 1.5000864267349243, "learning_rate": 4.566630651342041e-05, "loss": 0.1614, "step": 42570 }, { "epoch": 1.5473508249146013, "grad_norm": 1.5625576972961426, "learning_rate": 4.566338597069757e-05, "loss": 0.1145, "step": 42580 }, { "epoch": 1.5477142234173995, "grad_norm": 1.2443382740020752, "learning_rate": 4.566046453766295e-05, "loss": 0.1203, "step": 42590 }, { "epoch": 1.5480776219201977, "grad_norm": 1.5014569759368896, "learning_rate": 4.5657542214442426e-05, "loss": 0.1459, "step": 42600 }, { "epoch": 1.5480776219201977, "eval_loss": 0.351544588804245, "eval_runtime": 180.3519, "eval_samples_per_second": 41.109, "eval_steps_per_second": 5.14, "eval_wer": 0.17858115344818196, "step": 42600 }, { "epoch": 1.5484410204229957, "grad_norm": 1.0584172010421753, "learning_rate": 4.565461900116191e-05, "loss": 0.1046, "step": 42610 }, { "epoch": 1.5488044189257941, "grad_norm": 0.6157267689704895, "learning_rate": 4.565169489794735e-05, "loss": 0.135, "step": 42620 }, { "epoch": 1.549167817428592, "grad_norm": 0.898263692855835, "learning_rate": 4.564876990492474e-05, "loss": 0.1157, "step": 42630 }, { "epoch": 1.5495312159313903, "grad_norm": 0.6782193779945374, "learning_rate": 4.5645844022220096e-05, "loss": 0.2191, "step": 42640 }, { "epoch": 1.5498946144341885, "grad_norm": 0.6636195182800293, "learning_rate": 4.5642917249959493e-05, "loss": 0.1709, "step": 42650 }, { "epoch": 1.5502580129369867, "grad_norm": 1.3367676734924316, "learning_rate": 4.563998958826904e-05, "loss": 0.1197, "step": 42660 }, { "epoch": 1.550621411439785, "grad_norm": 0.470985472202301, "learning_rate": 4.563706103727486e-05, "loss": 0.1395, "step": 42670 }, { "epoch": 1.550984809942583, "grad_norm": 1.1232322454452515, "learning_rate": 4.563413159710316e-05, "loss": 0.1139, "step": 42680 }, { "epoch": 1.5513482084453813, "grad_norm": 1.0105756521224976, "learning_rate": 4.563120126788013e-05, "loss": 0.1243, "step": 42690 }, { "epoch": 1.5517116069481793, "grad_norm": 0.785205602645874, "learning_rate": 4.562827004973206e-05, "loss": 0.1588, "step": 42700 }, { "epoch": 1.5520750054509775, "grad_norm": 1.4863699674606323, "learning_rate": 4.5625337942785224e-05, "loss": 0.0913, "step": 42710 }, { "epoch": 1.5524384039537757, "grad_norm": 0.33174383640289307, "learning_rate": 4.562240494716596e-05, "loss": 0.158, "step": 42720 }, { "epoch": 1.5528018024565737, "grad_norm": 0.7735195159912109, "learning_rate": 4.5619471063000644e-05, "loss": 0.1295, "step": 42730 }, { "epoch": 1.5531652009593722, "grad_norm": 3.2964320182800293, "learning_rate": 4.561653629041568e-05, "loss": 0.1144, "step": 42740 }, { "epoch": 1.5535285994621701, "grad_norm": 0.6756449937820435, "learning_rate": 4.5613600629537526e-05, "loss": 0.119, "step": 42750 }, { "epoch": 1.5538919979649684, "grad_norm": 1.7608799934387207, "learning_rate": 4.5610664080492655e-05, "loss": 0.1239, "step": 42760 }, { "epoch": 1.5542553964677666, "grad_norm": 0.8312143087387085, "learning_rate": 4.5607726643407614e-05, "loss": 0.1434, "step": 42770 }, { "epoch": 1.5546187949705648, "grad_norm": 1.3083513975143433, "learning_rate": 4.560478831840894e-05, "loss": 0.125, "step": 42780 }, { "epoch": 1.554982193473363, "grad_norm": 1.4495130777359009, "learning_rate": 4.560184910562326e-05, "loss": 0.1172, "step": 42790 }, { "epoch": 1.555345591976161, "grad_norm": 0.5549319982528687, "learning_rate": 4.559890900517721e-05, "loss": 1.7985, "step": 42800 }, { "epoch": 1.5557089904789594, "grad_norm": 1.0677647590637207, "learning_rate": 4.5595968017197446e-05, "loss": 0.1485, "step": 42810 }, { "epoch": 1.5560723889817574, "grad_norm": 0.5432078242301941, "learning_rate": 4.559302614181071e-05, "loss": 0.1372, "step": 42820 }, { "epoch": 1.5564357874845556, "grad_norm": 2.0982048511505127, "learning_rate": 4.559008337914375e-05, "loss": 0.1543, "step": 42830 }, { "epoch": 1.5567991859873538, "grad_norm": 2.8568451404571533, "learning_rate": 4.558713972932335e-05, "loss": 0.1271, "step": 42840 }, { "epoch": 1.5571625844901518, "grad_norm": 0.9933029413223267, "learning_rate": 4.558419519247635e-05, "loss": 1.0891, "step": 42850 }, { "epoch": 1.5575259829929502, "grad_norm": 0.6010461449623108, "learning_rate": 4.5581249768729614e-05, "loss": 0.1509, "step": 42860 }, { "epoch": 1.5578893814957482, "grad_norm": 0.6242499351501465, "learning_rate": 4.557830345821006e-05, "loss": 0.1527, "step": 42870 }, { "epoch": 1.5582527799985464, "grad_norm": 0.48831334710121155, "learning_rate": 4.557535626104463e-05, "loss": 0.1451, "step": 42880 }, { "epoch": 1.5586161785013446, "grad_norm": 1.1660668849945068, "learning_rate": 4.55724081773603e-05, "loss": 0.1558, "step": 42890 }, { "epoch": 1.5589795770041426, "grad_norm": 1.067808747291565, "learning_rate": 4.5569459207284106e-05, "loss": 0.1634, "step": 42900 }, { "epoch": 1.559342975506941, "grad_norm": 1.6434768438339233, "learning_rate": 4.556650935094309e-05, "loss": 0.1269, "step": 42910 }, { "epoch": 1.559706374009739, "grad_norm": 0.4303635358810425, "learning_rate": 4.556355860846437e-05, "loss": 0.1536, "step": 42920 }, { "epoch": 1.5600697725125372, "grad_norm": 3.148212194442749, "learning_rate": 4.5560606979975075e-05, "loss": 0.1062, "step": 42930 }, { "epoch": 1.5604331710153354, "grad_norm": 3.3599109649658203, "learning_rate": 4.5557654465602376e-05, "loss": 0.1158, "step": 42940 }, { "epoch": 1.5607965695181336, "grad_norm": 3.2170286178588867, "learning_rate": 4.5554701065473494e-05, "loss": 0.1491, "step": 42950 }, { "epoch": 1.5611599680209318, "grad_norm": 1.1147798299789429, "learning_rate": 4.555174677971567e-05, "loss": 0.1143, "step": 42960 }, { "epoch": 1.5615233665237298, "grad_norm": 0.4949367046356201, "learning_rate": 4.5548791608456206e-05, "loss": 0.1639, "step": 42970 }, { "epoch": 1.5618867650265282, "grad_norm": 0.7166339755058289, "learning_rate": 4.554583555182244e-05, "loss": 0.137, "step": 42980 }, { "epoch": 1.5622501635293262, "grad_norm": 0.48903581500053406, "learning_rate": 4.55428786099417e-05, "loss": 0.126, "step": 42990 }, { "epoch": 1.5626135620321244, "grad_norm": 0.43728914856910706, "learning_rate": 4.553992078294142e-05, "loss": 0.1371, "step": 43000 }, { "epoch": 1.5629769605349226, "grad_norm": 0.7486665844917297, "learning_rate": 4.5536962070949035e-05, "loss": 0.1233, "step": 43010 }, { "epoch": 1.5633403590377206, "grad_norm": 0.7540434002876282, "learning_rate": 4.5534002474092025e-05, "loss": 0.1356, "step": 43020 }, { "epoch": 1.563703757540519, "grad_norm": 1.2763710021972656, "learning_rate": 4.55310419924979e-05, "loss": 0.1234, "step": 43030 }, { "epoch": 1.564067156043317, "grad_norm": 0.5709404945373535, "learning_rate": 4.552808062629424e-05, "loss": 0.1224, "step": 43040 }, { "epoch": 1.5644305545461152, "grad_norm": 0.5243006348609924, "learning_rate": 4.552511837560862e-05, "loss": 0.1175, "step": 43050 }, { "epoch": 1.5647939530489134, "grad_norm": 1.3225644826889038, "learning_rate": 4.552215524056867e-05, "loss": 0.1408, "step": 43060 }, { "epoch": 1.5651573515517117, "grad_norm": 0.2830749452114105, "learning_rate": 4.551919122130208e-05, "loss": 0.1588, "step": 43070 }, { "epoch": 1.5655207500545099, "grad_norm": 1.7666617631912231, "learning_rate": 4.551622631793654e-05, "loss": 0.109, "step": 43080 }, { "epoch": 1.5658841485573078, "grad_norm": 0.6468254327774048, "learning_rate": 4.551326053059981e-05, "loss": 0.1199, "step": 43090 }, { "epoch": 1.5662475470601063, "grad_norm": 0.7526164650917053, "learning_rate": 4.551029385941967e-05, "loss": 0.1648, "step": 43100 }, { "epoch": 1.5666109455629043, "grad_norm": 3.8184330463409424, "learning_rate": 4.550732630452394e-05, "loss": 0.1392, "step": 43110 }, { "epoch": 1.5669743440657025, "grad_norm": 0.9396213293075562, "learning_rate": 4.550435786604049e-05, "loss": 0.1659, "step": 43120 }, { "epoch": 1.5673377425685007, "grad_norm": 1.536440372467041, "learning_rate": 4.550168551604358e-05, "loss": 0.1227, "step": 43130 }, { "epoch": 1.5677011410712987, "grad_norm": 1.6777888536453247, "learning_rate": 4.549871539909584e-05, "loss": 0.128, "step": 43140 }, { "epoch": 1.568064539574097, "grad_norm": 21.312944412231445, "learning_rate": 4.5495744398931396e-05, "loss": 0.2651, "step": 43150 }, { "epoch": 1.568427938076895, "grad_norm": 0.8739009499549866, "learning_rate": 4.549277251567824e-05, "loss": 0.12, "step": 43160 }, { "epoch": 1.5687913365796933, "grad_norm": 0.3690776526927948, "learning_rate": 4.548979974946444e-05, "loss": 0.1665, "step": 43170 }, { "epoch": 1.5691547350824915, "grad_norm": 1.3902113437652588, "learning_rate": 4.548682610041807e-05, "loss": 0.1502, "step": 43180 }, { "epoch": 1.5695181335852895, "grad_norm": 0.9234703779220581, "learning_rate": 4.5483851568667244e-05, "loss": 0.1168, "step": 43190 }, { "epoch": 1.569881532088088, "grad_norm": 0.7674643397331238, "learning_rate": 4.5480876154340145e-05, "loss": 0.1404, "step": 43200 }, { "epoch": 1.569881532088088, "eval_loss": 0.3601061701774597, "eval_runtime": 180.5599, "eval_samples_per_second": 41.061, "eval_steps_per_second": 5.134, "eval_wer": 0.16855156388984696, "step": 43200 }, { "epoch": 1.5702449305908859, "grad_norm": 0.7668557167053223, "learning_rate": 4.5477899857564966e-05, "loss": 0.1842, "step": 43210 }, { "epoch": 1.570608329093684, "grad_norm": 0.7534570693969727, "learning_rate": 4.5474922678469936e-05, "loss": 0.1558, "step": 43220 }, { "epoch": 1.5709717275964823, "grad_norm": 0.9190795421600342, "learning_rate": 4.547194461718334e-05, "loss": 0.1808, "step": 43230 }, { "epoch": 1.5713351260992805, "grad_norm": 0.4574483633041382, "learning_rate": 4.54689656738335e-05, "loss": 0.1146, "step": 43240 }, { "epoch": 1.5716985246020787, "grad_norm": 1.1554951667785645, "learning_rate": 4.5465985848548744e-05, "loss": 0.8771, "step": 43250 }, { "epoch": 1.5720619231048767, "grad_norm": 1.175336480140686, "learning_rate": 4.546300514145748e-05, "loss": 0.1337, "step": 43260 }, { "epoch": 1.5724253216076751, "grad_norm": 0.4004783630371094, "learning_rate": 4.5460023552688136e-05, "loss": 0.1963, "step": 43270 }, { "epoch": 1.572788720110473, "grad_norm": 0.5944772362709045, "learning_rate": 4.5457041082369164e-05, "loss": 0.1223, "step": 43280 }, { "epoch": 1.5731521186132713, "grad_norm": 0.7069734334945679, "learning_rate": 4.545405773062909e-05, "loss": 3.2472, "step": 43290 }, { "epoch": 1.5735155171160695, "grad_norm": 1.0471086502075195, "learning_rate": 4.545107349759644e-05, "loss": 0.1558, "step": 43300 }, { "epoch": 1.5738789156188675, "grad_norm": 0.6987308263778687, "learning_rate": 4.54480883833998e-05, "loss": 0.4641, "step": 43310 }, { "epoch": 1.574242314121666, "grad_norm": 0.599287211894989, "learning_rate": 4.5445102388167785e-05, "loss": 0.1592, "step": 43320 }, { "epoch": 1.574605712624464, "grad_norm": 0.9643434286117554, "learning_rate": 4.544211551202904e-05, "loss": 0.1165, "step": 43330 }, { "epoch": 1.5749691111272621, "grad_norm": 0.5655382871627808, "learning_rate": 4.5439127755112285e-05, "loss": 0.1234, "step": 43340 }, { "epoch": 1.5753325096300603, "grad_norm": 1.7126801013946533, "learning_rate": 4.5436139117546235e-05, "loss": 0.1647, "step": 43350 }, { "epoch": 1.5756959081328585, "grad_norm": 0.6298018097877502, "learning_rate": 4.543314959945966e-05, "loss": 0.1028, "step": 43360 }, { "epoch": 1.5760593066356567, "grad_norm": 0.5706765651702881, "learning_rate": 4.543015920098137e-05, "loss": 0.5641, "step": 43370 }, { "epoch": 1.5764227051384547, "grad_norm": 0.9098716974258423, "learning_rate": 4.542716792224022e-05, "loss": 0.1233, "step": 43380 }, { "epoch": 1.5767861036412532, "grad_norm": 1.0217915773391724, "learning_rate": 4.5424175763365075e-05, "loss": 0.1306, "step": 43390 }, { "epoch": 1.5771495021440511, "grad_norm": 0.651685893535614, "learning_rate": 4.5421182724484866e-05, "loss": 0.1433, "step": 43400 }, { "epoch": 1.5775129006468493, "grad_norm": 0.6281771659851074, "learning_rate": 4.541818880572856e-05, "loss": 0.1313, "step": 43410 }, { "epoch": 1.5778762991496476, "grad_norm": 1.7486456632614136, "learning_rate": 4.541519400722514e-05, "loss": 0.122, "step": 43420 }, { "epoch": 1.5782396976524455, "grad_norm": 1.2109237909317017, "learning_rate": 4.541219832910364e-05, "loss": 0.1297, "step": 43430 }, { "epoch": 1.578603096155244, "grad_norm": 1.041900634765625, "learning_rate": 4.540920177149315e-05, "loss": 0.1014, "step": 43440 }, { "epoch": 1.578966494658042, "grad_norm": 0.7674359083175659, "learning_rate": 4.540620433452277e-05, "loss": 0.1838, "step": 43450 }, { "epoch": 1.5793298931608402, "grad_norm": 1.9548803567886353, "learning_rate": 4.540320601832165e-05, "loss": 0.1345, "step": 43460 }, { "epoch": 1.5796932916636384, "grad_norm": 0.39995163679122925, "learning_rate": 4.540020682301898e-05, "loss": 0.1305, "step": 43470 }, { "epoch": 1.5800566901664364, "grad_norm": 0.9415978789329529, "learning_rate": 4.539720674874398e-05, "loss": 0.123, "step": 43480 }, { "epoch": 1.5804200886692348, "grad_norm": 0.8457926511764526, "learning_rate": 4.539420579562592e-05, "loss": 0.1145, "step": 43490 }, { "epoch": 1.5807834871720328, "grad_norm": 2.9950082302093506, "learning_rate": 4.539120396379409e-05, "loss": 0.1551, "step": 43500 }, { "epoch": 1.581146885674831, "grad_norm": 1.8456460237503052, "learning_rate": 4.5388201253377834e-05, "loss": 0.0885, "step": 43510 }, { "epoch": 1.5815102841776292, "grad_norm": 0.4476306736469269, "learning_rate": 4.538519766450653e-05, "loss": 0.1351, "step": 43520 }, { "epoch": 1.5818736826804274, "grad_norm": 0.7363295555114746, "learning_rate": 4.5382193197309584e-05, "loss": 0.1045, "step": 43530 }, { "epoch": 1.5822370811832256, "grad_norm": 2.1484272480010986, "learning_rate": 4.5379187851916463e-05, "loss": 0.1304, "step": 43540 }, { "epoch": 1.5826004796860236, "grad_norm": 0.5627908706665039, "learning_rate": 4.537618162845664e-05, "loss": 0.1454, "step": 43550 }, { "epoch": 1.582963878188822, "grad_norm": 1.4841351509094238, "learning_rate": 4.537317452705964e-05, "loss": 0.1301, "step": 43560 }, { "epoch": 1.58332727669162, "grad_norm": 0.7127716541290283, "learning_rate": 4.537016654785505e-05, "loss": 0.1608, "step": 43570 }, { "epoch": 1.5836906751944182, "grad_norm": 1.0103297233581543, "learning_rate": 4.536715769097246e-05, "loss": 0.1137, "step": 43580 }, { "epoch": 1.5840540736972164, "grad_norm": 0.8980743288993835, "learning_rate": 4.536414795654151e-05, "loss": 0.1213, "step": 43590 }, { "epoch": 1.5844174722000144, "grad_norm": 0.5678355097770691, "learning_rate": 4.536113734469188e-05, "loss": 0.1253, "step": 43600 }, { "epoch": 1.5847808707028128, "grad_norm": 0.6713634729385376, "learning_rate": 4.535812585555328e-05, "loss": 0.1144, "step": 43610 }, { "epoch": 1.5851442692056108, "grad_norm": 0.4925456643104553, "learning_rate": 4.5355113489255484e-05, "loss": 0.1448, "step": 43620 }, { "epoch": 1.585507667708409, "grad_norm": 1.3464380502700806, "learning_rate": 4.5352100245928267e-05, "loss": 0.1213, "step": 43630 }, { "epoch": 1.5858710662112072, "grad_norm": 1.3755130767822266, "learning_rate": 4.5349086125701456e-05, "loss": 0.1277, "step": 43640 }, { "epoch": 1.5862344647140054, "grad_norm": 1.2649788856506348, "learning_rate": 4.534607112870494e-05, "loss": 0.1379, "step": 43650 }, { "epoch": 1.5865978632168036, "grad_norm": 0.6860102415084839, "learning_rate": 4.53430552550686e-05, "loss": 0.1209, "step": 43660 }, { "epoch": 1.5869612617196016, "grad_norm": 0.9149149656295776, "learning_rate": 4.534003850492239e-05, "loss": 0.158, "step": 43670 }, { "epoch": 1.5873246602224, "grad_norm": 1.1880120038986206, "learning_rate": 4.53370208783963e-05, "loss": 0.1283, "step": 43680 }, { "epoch": 1.587688058725198, "grad_norm": 2.6330199241638184, "learning_rate": 4.533400237562033e-05, "loss": 0.1414, "step": 43690 }, { "epoch": 1.5880514572279962, "grad_norm": 0.7637589573860168, "learning_rate": 4.533098299672455e-05, "loss": 0.1267, "step": 43700 }, { "epoch": 1.5884148557307944, "grad_norm": 1.7144758701324463, "learning_rate": 4.5327962741839044e-05, "loss": 0.1222, "step": 43710 }, { "epoch": 1.5887782542335924, "grad_norm": 1.0269776582717896, "learning_rate": 4.532494161109396e-05, "loss": 0.1862, "step": 43720 }, { "epoch": 1.5891416527363909, "grad_norm": 0.8622583746910095, "learning_rate": 4.532191960461946e-05, "loss": 0.1894, "step": 43730 }, { "epoch": 1.5895050512391888, "grad_norm": 1.0310677289962769, "learning_rate": 4.531889672254575e-05, "loss": 0.1284, "step": 43740 }, { "epoch": 1.589868449741987, "grad_norm": 2.753690242767334, "learning_rate": 4.531587296500306e-05, "loss": 0.1404, "step": 43750 }, { "epoch": 1.5902318482447853, "grad_norm": 0.5997269749641418, "learning_rate": 4.53128483321217e-05, "loss": 0.1119, "step": 43760 }, { "epoch": 1.5905952467475832, "grad_norm": 0.8589096665382385, "learning_rate": 4.5309822824031976e-05, "loss": 0.1319, "step": 43770 }, { "epoch": 1.5909586452503817, "grad_norm": 0.7129044532775879, "learning_rate": 4.530679644086425e-05, "loss": 0.2389, "step": 43780 }, { "epoch": 1.5913220437531796, "grad_norm": 0.6947050094604492, "learning_rate": 4.530376918274892e-05, "loss": 0.115, "step": 43790 }, { "epoch": 1.5916854422559779, "grad_norm": 0.9983404278755188, "learning_rate": 4.530074104981641e-05, "loss": 0.1446, "step": 43800 }, { "epoch": 1.5916854422559779, "eval_loss": 0.3569597005844116, "eval_runtime": 180.6536, "eval_samples_per_second": 41.04, "eval_steps_per_second": 5.131, "eval_wer": 0.17362535625465172, "step": 43800 }, { "epoch": 1.592048840758776, "grad_norm": 0.712482750415802, "learning_rate": 4.529771204219721e-05, "loss": 0.1434, "step": 43810 }, { "epoch": 1.5924122392615743, "grad_norm": 0.5298041105270386, "learning_rate": 4.5294682160021806e-05, "loss": 0.1771, "step": 43820 }, { "epoch": 1.5927756377643725, "grad_norm": 1.337560772895813, "learning_rate": 4.529165140342076e-05, "loss": 0.1144, "step": 43830 }, { "epoch": 1.5931390362671705, "grad_norm": 0.5129504203796387, "learning_rate": 4.5288619772524654e-05, "loss": 0.1001, "step": 43840 }, { "epoch": 1.593502434769969, "grad_norm": 0.7407031059265137, "learning_rate": 4.528558726746411e-05, "loss": 0.1302, "step": 43850 }, { "epoch": 1.5938658332727669, "grad_norm": 0.9279839992523193, "learning_rate": 4.5282553888369785e-05, "loss": 0.1452, "step": 43860 }, { "epoch": 1.594229231775565, "grad_norm": 0.5245470404624939, "learning_rate": 4.5279519635372374e-05, "loss": 0.1756, "step": 43870 }, { "epoch": 1.5945926302783633, "grad_norm": 0.6099745631217957, "learning_rate": 4.527648450860262e-05, "loss": 0.2019, "step": 43880 }, { "epoch": 1.5949560287811613, "grad_norm": 0.9615786075592041, "learning_rate": 4.52734485081913e-05, "loss": 0.1252, "step": 43890 }, { "epoch": 1.5953194272839597, "grad_norm": 1.52881920337677, "learning_rate": 4.527041163426921e-05, "loss": 1.8751, "step": 43900 }, { "epoch": 1.5956828257867577, "grad_norm": 0.8344588875770569, "learning_rate": 4.526737388696721e-05, "loss": 0.129, "step": 43910 }, { "epoch": 1.596046224289556, "grad_norm": 0.5732100605964661, "learning_rate": 4.526433526641617e-05, "loss": 0.1475, "step": 43920 }, { "epoch": 1.596409622792354, "grad_norm": 0.8947811722755432, "learning_rate": 4.526129577274704e-05, "loss": 0.4153, "step": 43930 }, { "epoch": 1.5967730212951523, "grad_norm": 1.6199461221694946, "learning_rate": 4.5258255406090746e-05, "loss": 0.1379, "step": 43940 }, { "epoch": 1.5971364197979505, "grad_norm": 1.3465640544891357, "learning_rate": 4.525521416657832e-05, "loss": 0.1515, "step": 43950 }, { "epoch": 1.5974998183007485, "grad_norm": 1.7875219583511353, "learning_rate": 4.525217205434078e-05, "loss": 0.1119, "step": 43960 }, { "epoch": 1.597863216803547, "grad_norm": 0.5457040071487427, "learning_rate": 4.52491290695092e-05, "loss": 0.1499, "step": 43970 }, { "epoch": 1.598226615306345, "grad_norm": 1.2962692975997925, "learning_rate": 4.52460852122147e-05, "loss": 0.1311, "step": 43980 }, { "epoch": 1.5985900138091431, "grad_norm": 0.679913341999054, "learning_rate": 4.5243040482588426e-05, "loss": 0.1298, "step": 43990 }, { "epoch": 1.5989534123119413, "grad_norm": 1.5390740633010864, "learning_rate": 4.523999488076156e-05, "loss": 0.1483, "step": 44000 }, { "epoch": 1.5993168108147393, "grad_norm": 3.566751003265381, "learning_rate": 4.523694840686532e-05, "loss": 0.1303, "step": 44010 }, { "epoch": 1.5996802093175377, "grad_norm": 0.7023512125015259, "learning_rate": 4.5233901061030984e-05, "loss": 0.1305, "step": 44020 }, { "epoch": 1.6000436078203357, "grad_norm": 1.47295343875885, "learning_rate": 4.523085284338985e-05, "loss": 0.1173, "step": 44030 }, { "epoch": 1.600407006323134, "grad_norm": 0.7622318863868713, "learning_rate": 4.522780375407324e-05, "loss": 0.1494, "step": 44040 }, { "epoch": 1.6007704048259321, "grad_norm": 2.0168585777282715, "learning_rate": 4.522475379321254e-05, "loss": 0.1575, "step": 44050 }, { "epoch": 1.6011338033287301, "grad_norm": 0.9191824793815613, "learning_rate": 4.522170296093916e-05, "loss": 0.1111, "step": 44060 }, { "epoch": 1.6014972018315285, "grad_norm": 0.5007340908050537, "learning_rate": 4.521865125738455e-05, "loss": 0.193, "step": 44070 }, { "epoch": 1.6018606003343265, "grad_norm": 0.8389549851417542, "learning_rate": 4.5215598682680186e-05, "loss": 0.1227, "step": 44080 }, { "epoch": 1.6022239988371247, "grad_norm": 0.7387205362319946, "learning_rate": 4.521254523695761e-05, "loss": 0.2035, "step": 44090 }, { "epoch": 1.602587397339923, "grad_norm": 1.1978685855865479, "learning_rate": 4.520949092034837e-05, "loss": 0.1739, "step": 44100 }, { "epoch": 1.6029507958427212, "grad_norm": 1.9989899396896362, "learning_rate": 4.5206435732984085e-05, "loss": 0.1285, "step": 44110 }, { "epoch": 1.6033141943455194, "grad_norm": 0.6451914310455322, "learning_rate": 4.5203379674996365e-05, "loss": 0.1466, "step": 44120 }, { "epoch": 1.6036775928483173, "grad_norm": 0.6689841747283936, "learning_rate": 4.5200322746516904e-05, "loss": 0.113, "step": 44130 }, { "epoch": 1.6040409913511158, "grad_norm": 1.1558260917663574, "learning_rate": 4.519726494767741e-05, "loss": 0.1005, "step": 44140 }, { "epoch": 1.6044043898539138, "grad_norm": 13.844839096069336, "learning_rate": 4.519420627860963e-05, "loss": 0.1279, "step": 44150 }, { "epoch": 1.604767788356712, "grad_norm": 0.6856222152709961, "learning_rate": 4.519114673944536e-05, "loss": 0.1147, "step": 44160 }, { "epoch": 1.6051311868595102, "grad_norm": 0.7829769253730774, "learning_rate": 4.5188086330316405e-05, "loss": 0.1336, "step": 44170 }, { "epoch": 1.6054945853623082, "grad_norm": 1.3698971271514893, "learning_rate": 4.518502505135465e-05, "loss": 0.1158, "step": 44180 }, { "epoch": 1.6058579838651066, "grad_norm": 1.3197015523910522, "learning_rate": 4.5181962902691975e-05, "loss": 0.1293, "step": 44190 }, { "epoch": 1.6062213823679046, "grad_norm": 0.8092926740646362, "learning_rate": 4.517889988446033e-05, "loss": 0.1466, "step": 44200 }, { "epoch": 1.6065847808707028, "grad_norm": 2.015113115310669, "learning_rate": 4.5175835996791684e-05, "loss": 0.1228, "step": 44210 }, { "epoch": 1.606948179373501, "grad_norm": 1.2220087051391602, "learning_rate": 4.5172771239818056e-05, "loss": 0.199, "step": 44220 }, { "epoch": 1.6073115778762992, "grad_norm": 0.5432813167572021, "learning_rate": 4.516970561367149e-05, "loss": 0.1453, "step": 44230 }, { "epoch": 1.6076749763790974, "grad_norm": 0.6337705850601196, "learning_rate": 4.516663911848407e-05, "loss": 0.1257, "step": 44240 }, { "epoch": 1.6080383748818954, "grad_norm": 0.6741940379142761, "learning_rate": 4.5163571754387915e-05, "loss": 0.1062, "step": 44250 }, { "epoch": 1.6084017733846938, "grad_norm": 2.3033409118652344, "learning_rate": 4.516050352151521e-05, "loss": 0.1452, "step": 44260 }, { "epoch": 1.6087651718874918, "grad_norm": 0.4420888125896454, "learning_rate": 4.515743441999814e-05, "loss": 0.1358, "step": 44270 }, { "epoch": 1.60912857039029, "grad_norm": 1.5571812391281128, "learning_rate": 4.515436444996893e-05, "loss": 0.1102, "step": 44280 }, { "epoch": 1.6094919688930882, "grad_norm": 1.084507703781128, "learning_rate": 4.5151293611559865e-05, "loss": 0.1099, "step": 44290 }, { "epoch": 1.6098553673958862, "grad_norm": 0.7025009989738464, "learning_rate": 4.514822190490327e-05, "loss": 0.2296, "step": 44300 }, { "epoch": 1.6102187658986846, "grad_norm": 2.125432252883911, "learning_rate": 4.514514933013147e-05, "loss": 0.1189, "step": 44310 }, { "epoch": 1.6105821644014826, "grad_norm": 0.47693368792533875, "learning_rate": 4.5142075887376856e-05, "loss": 0.1488, "step": 44320 }, { "epoch": 1.6109455629042808, "grad_norm": 0.7935511469841003, "learning_rate": 4.5139001576771865e-05, "loss": 1.8833, "step": 44330 }, { "epoch": 1.611308961407079, "grad_norm": 0.6441402435302734, "learning_rate": 4.513592639844896e-05, "loss": 0.1173, "step": 44340 }, { "epoch": 1.611672359909877, "grad_norm": 1.3646268844604492, "learning_rate": 4.513285035254062e-05, "loss": 0.1171, "step": 44350 }, { "epoch": 1.6120357584126754, "grad_norm": 1.0334749221801758, "learning_rate": 4.512977343917939e-05, "loss": 0.1069, "step": 44360 }, { "epoch": 1.6123991569154734, "grad_norm": 0.3879293203353882, "learning_rate": 4.5126695658497856e-05, "loss": 0.1244, "step": 44370 }, { "epoch": 1.6127625554182716, "grad_norm": 0.6635248064994812, "learning_rate": 4.5123617010628606e-05, "loss": 0.1102, "step": 44380 }, { "epoch": 1.6131259539210698, "grad_norm": 0.8040985465049744, "learning_rate": 4.51205374957043e-05, "loss": 0.1455, "step": 44390 }, { "epoch": 1.613489352423868, "grad_norm": 0.5279836654663086, "learning_rate": 4.511745711385763e-05, "loss": 0.1547, "step": 44400 }, { "epoch": 1.613489352423868, "eval_loss": 0.34678882360458374, "eval_runtime": 179.7828, "eval_samples_per_second": 41.239, "eval_steps_per_second": 5.156, "eval_wer": 0.17216403144117487, "step": 44400 }, { "epoch": 1.6138527509266662, "grad_norm": 1.1375586986541748, "learning_rate": 4.51143758652213e-05, "loss": 0.14, "step": 44410 }, { "epoch": 1.6142161494294642, "grad_norm": 1.5960606336593628, "learning_rate": 4.511129374992809e-05, "loss": 0.1336, "step": 44420 }, { "epoch": 1.6145795479322627, "grad_norm": 0.5347716808319092, "learning_rate": 4.5108210768110785e-05, "loss": 0.1083, "step": 44430 }, { "epoch": 1.6149429464350606, "grad_norm": 1.816926121711731, "learning_rate": 4.510512691990222e-05, "loss": 0.1122, "step": 44440 }, { "epoch": 1.6153063449378589, "grad_norm": 1.2517473697662354, "learning_rate": 4.510204220543528e-05, "loss": 0.144, "step": 44450 }, { "epoch": 1.615669743440657, "grad_norm": 1.0830953121185303, "learning_rate": 4.509895662484286e-05, "loss": 0.1851, "step": 44460 }, { "epoch": 1.616033141943455, "grad_norm": 0.45219525694847107, "learning_rate": 4.50958701782579e-05, "loss": 0.1589, "step": 44470 }, { "epoch": 1.6163965404462535, "grad_norm": 0.940949559211731, "learning_rate": 4.509278286581341e-05, "loss": 0.113, "step": 44480 }, { "epoch": 1.6167599389490515, "grad_norm": 0.7262178659439087, "learning_rate": 4.5089694687642394e-05, "loss": 0.1294, "step": 44490 }, { "epoch": 1.6171233374518497, "grad_norm": 0.8851106762886047, "learning_rate": 4.508660564387791e-05, "loss": 0.1563, "step": 44500 }, { "epoch": 1.6174867359546479, "grad_norm": 1.4259148836135864, "learning_rate": 4.508351573465306e-05, "loss": 0.1298, "step": 44510 }, { "epoch": 1.617850134457446, "grad_norm": 1.7158180475234985, "learning_rate": 4.508042496010098e-05, "loss": 0.197, "step": 44520 }, { "epoch": 1.6182135329602443, "grad_norm": 1.1961179971694946, "learning_rate": 4.507733332035482e-05, "loss": 2.6746, "step": 44530 }, { "epoch": 1.6185769314630423, "grad_norm": 1.0735702514648438, "learning_rate": 4.507424081554782e-05, "loss": 0.1132, "step": 44540 }, { "epoch": 1.6189403299658407, "grad_norm": 0.8479132056236267, "learning_rate": 4.507114744581319e-05, "loss": 0.1411, "step": 44550 }, { "epoch": 1.6193037284686387, "grad_norm": 0.804205596446991, "learning_rate": 4.506805321128424e-05, "loss": 0.1301, "step": 44560 }, { "epoch": 1.6196671269714369, "grad_norm": 0.4933542013168335, "learning_rate": 4.506495811209428e-05, "loss": 0.1765, "step": 44570 }, { "epoch": 1.620030525474235, "grad_norm": 1.0244536399841309, "learning_rate": 4.506186214837666e-05, "loss": 0.131, "step": 44580 }, { "epoch": 1.620393923977033, "grad_norm": 0.4374043941497803, "learning_rate": 4.5058765320264784e-05, "loss": 0.102, "step": 44590 }, { "epoch": 1.6207573224798315, "grad_norm": 0.5329868197441101, "learning_rate": 4.505566762789208e-05, "loss": 0.1168, "step": 44600 }, { "epoch": 1.6211207209826295, "grad_norm": 0.9576613306999207, "learning_rate": 4.5052569071392014e-05, "loss": 0.0948, "step": 44610 }, { "epoch": 1.6214841194854277, "grad_norm": 0.3620557188987732, "learning_rate": 4.50494696508981e-05, "loss": 0.1693, "step": 44620 }, { "epoch": 1.621847517988226, "grad_norm": 119.01215362548828, "learning_rate": 4.504636936654387e-05, "loss": 2.0014, "step": 44630 }, { "epoch": 1.622210916491024, "grad_norm": 0.491005003452301, "learning_rate": 4.504326821846291e-05, "loss": 0.0958, "step": 44640 }, { "epoch": 1.6225743149938223, "grad_norm": 0.8035761713981628, "learning_rate": 4.504016620678883e-05, "loss": 0.6652, "step": 44650 }, { "epoch": 1.6229377134966203, "grad_norm": 1.4501937627792358, "learning_rate": 4.5037063331655305e-05, "loss": 0.1282, "step": 44660 }, { "epoch": 1.6233011119994185, "grad_norm": 0.3285962641239166, "learning_rate": 4.503395959319601e-05, "loss": 0.6589, "step": 44670 }, { "epoch": 1.6236645105022167, "grad_norm": 1.3429205417633057, "learning_rate": 4.5030854991544666e-05, "loss": 0.1224, "step": 44680 }, { "epoch": 1.624027909005015, "grad_norm": 0.6868845224380493, "learning_rate": 4.502774952683506e-05, "loss": 0.1229, "step": 44690 }, { "epoch": 1.6243913075078131, "grad_norm": 0.7645006775856018, "learning_rate": 4.502464319920099e-05, "loss": 0.1587, "step": 44700 }, { "epoch": 1.6247547060106111, "grad_norm": 1.2401680946350098, "learning_rate": 4.502153600877628e-05, "loss": 0.1274, "step": 44710 }, { "epoch": 1.6251181045134095, "grad_norm": 0.4394826292991638, "learning_rate": 4.501842795569483e-05, "loss": 0.1434, "step": 44720 }, { "epoch": 1.6254815030162075, "grad_norm": 0.5105617046356201, "learning_rate": 4.5015319040090545e-05, "loss": 0.1089, "step": 44730 }, { "epoch": 1.6258449015190057, "grad_norm": 1.5043278932571411, "learning_rate": 4.5012209262097365e-05, "loss": 0.1391, "step": 44740 }, { "epoch": 1.626208300021804, "grad_norm": 0.8561335802078247, "learning_rate": 4.5009098621849296e-05, "loss": 0.2735, "step": 44750 }, { "epoch": 1.626571698524602, "grad_norm": 1.71244478225708, "learning_rate": 4.500598711948037e-05, "loss": 0.1855, "step": 44760 }, { "epoch": 1.6269350970274004, "grad_norm": 0.6392226815223694, "learning_rate": 4.500287475512463e-05, "loss": 0.1675, "step": 44770 }, { "epoch": 1.6272984955301983, "grad_norm": 0.9670777916908264, "learning_rate": 4.4999761528916194e-05, "loss": 0.1201, "step": 44780 }, { "epoch": 1.6276618940329965, "grad_norm": 0.6879392862319946, "learning_rate": 4.4996647440989195e-05, "loss": 0.15, "step": 44790 }, { "epoch": 1.6280252925357948, "grad_norm": 1.038004994392395, "learning_rate": 4.49935324914778e-05, "loss": 0.148, "step": 44800 }, { "epoch": 1.628388691038593, "grad_norm": 1.1731406450271606, "learning_rate": 4.499041668051624e-05, "loss": 0.1225, "step": 44810 }, { "epoch": 1.6287520895413912, "grad_norm": 1.0449947118759155, "learning_rate": 4.498730000823873e-05, "loss": 0.1348, "step": 44820 }, { "epoch": 1.6291154880441892, "grad_norm": 0.7107880115509033, "learning_rate": 4.498418247477959e-05, "loss": 0.1185, "step": 44830 }, { "epoch": 1.6294788865469876, "grad_norm": 0.9275081157684326, "learning_rate": 4.498106408027313e-05, "loss": 0.1405, "step": 44840 }, { "epoch": 1.6298422850497856, "grad_norm": 1.5348129272460938, "learning_rate": 4.497794482485371e-05, "loss": 0.1401, "step": 44850 }, { "epoch": 1.6302056835525838, "grad_norm": 1.6144418716430664, "learning_rate": 4.497482470865574e-05, "loss": 0.1191, "step": 44860 }, { "epoch": 1.630569082055382, "grad_norm": 1.1674468517303467, "learning_rate": 4.497170373181363e-05, "loss": 0.7629, "step": 44870 }, { "epoch": 1.63093248055818, "grad_norm": 0.9818703532218933, "learning_rate": 4.496858189446187e-05, "loss": 0.1275, "step": 44880 }, { "epoch": 1.6312958790609784, "grad_norm": 8.3660249710083, "learning_rate": 4.496545919673496e-05, "loss": 0.1166, "step": 44890 }, { "epoch": 1.6316592775637764, "grad_norm": 0.7371792793273926, "learning_rate": 4.496233563876746e-05, "loss": 0.1559, "step": 44900 }, { "epoch": 1.6320226760665746, "grad_norm": 0.9537221789360046, "learning_rate": 4.4959211220693945e-05, "loss": 0.125, "step": 44910 }, { "epoch": 1.6323860745693728, "grad_norm": 0.9887855648994446, "learning_rate": 4.495608594264902e-05, "loss": 0.1406, "step": 44920 }, { "epoch": 1.632749473072171, "grad_norm": 0.7480888962745667, "learning_rate": 4.495295980476737e-05, "loss": 0.1227, "step": 44930 }, { "epoch": 1.6331128715749692, "grad_norm": 0.784050703048706, "learning_rate": 4.494983280718367e-05, "loss": 0.1068, "step": 44940 }, { "epoch": 1.6334762700777672, "grad_norm": 2.8426759243011475, "learning_rate": 4.494670495003265e-05, "loss": 0.1327, "step": 44950 }, { "epoch": 1.6338396685805654, "grad_norm": 3.448587417602539, "learning_rate": 4.494357623344909e-05, "loss": 0.142, "step": 44960 }, { "epoch": 1.6342030670833636, "grad_norm": 0.669575572013855, "learning_rate": 4.4940446657567784e-05, "loss": 0.1398, "step": 44970 }, { "epoch": 1.6345664655861618, "grad_norm": 1.1868761777877808, "learning_rate": 4.493731622252358e-05, "loss": 0.117, "step": 44980 }, { "epoch": 1.63492986408896, "grad_norm": 0.8725171685218811, "learning_rate": 4.4934184928451364e-05, "loss": 0.0914, "step": 44990 }, { "epoch": 1.635293262591758, "grad_norm": 1.300013780593872, "learning_rate": 4.493105277548605e-05, "loss": 0.1761, "step": 45000 }, { "epoch": 1.635293262591758, "eval_loss": 0.3556674122810364, "eval_runtime": 180.1231, "eval_samples_per_second": 41.161, "eval_steps_per_second": 5.146, "eval_wer": 0.17496868589685408, "step": 45000 }, { "epoch": 1.6356566610945564, "grad_norm": 2.436525583267212, "learning_rate": 4.4927919763762574e-05, "loss": 0.1164, "step": 45010 }, { "epoch": 1.6360200595973544, "grad_norm": 1.3236192464828491, "learning_rate": 4.492478589341594e-05, "loss": 0.1496, "step": 45020 }, { "epoch": 1.6363834581001526, "grad_norm": 0.6237584948539734, "learning_rate": 4.4921651164581185e-05, "loss": 0.1449, "step": 45030 }, { "epoch": 1.6367468566029508, "grad_norm": 0.5335447192192078, "learning_rate": 4.491851557739336e-05, "loss": 0.1349, "step": 45040 }, { "epoch": 1.6371102551057488, "grad_norm": 1.4458340406417847, "learning_rate": 4.491537913198757e-05, "loss": 0.1994, "step": 45050 }, { "epoch": 1.6374736536085472, "grad_norm": 1.4140558242797852, "learning_rate": 4.4912241828498944e-05, "loss": 0.1432, "step": 45060 }, { "epoch": 1.6378370521113452, "grad_norm": 1.127317190170288, "learning_rate": 4.4909103667062666e-05, "loss": 0.1932, "step": 45070 }, { "epoch": 1.6382004506141434, "grad_norm": 3.4496073722839355, "learning_rate": 4.490596464781395e-05, "loss": 0.1057, "step": 45080 }, { "epoch": 1.6385638491169416, "grad_norm": 0.663720428943634, "learning_rate": 4.490282477088805e-05, "loss": 0.1081, "step": 45090 }, { "epoch": 1.6389272476197398, "grad_norm": 0.8442180156707764, "learning_rate": 4.4899684036420244e-05, "loss": 0.1354, "step": 45100 }, { "epoch": 1.639290646122538, "grad_norm": 1.3163623809814453, "learning_rate": 4.489654244454585e-05, "loss": 0.1247, "step": 45110 }, { "epoch": 1.639654044625336, "grad_norm": 0.639021635055542, "learning_rate": 4.489339999540023e-05, "loss": 0.1598, "step": 45120 }, { "epoch": 1.6400174431281345, "grad_norm": 0.549207329750061, "learning_rate": 4.489025668911879e-05, "loss": 0.1157, "step": 45130 }, { "epoch": 1.6403808416309325, "grad_norm": 3.4274439811706543, "learning_rate": 4.488711252583696e-05, "loss": 0.1635, "step": 45140 }, { "epoch": 1.6407442401337307, "grad_norm": 3.244072914123535, "learning_rate": 4.488396750569022e-05, "loss": 0.1216, "step": 45150 }, { "epoch": 1.6411076386365289, "grad_norm": 1.9557846784591675, "learning_rate": 4.4880821628814054e-05, "loss": 0.1197, "step": 45160 }, { "epoch": 1.6414710371393268, "grad_norm": 0.8653383851051331, "learning_rate": 4.487767489534402e-05, "loss": 0.1929, "step": 45170 }, { "epoch": 1.6418344356421253, "grad_norm": 0.9569295048713684, "learning_rate": 4.4874527305415706e-05, "loss": 0.1064, "step": 45180 }, { "epoch": 1.6421978341449233, "grad_norm": 0.9595149159431458, "learning_rate": 4.487137885916473e-05, "loss": 0.1087, "step": 45190 }, { "epoch": 1.6425612326477215, "grad_norm": 1.06610906124115, "learning_rate": 4.486822955672673e-05, "loss": 0.1126, "step": 45200 }, { "epoch": 1.6429246311505197, "grad_norm": 0.5096926689147949, "learning_rate": 4.4865079398237407e-05, "loss": 0.1175, "step": 45210 }, { "epoch": 1.6432880296533179, "grad_norm": 0.6575452089309692, "learning_rate": 4.48619283838325e-05, "loss": 0.2033, "step": 45220 }, { "epoch": 1.643651428156116, "grad_norm": 1.5489494800567627, "learning_rate": 4.485877651364777e-05, "loss": 0.1283, "step": 45230 }, { "epoch": 1.644014826658914, "grad_norm": 1.2202279567718506, "learning_rate": 4.485562378781901e-05, "loss": 0.129, "step": 45240 }, { "epoch": 1.6443782251617123, "grad_norm": 2.960289716720581, "learning_rate": 4.485247020648208e-05, "loss": 0.1547, "step": 45250 }, { "epoch": 1.6447416236645105, "grad_norm": 1.178314447402954, "learning_rate": 4.4849315769772835e-05, "loss": 0.1424, "step": 45260 }, { "epoch": 1.6451050221673087, "grad_norm": 0.47237566113471985, "learning_rate": 4.484616047782719e-05, "loss": 0.1003, "step": 45270 }, { "epoch": 1.645468420670107, "grad_norm": 0.7487808465957642, "learning_rate": 4.484300433078112e-05, "loss": 1.3485, "step": 45280 }, { "epoch": 1.6458318191729049, "grad_norm": 1.5242539644241333, "learning_rate": 4.483984732877059e-05, "loss": 0.1301, "step": 45290 }, { "epoch": 1.6461952176757033, "grad_norm": 1.1392406225204468, "learning_rate": 4.4836689471931624e-05, "loss": 0.1385, "step": 45300 }, { "epoch": 1.6465586161785013, "grad_norm": 0.856468677520752, "learning_rate": 4.483353076040029e-05, "loss": 0.1029, "step": 45310 }, { "epoch": 1.6469220146812995, "grad_norm": 0.414461225271225, "learning_rate": 4.483037119431268e-05, "loss": 0.1697, "step": 45320 }, { "epoch": 1.6472854131840977, "grad_norm": 1.7020654678344727, "learning_rate": 4.482721077380494e-05, "loss": 0.1102, "step": 45330 }, { "epoch": 1.6476488116868957, "grad_norm": 0.9631456136703491, "learning_rate": 4.482404949901323e-05, "loss": 0.1193, "step": 45340 }, { "epoch": 1.6480122101896941, "grad_norm": 0.5286620855331421, "learning_rate": 4.482088737007376e-05, "loss": 0.628, "step": 45350 }, { "epoch": 1.6483756086924921, "grad_norm": 1.0761183500289917, "learning_rate": 4.481772438712277e-05, "loss": 0.1131, "step": 45360 }, { "epoch": 1.6487390071952903, "grad_norm": 0.46266233921051025, "learning_rate": 4.481456055029656e-05, "loss": 0.1641, "step": 45370 }, { "epoch": 1.6491024056980885, "grad_norm": 1.1350431442260742, "learning_rate": 4.481139585973142e-05, "loss": 0.122, "step": 45380 }, { "epoch": 1.6494658042008867, "grad_norm": 4.3756632804870605, "learning_rate": 4.4808230315563735e-05, "loss": 0.1424, "step": 45390 }, { "epoch": 1.649829202703685, "grad_norm": 1.386616826057434, "learning_rate": 4.480506391792988e-05, "loss": 0.1579, "step": 45400 }, { "epoch": 1.650192601206483, "grad_norm": 0.6531800031661987, "learning_rate": 4.480189666696629e-05, "loss": 0.1275, "step": 45410 }, { "epoch": 1.6505559997092814, "grad_norm": 1.8200130462646484, "learning_rate": 4.479872856280942e-05, "loss": 0.1943, "step": 45420 }, { "epoch": 1.6509193982120793, "grad_norm": 0.6366170048713684, "learning_rate": 4.47955596055958e-05, "loss": 0.1172, "step": 45430 }, { "epoch": 1.6512827967148775, "grad_norm": 0.8036410808563232, "learning_rate": 4.479238979546193e-05, "loss": 0.1181, "step": 45440 }, { "epoch": 1.6516461952176757, "grad_norm": 0.7934151291847229, "learning_rate": 4.47892191325444e-05, "loss": 0.1319, "step": 45450 }, { "epoch": 1.6520095937204737, "grad_norm": 1.5175780057907104, "learning_rate": 4.4786047616979845e-05, "loss": 0.1328, "step": 45460 }, { "epoch": 1.6523729922232722, "grad_norm": 31.52168083190918, "learning_rate": 4.478287524890489e-05, "loss": 0.4917, "step": 45470 }, { "epoch": 1.6527363907260701, "grad_norm": 0.8360010981559753, "learning_rate": 4.477970202845623e-05, "loss": 0.1071, "step": 45480 }, { "epoch": 1.6530997892288684, "grad_norm": 0.5976376533508301, "learning_rate": 4.4776527955770586e-05, "loss": 0.1098, "step": 45490 }, { "epoch": 1.6534631877316666, "grad_norm": 0.779091477394104, "learning_rate": 4.4773353030984715e-05, "loss": 0.1621, "step": 45500 }, { "epoch": 1.6538265862344648, "grad_norm": 0.7147294282913208, "learning_rate": 4.477017725423542e-05, "loss": 0.2085, "step": 45510 }, { "epoch": 1.654189984737263, "grad_norm": 1.0562430620193481, "learning_rate": 4.4767000625659525e-05, "loss": 0.1489, "step": 45520 }, { "epoch": 1.654553383240061, "grad_norm": 0.6119662523269653, "learning_rate": 4.4763823145393906e-05, "loss": 0.0975, "step": 45530 }, { "epoch": 1.6549167817428592, "grad_norm": 2.1033360958099365, "learning_rate": 4.476064481357547e-05, "loss": 0.1151, "step": 45540 }, { "epoch": 1.6552801802456574, "grad_norm": 0.5644105672836304, "learning_rate": 4.4757465630341154e-05, "loss": 0.1479, "step": 45550 }, { "epoch": 1.6556435787484556, "grad_norm": 1.2466843128204346, "learning_rate": 4.475428559582794e-05, "loss": 0.1047, "step": 45560 }, { "epoch": 1.6560069772512538, "grad_norm": 0.6210132241249084, "learning_rate": 4.475110471017285e-05, "loss": 0.1703, "step": 45570 }, { "epoch": 1.6563703757540518, "grad_norm": 0.5731077194213867, "learning_rate": 4.474792297351293e-05, "loss": 0.1154, "step": 45580 }, { "epoch": 1.6567337742568502, "grad_norm": 1.2748225927352905, "learning_rate": 4.474474038598527e-05, "loss": 0.1106, "step": 45590 }, { "epoch": 1.6570971727596482, "grad_norm": 1.0353822708129883, "learning_rate": 4.4741556947727e-05, "loss": 0.1453, "step": 45600 }, { "epoch": 1.6570971727596482, "eval_loss": 0.35429847240448, "eval_runtime": 180.0899, "eval_samples_per_second": 41.168, "eval_steps_per_second": 5.147, "eval_wer": 0.16677255976909253, "step": 45600 }, { "epoch": 1.6574605712624464, "grad_norm": 0.8197756409645081, "learning_rate": 4.4738372658875286e-05, "loss": 0.6064, "step": 45610 }, { "epoch": 1.6578239697652446, "grad_norm": 1.9029946327209473, "learning_rate": 4.473518751956732e-05, "loss": 0.122, "step": 45620 }, { "epoch": 1.6581873682680426, "grad_norm": 0.897566020488739, "learning_rate": 4.473200152994035e-05, "loss": 0.5106, "step": 45630 }, { "epoch": 1.658550766770841, "grad_norm": 0.448548823595047, "learning_rate": 4.472881469013163e-05, "loss": 0.1316, "step": 45640 }, { "epoch": 1.658914165273639, "grad_norm": 0.9315693974494934, "learning_rate": 4.472562700027849e-05, "loss": 0.1616, "step": 45650 }, { "epoch": 1.6592775637764372, "grad_norm": 0.6731955409049988, "learning_rate": 4.4722438460518255e-05, "loss": 0.1138, "step": 45660 }, { "epoch": 1.6596409622792354, "grad_norm": 0.949320375919342, "learning_rate": 4.4719249070988325e-05, "loss": 0.1464, "step": 45670 }, { "epoch": 1.6600043607820336, "grad_norm": 1.0242235660552979, "learning_rate": 4.471605883182611e-05, "loss": 0.1135, "step": 45680 }, { "epoch": 1.6603677592848318, "grad_norm": 2.5394222736358643, "learning_rate": 4.471318689025813e-05, "loss": 1.5802, "step": 45690 }, { "epoch": 1.6607311577876298, "grad_norm": 0.5729508996009827, "learning_rate": 4.4709995037173305e-05, "loss": 0.44, "step": 45700 }, { "epoch": 1.6610945562904282, "grad_norm": 1.372788906097412, "learning_rate": 4.470680233485492e-05, "loss": 0.1286, "step": 45710 }, { "epoch": 1.6614579547932262, "grad_norm": 1.5759491920471191, "learning_rate": 4.470360878344055e-05, "loss": 0.1221, "step": 45720 }, { "epoch": 1.6618213532960244, "grad_norm": 1.0494245290756226, "learning_rate": 4.470041438306778e-05, "loss": 0.126, "step": 45730 }, { "epoch": 1.6621847517988226, "grad_norm": 0.469928115606308, "learning_rate": 4.469721913387424e-05, "loss": 0.1295, "step": 45740 }, { "epoch": 1.6625481503016206, "grad_norm": 0.9547176361083984, "learning_rate": 4.469402303599761e-05, "loss": 0.1724, "step": 45750 }, { "epoch": 1.662911548804419, "grad_norm": 0.5945098400115967, "learning_rate": 4.469082608957561e-05, "loss": 0.1282, "step": 45760 }, { "epoch": 1.663274947307217, "grad_norm": 0.8782799243927002, "learning_rate": 4.468762829474597e-05, "loss": 0.1594, "step": 45770 }, { "epoch": 1.6636383458100152, "grad_norm": 0.8542808294296265, "learning_rate": 4.4684429651646476e-05, "loss": 0.1147, "step": 45780 }, { "epoch": 1.6640017443128134, "grad_norm": 1.189684271812439, "learning_rate": 4.4681230160414946e-05, "loss": 0.1348, "step": 45790 }, { "epoch": 1.6643651428156117, "grad_norm": 0.9197025895118713, "learning_rate": 4.467802982118923e-05, "loss": 0.1528, "step": 45800 }, { "epoch": 1.6647285413184099, "grad_norm": 0.5935563445091248, "learning_rate": 4.4674828634107226e-05, "loss": 0.1356, "step": 45810 }, { "epoch": 1.6650919398212078, "grad_norm": 0.7441408038139343, "learning_rate": 4.467162659930686e-05, "loss": 0.1553, "step": 45820 }, { "epoch": 1.665455338324006, "grad_norm": 0.5700821280479431, "learning_rate": 4.466842371692609e-05, "loss": 0.1206, "step": 45830 }, { "epoch": 1.6658187368268043, "grad_norm": 1.0566598176956177, "learning_rate": 4.466521998710292e-05, "loss": 0.1137, "step": 45840 }, { "epoch": 1.6661821353296025, "grad_norm": 0.8243798613548279, "learning_rate": 4.4662015409975406e-05, "loss": 0.1531, "step": 45850 }, { "epoch": 1.6665455338324007, "grad_norm": 1.1144201755523682, "learning_rate": 4.465880998568159e-05, "loss": 0.1122, "step": 45860 }, { "epoch": 1.6669089323351987, "grad_norm": 0.8346664309501648, "learning_rate": 4.46556037143596e-05, "loss": 0.1365, "step": 45870 }, { "epoch": 1.667272330837997, "grad_norm": 1.140259027481079, "learning_rate": 4.46523965961476e-05, "loss": 0.1105, "step": 45880 }, { "epoch": 1.667635729340795, "grad_norm": 1.7616723775863647, "learning_rate": 4.464918863118374e-05, "loss": 0.1092, "step": 45890 }, { "epoch": 1.6679991278435933, "grad_norm": 0.5135784149169922, "learning_rate": 4.464597981960625e-05, "loss": 0.1502, "step": 45900 }, { "epoch": 1.6683625263463915, "grad_norm": 1.542801022529602, "learning_rate": 4.464277016155339e-05, "loss": 0.125, "step": 45910 }, { "epoch": 1.6687259248491895, "grad_norm": 0.31144529581069946, "learning_rate": 4.463955965716346e-05, "loss": 0.21, "step": 45920 }, { "epoch": 1.669089323351988, "grad_norm": 0.642985463142395, "learning_rate": 4.463634830657478e-05, "loss": 0.1213, "step": 45930 }, { "epoch": 1.6694527218547859, "grad_norm": 2.417689800262451, "learning_rate": 4.4633136109925716e-05, "loss": 0.1244, "step": 45940 }, { "epoch": 1.669816120357584, "grad_norm": 0.5426376461982727, "learning_rate": 4.462992306735467e-05, "loss": 0.1789, "step": 45950 }, { "epoch": 1.6701795188603823, "grad_norm": 0.5924781560897827, "learning_rate": 4.4626709179000094e-05, "loss": 0.118, "step": 45960 }, { "epoch": 1.6705429173631805, "grad_norm": 0.5799354314804077, "learning_rate": 4.4623494445000435e-05, "loss": 0.1714, "step": 45970 }, { "epoch": 1.6709063158659787, "grad_norm": 0.6282142400741577, "learning_rate": 4.462027886549423e-05, "loss": 0.1099, "step": 45980 }, { "epoch": 1.6712697143687767, "grad_norm": 1.1201330423355103, "learning_rate": 4.461706244062002e-05, "loss": 0.1281, "step": 45990 }, { "epoch": 1.6716331128715751, "grad_norm": 1.1922492980957031, "learning_rate": 4.461384517051638e-05, "loss": 0.1245, "step": 46000 }, { "epoch": 1.671996511374373, "grad_norm": 1.0188281536102295, "learning_rate": 4.461062705532194e-05, "loss": 0.1715, "step": 46010 }, { "epoch": 1.6723599098771713, "grad_norm": 0.5861912369728088, "learning_rate": 4.4607408095175364e-05, "loss": 0.1391, "step": 46020 }, { "epoch": 1.6727233083799695, "grad_norm": 0.6984696388244629, "learning_rate": 4.4604188290215324e-05, "loss": 0.116, "step": 46030 }, { "epoch": 1.6730867068827675, "grad_norm": 0.5184624791145325, "learning_rate": 4.460096764058057e-05, "loss": 0.1173, "step": 46040 }, { "epoch": 1.673450105385566, "grad_norm": 0.39695462584495544, "learning_rate": 4.4597746146409856e-05, "loss": 0.1325, "step": 46050 }, { "epoch": 1.673813503888364, "grad_norm": 0.687271237373352, "learning_rate": 4.459452380784199e-05, "loss": 0.1123, "step": 46060 }, { "epoch": 1.6741769023911621, "grad_norm": 0.8372097015380859, "learning_rate": 4.459130062501582e-05, "loss": 0.1895, "step": 46070 }, { "epoch": 1.6745403008939603, "grad_norm": 1.8692165613174438, "learning_rate": 4.4588076598070206e-05, "loss": 0.1213, "step": 46080 }, { "epoch": 1.6749036993967585, "grad_norm": 12.919623374938965, "learning_rate": 4.458485172714406e-05, "loss": 0.115, "step": 46090 }, { "epoch": 1.6752670978995567, "grad_norm": 0.6733956933021545, "learning_rate": 4.458162601237634e-05, "loss": 0.1473, "step": 46100 }, { "epoch": 1.6756304964023547, "grad_norm": 0.8653566241264343, "learning_rate": 4.457839945390603e-05, "loss": 0.0995, "step": 46110 }, { "epoch": 1.675993894905153, "grad_norm": 0.536120593547821, "learning_rate": 4.4575172051872145e-05, "loss": 0.1494, "step": 46120 }, { "epoch": 1.6763572934079511, "grad_norm": 3.844902753829956, "learning_rate": 4.4571943806413743e-05, "loss": 0.1086, "step": 46130 }, { "epoch": 1.6767206919107494, "grad_norm": 2.0951857566833496, "learning_rate": 4.4568714717669926e-05, "loss": 0.111, "step": 46140 }, { "epoch": 1.6770840904135476, "grad_norm": 2.954204559326172, "learning_rate": 4.456548478577981e-05, "loss": 0.1481, "step": 46150 }, { "epoch": 1.6774474889163455, "grad_norm": 0.7243287563323975, "learning_rate": 4.456225401088258e-05, "loss": 0.1192, "step": 46160 }, { "epoch": 1.677810887419144, "grad_norm": 1.103082299232483, "learning_rate": 4.455902239311741e-05, "loss": 0.156, "step": 46170 }, { "epoch": 1.678174285921942, "grad_norm": 1.2734848260879517, "learning_rate": 4.455578993262357e-05, "loss": 0.1154, "step": 46180 }, { "epoch": 1.6785376844247402, "grad_norm": 0.9912572503089905, "learning_rate": 4.455255662954032e-05, "loss": 0.1228, "step": 46190 }, { "epoch": 1.6789010829275384, "grad_norm": 0.8736640214920044, "learning_rate": 4.454932248400697e-05, "loss": 0.12, "step": 46200 }, { "epoch": 1.6789010829275384, "eval_loss": 0.3570244014263153, "eval_runtime": 179.9492, "eval_samples_per_second": 41.201, "eval_steps_per_second": 5.151, "eval_wer": 0.17022164939096338, "step": 46200 }, { "epoch": 1.6792644814303364, "grad_norm": 3.907130241394043, "learning_rate": 4.454608749616287e-05, "loss": 0.1108, "step": 46210 }, { "epoch": 1.6796278799331348, "grad_norm": 0.3899100720882416, "learning_rate": 4.4542851666147404e-05, "loss": 0.1699, "step": 46220 }, { "epoch": 1.6799912784359328, "grad_norm": 1.6752989292144775, "learning_rate": 4.45396149941e-05, "loss": 0.124, "step": 46230 }, { "epoch": 1.680354676938731, "grad_norm": 5.3016886711120605, "learning_rate": 4.453637748016011e-05, "loss": 0.1239, "step": 46240 }, { "epoch": 1.6807180754415292, "grad_norm": 2.723459482192993, "learning_rate": 4.453313912446722e-05, "loss": 0.1245, "step": 46250 }, { "epoch": 1.6810814739443274, "grad_norm": 2.0152530670166016, "learning_rate": 4.4529899927160854e-05, "loss": 0.1056, "step": 46260 }, { "epoch": 1.6814448724471256, "grad_norm": 0.7301231622695923, "learning_rate": 4.452665988838059e-05, "loss": 0.1899, "step": 46270 }, { "epoch": 1.6818082709499236, "grad_norm": 0.7544482350349426, "learning_rate": 4.4523419008266045e-05, "loss": 0.1233, "step": 46280 }, { "epoch": 1.682171669452722, "grad_norm": 0.9912691712379456, "learning_rate": 4.4520177286956824e-05, "loss": 0.1263, "step": 46290 }, { "epoch": 1.68253506795552, "grad_norm": 2.0335001945495605, "learning_rate": 4.451693472459262e-05, "loss": 0.1328, "step": 46300 }, { "epoch": 1.6828984664583182, "grad_norm": 0.5679906606674194, "learning_rate": 4.451369132131314e-05, "loss": 0.0828, "step": 46310 }, { "epoch": 1.6832618649611164, "grad_norm": 1.3262155055999756, "learning_rate": 4.451044707725814e-05, "loss": 0.1426, "step": 46320 }, { "epoch": 1.6836252634639144, "grad_norm": 1.1101963520050049, "learning_rate": 4.4507201992567386e-05, "loss": 0.1385, "step": 46330 }, { "epoch": 1.6839886619667128, "grad_norm": 0.8079712390899658, "learning_rate": 4.4503956067380704e-05, "loss": 0.1278, "step": 46340 }, { "epoch": 1.6843520604695108, "grad_norm": 0.79506516456604, "learning_rate": 4.450070930183795e-05, "loss": 0.1281, "step": 46350 }, { "epoch": 1.684715458972309, "grad_norm": 1.1767312288284302, "learning_rate": 4.4497461696079024e-05, "loss": 0.1299, "step": 46360 }, { "epoch": 1.6850788574751072, "grad_norm": 0.7996610403060913, "learning_rate": 4.449421325024384e-05, "loss": 0.156, "step": 46370 }, { "epoch": 1.6854422559779054, "grad_norm": 0.638761579990387, "learning_rate": 4.449096396447237e-05, "loss": 0.1197, "step": 46380 }, { "epoch": 1.6858056544807036, "grad_norm": 3.2339584827423096, "learning_rate": 4.448771383890461e-05, "loss": 0.0992, "step": 46390 }, { "epoch": 1.6861690529835016, "grad_norm": 1.0168710947036743, "learning_rate": 4.448446287368059e-05, "loss": 0.1471, "step": 46400 }, { "epoch": 1.6865324514862998, "grad_norm": 0.7783123850822449, "learning_rate": 4.44812110689404e-05, "loss": 0.1012, "step": 46410 }, { "epoch": 1.686895849989098, "grad_norm": 0.7757607698440552, "learning_rate": 4.447795842482414e-05, "loss": 0.1355, "step": 46420 }, { "epoch": 1.6872592484918962, "grad_norm": 1.9442954063415527, "learning_rate": 4.447470494147195e-05, "loss": 0.0953, "step": 46430 }, { "epoch": 1.6876226469946944, "grad_norm": 1.0810720920562744, "learning_rate": 4.447145061902401e-05, "loss": 0.1037, "step": 46440 }, { "epoch": 1.6879860454974924, "grad_norm": 5.812492847442627, "learning_rate": 4.4468195457620556e-05, "loss": 0.1376, "step": 46450 }, { "epoch": 1.6883494440002909, "grad_norm": 2.3588967323303223, "learning_rate": 4.4464939457401825e-05, "loss": 0.1164, "step": 46460 }, { "epoch": 1.6887128425030888, "grad_norm": 1.6339848041534424, "learning_rate": 4.4461682618508106e-05, "loss": 0.1604, "step": 46470 }, { "epoch": 1.689076241005887, "grad_norm": 1.6590059995651245, "learning_rate": 4.445842494107973e-05, "loss": 0.1529, "step": 46480 }, { "epoch": 1.6894396395086853, "grad_norm": 0.8776388764381409, "learning_rate": 4.445516642525705e-05, "loss": 0.1165, "step": 46490 }, { "epoch": 1.6898030380114832, "grad_norm": 2.1173806190490723, "learning_rate": 4.4451907071180474e-05, "loss": 0.1431, "step": 46500 }, { "epoch": 1.6901664365142817, "grad_norm": 1.3882853984832764, "learning_rate": 4.444864687899043e-05, "loss": 0.134, "step": 46510 }, { "epoch": 1.6905298350170797, "grad_norm": 0.5224485993385315, "learning_rate": 4.4445385848827395e-05, "loss": 0.1586, "step": 46520 }, { "epoch": 1.6908932335198779, "grad_norm": 1.3461922407150269, "learning_rate": 4.444212398083187e-05, "loss": 0.119, "step": 46530 }, { "epoch": 1.691256632022676, "grad_norm": 0.7005299925804138, "learning_rate": 4.4438861275144395e-05, "loss": 0.1046, "step": 46540 }, { "epoch": 1.6916200305254743, "grad_norm": 2.497610092163086, "learning_rate": 4.4435597731905554e-05, "loss": 0.1164, "step": 46550 }, { "epoch": 1.6919834290282725, "grad_norm": 4.168522357940674, "learning_rate": 4.443233335125596e-05, "loss": 0.1342, "step": 46560 }, { "epoch": 1.6923468275310705, "grad_norm": 0.59686678647995, "learning_rate": 4.442906813333626e-05, "loss": 0.1536, "step": 46570 }, { "epoch": 1.692710226033869, "grad_norm": 0.9300062656402588, "learning_rate": 4.442580207828715e-05, "loss": 0.1176, "step": 46580 }, { "epoch": 1.6930736245366669, "grad_norm": 0.8389412760734558, "learning_rate": 4.442253518624934e-05, "loss": 0.1085, "step": 46590 }, { "epoch": 1.693437023039465, "grad_norm": 1.0124256610870361, "learning_rate": 4.441926745736359e-05, "loss": 0.1357, "step": 46600 }, { "epoch": 1.6938004215422633, "grad_norm": 1.5312106609344482, "learning_rate": 4.4415998891770704e-05, "loss": 0.0911, "step": 46610 }, { "epoch": 1.6941638200450613, "grad_norm": 0.7724300622940063, "learning_rate": 4.441272948961151e-05, "loss": 0.1338, "step": 46620 }, { "epoch": 1.6945272185478597, "grad_norm": 0.9552409052848816, "learning_rate": 4.4409459251026864e-05, "loss": 0.1181, "step": 46630 }, { "epoch": 1.6948906170506577, "grad_norm": 0.7531419992446899, "learning_rate": 4.440618817615768e-05, "loss": 0.1301, "step": 46640 }, { "epoch": 1.695254015553456, "grad_norm": 1.4831467866897583, "learning_rate": 4.44029162651449e-05, "loss": 0.1734, "step": 46650 }, { "epoch": 1.695617414056254, "grad_norm": 1.008574366569519, "learning_rate": 4.43996435181295e-05, "loss": 0.1205, "step": 46660 }, { "epoch": 1.6959808125590523, "grad_norm": 0.7653814554214478, "learning_rate": 4.4396369935252475e-05, "loss": 0.1886, "step": 46670 }, { "epoch": 1.6963442110618505, "grad_norm": 0.668803334236145, "learning_rate": 4.439309551665488e-05, "loss": 0.1066, "step": 46680 }, { "epoch": 1.6967076095646485, "grad_norm": 0.6227422952651978, "learning_rate": 4.438982026247781e-05, "loss": 0.1191, "step": 46690 }, { "epoch": 1.6970710080674467, "grad_norm": 4.826232433319092, "learning_rate": 4.438654417286237e-05, "loss": 0.1594, "step": 46700 }, { "epoch": 1.697434406570245, "grad_norm": 7.682708740234375, "learning_rate": 4.4383267247949714e-05, "loss": 0.1131, "step": 46710 }, { "epoch": 1.6977978050730431, "grad_norm": 1.0136793851852417, "learning_rate": 4.4379989487881046e-05, "loss": 0.1575, "step": 46720 }, { "epoch": 1.6981612035758413, "grad_norm": 2.774850606918335, "learning_rate": 4.437671089279758e-05, "loss": 0.1384, "step": 46730 }, { "epoch": 1.6985246020786393, "grad_norm": 11.532723426818848, "learning_rate": 4.4373431462840584e-05, "loss": 0.1338, "step": 46740 }, { "epoch": 1.6988880005814377, "grad_norm": 1.9600322246551514, "learning_rate": 4.437015119815136e-05, "loss": 0.1311, "step": 46750 }, { "epoch": 1.6992513990842357, "grad_norm": 0.5886809229850769, "learning_rate": 4.436687009887124e-05, "loss": 0.1647, "step": 46760 }, { "epoch": 1.699614797587034, "grad_norm": 2.5982067584991455, "learning_rate": 4.436358816514159e-05, "loss": 0.2078, "step": 46770 }, { "epoch": 1.6999781960898321, "grad_norm": 0.7927113771438599, "learning_rate": 4.436030539710383e-05, "loss": 0.1017, "step": 46780 }, { "epoch": 1.7003415945926301, "grad_norm": 0.5954931974411011, "learning_rate": 4.435702179489939e-05, "loss": 0.1354, "step": 46790 }, { "epoch": 1.7007049930954286, "grad_norm": 1.10712468624115, "learning_rate": 4.4353737358669755e-05, "loss": 0.1531, "step": 46800 }, { "epoch": 1.7007049930954286, "eval_loss": 0.3568388819694519, "eval_runtime": 179.9769, "eval_samples_per_second": 41.194, "eval_steps_per_second": 5.151, "eval_wer": 0.16614627770617388, "step": 46800 }, { "epoch": 1.7010683915982265, "grad_norm": 2.1790926456451416, "learning_rate": 4.435045208855644e-05, "loss": 0.1336, "step": 46810 }, { "epoch": 1.7014317901010247, "grad_norm": 0.3032105267047882, "learning_rate": 4.434716598470099e-05, "loss": 0.1234, "step": 46820 }, { "epoch": 1.701795188603823, "grad_norm": 0.8836687207221985, "learning_rate": 4.434387904724499e-05, "loss": 0.1007, "step": 46830 }, { "epoch": 1.7021585871066212, "grad_norm": 0.9671638011932373, "learning_rate": 4.4340591276330075e-05, "loss": 0.3498, "step": 46840 }, { "epoch": 1.7025219856094194, "grad_norm": 3.225950002670288, "learning_rate": 4.4337302672097894e-05, "loss": 0.1493, "step": 46850 }, { "epoch": 1.7028853841122173, "grad_norm": 1.0754051208496094, "learning_rate": 4.4334013234690144e-05, "loss": 0.11, "step": 46860 }, { "epoch": 1.7032487826150158, "grad_norm": 0.3170652687549591, "learning_rate": 4.433072296424855e-05, "loss": 0.1543, "step": 46870 }, { "epoch": 1.7036121811178138, "grad_norm": 2.504772424697876, "learning_rate": 4.4327431860914885e-05, "loss": 0.1221, "step": 46880 }, { "epoch": 1.703975579620612, "grad_norm": 3.071819305419922, "learning_rate": 4.4324139924830956e-05, "loss": 2.0837, "step": 46890 }, { "epoch": 1.7043389781234102, "grad_norm": 0.6085606813430786, "learning_rate": 4.4320847156138584e-05, "loss": 0.2395, "step": 46900 }, { "epoch": 1.7047023766262082, "grad_norm": 1.1668941974639893, "learning_rate": 4.431755355497965e-05, "loss": 1.7169, "step": 46910 }, { "epoch": 1.7050657751290066, "grad_norm": 2.0856447219848633, "learning_rate": 4.431425912149607e-05, "loss": 0.1911, "step": 46920 }, { "epoch": 1.7054291736318046, "grad_norm": 2.468442916870117, "learning_rate": 4.431096385582979e-05, "loss": 0.1408, "step": 46930 }, { "epoch": 1.7057925721346028, "grad_norm": 0.5875902771949768, "learning_rate": 4.430766775812278e-05, "loss": 0.0959, "step": 46940 }, { "epoch": 1.706155970637401, "grad_norm": 0.7292661070823669, "learning_rate": 4.430437082851706e-05, "loss": 0.1416, "step": 46950 }, { "epoch": 1.7065193691401992, "grad_norm": 0.9893856644630432, "learning_rate": 4.43010730671547e-05, "loss": 0.1233, "step": 46960 }, { "epoch": 1.7068827676429974, "grad_norm": 0.5555475950241089, "learning_rate": 4.4297774474177755e-05, "loss": 0.1817, "step": 46970 }, { "epoch": 1.7072461661457954, "grad_norm": 2.520148754119873, "learning_rate": 4.429447504972838e-05, "loss": 0.123, "step": 46980 }, { "epoch": 1.7076095646485938, "grad_norm": 0.4278533160686493, "learning_rate": 4.429117479394873e-05, "loss": 0.1161, "step": 46990 }, { "epoch": 1.7079729631513918, "grad_norm": 1.8297946453094482, "learning_rate": 4.428787370698099e-05, "loss": 0.1347, "step": 47000 }, { "epoch": 1.70833636165419, "grad_norm": 1.1131938695907593, "learning_rate": 4.42845717889674e-05, "loss": 0.113, "step": 47010 }, { "epoch": 1.7086997601569882, "grad_norm": 0.6185646057128906, "learning_rate": 4.428126904005022e-05, "loss": 0.179, "step": 47020 }, { "epoch": 1.7090631586597862, "grad_norm": 0.5343379378318787, "learning_rate": 4.4277965460371775e-05, "loss": 0.1108, "step": 47030 }, { "epoch": 1.7094265571625846, "grad_norm": 1.2087206840515137, "learning_rate": 4.427466105007437e-05, "loss": 0.1308, "step": 47040 }, { "epoch": 1.7097899556653826, "grad_norm": 1.3067313432693481, "learning_rate": 4.4271355809300416e-05, "loss": 0.126, "step": 47050 }, { "epoch": 1.7101533541681808, "grad_norm": 3.69439697265625, "learning_rate": 4.42680497381923e-05, "loss": 0.1235, "step": 47060 }, { "epoch": 1.710516752670979, "grad_norm": 1.7959043979644775, "learning_rate": 4.4264742836892484e-05, "loss": 0.1596, "step": 47070 }, { "epoch": 1.710880151173777, "grad_norm": 0.7556483149528503, "learning_rate": 4.4261435105543434e-05, "loss": 0.139, "step": 47080 }, { "epoch": 1.7112435496765754, "grad_norm": 1.0691754817962646, "learning_rate": 4.425812654428768e-05, "loss": 0.1137, "step": 47090 }, { "epoch": 1.7116069481793734, "grad_norm": 0.39985036849975586, "learning_rate": 4.425481715326778e-05, "loss": 0.1147, "step": 47100 }, { "epoch": 1.7119703466821716, "grad_norm": 0.8595932126045227, "learning_rate": 4.425150693262631e-05, "loss": 0.124, "step": 47110 }, { "epoch": 1.7123337451849698, "grad_norm": 1.0184409618377686, "learning_rate": 4.424819588250591e-05, "loss": 0.1196, "step": 47120 }, { "epoch": 1.712697143687768, "grad_norm": 0.9141554236412048, "learning_rate": 4.4244884003049234e-05, "loss": 0.1139, "step": 47130 }, { "epoch": 1.7130605421905662, "grad_norm": 3.891220808029175, "learning_rate": 4.424157129439897e-05, "loss": 0.1123, "step": 47140 }, { "epoch": 1.7134239406933642, "grad_norm": 1.1739959716796875, "learning_rate": 4.4238257756697875e-05, "loss": 0.1472, "step": 47150 }, { "epoch": 1.7137873391961627, "grad_norm": 1.062530517578125, "learning_rate": 4.423494339008869e-05, "loss": 0.1222, "step": 47160 }, { "epoch": 1.7141507376989606, "grad_norm": 0.840980052947998, "learning_rate": 4.423162819471424e-05, "loss": 0.209, "step": 47170 }, { "epoch": 1.7145141362017589, "grad_norm": 1.5960949659347534, "learning_rate": 4.4228312170717353e-05, "loss": 0.2366, "step": 47180 }, { "epoch": 1.714877534704557, "grad_norm": 1.9222055673599243, "learning_rate": 4.4224995318240914e-05, "loss": 0.1195, "step": 47190 }, { "epoch": 1.715240933207355, "grad_norm": 0.9651756286621094, "learning_rate": 4.422167763742783e-05, "loss": 1.1511, "step": 47200 }, { "epoch": 1.7156043317101535, "grad_norm": 0.7599210739135742, "learning_rate": 4.421835912842105e-05, "loss": 0.1067, "step": 47210 }, { "epoch": 1.7159677302129515, "grad_norm": 0.9147046804428101, "learning_rate": 4.4215039791363546e-05, "loss": 0.2011, "step": 47220 }, { "epoch": 1.7163311287157497, "grad_norm": 3.473452091217041, "learning_rate": 4.421171962639835e-05, "loss": 0.1297, "step": 47230 }, { "epoch": 1.7166945272185479, "grad_norm": 0.8271628618240356, "learning_rate": 4.420839863366851e-05, "loss": 0.1042, "step": 47240 }, { "epoch": 1.717057925721346, "grad_norm": 1.2343850135803223, "learning_rate": 4.4205076813317115e-05, "loss": 0.1401, "step": 47250 }, { "epoch": 1.7174213242241443, "grad_norm": 2.5408592224121094, "learning_rate": 4.420175416548729e-05, "loss": 0.1331, "step": 47260 }, { "epoch": 1.7177847227269423, "grad_norm": 0.9061927795410156, "learning_rate": 4.41984306903222e-05, "loss": 0.151, "step": 47270 }, { "epoch": 1.7181481212297407, "grad_norm": 1.4675298929214478, "learning_rate": 4.419510638796505e-05, "loss": 0.133, "step": 47280 }, { "epoch": 1.7185115197325387, "grad_norm": 0.5093546509742737, "learning_rate": 4.4191781258559044e-05, "loss": 0.1091, "step": 47290 }, { "epoch": 1.718874918235337, "grad_norm": 1.7886688709259033, "learning_rate": 4.418845530224748e-05, "loss": 0.1364, "step": 47300 }, { "epoch": 1.719238316738135, "grad_norm": 0.9077005982398987, "learning_rate": 4.418512851917365e-05, "loss": 0.0886, "step": 47310 }, { "epoch": 1.719601715240933, "grad_norm": 0.9992018938064575, "learning_rate": 4.418180090948088e-05, "loss": 0.204, "step": 47320 }, { "epoch": 1.7199651137437315, "grad_norm": 3.1602091789245605, "learning_rate": 4.417847247331257e-05, "loss": 0.1099, "step": 47330 }, { "epoch": 1.7203285122465295, "grad_norm": 0.867824912071228, "learning_rate": 4.417514321081212e-05, "loss": 0.1015, "step": 47340 }, { "epoch": 1.7206919107493277, "grad_norm": 0.8763206005096436, "learning_rate": 4.4171813122122966e-05, "loss": 0.1669, "step": 47350 }, { "epoch": 1.721055309252126, "grad_norm": 0.579663097858429, "learning_rate": 4.4168482207388604e-05, "loss": 0.1216, "step": 47360 }, { "epoch": 1.721418707754924, "grad_norm": 0.9902794361114502, "learning_rate": 4.416515046675255e-05, "loss": 0.1507, "step": 47370 }, { "epoch": 1.7217821062577223, "grad_norm": 1.082514762878418, "learning_rate": 4.4161817900358334e-05, "loss": 0.1218, "step": 47380 }, { "epoch": 1.7221455047605203, "grad_norm": 1.6784402132034302, "learning_rate": 4.415848450834958e-05, "loss": 0.1112, "step": 47390 }, { "epoch": 1.7225089032633185, "grad_norm": 1.4955846071243286, "learning_rate": 4.415515029086989e-05, "loss": 0.1539, "step": 47400 }, { "epoch": 1.7225089032633185, "eval_loss": 0.3519718050956726, "eval_runtime": 180.5197, "eval_samples_per_second": 41.07, "eval_steps_per_second": 5.135, "eval_wer": 0.1726087824713635, "step": 47400 }, { "epoch": 1.7228723017661167, "grad_norm": 1.609122395515442, "learning_rate": 4.415181524806293e-05, "loss": 0.1218, "step": 47410 }, { "epoch": 1.723235700268915, "grad_norm": 0.5392347574234009, "learning_rate": 4.4148479380072386e-05, "loss": 0.1304, "step": 47420 }, { "epoch": 1.7235990987717131, "grad_norm": 1.874009132385254, "learning_rate": 4.4145142687042e-05, "loss": 0.1251, "step": 47430 }, { "epoch": 1.7239624972745111, "grad_norm": 0.5162834525108337, "learning_rate": 4.4141805169115534e-05, "loss": 0.0922, "step": 47440 }, { "epoch": 1.7243258957773095, "grad_norm": 0.5982137322425842, "learning_rate": 4.41384668264368e-05, "loss": 0.1274, "step": 47450 }, { "epoch": 1.7246892942801075, "grad_norm": 3.50940203666687, "learning_rate": 4.413512765914961e-05, "loss": 0.134, "step": 47460 }, { "epoch": 1.7250526927829057, "grad_norm": 0.49185237288475037, "learning_rate": 4.413178766739786e-05, "loss": 0.1573, "step": 47470 }, { "epoch": 1.725416091285704, "grad_norm": 0.7658770680427551, "learning_rate": 4.412844685132545e-05, "loss": 0.1231, "step": 47480 }, { "epoch": 1.725779489788502, "grad_norm": 0.7022835612297058, "learning_rate": 4.4125105211076324e-05, "loss": 0.124, "step": 47490 }, { "epoch": 1.7261428882913004, "grad_norm": 0.7009884119033813, "learning_rate": 4.4121762746794456e-05, "loss": 0.1207, "step": 47500 }, { "epoch": 1.7265062867940983, "grad_norm": 1.4415069818496704, "learning_rate": 4.4118419458623875e-05, "loss": 0.1223, "step": 47510 }, { "epoch": 1.7268696852968966, "grad_norm": 0.8234976530075073, "learning_rate": 4.411507534670862e-05, "loss": 0.1524, "step": 47520 }, { "epoch": 1.7272330837996948, "grad_norm": 0.6677774786949158, "learning_rate": 4.411173041119278e-05, "loss": 0.1298, "step": 47530 }, { "epoch": 1.727596482302493, "grad_norm": 9.5011625289917, "learning_rate": 4.410838465222048e-05, "loss": 0.1264, "step": 47540 }, { "epoch": 1.7279598808052912, "grad_norm": 1.5103870630264282, "learning_rate": 4.410503806993587e-05, "loss": 0.1182, "step": 47550 }, { "epoch": 1.7283232793080892, "grad_norm": 1.5079401731491089, "learning_rate": 4.410169066448314e-05, "loss": 0.1295, "step": 47560 }, { "epoch": 1.7286866778108876, "grad_norm": 1.1625335216522217, "learning_rate": 4.4098342436006536e-05, "loss": 0.169, "step": 47570 }, { "epoch": 1.7290500763136856, "grad_norm": 0.8692395687103271, "learning_rate": 4.40949933846503e-05, "loss": 0.147, "step": 47580 }, { "epoch": 1.7294134748164838, "grad_norm": 0.49405890703201294, "learning_rate": 4.409164351055873e-05, "loss": 0.1297, "step": 47590 }, { "epoch": 1.729776873319282, "grad_norm": 2.2081878185272217, "learning_rate": 4.408829281387619e-05, "loss": 0.161, "step": 47600 }, { "epoch": 1.73014027182208, "grad_norm": 2.5445384979248047, "learning_rate": 4.408494129474701e-05, "loss": 0.105, "step": 47610 }, { "epoch": 1.7305036703248784, "grad_norm": 0.4841686487197876, "learning_rate": 4.408158895331562e-05, "loss": 0.1352, "step": 47620 }, { "epoch": 1.7308670688276764, "grad_norm": 91.45899963378906, "learning_rate": 4.407823578972646e-05, "loss": 1.4893, "step": 47630 }, { "epoch": 1.7312304673304746, "grad_norm": 1.3897613286972046, "learning_rate": 4.4074881804124e-05, "loss": 0.1109, "step": 47640 }, { "epoch": 1.7315938658332728, "grad_norm": 2.621211528778076, "learning_rate": 4.407152699665275e-05, "loss": 0.1332, "step": 47650 }, { "epoch": 1.7319572643360708, "grad_norm": 0.8939427733421326, "learning_rate": 4.406817136745726e-05, "loss": 0.1101, "step": 47660 }, { "epoch": 1.7323206628388692, "grad_norm": 0.9848506450653076, "learning_rate": 4.4064814916682105e-05, "loss": 0.1855, "step": 47670 }, { "epoch": 1.7326840613416672, "grad_norm": 0.7746869921684265, "learning_rate": 4.406145764447192e-05, "loss": 0.1791, "step": 47680 }, { "epoch": 1.7330474598444654, "grad_norm": 0.6325744390487671, "learning_rate": 4.405809955097133e-05, "loss": 0.1067, "step": 47690 }, { "epoch": 1.7334108583472636, "grad_norm": 0.4917563498020172, "learning_rate": 4.405474063632505e-05, "loss": 0.1405, "step": 47700 }, { "epoch": 1.7337742568500618, "grad_norm": 0.8248608708381653, "learning_rate": 4.405138090067779e-05, "loss": 0.2285, "step": 47710 }, { "epoch": 1.73413765535286, "grad_norm": 1.1260930299758911, "learning_rate": 4.404802034417431e-05, "loss": 0.1606, "step": 47720 }, { "epoch": 1.734501053855658, "grad_norm": 1.282873272895813, "learning_rate": 4.404465896695941e-05, "loss": 0.1251, "step": 47730 }, { "epoch": 1.7348644523584564, "grad_norm": 1.245103120803833, "learning_rate": 4.404129676917791e-05, "loss": 0.1009, "step": 47740 }, { "epoch": 1.7352278508612544, "grad_norm": 0.8262288570404053, "learning_rate": 4.4037933750974686e-05, "loss": 0.2019, "step": 47750 }, { "epoch": 1.7355912493640526, "grad_norm": 0.6815000772476196, "learning_rate": 4.403456991249464e-05, "loss": 0.1395, "step": 47760 }, { "epoch": 1.7359546478668508, "grad_norm": 0.3317665159702301, "learning_rate": 4.403120525388269e-05, "loss": 0.1484, "step": 47770 }, { "epoch": 1.7363180463696488, "grad_norm": 3.1392834186553955, "learning_rate": 4.402783977528383e-05, "loss": 0.1404, "step": 47780 }, { "epoch": 1.7366814448724472, "grad_norm": 0.8862357139587402, "learning_rate": 4.4024473476843043e-05, "loss": 0.1012, "step": 47790 }, { "epoch": 1.7370448433752452, "grad_norm": 0.40148264169692993, "learning_rate": 4.402110635870539e-05, "loss": 0.1284, "step": 47800 }, { "epoch": 1.7374082418780434, "grad_norm": 0.6237661838531494, "learning_rate": 4.401773842101594e-05, "loss": 0.1583, "step": 47810 }, { "epoch": 1.7377716403808416, "grad_norm": 0.6144997477531433, "learning_rate": 4.4014369663919805e-05, "loss": 0.2531, "step": 47820 }, { "epoch": 1.7381350388836398, "grad_norm": 0.6017129421234131, "learning_rate": 4.4011000087562135e-05, "loss": 0.1141, "step": 47830 }, { "epoch": 1.738498437386438, "grad_norm": 1.1838932037353516, "learning_rate": 4.400762969208812e-05, "loss": 0.145, "step": 47840 }, { "epoch": 1.738861835889236, "grad_norm": 1.8152750730514526, "learning_rate": 4.400425847764297e-05, "loss": 0.1485, "step": 47850 }, { "epoch": 1.7392252343920345, "grad_norm": 1.8269041776657104, "learning_rate": 4.400088644437193e-05, "loss": 0.1248, "step": 47860 }, { "epoch": 1.7395886328948325, "grad_norm": 0.36247947812080383, "learning_rate": 4.39975135924203e-05, "loss": 0.1293, "step": 47870 }, { "epoch": 1.7399520313976307, "grad_norm": 0.5409418344497681, "learning_rate": 4.399413992193341e-05, "loss": 0.0932, "step": 47880 }, { "epoch": 1.7403154299004289, "grad_norm": 0.8623117804527283, "learning_rate": 4.3990765433056616e-05, "loss": 0.1024, "step": 47890 }, { "epoch": 1.7406788284032269, "grad_norm": 1.7151434421539307, "learning_rate": 4.39873901259353e-05, "loss": 0.1591, "step": 47900 }, { "epoch": 1.7410422269060253, "grad_norm": 0.580970287322998, "learning_rate": 4.39840140007149e-05, "loss": 0.1124, "step": 47910 }, { "epoch": 1.7414056254088233, "grad_norm": 0.7153110504150391, "learning_rate": 4.3980637057540884e-05, "loss": 0.2013, "step": 47920 }, { "epoch": 1.7417690239116215, "grad_norm": 0.6043591499328613, "learning_rate": 4.397725929655875e-05, "loss": 1.2071, "step": 47930 }, { "epoch": 1.7421324224144197, "grad_norm": 1.4633545875549316, "learning_rate": 4.397388071791403e-05, "loss": 0.1533, "step": 47940 }, { "epoch": 1.7424958209172177, "grad_norm": 2.011232376098633, "learning_rate": 4.3970501321752314e-05, "loss": 0.1288, "step": 47950 }, { "epoch": 1.742859219420016, "grad_norm": 1.238098382949829, "learning_rate": 4.396712110821918e-05, "loss": 0.1553, "step": 47960 }, { "epoch": 1.743222617922814, "grad_norm": 0.30882275104522705, "learning_rate": 4.3963740077460285e-05, "loss": 0.1547, "step": 47970 }, { "epoch": 1.7435860164256123, "grad_norm": 0.6614134311676025, "learning_rate": 4.39603582296213e-05, "loss": 0.0926, "step": 47980 }, { "epoch": 1.7439494149284105, "grad_norm": 0.8839965462684631, "learning_rate": 4.3956975564847944e-05, "loss": 0.1228, "step": 47990 }, { "epoch": 1.7443128134312087, "grad_norm": 1.4926025867462158, "learning_rate": 4.395359208328597e-05, "loss": 0.1692, "step": 48000 }, { "epoch": 1.7443128134312087, "eval_loss": 0.3166210651397705, "eval_runtime": 179.7213, "eval_samples_per_second": 41.253, "eval_steps_per_second": 5.158, "eval_wer": 0.1702307259425999, "step": 48000 }, { "epoch": 1.744676211934007, "grad_norm": 0.8344828486442566, "learning_rate": 4.395020778508114e-05, "loss": 0.115, "step": 48010 }, { "epoch": 1.7450396104368049, "grad_norm": 0.7698808908462524, "learning_rate": 4.394682267037928e-05, "loss": 0.11, "step": 48020 }, { "epoch": 1.7454030089396033, "grad_norm": 0.7840538024902344, "learning_rate": 4.394343673932625e-05, "loss": 0.1209, "step": 48030 }, { "epoch": 1.7457664074424013, "grad_norm": 0.9926084876060486, "learning_rate": 4.394004999206792e-05, "loss": 0.1711, "step": 48040 }, { "epoch": 1.7461298059451995, "grad_norm": 0.8097075819969177, "learning_rate": 4.3936662428750234e-05, "loss": 0.1564, "step": 48050 }, { "epoch": 1.7464932044479977, "grad_norm": 1.1143872737884521, "learning_rate": 4.393327404951915e-05, "loss": 0.1254, "step": 48060 }, { "epoch": 1.7468566029507957, "grad_norm": 0.8600324988365173, "learning_rate": 4.392988485452063e-05, "loss": 0.1512, "step": 48070 }, { "epoch": 1.7472200014535941, "grad_norm": 1.2914844751358032, "learning_rate": 4.3926494843900745e-05, "loss": 0.1215, "step": 48080 }, { "epoch": 1.7475833999563921, "grad_norm": 2.0937047004699707, "learning_rate": 4.3923104017805524e-05, "loss": 0.1222, "step": 48090 }, { "epoch": 1.7479467984591903, "grad_norm": 0.5339716076850891, "learning_rate": 4.391971237638108e-05, "loss": 0.1561, "step": 48100 }, { "epoch": 1.7483101969619885, "grad_norm": 2.8374857902526855, "learning_rate": 4.391631991977356e-05, "loss": 0.1204, "step": 48110 }, { "epoch": 1.7486735954647867, "grad_norm": 0.6171733140945435, "learning_rate": 4.39129266481291e-05, "loss": 0.1742, "step": 48120 }, { "epoch": 1.749036993967585, "grad_norm": 1.0918267965316772, "learning_rate": 4.390953256159394e-05, "loss": 0.1062, "step": 48130 }, { "epoch": 1.749400392470383, "grad_norm": 1.273437738418579, "learning_rate": 4.3906137660314296e-05, "loss": 0.1166, "step": 48140 }, { "epoch": 1.7497637909731814, "grad_norm": 0.9305084943771362, "learning_rate": 4.390274194443645e-05, "loss": 0.1264, "step": 48150 }, { "epoch": 1.7501271894759793, "grad_norm": 1.1687837839126587, "learning_rate": 4.389934541410671e-05, "loss": 0.1437, "step": 48160 }, { "epoch": 1.7504905879787775, "grad_norm": 0.5981254577636719, "learning_rate": 4.389594806947142e-05, "loss": 0.155, "step": 48170 }, { "epoch": 1.7508539864815758, "grad_norm": 0.8380612730979919, "learning_rate": 4.389254991067695e-05, "loss": 0.118, "step": 48180 }, { "epoch": 1.7512173849843737, "grad_norm": 0.9481167197227478, "learning_rate": 4.388915093786973e-05, "loss": 0.1164, "step": 48190 }, { "epoch": 1.7515807834871722, "grad_norm": 0.4648977220058441, "learning_rate": 4.3885751151196206e-05, "loss": 0.139, "step": 48200 }, { "epoch": 1.7519441819899702, "grad_norm": 1.5835154056549072, "learning_rate": 4.388235055080287e-05, "loss": 0.1288, "step": 48210 }, { "epoch": 1.7523075804927684, "grad_norm": 2.212986469268799, "learning_rate": 4.387894913683622e-05, "loss": 0.1271, "step": 48220 }, { "epoch": 1.7526709789955666, "grad_norm": 1.1150215864181519, "learning_rate": 4.3875887168782186e-05, "loss": 3.4347, "step": 48230 }, { "epoch": 1.7530343774983645, "grad_norm": 4.514034271240234, "learning_rate": 4.3872484209430055e-05, "loss": 0.1084, "step": 48240 }, { "epoch": 1.753397776001163, "grad_norm": 1.3234679698944092, "learning_rate": 4.386908043692973e-05, "loss": 0.1258, "step": 48250 }, { "epoch": 1.753761174503961, "grad_norm": 0.579300045967102, "learning_rate": 4.3865675851427856e-05, "loss": 0.1375, "step": 48260 }, { "epoch": 1.7541245730067592, "grad_norm": Infinity, "learning_rate": 4.3862611029481096e-05, "loss": 3.7136, "step": 48270 }, { "epoch": 1.7544879715095574, "grad_norm": 0.5648366808891296, "learning_rate": 4.3859204899680476e-05, "loss": 0.1164, "step": 48280 }, { "epoch": 1.7548513700123556, "grad_norm": 2.9147396087646484, "learning_rate": 4.38557979573038e-05, "loss": 0.1269, "step": 48290 }, { "epoch": 1.7552147685151538, "grad_norm": 1.5536600351333618, "learning_rate": 4.385239020249789e-05, "loss": 0.1275, "step": 48300 }, { "epoch": 1.7555781670179518, "grad_norm": 1.4855754375457764, "learning_rate": 4.384898163540956e-05, "loss": 0.1281, "step": 48310 }, { "epoch": 1.7559415655207502, "grad_norm": 182.5458984375, "learning_rate": 4.384557225618567e-05, "loss": 2.7885, "step": 48320 }, { "epoch": 1.7563049640235482, "grad_norm": 1.1808582544326782, "learning_rate": 4.3842162064973134e-05, "loss": 0.1378, "step": 48330 }, { "epoch": 1.7566683625263464, "grad_norm": 2.970055341720581, "learning_rate": 4.383875106191888e-05, "loss": 0.1319, "step": 48340 }, { "epoch": 1.7570317610291446, "grad_norm": 1.0342578887939453, "learning_rate": 4.383533924716986e-05, "loss": 0.1254, "step": 48350 }, { "epoch": 1.7573951595319426, "grad_norm": 2.821300745010376, "learning_rate": 4.383192662087309e-05, "loss": 0.1098, "step": 48360 }, { "epoch": 1.757758558034741, "grad_norm": 4.711688041687012, "learning_rate": 4.382851318317561e-05, "loss": 0.2667, "step": 48370 }, { "epoch": 1.758121956537539, "grad_norm": 0.6478423476219177, "learning_rate": 4.382509893422448e-05, "loss": 0.1041, "step": 48380 }, { "epoch": 1.7584853550403372, "grad_norm": 0.4265103042125702, "learning_rate": 4.382168387416683e-05, "loss": 0.0854, "step": 48390 }, { "epoch": 1.7588487535431354, "grad_norm": 1.3017734289169312, "learning_rate": 4.381826800314979e-05, "loss": 0.1411, "step": 48400 }, { "epoch": 1.7592121520459336, "grad_norm": 2.5378410816192627, "learning_rate": 4.3814851321320524e-05, "loss": 0.1361, "step": 48410 }, { "epoch": 1.7595755505487318, "grad_norm": 1.133738398551941, "learning_rate": 4.381143382882627e-05, "loss": 0.1628, "step": 48420 }, { "epoch": 1.7599389490515298, "grad_norm": 0.851696252822876, "learning_rate": 4.3808015525814254e-05, "loss": 0.1237, "step": 48430 }, { "epoch": 1.7603023475543282, "grad_norm": 0.6289827823638916, "learning_rate": 4.380459641243177e-05, "loss": 0.1126, "step": 48440 }, { "epoch": 1.7606657460571262, "grad_norm": 0.9123024940490723, "learning_rate": 4.380117648882614e-05, "loss": 0.1588, "step": 48450 }, { "epoch": 1.7610291445599244, "grad_norm": 0.9411369562149048, "learning_rate": 4.379775575514471e-05, "loss": 0.1505, "step": 48460 }, { "epoch": 1.7613925430627226, "grad_norm": 0.7461705207824707, "learning_rate": 4.379433421153486e-05, "loss": 0.1404, "step": 48470 }, { "epoch": 1.7617559415655206, "grad_norm": 3.351199150085449, "learning_rate": 4.3790911858144025e-05, "loss": 0.1126, "step": 48480 }, { "epoch": 1.762119340068319, "grad_norm": 0.8594498634338379, "learning_rate": 4.378748869511965e-05, "loss": 0.0944, "step": 48490 }, { "epoch": 1.762482738571117, "grad_norm": 8.456245422363281, "learning_rate": 4.378406472260924e-05, "loss": 0.1368, "step": 48500 }, { "epoch": 1.7628461370739152, "grad_norm": 2.1628379821777344, "learning_rate": 4.3780639940760306e-05, "loss": 0.1052, "step": 48510 }, { "epoch": 1.7632095355767134, "grad_norm": 0.7315357327461243, "learning_rate": 4.377721434972043e-05, "loss": 0.1247, "step": 48520 }, { "epoch": 1.7635729340795114, "grad_norm": 0.8521216511726379, "learning_rate": 4.377378794963719e-05, "loss": 4.0948, "step": 48530 }, { "epoch": 1.7639363325823099, "grad_norm": 1.6623167991638184, "learning_rate": 4.377036074065823e-05, "loss": 0.1117, "step": 48540 }, { "epoch": 1.7642997310851078, "grad_norm": 0.8901768326759338, "learning_rate": 4.3766932722931206e-05, "loss": 0.1484, "step": 48550 }, { "epoch": 1.764663129587906, "grad_norm": 1.522369623184204, "learning_rate": 4.3763503896603826e-05, "loss": 0.1206, "step": 48560 }, { "epoch": 1.7650265280907043, "grad_norm": 0.5716699957847595, "learning_rate": 4.3760074261823824e-05, "loss": 0.1744, "step": 48570 }, { "epoch": 1.7653899265935025, "grad_norm": 2.5722410678863525, "learning_rate": 4.375664381873896e-05, "loss": 0.1224, "step": 48580 }, { "epoch": 1.7657533250963007, "grad_norm": 2.1870505809783936, "learning_rate": 4.3753212567497065e-05, "loss": 0.1261, "step": 48590 }, { "epoch": 1.7661167235990987, "grad_norm": 0.8871011137962341, "learning_rate": 4.374978050824596e-05, "loss": 0.1399, "step": 48600 }, { "epoch": 1.7661167235990987, "eval_loss": 0.3405693769454956, "eval_runtime": 179.6168, "eval_samples_per_second": 41.277, "eval_steps_per_second": 5.161, "eval_wer": 0.16414035979450686, "step": 48600 }, { "epoch": 1.766480122101897, "grad_norm": 1.1147382259368896, "learning_rate": 4.374634764113352e-05, "loss": 0.1198, "step": 48610 }, { "epoch": 1.766843520604695, "grad_norm": 0.48618343472480774, "learning_rate": 4.374291396630767e-05, "loss": 0.199, "step": 48620 }, { "epoch": 1.7672069191074933, "grad_norm": 0.9088806509971619, "learning_rate": 4.373947948391633e-05, "loss": 0.1168, "step": 48630 }, { "epoch": 1.7675703176102915, "grad_norm": 0.4064035415649414, "learning_rate": 4.373604419410751e-05, "loss": 0.0935, "step": 48640 }, { "epoch": 1.7679337161130895, "grad_norm": 0.7718721032142639, "learning_rate": 4.373260809702921e-05, "loss": 0.1229, "step": 48650 }, { "epoch": 1.768297114615888, "grad_norm": 0.9764898419380188, "learning_rate": 4.3729171192829465e-05, "loss": 0.1043, "step": 48660 }, { "epoch": 1.7686605131186859, "grad_norm": 1.2039941549301147, "learning_rate": 4.372573348165638e-05, "loss": 0.142, "step": 48670 }, { "epoch": 1.769023911621484, "grad_norm": 0.5939382314682007, "learning_rate": 4.3722294963658064e-05, "loss": 0.1373, "step": 48680 }, { "epoch": 1.7693873101242823, "grad_norm": 1.034637451171875, "learning_rate": 4.3718855638982664e-05, "loss": 0.114, "step": 48690 }, { "epoch": 1.7697507086270805, "grad_norm": 1.1438782215118408, "learning_rate": 4.371541550777838e-05, "loss": 0.123, "step": 48700 }, { "epoch": 1.7701141071298787, "grad_norm": 0.8836175799369812, "learning_rate": 4.3711974570193435e-05, "loss": 0.1159, "step": 48710 }, { "epoch": 1.7704775056326767, "grad_norm": 0.4500117897987366, "learning_rate": 4.370853282637609e-05, "loss": 0.157, "step": 48720 }, { "epoch": 1.7708409041354751, "grad_norm": 0.8643542528152466, "learning_rate": 4.370509027647462e-05, "loss": 0.4179, "step": 48730 }, { "epoch": 1.771204302638273, "grad_norm": 1.6022706031799316, "learning_rate": 4.370164692063737e-05, "loss": 0.1222, "step": 48740 }, { "epoch": 1.7715677011410713, "grad_norm": 2.06821870803833, "learning_rate": 4.3698202759012685e-05, "loss": 0.1353, "step": 48750 }, { "epoch": 1.7719310996438695, "grad_norm": 0.6547145843505859, "learning_rate": 4.369475779174898e-05, "loss": 0.1053, "step": 48760 }, { "epoch": 1.7722944981466675, "grad_norm": 1.154436707496643, "learning_rate": 4.369131201899468e-05, "loss": 0.1642, "step": 48770 }, { "epoch": 1.772657896649466, "grad_norm": 1.4460755586624146, "learning_rate": 4.3687865440898243e-05, "loss": 0.1194, "step": 48780 }, { "epoch": 1.773021295152264, "grad_norm": 0.8164231777191162, "learning_rate": 4.368441805760818e-05, "loss": 0.1205, "step": 48790 }, { "epoch": 1.7733846936550621, "grad_norm": 3.0235085487365723, "learning_rate": 4.3680969869273016e-05, "loss": 0.2118, "step": 48800 }, { "epoch": 1.7737480921578603, "grad_norm": 1.397639513015747, "learning_rate": 4.367752087604134e-05, "loss": 0.1163, "step": 48810 }, { "epoch": 1.7741114906606583, "grad_norm": 0.5514954328536987, "learning_rate": 4.3674071078061726e-05, "loss": 0.1373, "step": 48820 }, { "epoch": 1.7744748891634567, "grad_norm": 1.3183518648147583, "learning_rate": 4.3670620475482836e-05, "loss": 0.1059, "step": 48830 }, { "epoch": 1.7748382876662547, "grad_norm": 0.6846873164176941, "learning_rate": 4.366716906845335e-05, "loss": 0.1139, "step": 48840 }, { "epoch": 1.775201686169053, "grad_norm": 1.2583421468734741, "learning_rate": 4.366371685712196e-05, "loss": 0.1248, "step": 48850 }, { "epoch": 1.7755650846718511, "grad_norm": 0.7057945728302002, "learning_rate": 4.366026384163742e-05, "loss": 0.1089, "step": 48860 }, { "epoch": 1.7759284831746494, "grad_norm": 1.1777584552764893, "learning_rate": 4.36568100221485e-05, "loss": 0.1842, "step": 48870 }, { "epoch": 1.7762918816774476, "grad_norm": 0.8768916726112366, "learning_rate": 4.3653355398804025e-05, "loss": 0.1095, "step": 48880 }, { "epoch": 1.7766552801802455, "grad_norm": 1.5699349641799927, "learning_rate": 4.364989997175283e-05, "loss": 0.0982, "step": 48890 }, { "epoch": 1.777018678683044, "grad_norm": 1.1270577907562256, "learning_rate": 4.36464437411438e-05, "loss": 0.1329, "step": 48900 }, { "epoch": 1.777382077185842, "grad_norm": 0.545153021812439, "learning_rate": 4.3642986707125856e-05, "loss": 0.1169, "step": 48910 }, { "epoch": 1.7777454756886402, "grad_norm": 1.2134042978286743, "learning_rate": 4.363952886984795e-05, "loss": 0.1704, "step": 48920 }, { "epoch": 1.7781088741914384, "grad_norm": 1.079684853553772, "learning_rate": 4.3636070229459055e-05, "loss": 0.1077, "step": 48930 }, { "epoch": 1.7784722726942364, "grad_norm": 0.6559361815452576, "learning_rate": 4.3632610786108205e-05, "loss": 0.1044, "step": 48940 }, { "epoch": 1.7788356711970348, "grad_norm": 0.9042558670043945, "learning_rate": 4.3629150539944454e-05, "loss": 0.8377, "step": 48950 }, { "epoch": 1.7791990696998328, "grad_norm": 2.102360725402832, "learning_rate": 4.362568949111689e-05, "loss": 0.1407, "step": 48960 }, { "epoch": 1.779562468202631, "grad_norm": 2.581956148147583, "learning_rate": 4.3622227639774635e-05, "loss": 0.182, "step": 48970 }, { "epoch": 1.7799258667054292, "grad_norm": 0.9113497138023376, "learning_rate": 4.361876498606685e-05, "loss": 0.0965, "step": 48980 }, { "epoch": 1.7802892652082274, "grad_norm": 2.428302049636841, "learning_rate": 4.361530153014273e-05, "loss": 0.1093, "step": 48990 }, { "epoch": 1.7806526637110256, "grad_norm": 1.9562546014785767, "learning_rate": 4.361183727215149e-05, "loss": 0.1437, "step": 49000 }, { "epoch": 1.7810160622138236, "grad_norm": 0.7445639967918396, "learning_rate": 4.360837221224241e-05, "loss": 0.1037, "step": 49010 }, { "epoch": 1.781379460716622, "grad_norm": 0.9966205954551697, "learning_rate": 4.360490635056478e-05, "loss": 0.1649, "step": 49020 }, { "epoch": 1.78174285921942, "grad_norm": 1.8854800462722778, "learning_rate": 4.360143968726793e-05, "loss": 0.1139, "step": 49030 }, { "epoch": 1.7821062577222182, "grad_norm": 0.5688827037811279, "learning_rate": 4.3597972222501225e-05, "loss": 0.1134, "step": 49040 }, { "epoch": 1.7824696562250164, "grad_norm": 0.7284519076347351, "learning_rate": 4.359450395641408e-05, "loss": 0.1636, "step": 49050 }, { "epoch": 1.7828330547278144, "grad_norm": 0.7459525465965271, "learning_rate": 4.359103488915591e-05, "loss": 0.1282, "step": 49060 }, { "epoch": 1.7831964532306128, "grad_norm": 0.3692184090614319, "learning_rate": 4.35875650208762e-05, "loss": 0.1491, "step": 49070 }, { "epoch": 1.7835598517334108, "grad_norm": 1.8872483968734741, "learning_rate": 4.358409435172443e-05, "loss": 0.1701, "step": 49080 }, { "epoch": 1.783923250236209, "grad_norm": 2.4084055423736572, "learning_rate": 4.358062288185018e-05, "loss": 0.1038, "step": 49090 }, { "epoch": 1.7842866487390072, "grad_norm": 1.3348972797393799, "learning_rate": 4.357715061140299e-05, "loss": 0.1723, "step": 49100 }, { "epoch": 1.7846500472418052, "grad_norm": 1.2129530906677246, "learning_rate": 4.357367754053248e-05, "loss": 0.1127, "step": 49110 }, { "epoch": 1.7850134457446036, "grad_norm": 0.3813287615776062, "learning_rate": 4.3570203669388285e-05, "loss": 0.1193, "step": 49120 }, { "epoch": 1.7853768442474016, "grad_norm": 0.5162807703018188, "learning_rate": 4.356672899812009e-05, "loss": 0.1023, "step": 49130 }, { "epoch": 1.7857402427501998, "grad_norm": 1.0435444116592407, "learning_rate": 4.356325352687761e-05, "loss": 0.0866, "step": 49140 }, { "epoch": 1.786103641252998, "grad_norm": 1.3136024475097656, "learning_rate": 4.355977725581058e-05, "loss": 0.1726, "step": 49150 }, { "epoch": 1.7864670397557962, "grad_norm": 3.7956295013427734, "learning_rate": 4.355630018506878e-05, "loss": 0.1215, "step": 49160 }, { "epoch": 1.7868304382585944, "grad_norm": 0.8079971075057983, "learning_rate": 4.3552822314802025e-05, "loss": 0.1389, "step": 49170 }, { "epoch": 1.7871938367613924, "grad_norm": 0.7602683901786804, "learning_rate": 4.354934364516018e-05, "loss": 0.1433, "step": 49180 }, { "epoch": 1.7875572352641909, "grad_norm": 1.5899375677108765, "learning_rate": 4.3545864176293104e-05, "loss": 0.1194, "step": 49190 }, { "epoch": 1.7879206337669888, "grad_norm": 57.75679016113281, "learning_rate": 4.354238390835073e-05, "loss": 0.4817, "step": 49200 }, { "epoch": 1.7879206337669888, "eval_loss": 0.3314359784126282, "eval_runtime": 180.9298, "eval_samples_per_second": 40.977, "eval_steps_per_second": 5.124, "eval_wer": 0.17101130938333908, "step": 49200 }, { "epoch": 1.788284032269787, "grad_norm": 0.689213216304779, "learning_rate": 4.353890284148301e-05, "loss": 0.1062, "step": 49210 }, { "epoch": 1.7886474307725853, "grad_norm": 0.43430793285369873, "learning_rate": 4.3535420975839924e-05, "loss": 0.1184, "step": 49220 }, { "epoch": 1.7890108292753832, "grad_norm": 0.5645721554756165, "learning_rate": 4.353193831157151e-05, "loss": 0.5598, "step": 49230 }, { "epoch": 1.7893742277781817, "grad_norm": 0.9227817058563232, "learning_rate": 4.352845484882779e-05, "loss": 0.1123, "step": 49240 }, { "epoch": 1.7897376262809797, "grad_norm": 1.031924843788147, "learning_rate": 4.35249705877589e-05, "loss": 0.1266, "step": 49250 }, { "epoch": 1.7901010247837779, "grad_norm": 0.7044590711593628, "learning_rate": 4.3521485528514914e-05, "loss": 0.1203, "step": 49260 }, { "epoch": 1.790464423286576, "grad_norm": 0.669763445854187, "learning_rate": 4.3517999671246034e-05, "loss": 0.1206, "step": 49270 }, { "epoch": 1.7908278217893743, "grad_norm": 1.149156928062439, "learning_rate": 4.351451301610243e-05, "loss": 0.1309, "step": 49280 }, { "epoch": 1.7911912202921725, "grad_norm": 0.42814984917640686, "learning_rate": 4.3511025563234334e-05, "loss": 0.6053, "step": 49290 }, { "epoch": 1.7915546187949705, "grad_norm": 1.2521979808807373, "learning_rate": 4.350753731279201e-05, "loss": 0.1397, "step": 49300 }, { "epoch": 1.791918017297769, "grad_norm": 1.2080492973327637, "learning_rate": 4.3504048264925756e-05, "loss": 0.1416, "step": 49310 }, { "epoch": 1.7922814158005669, "grad_norm": 0.479490727186203, "learning_rate": 4.3500558419785897e-05, "loss": 0.1247, "step": 49320 }, { "epoch": 1.792644814303365, "grad_norm": 0.6005672216415405, "learning_rate": 4.349706777752279e-05, "loss": 0.1049, "step": 49330 }, { "epoch": 1.7930082128061633, "grad_norm": 0.7790218591690063, "learning_rate": 4.349357633828687e-05, "loss": 0.111, "step": 49340 }, { "epoch": 1.7933716113089613, "grad_norm": 0.9393801689147949, "learning_rate": 4.3490084102228523e-05, "loss": 0.1228, "step": 49350 }, { "epoch": 1.7937350098117597, "grad_norm": 6.127364158630371, "learning_rate": 4.348659106949825e-05, "loss": 0.1369, "step": 49360 }, { "epoch": 1.7940984083145577, "grad_norm": 0.746756911277771, "learning_rate": 4.3483097240246546e-05, "loss": 0.1515, "step": 49370 }, { "epoch": 1.794461806817356, "grad_norm": 0.8121843934059143, "learning_rate": 4.347960261462394e-05, "loss": 0.1007, "step": 49380 }, { "epoch": 1.794825205320154, "grad_norm": 1.695778250694275, "learning_rate": 4.347610719278101e-05, "loss": 0.1302, "step": 49390 }, { "epoch": 1.795188603822952, "grad_norm": 0.9138917922973633, "learning_rate": 4.3472610974868354e-05, "loss": 0.1758, "step": 49400 }, { "epoch": 1.7955520023257505, "grad_norm": 0.6245046257972717, "learning_rate": 4.3469113961036625e-05, "loss": 0.1269, "step": 49410 }, { "epoch": 1.7959154008285485, "grad_norm": 0.4156048595905304, "learning_rate": 4.3465616151436484e-05, "loss": 0.1782, "step": 49420 }, { "epoch": 1.7962787993313467, "grad_norm": 0.7024033069610596, "learning_rate": 4.346211754621865e-05, "loss": 0.1817, "step": 49430 }, { "epoch": 1.796642197834145, "grad_norm": 1.7370237112045288, "learning_rate": 4.345861814553385e-05, "loss": 0.1453, "step": 49440 }, { "epoch": 1.7970055963369431, "grad_norm": 2.4251365661621094, "learning_rate": 4.3455117949532875e-05, "loss": 1.928, "step": 49450 }, { "epoch": 1.7973689948397413, "grad_norm": 0.7227508425712585, "learning_rate": 4.3451616958366524e-05, "loss": 0.1225, "step": 49460 }, { "epoch": 1.7977323933425393, "grad_norm": 0.27948006987571716, "learning_rate": 4.344811517218566e-05, "loss": 0.1819, "step": 49470 }, { "epoch": 1.7980957918453377, "grad_norm": 0.583686888217926, "learning_rate": 4.344461259114116e-05, "loss": 0.1155, "step": 49480 }, { "epoch": 1.7984591903481357, "grad_norm": 0.7126079797744751, "learning_rate": 4.344110921538391e-05, "loss": 0.1466, "step": 49490 }, { "epoch": 1.798822588850934, "grad_norm": 1.089173674583435, "learning_rate": 4.343760504506488e-05, "loss": 0.142, "step": 49500 }, { "epoch": 1.7991859873537321, "grad_norm": 0.950932502746582, "learning_rate": 4.343410008033506e-05, "loss": 0.1036, "step": 49510 }, { "epoch": 1.7995493858565301, "grad_norm": 0.6006519794464111, "learning_rate": 4.343059432134545e-05, "loss": 0.2374, "step": 49520 }, { "epoch": 1.7999127843593286, "grad_norm": 2.3363699913024902, "learning_rate": 4.342708776824711e-05, "loss": 0.1084, "step": 49530 }, { "epoch": 1.8002761828621265, "grad_norm": 0.5308919548988342, "learning_rate": 4.342358042119111e-05, "loss": 0.1194, "step": 49540 }, { "epoch": 1.8006395813649247, "grad_norm": 0.5925958752632141, "learning_rate": 4.3420072280328594e-05, "loss": 0.1374, "step": 49550 }, { "epoch": 1.801002979867723, "grad_norm": 0.9156503081321716, "learning_rate": 4.34165633458107e-05, "loss": 0.1201, "step": 49560 }, { "epoch": 1.8013663783705212, "grad_norm": 1.0838543176651, "learning_rate": 4.341305361778862e-05, "loss": 0.1442, "step": 49570 }, { "epoch": 1.8017297768733194, "grad_norm": 0.5390272736549377, "learning_rate": 4.340954309641357e-05, "loss": 0.1318, "step": 49580 }, { "epoch": 1.8020931753761174, "grad_norm": 1.546189785003662, "learning_rate": 4.340603178183681e-05, "loss": 0.1106, "step": 49590 }, { "epoch": 1.8024565738789158, "grad_norm": 0.6207401752471924, "learning_rate": 4.340251967420963e-05, "loss": 0.1309, "step": 49600 }, { "epoch": 1.8028199723817138, "grad_norm": 0.7735322713851929, "learning_rate": 4.339900677368335e-05, "loss": 0.1116, "step": 49610 }, { "epoch": 1.803183370884512, "grad_norm": 0.30109134316444397, "learning_rate": 4.3395493080409335e-05, "loss": 0.1424, "step": 49620 }, { "epoch": 1.8035467693873102, "grad_norm": 0.9261472225189209, "learning_rate": 4.339197859453897e-05, "loss": 0.1243, "step": 49630 }, { "epoch": 1.8039101678901082, "grad_norm": 1.4092273712158203, "learning_rate": 4.3388463316223696e-05, "loss": 0.0978, "step": 49640 }, { "epoch": 1.8042735663929066, "grad_norm": 1.4663946628570557, "learning_rate": 4.338494724561496e-05, "loss": 0.1159, "step": 49650 }, { "epoch": 1.8046369648957046, "grad_norm": 0.9966018795967102, "learning_rate": 4.338143038286425e-05, "loss": 0.1606, "step": 49660 }, { "epoch": 1.8050003633985028, "grad_norm": 1.0136394500732422, "learning_rate": 4.3377912728123124e-05, "loss": 0.147, "step": 49670 }, { "epoch": 1.805363761901301, "grad_norm": 0.647540807723999, "learning_rate": 4.337439428154312e-05, "loss": 3.8271, "step": 49680 }, { "epoch": 1.805727160404099, "grad_norm": 0.9579293131828308, "learning_rate": 4.3370875043275835e-05, "loss": 0.1021, "step": 49690 }, { "epoch": 1.8060905589068974, "grad_norm": 0.9937068819999695, "learning_rate": 4.3367355013472924e-05, "loss": 0.1295, "step": 49700 }, { "epoch": 1.8064539574096954, "grad_norm": 0.7757039070129395, "learning_rate": 4.3363834192286026e-05, "loss": 0.1347, "step": 49710 }, { "epoch": 1.8068173559124936, "grad_norm": 0.4056714177131653, "learning_rate": 4.336031257986685e-05, "loss": 0.1225, "step": 49720 }, { "epoch": 1.8071807544152918, "grad_norm": 0.7865206003189087, "learning_rate": 4.335679017636714e-05, "loss": 0.1149, "step": 49730 }, { "epoch": 1.80754415291809, "grad_norm": 0.8593509793281555, "learning_rate": 4.335326698193864e-05, "loss": 0.2783, "step": 49740 }, { "epoch": 1.8079075514208882, "grad_norm": 1.0015538930892944, "learning_rate": 4.334974299673318e-05, "loss": 0.1143, "step": 49750 }, { "epoch": 1.8082709499236862, "grad_norm": 0.6025313138961792, "learning_rate": 4.334621822090258e-05, "loss": 0.0951, "step": 49760 }, { "epoch": 1.8086343484264846, "grad_norm": 0.5842722058296204, "learning_rate": 4.33426926545987e-05, "loss": 0.1468, "step": 49770 }, { "epoch": 1.8089977469292826, "grad_norm": 0.5118249654769897, "learning_rate": 4.333916629797348e-05, "loss": 0.1095, "step": 49780 }, { "epoch": 1.8093611454320808, "grad_norm": 2.118723154067993, "learning_rate": 4.333563915117882e-05, "loss": 0.1106, "step": 49790 }, { "epoch": 1.809724543934879, "grad_norm": 0.7443241477012634, "learning_rate": 4.3332111214366714e-05, "loss": 0.1103, "step": 49800 }, { "epoch": 1.809724543934879, "eval_loss": 0.3337153196334839, "eval_runtime": 180.2527, "eval_samples_per_second": 41.131, "eval_steps_per_second": 5.143, "eval_wer": 0.1598834570769873, "step": 49800 }, { "epoch": 1.810087942437677, "grad_norm": 1.2676368951797485, "learning_rate": 4.332858248768916e-05, "loss": 0.1016, "step": 49810 }, { "epoch": 1.8104513409404754, "grad_norm": 2.088113307952881, "learning_rate": 4.3325052971298195e-05, "loss": 0.1931, "step": 49820 }, { "epoch": 1.8108147394432734, "grad_norm": 0.6147329807281494, "learning_rate": 4.332152266534591e-05, "loss": 0.0989, "step": 49830 }, { "epoch": 1.8111781379460716, "grad_norm": 1.3880411386489868, "learning_rate": 4.3317991569984384e-05, "loss": 0.1297, "step": 49840 }, { "epoch": 1.8115415364488698, "grad_norm": 1.794396996498108, "learning_rate": 4.331445968536579e-05, "loss": 0.1191, "step": 49850 }, { "epoch": 1.811904934951668, "grad_norm": 1.7588627338409424, "learning_rate": 4.331092701164229e-05, "loss": 0.1594, "step": 49860 }, { "epoch": 1.8122683334544663, "grad_norm": 0.9384113550186157, "learning_rate": 4.330739354896609e-05, "loss": 0.1634, "step": 49870 }, { "epoch": 1.8126317319572642, "grad_norm": 0.5903241634368896, "learning_rate": 4.330385929748945e-05, "loss": 0.1436, "step": 49880 }, { "epoch": 1.8129951304600627, "grad_norm": 0.6767405271530151, "learning_rate": 4.330032425736462e-05, "loss": 0.1139, "step": 49890 }, { "epoch": 1.8133585289628606, "grad_norm": 0.8860883116722107, "learning_rate": 4.329678842874395e-05, "loss": 0.1279, "step": 49900 }, { "epoch": 1.8137219274656589, "grad_norm": 58.24811553955078, "learning_rate": 4.3293251811779755e-05, "loss": 0.5721, "step": 49910 }, { "epoch": 1.814085325968457, "grad_norm": 0.965320348739624, "learning_rate": 4.328971440662443e-05, "loss": 0.1297, "step": 49920 }, { "epoch": 1.814448724471255, "grad_norm": 0.5799686908721924, "learning_rate": 4.328617621343039e-05, "loss": 0.1166, "step": 49930 }, { "epoch": 1.8148121229740535, "grad_norm": 2.6377480030059814, "learning_rate": 4.3282637232350074e-05, "loss": 0.0991, "step": 49940 }, { "epoch": 1.8151755214768515, "grad_norm": 0.9475420117378235, "learning_rate": 4.327909746353597e-05, "loss": 0.119, "step": 49950 }, { "epoch": 1.8155389199796497, "grad_norm": 0.36662977933883667, "learning_rate": 4.32755569071406e-05, "loss": 0.1224, "step": 49960 }, { "epoch": 1.8159023184824479, "grad_norm": 1.361423373222351, "learning_rate": 4.3272015563316506e-05, "loss": 0.1463, "step": 49970 }, { "epoch": 1.8162657169852459, "grad_norm": 1.3705862760543823, "learning_rate": 4.326847343221627e-05, "loss": 0.9571, "step": 49980 }, { "epoch": 1.8166291154880443, "grad_norm": 1.0312581062316895, "learning_rate": 4.326493051399251e-05, "loss": 0.1055, "step": 49990 }, { "epoch": 1.8169925139908423, "grad_norm": 1.8485617637634277, "learning_rate": 4.326138680879789e-05, "loss": 0.1521, "step": 50000 }, { "epoch": 1.8173559124936405, "grad_norm": 0.8536475300788879, "learning_rate": 4.325784231678508e-05, "loss": 0.1067, "step": 50010 }, { "epoch": 1.8177193109964387, "grad_norm": 0.8633929491043091, "learning_rate": 4.325429703810681e-05, "loss": 0.1326, "step": 50020 }, { "epoch": 1.818082709499237, "grad_norm": 0.9228955507278442, "learning_rate": 4.325075097291582e-05, "loss": 0.1123, "step": 50030 }, { "epoch": 1.818446108002035, "grad_norm": 0.6627784967422485, "learning_rate": 4.324720412136491e-05, "loss": 0.1049, "step": 50040 }, { "epoch": 1.818809506504833, "grad_norm": 2.006086826324463, "learning_rate": 4.324365648360691e-05, "loss": 0.1699, "step": 50050 }, { "epoch": 1.8191729050076315, "grad_norm": 0.8955428600311279, "learning_rate": 4.3240108059794646e-05, "loss": 0.1357, "step": 50060 }, { "epoch": 1.8195363035104295, "grad_norm": 164.89707946777344, "learning_rate": 4.3236558850081036e-05, "loss": 3.7378, "step": 50070 }, { "epoch": 1.8198997020132277, "grad_norm": 1.0500569343566895, "learning_rate": 4.3233008854618994e-05, "loss": 0.3275, "step": 50080 }, { "epoch": 1.820263100516026, "grad_norm": 23.444902420043945, "learning_rate": 4.3229458073561466e-05, "loss": 0.1081, "step": 50090 }, { "epoch": 1.820626499018824, "grad_norm": 2.446769952774048, "learning_rate": 4.322590650706145e-05, "loss": 0.1738, "step": 50100 }, { "epoch": 1.8209898975216223, "grad_norm": 4.417498588562012, "learning_rate": 4.322235415527198e-05, "loss": 0.1196, "step": 50110 }, { "epoch": 1.8213532960244203, "grad_norm": 0.8139522671699524, "learning_rate": 4.321880101834609e-05, "loss": 0.1339, "step": 50120 }, { "epoch": 1.8217166945272185, "grad_norm": 0.6750831007957458, "learning_rate": 4.32152470964369e-05, "loss": 0.1169, "step": 50130 }, { "epoch": 1.8220800930300167, "grad_norm": 0.6926230192184448, "learning_rate": 4.3211692389697514e-05, "loss": 0.1192, "step": 50140 }, { "epoch": 1.822443491532815, "grad_norm": 0.5890200138092041, "learning_rate": 4.3208136898281106e-05, "loss": 0.1431, "step": 50150 }, { "epoch": 1.8228068900356131, "grad_norm": 1.0174696445465088, "learning_rate": 4.3204580622340865e-05, "loss": 0.7309, "step": 50160 }, { "epoch": 1.8231702885384111, "grad_norm": 0.9049250483512878, "learning_rate": 4.320102356203001e-05, "loss": 0.1451, "step": 50170 }, { "epoch": 1.8235336870412096, "grad_norm": 0.7444465160369873, "learning_rate": 4.3197465717501815e-05, "loss": 0.1111, "step": 50180 }, { "epoch": 1.8238970855440075, "grad_norm": 1.0209647417068481, "learning_rate": 4.319390708890957e-05, "loss": 0.0911, "step": 50190 }, { "epoch": 1.8242604840468057, "grad_norm": 0.7396380305290222, "learning_rate": 4.31903476764066e-05, "loss": 0.1367, "step": 50200 }, { "epoch": 1.824623882549604, "grad_norm": 0.7910483479499817, "learning_rate": 4.318678748014626e-05, "loss": 0.0978, "step": 50210 }, { "epoch": 1.824987281052402, "grad_norm": 0.5519534349441528, "learning_rate": 4.318322650028197e-05, "loss": 0.1502, "step": 50220 }, { "epoch": 1.8253506795552004, "grad_norm": 0.7698003053665161, "learning_rate": 4.317966473696714e-05, "loss": 0.1257, "step": 50230 }, { "epoch": 1.8257140780579983, "grad_norm": 0.6308223605155945, "learning_rate": 4.3176102190355246e-05, "loss": 0.1048, "step": 50240 }, { "epoch": 1.8260774765607966, "grad_norm": 0.7210573554039001, "learning_rate": 4.317253886059978e-05, "loss": 2.1908, "step": 50250 }, { "epoch": 1.8264408750635948, "grad_norm": 1.0156967639923096, "learning_rate": 4.316897474785426e-05, "loss": 0.1612, "step": 50260 }, { "epoch": 1.8268042735663927, "grad_norm": 1.366592288017273, "learning_rate": 4.3165409852272276e-05, "loss": 0.1524, "step": 50270 }, { "epoch": 1.8271676720691912, "grad_norm": 1.1312644481658936, "learning_rate": 4.3161844174007406e-05, "loss": 0.1118, "step": 50280 }, { "epoch": 1.8275310705719892, "grad_norm": 0.3259322941303253, "learning_rate": 4.3158277713213295e-05, "loss": 0.1043, "step": 50290 }, { "epoch": 1.8278944690747874, "grad_norm": 0.8572397828102112, "learning_rate": 4.31547104700436e-05, "loss": 0.1346, "step": 50300 }, { "epoch": 1.8282578675775856, "grad_norm": 1.2105157375335693, "learning_rate": 4.3151142444652035e-05, "loss": 0.1135, "step": 50310 }, { "epoch": 1.8286212660803838, "grad_norm": 0.6126898527145386, "learning_rate": 4.314757363719232e-05, "loss": 0.1719, "step": 50320 }, { "epoch": 1.828984664583182, "grad_norm": 0.7111977338790894, "learning_rate": 4.314400404781822e-05, "loss": 0.1089, "step": 50330 }, { "epoch": 1.82934806308598, "grad_norm": 1.016365647315979, "learning_rate": 4.314043367668355e-05, "loss": 0.0869, "step": 50340 }, { "epoch": 1.8297114615887784, "grad_norm": 1.1696245670318604, "learning_rate": 4.3136862523942136e-05, "loss": 0.1241, "step": 50350 }, { "epoch": 1.8300748600915764, "grad_norm": 1.2339487075805664, "learning_rate": 4.313329058974784e-05, "loss": 0.1202, "step": 50360 }, { "epoch": 1.8304382585943746, "grad_norm": 1.6147994995117188, "learning_rate": 4.312971787425458e-05, "loss": 0.1502, "step": 50370 }, { "epoch": 1.8308016570971728, "grad_norm": 0.8028876781463623, "learning_rate": 4.312614437761628e-05, "loss": 0.1153, "step": 50380 }, { "epoch": 1.8311650555999708, "grad_norm": 0.6366049647331238, "learning_rate": 4.312257009998691e-05, "loss": 0.1367, "step": 50390 }, { "epoch": 1.8315284541027692, "grad_norm": 1.6343673467636108, "learning_rate": 4.311899504152047e-05, "loss": 0.1502, "step": 50400 }, { "epoch": 1.8315284541027692, "eval_loss": 0.34016337990760803, "eval_runtime": 180.2837, "eval_samples_per_second": 41.124, "eval_steps_per_second": 5.142, "eval_wer": 0.16422204875923538, "step": 50400 }, { "epoch": 1.8318918526055672, "grad_norm": 0.5833923816680908, "learning_rate": 4.311541920237101e-05, "loss": 0.1262, "step": 50410 }, { "epoch": 1.8322552511083654, "grad_norm": 0.5683671832084656, "learning_rate": 4.311184258269258e-05, "loss": 0.1626, "step": 50420 }, { "epoch": 1.8326186496111636, "grad_norm": 2.4269814491271973, "learning_rate": 4.3108265182639304e-05, "loss": 0.1293, "step": 50430 }, { "epoch": 1.8329820481139618, "grad_norm": 0.8372895121574402, "learning_rate": 4.310468700236532e-05, "loss": 0.1004, "step": 50440 }, { "epoch": 1.83334544661676, "grad_norm": 0.7744470238685608, "learning_rate": 4.3101108042024776e-05, "loss": 0.126, "step": 50450 }, { "epoch": 1.833708845119558, "grad_norm": 1.1404407024383545, "learning_rate": 4.3097528301771895e-05, "loss": 0.1267, "step": 50460 }, { "epoch": 1.8340722436223564, "grad_norm": 0.4898841381072998, "learning_rate": 4.30939477817609e-05, "loss": 0.1353, "step": 50470 }, { "epoch": 1.8344356421251544, "grad_norm": 0.949220597743988, "learning_rate": 4.3090366482146085e-05, "loss": 0.1188, "step": 50480 }, { "epoch": 1.8347990406279526, "grad_norm": 0.2803521156311035, "learning_rate": 4.308678440308175e-05, "loss": 0.0978, "step": 50490 }, { "epoch": 1.8351624391307508, "grad_norm": 1.3130167722702026, "learning_rate": 4.308320154472221e-05, "loss": 0.1228, "step": 50500 }, { "epoch": 1.8355258376335488, "grad_norm": 1.2615669965744019, "learning_rate": 4.307961790722187e-05, "loss": 0.1146, "step": 50510 }, { "epoch": 1.8358892361363472, "grad_norm": 1.296720266342163, "learning_rate": 4.307603349073512e-05, "loss": 0.1497, "step": 50520 }, { "epoch": 1.8362526346391452, "grad_norm": 0.7248766422271729, "learning_rate": 4.30724482954164e-05, "loss": 2.0, "step": 50530 }, { "epoch": 1.8366160331419434, "grad_norm": 0.9493942260742188, "learning_rate": 4.306886232142018e-05, "loss": 0.1092, "step": 50540 }, { "epoch": 1.8369794316447416, "grad_norm": 0.8784381151199341, "learning_rate": 4.306527556890097e-05, "loss": 0.1332, "step": 50550 }, { "epoch": 1.8373428301475396, "grad_norm": 1.1002815961837769, "learning_rate": 4.306168803801332e-05, "loss": 0.1166, "step": 50560 }, { "epoch": 1.837706228650338, "grad_norm": 1.0042141675949097, "learning_rate": 4.3058099728911795e-05, "loss": 0.1494, "step": 50570 }, { "epoch": 1.838069627153136, "grad_norm": 0.8974900841712952, "learning_rate": 4.305451064175101e-05, "loss": 0.1281, "step": 50580 }, { "epoch": 1.8384330256559342, "grad_norm": 6.133358001708984, "learning_rate": 4.3050920776685587e-05, "loss": 0.1136, "step": 50590 }, { "epoch": 1.8387964241587325, "grad_norm": 1.828365445137024, "learning_rate": 4.304733013387022e-05, "loss": 0.1634, "step": 50600 }, { "epoch": 1.8391598226615307, "grad_norm": 1.1756354570388794, "learning_rate": 4.3043738713459605e-05, "loss": 0.1039, "step": 50610 }, { "epoch": 1.8395232211643289, "grad_norm": 0.3747727870941162, "learning_rate": 4.304014651560849e-05, "loss": 0.1966, "step": 50620 }, { "epoch": 1.8398866196671269, "grad_norm": 2.3143324851989746, "learning_rate": 4.303655354047166e-05, "loss": 0.1294, "step": 50630 }, { "epoch": 1.8402500181699253, "grad_norm": 0.408648818731308, "learning_rate": 4.3032959788203894e-05, "loss": 0.1097, "step": 50640 }, { "epoch": 1.8406134166727233, "grad_norm": 2.8158206939697266, "learning_rate": 4.3029365258960065e-05, "loss": 0.1418, "step": 50650 }, { "epoch": 1.8409768151755215, "grad_norm": 1.1815099716186523, "learning_rate": 4.3025769952895036e-05, "loss": 0.1053, "step": 50660 }, { "epoch": 1.8413402136783197, "grad_norm": 0.5927807688713074, "learning_rate": 4.3022173870163705e-05, "loss": 0.1731, "step": 50670 }, { "epoch": 1.8417036121811177, "grad_norm": 0.945854663848877, "learning_rate": 4.301857701092103e-05, "loss": 0.1355, "step": 50680 }, { "epoch": 1.842067010683916, "grad_norm": 1.1901466846466064, "learning_rate": 4.301497937532199e-05, "loss": 0.1237, "step": 50690 }, { "epoch": 1.842430409186714, "grad_norm": 2.328880548477173, "learning_rate": 4.301138096352158e-05, "loss": 0.184, "step": 50700 }, { "epoch": 1.8427938076895123, "grad_norm": 1.652759075164795, "learning_rate": 4.3007781775674846e-05, "loss": 0.1085, "step": 50710 }, { "epoch": 1.8431572061923105, "grad_norm": 0.7879970669746399, "learning_rate": 4.3004181811936874e-05, "loss": 0.1291, "step": 50720 }, { "epoch": 1.8435206046951087, "grad_norm": 0.9201347827911377, "learning_rate": 4.3000581072462765e-05, "loss": 0.1069, "step": 50730 }, { "epoch": 1.843884003197907, "grad_norm": 0.4384756088256836, "learning_rate": 4.299697955740766e-05, "loss": 0.0966, "step": 50740 }, { "epoch": 1.844247401700705, "grad_norm": 1.022146463394165, "learning_rate": 4.299337726692674e-05, "loss": 0.4218, "step": 50750 }, { "epoch": 1.8446108002035033, "grad_norm": 1.298736810684204, "learning_rate": 4.298977420117521e-05, "loss": 0.1266, "step": 50760 }, { "epoch": 1.8449741987063013, "grad_norm": 0.3361910283565521, "learning_rate": 4.2986170360308324e-05, "loss": 0.1357, "step": 50770 }, { "epoch": 1.8453375972090995, "grad_norm": 1.3413841724395752, "learning_rate": 4.298256574448135e-05, "loss": 0.1116, "step": 50780 }, { "epoch": 1.8457009957118977, "grad_norm": 1.324555516242981, "learning_rate": 4.2978960353849606e-05, "loss": 0.1171, "step": 50790 }, { "epoch": 1.8460643942146957, "grad_norm": 2.6282169818878174, "learning_rate": 4.297535418856843e-05, "loss": 0.1341, "step": 50800 }, { "epoch": 1.8464277927174941, "grad_norm": 1.0654820203781128, "learning_rate": 4.297174724879319e-05, "loss": 0.1358, "step": 50810 }, { "epoch": 1.8467911912202921, "grad_norm": 0.45108261704444885, "learning_rate": 4.296813953467931e-05, "loss": 0.21, "step": 50820 }, { "epoch": 1.8471545897230903, "grad_norm": 0.4607618749141693, "learning_rate": 4.296453104638222e-05, "loss": 0.1004, "step": 50830 }, { "epoch": 1.8475179882258885, "grad_norm": 1.834341049194336, "learning_rate": 4.296092178405741e-05, "loss": 0.1092, "step": 50840 }, { "epoch": 1.8478813867286865, "grad_norm": 2.6476006507873535, "learning_rate": 4.295731174786039e-05, "loss": 0.1255, "step": 50850 }, { "epoch": 1.848244785231485, "grad_norm": 0.6069791316986084, "learning_rate": 4.2953700937946696e-05, "loss": 0.1166, "step": 50860 }, { "epoch": 1.848608183734283, "grad_norm": 0.9340389966964722, "learning_rate": 4.2950089354471915e-05, "loss": 0.1548, "step": 50870 }, { "epoch": 1.8489715822370811, "grad_norm": 1.0438172817230225, "learning_rate": 4.294647699759163e-05, "loss": 0.1996, "step": 50880 }, { "epoch": 1.8493349807398793, "grad_norm": 0.9623711109161377, "learning_rate": 4.294286386746152e-05, "loss": 0.11, "step": 50890 }, { "epoch": 1.8496983792426775, "grad_norm": 0.7084048986434937, "learning_rate": 4.2939249964237246e-05, "loss": 0.1411, "step": 50900 }, { "epoch": 1.8500617777454758, "grad_norm": 2.6798365116119385, "learning_rate": 4.293563528807453e-05, "loss": 0.1253, "step": 50910 }, { "epoch": 1.8504251762482737, "grad_norm": 1.17062246799469, "learning_rate": 4.2932019839129087e-05, "loss": 0.1676, "step": 50920 }, { "epoch": 1.8507885747510722, "grad_norm": 3.6233065128326416, "learning_rate": 4.2928403617556714e-05, "loss": 0.1128, "step": 50930 }, { "epoch": 1.8511519732538702, "grad_norm": 1.1630785465240479, "learning_rate": 4.2924786623513225e-05, "loss": 0.084, "step": 50940 }, { "epoch": 1.8515153717566684, "grad_norm": 1.654990553855896, "learning_rate": 4.292116885715446e-05, "loss": 0.1527, "step": 50950 }, { "epoch": 1.8518787702594666, "grad_norm": 1.1175593137741089, "learning_rate": 4.291755031863628e-05, "loss": 0.1132, "step": 50960 }, { "epoch": 1.8522421687622646, "grad_norm": 0.46370136737823486, "learning_rate": 4.291393100811462e-05, "loss": 0.1335, "step": 50970 }, { "epoch": 1.852605567265063, "grad_norm": 1.1095346212387085, "learning_rate": 4.2910310925745404e-05, "loss": 0.1024, "step": 50980 }, { "epoch": 1.852968965767861, "grad_norm": 1.7305604219436646, "learning_rate": 4.290669007168462e-05, "loss": 0.0964, "step": 50990 }, { "epoch": 1.8533323642706592, "grad_norm": 0.5683947801589966, "learning_rate": 4.290306844608827e-05, "loss": 0.1324, "step": 51000 }, { "epoch": 1.8533323642706592, "eval_loss": 0.35921338200569153, "eval_runtime": 179.6912, "eval_samples_per_second": 41.26, "eval_steps_per_second": 5.159, "eval_wer": 0.164648646686151, "step": 51000 }, { "epoch": 1.8536957627734574, "grad_norm": 0.417925089597702, "learning_rate": 4.289944604911239e-05, "loss": 0.156, "step": 51010 }, { "epoch": 1.8540591612762556, "grad_norm": 0.3907199501991272, "learning_rate": 4.2895822880913076e-05, "loss": 0.1322, "step": 51020 }, { "epoch": 1.8544225597790538, "grad_norm": 0.8322422504425049, "learning_rate": 4.2892198941646436e-05, "loss": 0.1278, "step": 51030 }, { "epoch": 1.8547859582818518, "grad_norm": 1.3560541868209839, "learning_rate": 4.2888574231468595e-05, "loss": 0.0933, "step": 51040 }, { "epoch": 1.8551493567846502, "grad_norm": 1.353043556213379, "learning_rate": 4.288494875053573e-05, "loss": 0.1185, "step": 51050 }, { "epoch": 1.8555127552874482, "grad_norm": 7.476738929748535, "learning_rate": 4.2881322499004076e-05, "loss": 0.127, "step": 51060 }, { "epoch": 1.8558761537902464, "grad_norm": 0.5096439719200134, "learning_rate": 4.2877695477029844e-05, "loss": 0.1768, "step": 51070 }, { "epoch": 1.8562395522930446, "grad_norm": 2.3075900077819824, "learning_rate": 4.2874067684769325e-05, "loss": 0.1288, "step": 51080 }, { "epoch": 1.8566029507958426, "grad_norm": 0.615508496761322, "learning_rate": 4.287043912237883e-05, "loss": 0.0903, "step": 51090 }, { "epoch": 1.856966349298641, "grad_norm": 2.022796154022217, "learning_rate": 4.286680979001469e-05, "loss": 0.1372, "step": 51100 }, { "epoch": 1.857329747801439, "grad_norm": 0.8577511310577393, "learning_rate": 4.28631796878333e-05, "loss": 0.1216, "step": 51110 }, { "epoch": 1.8576931463042372, "grad_norm": 0.5876504182815552, "learning_rate": 4.285954881599104e-05, "loss": 0.1672, "step": 51120 }, { "epoch": 1.8580565448070354, "grad_norm": 1.0467904806137085, "learning_rate": 4.2855917174644374e-05, "loss": 0.1175, "step": 51130 }, { "epoch": 1.8584199433098334, "grad_norm": 0.6273336410522461, "learning_rate": 4.285228476394977e-05, "loss": 0.1048, "step": 51140 }, { "epoch": 1.8587833418126318, "grad_norm": 0.7880851030349731, "learning_rate": 4.284865158406372e-05, "loss": 0.1418, "step": 51150 }, { "epoch": 1.8591467403154298, "grad_norm": 1.316256046295166, "learning_rate": 4.284501763514279e-05, "loss": 0.1373, "step": 51160 }, { "epoch": 1.859510138818228, "grad_norm": 0.46383532881736755, "learning_rate": 4.284138291734355e-05, "loss": 0.167, "step": 51170 }, { "epoch": 1.8598735373210262, "grad_norm": 0.7432321310043335, "learning_rate": 4.283774743082259e-05, "loss": 0.1534, "step": 51180 }, { "epoch": 1.8602369358238244, "grad_norm": 0.673897922039032, "learning_rate": 4.2834111175736555e-05, "loss": 0.1122, "step": 51190 }, { "epoch": 1.8606003343266226, "grad_norm": 0.7635305523872375, "learning_rate": 4.2830474152242136e-05, "loss": 0.1054, "step": 51200 }, { "epoch": 1.8609637328294206, "grad_norm": 1.281503677368164, "learning_rate": 4.282683636049602e-05, "loss": 0.1175, "step": 51210 }, { "epoch": 1.861327131332219, "grad_norm": 1.249510407447815, "learning_rate": 4.282319780065496e-05, "loss": 0.1624, "step": 51220 }, { "epoch": 1.861690529835017, "grad_norm": 1.7470375299453735, "learning_rate": 4.281955847287571e-05, "loss": 0.1173, "step": 51230 }, { "epoch": 1.8620539283378152, "grad_norm": 0.4860547184944153, "learning_rate": 4.2815918377315096e-05, "loss": 0.1109, "step": 51240 }, { "epoch": 1.8624173268406135, "grad_norm": 1.2206679582595825, "learning_rate": 4.281227751412995e-05, "loss": 0.139, "step": 51250 }, { "epoch": 1.8627807253434114, "grad_norm": 5.966470241546631, "learning_rate": 4.2808635883477134e-05, "loss": 0.0989, "step": 51260 }, { "epoch": 1.8631441238462099, "grad_norm": 0.6490101218223572, "learning_rate": 4.280499348551357e-05, "loss": 0.1876, "step": 51270 }, { "epoch": 1.8635075223490078, "grad_norm": 1.0723692178726196, "learning_rate": 4.280135032039618e-05, "loss": 0.1081, "step": 51280 }, { "epoch": 1.863870920851806, "grad_norm": 1.9690381288528442, "learning_rate": 4.2797706388281944e-05, "loss": 0.1112, "step": 51290 }, { "epoch": 1.8642343193546043, "grad_norm": 5.197057723999023, "learning_rate": 4.279406168932787e-05, "loss": 0.1374, "step": 51300 }, { "epoch": 1.8645977178574025, "grad_norm": 1.063194751739502, "learning_rate": 4.279041622369098e-05, "loss": 0.1221, "step": 51310 }, { "epoch": 1.8649611163602007, "grad_norm": 0.9989191889762878, "learning_rate": 4.2786769991528356e-05, "loss": 0.1602, "step": 51320 }, { "epoch": 1.8653245148629987, "grad_norm": 1.2675135135650635, "learning_rate": 4.278312299299711e-05, "loss": 0.1038, "step": 51330 }, { "epoch": 1.865687913365797, "grad_norm": 0.7109481692314148, "learning_rate": 4.277947522825435e-05, "loss": 0.1323, "step": 51340 }, { "epoch": 1.866051311868595, "grad_norm": 0.522361695766449, "learning_rate": 4.2775826697457265e-05, "loss": 0.1339, "step": 51350 }, { "epoch": 1.8664147103713933, "grad_norm": 0.6734838485717773, "learning_rate": 4.277217740076306e-05, "loss": 0.0991, "step": 51360 }, { "epoch": 1.8667781088741915, "grad_norm": 0.7802498936653137, "learning_rate": 4.276852733832897e-05, "loss": 0.1614, "step": 51370 }, { "epoch": 1.8671415073769895, "grad_norm": 0.7883875966072083, "learning_rate": 4.276487651031225e-05, "loss": 0.1093, "step": 51380 }, { "epoch": 1.867504905879788, "grad_norm": 2.097398519515991, "learning_rate": 4.27612249168702e-05, "loss": 0.0925, "step": 51390 }, { "epoch": 1.8678683043825859, "grad_norm": 2.9690377712249756, "learning_rate": 4.2757572558160176e-05, "loss": 0.1693, "step": 51400 }, { "epoch": 1.868231702885384, "grad_norm": 0.681706428527832, "learning_rate": 4.275391943433953e-05, "loss": 0.1045, "step": 51410 }, { "epoch": 1.8685951013881823, "grad_norm": 0.32064223289489746, "learning_rate": 4.275026554556566e-05, "loss": 0.1511, "step": 51420 }, { "epoch": 1.8689584998909803, "grad_norm": 0.30609723925590515, "learning_rate": 4.2746610891996006e-05, "loss": 0.1089, "step": 51430 }, { "epoch": 1.8693218983937787, "grad_norm": 0.5510174036026001, "learning_rate": 4.274295547378803e-05, "loss": 0.097, "step": 51440 }, { "epoch": 1.8696852968965767, "grad_norm": 1.0738519430160522, "learning_rate": 4.2739299291099233e-05, "loss": 0.1309, "step": 51450 }, { "epoch": 1.870048695399375, "grad_norm": 0.7475055456161499, "learning_rate": 4.2735642344087144e-05, "loss": 0.2601, "step": 51460 }, { "epoch": 1.8704120939021731, "grad_norm": 0.5625230669975281, "learning_rate": 4.273198463290934e-05, "loss": 0.1464, "step": 51470 }, { "epoch": 1.8707754924049713, "grad_norm": 1.03018057346344, "learning_rate": 4.2728326157723396e-05, "loss": 0.1346, "step": 51480 }, { "epoch": 1.8711388909077695, "grad_norm": 0.7019144892692566, "learning_rate": 4.272466691868696e-05, "loss": 0.1281, "step": 51490 }, { "epoch": 1.8715022894105675, "grad_norm": 1.6843324899673462, "learning_rate": 4.2721006915957695e-05, "loss": 0.8711, "step": 51500 }, { "epoch": 1.871865687913366, "grad_norm": 1.4415316581726074, "learning_rate": 4.271734614969329e-05, "loss": 0.1193, "step": 51510 }, { "epoch": 1.872229086416164, "grad_norm": 0.5036882162094116, "learning_rate": 4.271368462005148e-05, "loss": 0.1821, "step": 51520 }, { "epoch": 1.8725924849189621, "grad_norm": 80.8698959350586, "learning_rate": 4.2710022327190026e-05, "loss": 1.5666, "step": 51530 }, { "epoch": 1.8729558834217603, "grad_norm": 1.3852354288101196, "learning_rate": 4.2706359271266716e-05, "loss": 0.1154, "step": 51540 }, { "epoch": 1.8733192819245583, "grad_norm": 2.0087506771087646, "learning_rate": 4.270269545243939e-05, "loss": 0.1289, "step": 51550 }, { "epoch": 1.8736826804273568, "grad_norm": 18.74397087097168, "learning_rate": 4.26990308708659e-05, "loss": 0.166, "step": 51560 }, { "epoch": 1.8740460789301547, "grad_norm": 0.5033942461013794, "learning_rate": 4.2695365526704144e-05, "loss": 0.2648, "step": 51570 }, { "epoch": 1.874409477432953, "grad_norm": 0.7396907210350037, "learning_rate": 4.269169942011205e-05, "loss": 0.1235, "step": 51580 }, { "epoch": 1.8747728759357511, "grad_norm": 1.0756418704986572, "learning_rate": 4.2688032551247574e-05, "loss": 0.1113, "step": 51590 }, { "epoch": 1.8751362744385494, "grad_norm": 0.8277359008789062, "learning_rate": 4.268436492026871e-05, "loss": 0.1556, "step": 51600 }, { "epoch": 1.8751362744385494, "eval_loss": 0.3514460623264313, "eval_runtime": 180.5008, "eval_samples_per_second": 41.075, "eval_steps_per_second": 5.136, "eval_wer": 0.16721731079928115, "step": 51600 }, { "epoch": 1.8754996729413476, "grad_norm": 1.8270063400268555, "learning_rate": 4.268069652733349e-05, "loss": 0.1154, "step": 51610 }, { "epoch": 1.8758630714441455, "grad_norm": 1.2547001838684082, "learning_rate": 4.267702737259995e-05, "loss": 0.1256, "step": 51620 }, { "epoch": 1.876226469946944, "grad_norm": 2.664400100708008, "learning_rate": 4.26733574562262e-05, "loss": 0.1301, "step": 51630 }, { "epoch": 1.876589868449742, "grad_norm": 1.9625864028930664, "learning_rate": 4.266968677837037e-05, "loss": 0.1347, "step": 51640 }, { "epoch": 1.8769532669525402, "grad_norm": 1.5494035482406616, "learning_rate": 4.266601533919059e-05, "loss": 0.1537, "step": 51650 }, { "epoch": 1.8773166654553384, "grad_norm": 1.1424529552459717, "learning_rate": 4.2662343138845076e-05, "loss": 0.1212, "step": 51660 }, { "epoch": 1.8776800639581364, "grad_norm": 0.8773604035377502, "learning_rate": 4.265867017749203e-05, "loss": 0.2055, "step": 51670 }, { "epoch": 1.8780434624609348, "grad_norm": 0.9486932158470154, "learning_rate": 4.265499645528972e-05, "loss": 0.114, "step": 51680 }, { "epoch": 1.8784068609637328, "grad_norm": 0.5126560926437378, "learning_rate": 4.265132197239643e-05, "loss": 0.1068, "step": 51690 }, { "epoch": 1.878770259466531, "grad_norm": 1.4426672458648682, "learning_rate": 4.264764672897049e-05, "loss": 0.1268, "step": 51700 }, { "epoch": 1.8791336579693292, "grad_norm": 1.2795157432556152, "learning_rate": 4.264397072517023e-05, "loss": 0.1, "step": 51710 }, { "epoch": 1.8794970564721272, "grad_norm": 0.34891799092292786, "learning_rate": 4.2640293961154055e-05, "loss": 0.1683, "step": 51720 }, { "epoch": 1.8798604549749256, "grad_norm": 0.6939824223518372, "learning_rate": 4.2636616437080366e-05, "loss": 0.1131, "step": 51730 }, { "epoch": 1.8802238534777236, "grad_norm": 1.4243013858795166, "learning_rate": 4.2632938153107636e-05, "loss": 0.1019, "step": 51740 }, { "epoch": 1.8805872519805218, "grad_norm": 0.699863851070404, "learning_rate": 4.2629259109394335e-05, "loss": 0.1652, "step": 51750 }, { "epoch": 1.88095065048332, "grad_norm": 1.4008554220199585, "learning_rate": 4.2625579306098994e-05, "loss": 0.1062, "step": 51760 }, { "epoch": 1.8813140489861182, "grad_norm": 1.0460174083709717, "learning_rate": 4.2621898743380144e-05, "loss": 0.1216, "step": 51770 }, { "epoch": 1.8816774474889164, "grad_norm": 1.277803897857666, "learning_rate": 4.2618217421396375e-05, "loss": 1.8922, "step": 51780 }, { "epoch": 1.8820408459917144, "grad_norm": 2.2651615142822266, "learning_rate": 4.2614535340306314e-05, "loss": 0.1014, "step": 51790 }, { "epoch": 1.8824042444945128, "grad_norm": 0.8989794254302979, "learning_rate": 4.2610852500268586e-05, "loss": 0.1089, "step": 51800 }, { "epoch": 1.8827676429973108, "grad_norm": 0.9130983948707581, "learning_rate": 4.2607168901441885e-05, "loss": 0.1098, "step": 51810 }, { "epoch": 1.883131041500109, "grad_norm": 0.5734561085700989, "learning_rate": 4.260348454398493e-05, "loss": 0.1674, "step": 51820 }, { "epoch": 1.8834944400029072, "grad_norm": 0.9285537600517273, "learning_rate": 4.259979942805645e-05, "loss": 0.1197, "step": 51830 }, { "epoch": 1.8838578385057052, "grad_norm": 1.951344609260559, "learning_rate": 4.259611355381524e-05, "loss": 0.1009, "step": 51840 }, { "epoch": 1.8842212370085036, "grad_norm": 0.6593104004859924, "learning_rate": 4.2592426921420106e-05, "loss": 0.1121, "step": 51850 }, { "epoch": 1.8845846355113016, "grad_norm": 0.35744279623031616, "learning_rate": 4.258873953102987e-05, "loss": 0.1029, "step": 51860 }, { "epoch": 1.8849480340140998, "grad_norm": 0.7135227918624878, "learning_rate": 4.2585051382803455e-05, "loss": 0.153, "step": 51870 }, { "epoch": 1.885311432516898, "grad_norm": 1.8943212032318115, "learning_rate": 4.258136247689973e-05, "loss": 0.1079, "step": 51880 }, { "epoch": 1.8856748310196962, "grad_norm": 1.6160852909088135, "learning_rate": 4.2577672813477656e-05, "loss": 0.1259, "step": 51890 }, { "epoch": 1.8860382295224944, "grad_norm": 0.8660845160484314, "learning_rate": 4.25739823926962e-05, "loss": 0.1151, "step": 51900 }, { "epoch": 1.8864016280252924, "grad_norm": 1.074818730354309, "learning_rate": 4.2570291214714365e-05, "loss": 0.1181, "step": 51910 }, { "epoch": 1.8867650265280909, "grad_norm": 0.4410496950149536, "learning_rate": 4.2566599279691205e-05, "loss": 0.1296, "step": 51920 }, { "epoch": 1.8871284250308888, "grad_norm": 0.6509966254234314, "learning_rate": 4.2562906587785776e-05, "loss": 0.1096, "step": 51930 }, { "epoch": 1.887491823533687, "grad_norm": 0.8408392667770386, "learning_rate": 4.25592131391572e-05, "loss": 0.0968, "step": 51940 }, { "epoch": 1.8878552220364853, "grad_norm": 0.6419994831085205, "learning_rate": 4.25555189339646e-05, "loss": 0.1336, "step": 51950 }, { "epoch": 1.8882186205392832, "grad_norm": 1.4039828777313232, "learning_rate": 4.2551823972367156e-05, "loss": 0.1102, "step": 51960 }, { "epoch": 1.8885820190420817, "grad_norm": 1.1315640211105347, "learning_rate": 4.2548128254524066e-05, "loss": 0.1265, "step": 51970 }, { "epoch": 1.8889454175448797, "grad_norm": 0.6739282011985779, "learning_rate": 4.254443178059456e-05, "loss": 0.101, "step": 51980 }, { "epoch": 1.8893088160476779, "grad_norm": 0.9321909546852112, "learning_rate": 4.254073455073792e-05, "loss": 0.1093, "step": 51990 }, { "epoch": 1.889672214550476, "grad_norm": 8.199972152709961, "learning_rate": 4.2537036565113435e-05, "loss": 0.2084, "step": 52000 }, { "epoch": 1.890035613053274, "grad_norm": 0.5499328970909119, "learning_rate": 4.253333782388044e-05, "loss": 0.138, "step": 52010 }, { "epoch": 1.8903990115560725, "grad_norm": 1.2447484731674194, "learning_rate": 4.252963832719831e-05, "loss": 0.1353, "step": 52020 }, { "epoch": 1.8907624100588705, "grad_norm": 1.7561428546905518, "learning_rate": 4.252593807522642e-05, "loss": 0.1267, "step": 52030 }, { "epoch": 1.8911258085616687, "grad_norm": 0.6353381872177124, "learning_rate": 4.252223706812423e-05, "loss": 0.1105, "step": 52040 }, { "epoch": 1.8914892070644669, "grad_norm": 1.8371816873550415, "learning_rate": 4.251853530605118e-05, "loss": 0.1498, "step": 52050 }, { "epoch": 1.891852605567265, "grad_norm": 2.9866833686828613, "learning_rate": 4.251483278916678e-05, "loss": 0.1133, "step": 52060 }, { "epoch": 1.8922160040700633, "grad_norm": 0.9184136986732483, "learning_rate": 4.2511129517630555e-05, "loss": 0.114, "step": 52070 }, { "epoch": 1.8925794025728613, "grad_norm": 1.407132863998413, "learning_rate": 4.250742549160206e-05, "loss": 0.1055, "step": 52080 }, { "epoch": 1.8929428010756597, "grad_norm": 1.2703722715377808, "learning_rate": 4.250372071124089e-05, "loss": 0.1429, "step": 52090 }, { "epoch": 1.8933061995784577, "grad_norm": 1.260004997253418, "learning_rate": 4.2500385764093334e-05, "loss": 1.4537, "step": 52100 }, { "epoch": 1.893669598081256, "grad_norm": 0.9646703004837036, "learning_rate": 4.249667955093988e-05, "loss": 0.1465, "step": 52110 }, { "epoch": 1.894032996584054, "grad_norm": 0.7287250757217407, "learning_rate": 4.249297258391677e-05, "loss": 0.1387, "step": 52120 }, { "epoch": 1.894396395086852, "grad_norm": 1.042417049407959, "learning_rate": 4.24892648631837e-05, "loss": 0.1031, "step": 52130 }, { "epoch": 1.8947597935896505, "grad_norm": 0.9328198432922363, "learning_rate": 4.248555638890043e-05, "loss": 0.1324, "step": 52140 }, { "epoch": 1.8951231920924485, "grad_norm": 3.1417503356933594, "learning_rate": 4.2481847161226764e-05, "loss": 0.1798, "step": 52150 }, { "epoch": 1.8954865905952467, "grad_norm": 0.8507013916969299, "learning_rate": 4.247813718032249e-05, "loss": 0.1182, "step": 52160 }, { "epoch": 1.895849989098045, "grad_norm": 1.0147353410720825, "learning_rate": 4.247442644634748e-05, "loss": 0.1341, "step": 52170 }, { "epoch": 1.8962133876008431, "grad_norm": 2.492661476135254, "learning_rate": 4.2470714959461614e-05, "loss": 0.1109, "step": 52180 }, { "epoch": 1.8965767861036413, "grad_norm": 1.8121393918991089, "learning_rate": 4.246700271982479e-05, "loss": 0.1381, "step": 52190 }, { "epoch": 1.8969401846064393, "grad_norm": 1.2550605535507202, "learning_rate": 4.2463289727596965e-05, "loss": 0.1469, "step": 52200 }, { "epoch": 1.8969401846064393, "eval_loss": 0.3317066729068756, "eval_runtime": 179.6114, "eval_samples_per_second": 41.278, "eval_steps_per_second": 5.161, "eval_wer": 0.16958629077640822, "step": 52200 }, { "epoch": 1.8973035831092377, "grad_norm": 0.7204797863960266, "learning_rate": 4.245957598293813e-05, "loss": 0.1328, "step": 52210 }, { "epoch": 1.8976669816120357, "grad_norm": 0.5142366290092468, "learning_rate": 4.245586148600829e-05, "loss": 0.1457, "step": 52220 }, { "epoch": 1.898030380114834, "grad_norm": 0.8868045210838318, "learning_rate": 4.2452146236967474e-05, "loss": 0.1344, "step": 52230 }, { "epoch": 1.8983937786176321, "grad_norm": 0.7489217519760132, "learning_rate": 4.2448430235975777e-05, "loss": 0.1119, "step": 52240 }, { "epoch": 1.8987571771204301, "grad_norm": 1.2009568214416504, "learning_rate": 4.244471348319331e-05, "loss": 0.1503, "step": 52250 }, { "epoch": 1.8991205756232286, "grad_norm": 1.1081483364105225, "learning_rate": 4.24409959787802e-05, "loss": 0.1265, "step": 52260 }, { "epoch": 1.8994839741260265, "grad_norm": 0.42917948961257935, "learning_rate": 4.243727772289663e-05, "loss": 0.1825, "step": 52270 }, { "epoch": 1.8998473726288247, "grad_norm": 1.7099511623382568, "learning_rate": 4.2433558715702804e-05, "loss": 0.133, "step": 52280 }, { "epoch": 1.900210771131623, "grad_norm": 1.212544560432434, "learning_rate": 4.242983895735896e-05, "loss": 0.1306, "step": 52290 }, { "epoch": 1.900574169634421, "grad_norm": 0.48001641035079956, "learning_rate": 4.242611844802538e-05, "loss": 0.1739, "step": 52300 }, { "epoch": 1.9009375681372194, "grad_norm": 2.3596603870391846, "learning_rate": 4.242239718786235e-05, "loss": 0.129, "step": 52310 }, { "epoch": 1.9013009666400174, "grad_norm": 0.41326409578323364, "learning_rate": 4.241867517703022e-05, "loss": 0.1218, "step": 52320 }, { "epoch": 1.9016643651428156, "grad_norm": 0.9740013480186462, "learning_rate": 4.241495241568935e-05, "loss": 0.1248, "step": 52330 }, { "epoch": 1.9020277636456138, "grad_norm": 0.8275489807128906, "learning_rate": 4.2411228904000136e-05, "loss": 0.1067, "step": 52340 }, { "epoch": 1.902391162148412, "grad_norm": 9.790162086486816, "learning_rate": 4.240750464212303e-05, "loss": 0.1548, "step": 52350 }, { "epoch": 1.9027545606512102, "grad_norm": 3.4635374546051025, "learning_rate": 4.240377963021847e-05, "loss": 0.1157, "step": 52360 }, { "epoch": 1.9031179591540082, "grad_norm": 0.5103577971458435, "learning_rate": 4.2400053868446976e-05, "loss": 0.1356, "step": 52370 }, { "epoch": 1.9034813576568066, "grad_norm": 1.608657956123352, "learning_rate": 4.239632735696908e-05, "loss": 0.114, "step": 52380 }, { "epoch": 1.9038447561596046, "grad_norm": 0.9395160675048828, "learning_rate": 4.2392600095945324e-05, "loss": 0.1079, "step": 52390 }, { "epoch": 1.9042081546624028, "grad_norm": 1.1032116413116455, "learning_rate": 4.2388872085536314e-05, "loss": 0.1789, "step": 52400 }, { "epoch": 1.904571553165201, "grad_norm": 0.765036940574646, "learning_rate": 4.2385143325902675e-05, "loss": 0.0984, "step": 52410 }, { "epoch": 1.904934951667999, "grad_norm": 2.460920572280884, "learning_rate": 4.238141381720507e-05, "loss": 0.1201, "step": 52420 }, { "epoch": 1.9052983501707974, "grad_norm": 0.6005275845527649, "learning_rate": 4.237768355960418e-05, "loss": 0.1145, "step": 52430 }, { "epoch": 1.9056617486735954, "grad_norm": 0.606640636920929, "learning_rate": 4.2373952553260745e-05, "loss": 0.1204, "step": 52440 }, { "epoch": 1.9060251471763936, "grad_norm": 1.0981110334396362, "learning_rate": 4.237022079833551e-05, "loss": 0.1237, "step": 52450 }, { "epoch": 1.9063885456791918, "grad_norm": 1.2138440608978271, "learning_rate": 4.236648829498926e-05, "loss": 0.1027, "step": 52460 }, { "epoch": 1.90675194418199, "grad_norm": 0.38126930594444275, "learning_rate": 4.2362755043382816e-05, "loss": 0.1787, "step": 52470 }, { "epoch": 1.9071153426847882, "grad_norm": 1.1713272333145142, "learning_rate": 4.235902104367704e-05, "loss": 0.1098, "step": 52480 }, { "epoch": 1.9074787411875862, "grad_norm": 1.0597947835922241, "learning_rate": 4.235528629603282e-05, "loss": 0.1085, "step": 52490 }, { "epoch": 1.9078421396903846, "grad_norm": 0.5749408602714539, "learning_rate": 4.235155080061105e-05, "loss": 0.1295, "step": 52500 }, { "epoch": 1.9082055381931826, "grad_norm": 1.4702091217041016, "learning_rate": 4.234781455757269e-05, "loss": 0.1281, "step": 52510 }, { "epoch": 1.9085689366959808, "grad_norm": 0.586208164691925, "learning_rate": 4.234407756707873e-05, "loss": 0.1412, "step": 52520 }, { "epoch": 1.908932335198779, "grad_norm": 0.8572281002998352, "learning_rate": 4.2340339829290174e-05, "loss": 0.1059, "step": 52530 }, { "epoch": 1.909295733701577, "grad_norm": 0.7896180152893066, "learning_rate": 4.233660134436809e-05, "loss": 0.1144, "step": 52540 }, { "epoch": 1.9096591322043754, "grad_norm": 0.9928715825080872, "learning_rate": 4.233286211247351e-05, "loss": 0.1507, "step": 52550 }, { "epoch": 1.9100225307071734, "grad_norm": 1.1396877765655518, "learning_rate": 4.23291221337676e-05, "loss": 0.1306, "step": 52560 }, { "epoch": 1.9103859292099716, "grad_norm": 0.743976891040802, "learning_rate": 4.232538140841146e-05, "loss": 0.1839, "step": 52570 }, { "epoch": 1.9107493277127698, "grad_norm": 0.40765443444252014, "learning_rate": 4.232163993656628e-05, "loss": 0.1303, "step": 52580 }, { "epoch": 1.911112726215568, "grad_norm": 0.6623360514640808, "learning_rate": 4.231789771839326e-05, "loss": 0.1202, "step": 52590 }, { "epoch": 1.9114761247183663, "grad_norm": 0.9128944873809814, "learning_rate": 4.2314154754053656e-05, "loss": 0.827, "step": 52600 }, { "epoch": 1.9118395232211642, "grad_norm": 0.9086483716964722, "learning_rate": 4.231041104370872e-05, "loss": 0.1147, "step": 52610 }, { "epoch": 1.9122029217239624, "grad_norm": 1.0172945261001587, "learning_rate": 4.2306666587519765e-05, "loss": 0.1501, "step": 52620 }, { "epoch": 1.9125663202267607, "grad_norm": 1.759474277496338, "learning_rate": 4.2302921385648126e-05, "loss": 0.1389, "step": 52630 }, { "epoch": 1.9129297187295589, "grad_norm": 1.5807387828826904, "learning_rate": 4.229917543825517e-05, "loss": 0.1067, "step": 52640 }, { "epoch": 1.913293117232357, "grad_norm": 0.8961324095726013, "learning_rate": 4.2295428745502284e-05, "loss": 0.1204, "step": 52650 }, { "epoch": 1.913656515735155, "grad_norm": 1.3519996404647827, "learning_rate": 4.229168130755092e-05, "loss": 0.1163, "step": 52660 }, { "epoch": 1.9140199142379535, "grad_norm": 1.4970946311950684, "learning_rate": 4.2287933124562526e-05, "loss": 0.1532, "step": 52670 }, { "epoch": 1.9143833127407515, "grad_norm": 48.62047576904297, "learning_rate": 4.2284184196698615e-05, "loss": 0.474, "step": 52680 }, { "epoch": 1.9147467112435497, "grad_norm": 1.5001195669174194, "learning_rate": 4.22804345241207e-05, "loss": 0.1108, "step": 52690 }, { "epoch": 1.9151101097463479, "grad_norm": 0.6537098288536072, "learning_rate": 4.227668410699034e-05, "loss": 0.1457, "step": 52700 }, { "epoch": 1.9154735082491459, "grad_norm": 1.2610722780227661, "learning_rate": 4.227293294546914e-05, "loss": 0.1223, "step": 52710 }, { "epoch": 1.9158369067519443, "grad_norm": 1.0688477754592896, "learning_rate": 4.226918103971871e-05, "loss": 0.1319, "step": 52720 }, { "epoch": 1.9162003052547423, "grad_norm": 0.532785952091217, "learning_rate": 4.226542838990072e-05, "loss": 0.1355, "step": 52730 }, { "epoch": 1.9165637037575405, "grad_norm": 0.6391937136650085, "learning_rate": 4.226167499617684e-05, "loss": 0.1215, "step": 52740 }, { "epoch": 1.9169271022603387, "grad_norm": 0.9662737250328064, "learning_rate": 4.225792085870881e-05, "loss": 0.1522, "step": 52750 }, { "epoch": 1.917290500763137, "grad_norm": 1.3882033824920654, "learning_rate": 4.225416597765838e-05, "loss": 0.1234, "step": 52760 }, { "epoch": 1.917653899265935, "grad_norm": 1.2721084356307983, "learning_rate": 4.225041035318732e-05, "loss": 0.1645, "step": 52770 }, { "epoch": 1.918017297768733, "grad_norm": 1.06475830078125, "learning_rate": 4.224665398545745e-05, "loss": 0.1278, "step": 52780 }, { "epoch": 1.9183806962715315, "grad_norm": 1.8718911409378052, "learning_rate": 4.224289687463063e-05, "loss": 0.1081, "step": 52790 }, { "epoch": 1.9187440947743295, "grad_norm": 0.7336494326591492, "learning_rate": 4.223913902086874e-05, "loss": 0.1398, "step": 52800 }, { "epoch": 1.9187440947743295, "eval_loss": 0.33621227741241455, "eval_runtime": 180.2286, "eval_samples_per_second": 41.137, "eval_steps_per_second": 5.143, "eval_wer": 0.16169876740428776, "step": 52800 }, { "epoch": 1.9191074932771277, "grad_norm": 1.407049298286438, "learning_rate": 4.223538042433368e-05, "loss": 0.1219, "step": 52810 }, { "epoch": 1.919470891779926, "grad_norm": 0.27716466784477234, "learning_rate": 4.22316210851874e-05, "loss": 0.1303, "step": 52820 }, { "epoch": 1.919834290282724, "grad_norm": 1.0262128114700317, "learning_rate": 4.222786100359188e-05, "loss": 0.1053, "step": 52830 }, { "epoch": 1.9201976887855223, "grad_norm": 0.6818228960037231, "learning_rate": 4.222410017970913e-05, "loss": 0.09, "step": 52840 }, { "epoch": 1.9205610872883203, "grad_norm": 0.6761994361877441, "learning_rate": 4.2220338613701185e-05, "loss": 0.108, "step": 52850 }, { "epoch": 1.9209244857911185, "grad_norm": 4.313242435455322, "learning_rate": 4.2216576305730104e-05, "loss": 0.1262, "step": 52860 }, { "epoch": 1.9212878842939167, "grad_norm": 0.6098904609680176, "learning_rate": 4.221281325595803e-05, "loss": 0.2005, "step": 52870 }, { "epoch": 1.921651282796715, "grad_norm": 1.2213470935821533, "learning_rate": 4.2209049464547064e-05, "loss": 0.1088, "step": 52880 }, { "epoch": 1.9220146812995131, "grad_norm": 0.705827534198761, "learning_rate": 4.220528493165938e-05, "loss": 0.1207, "step": 52890 }, { "epoch": 1.9223780798023111, "grad_norm": 0.8161284327507019, "learning_rate": 4.22015196574572e-05, "loss": 0.1855, "step": 52900 }, { "epoch": 1.9227414783051093, "grad_norm": 0.7296738028526306, "learning_rate": 4.2197753642102734e-05, "loss": 0.1224, "step": 52910 }, { "epoch": 1.9231048768079075, "grad_norm": 1.1311039924621582, "learning_rate": 4.2193986885758255e-05, "loss": 0.1331, "step": 52920 }, { "epoch": 1.9234682753107057, "grad_norm": 1.0949995517730713, "learning_rate": 4.219021938858605e-05, "loss": 0.1172, "step": 52930 }, { "epoch": 1.923831673813504, "grad_norm": 2.3175034523010254, "learning_rate": 4.2186451150748465e-05, "loss": 0.1061, "step": 52940 }, { "epoch": 1.924195072316302, "grad_norm": 0.4657406806945801, "learning_rate": 4.2182682172407853e-05, "loss": 0.1099, "step": 52950 }, { "epoch": 1.9245584708191004, "grad_norm": 1.0153266191482544, "learning_rate": 4.2178912453726585e-05, "loss": 0.1028, "step": 52960 }, { "epoch": 1.9249218693218983, "grad_norm": 0.48774194717407227, "learning_rate": 4.217514199486712e-05, "loss": 0.1196, "step": 52970 }, { "epoch": 1.9252852678246966, "grad_norm": 0.5909627079963684, "learning_rate": 4.2171370795991886e-05, "loss": 3.5511, "step": 52980 }, { "epoch": 1.9256486663274948, "grad_norm": 1.0662988424301147, "learning_rate": 4.216759885726338e-05, "loss": 0.2095, "step": 52990 }, { "epoch": 1.9260120648302927, "grad_norm": 1.2562239170074463, "learning_rate": 4.2163826178844124e-05, "loss": 0.6666, "step": 53000 }, { "epoch": 1.9263754633330912, "grad_norm": 0.6966450214385986, "learning_rate": 4.216005276089666e-05, "loss": 0.1059, "step": 53010 }, { "epoch": 1.9267388618358892, "grad_norm": 0.7130870819091797, "learning_rate": 4.215627860358359e-05, "loss": 0.1822, "step": 53020 }, { "epoch": 1.9271022603386874, "grad_norm": 0.8667415380477905, "learning_rate": 4.215250370706752e-05, "loss": 0.1297, "step": 53030 }, { "epoch": 1.9274656588414856, "grad_norm": 0.8106217384338379, "learning_rate": 4.214872807151108e-05, "loss": 0.1198, "step": 53040 }, { "epoch": 1.9278290573442838, "grad_norm": 0.6625964045524597, "learning_rate": 4.214495169707697e-05, "loss": 0.1554, "step": 53050 }, { "epoch": 1.928192455847082, "grad_norm": 1.328296422958374, "learning_rate": 4.214117458392789e-05, "loss": 0.1275, "step": 53060 }, { "epoch": 1.92855585434988, "grad_norm": 0.5741416811943054, "learning_rate": 4.213739673222659e-05, "loss": 0.122, "step": 53070 }, { "epoch": 1.9289192528526784, "grad_norm": 0.6884883046150208, "learning_rate": 4.213361814213584e-05, "loss": 0.2229, "step": 53080 }, { "epoch": 1.9292826513554764, "grad_norm": 1.364357590675354, "learning_rate": 4.212983881381844e-05, "loss": 0.1169, "step": 53090 }, { "epoch": 1.9296460498582746, "grad_norm": 1.6540427207946777, "learning_rate": 4.2126058747437236e-05, "loss": 0.1273, "step": 53100 }, { "epoch": 1.9300094483610728, "grad_norm": 1.8838560581207275, "learning_rate": 4.21222779431551e-05, "loss": 0.1395, "step": 53110 }, { "epoch": 1.9303728468638708, "grad_norm": 1.0048059225082397, "learning_rate": 4.2118496401134925e-05, "loss": 0.1516, "step": 53120 }, { "epoch": 1.9307362453666692, "grad_norm": 1.0288422107696533, "learning_rate": 4.211471412153965e-05, "loss": 0.114, "step": 53130 }, { "epoch": 1.9310996438694672, "grad_norm": 0.8214828968048096, "learning_rate": 4.2110931104532236e-05, "loss": 0.1154, "step": 53140 }, { "epoch": 1.9314630423722654, "grad_norm": 1.7350075244903564, "learning_rate": 4.210714735027568e-05, "loss": 0.1351, "step": 53150 }, { "epoch": 1.9318264408750636, "grad_norm": 1.1846505403518677, "learning_rate": 4.210336285893302e-05, "loss": 0.1213, "step": 53160 }, { "epoch": 1.9321898393778618, "grad_norm": 0.36710694432258606, "learning_rate": 4.2099577630667295e-05, "loss": 0.1328, "step": 53170 }, { "epoch": 1.93255323788066, "grad_norm": 1.5242916345596313, "learning_rate": 4.209579166564162e-05, "loss": 0.1068, "step": 53180 }, { "epoch": 1.932916636383458, "grad_norm": 0.5341594219207764, "learning_rate": 4.209200496401911e-05, "loss": 0.1132, "step": 53190 }, { "epoch": 1.9332800348862562, "grad_norm": 1.32260000705719, "learning_rate": 4.2088217525962914e-05, "loss": 0.2021, "step": 53200 }, { "epoch": 1.9336434333890544, "grad_norm": 1.7666555643081665, "learning_rate": 4.208442935163622e-05, "loss": 0.1199, "step": 53210 }, { "epoch": 1.9340068318918526, "grad_norm": 0.7060844302177429, "learning_rate": 4.2080640441202265e-05, "loss": 0.2058, "step": 53220 }, { "epoch": 1.9343702303946508, "grad_norm": 0.6064701676368713, "learning_rate": 4.207685079482428e-05, "loss": 0.1163, "step": 53230 }, { "epoch": 1.9347336288974488, "grad_norm": 0.8445596694946289, "learning_rate": 4.2073060412665554e-05, "loss": 0.1094, "step": 53240 }, { "epoch": 1.9350970274002472, "grad_norm": 1.8160717487335205, "learning_rate": 4.20692692948894e-05, "loss": 0.1421, "step": 53250 }, { "epoch": 1.9354604259030452, "grad_norm": 0.8465480208396912, "learning_rate": 4.206547744165918e-05, "loss": 0.1151, "step": 53260 }, { "epoch": 1.9358238244058434, "grad_norm": 0.4361567795276642, "learning_rate": 4.206168485313823e-05, "loss": 0.1343, "step": 53270 }, { "epoch": 1.9361872229086416, "grad_norm": 0.4682723581790924, "learning_rate": 4.2057891529490004e-05, "loss": 0.2349, "step": 53280 }, { "epoch": 1.9365506214114396, "grad_norm": 0.7894558310508728, "learning_rate": 4.205409747087792e-05, "loss": 0.1211, "step": 53290 }, { "epoch": 1.936914019914238, "grad_norm": 1.9797241687774658, "learning_rate": 4.205030267746545e-05, "loss": 0.1403, "step": 53300 }, { "epoch": 1.937277418417036, "grad_norm": 0.7554487586021423, "learning_rate": 4.20465071494161e-05, "loss": 0.133, "step": 53310 }, { "epoch": 1.9376408169198343, "grad_norm": 0.5056400895118713, "learning_rate": 4.2042710886893414e-05, "loss": 0.1429, "step": 53320 }, { "epoch": 1.9380042154226325, "grad_norm": 4.12957763671875, "learning_rate": 4.203891389006096e-05, "loss": 0.1154, "step": 53330 }, { "epoch": 1.9383676139254307, "grad_norm": 0.7138916850090027, "learning_rate": 4.203511615908232e-05, "loss": 0.1235, "step": 53340 }, { "epoch": 1.9387310124282289, "grad_norm": 0.6101375818252563, "learning_rate": 4.2031317694121144e-05, "loss": 0.1015, "step": 53350 }, { "epoch": 1.9390944109310269, "grad_norm": 0.9244548082351685, "learning_rate": 4.202751849534108e-05, "loss": 0.104, "step": 53360 }, { "epoch": 1.9394578094338253, "grad_norm": 0.38235339522361755, "learning_rate": 4.202371856290583e-05, "loss": 0.2562, "step": 53370 }, { "epoch": 1.9398212079366233, "grad_norm": 1.2204453945159912, "learning_rate": 4.201991789697912e-05, "loss": 0.1074, "step": 53380 }, { "epoch": 1.9401846064394215, "grad_norm": 0.9025306105613708, "learning_rate": 4.2016116497724715e-05, "loss": 0.1154, "step": 53390 }, { "epoch": 1.9405480049422197, "grad_norm": 0.6132228970527649, "learning_rate": 4.201231436530637e-05, "loss": 0.1332, "step": 53400 }, { "epoch": 1.9405480049422197, "eval_loss": 0.34726399183273315, "eval_runtime": 180.5053, "eval_samples_per_second": 41.074, "eval_steps_per_second": 5.136, "eval_wer": 0.16638226804872294, "step": 53400 }, { "epoch": 1.9409114034450177, "grad_norm": 1.0227421522140503, "learning_rate": 4.2008511499887945e-05, "loss": 0.1042, "step": 53410 }, { "epoch": 1.941274801947816, "grad_norm": 1.9135148525238037, "learning_rate": 4.2004707901633274e-05, "loss": 0.1953, "step": 53420 }, { "epoch": 1.941638200450614, "grad_norm": 1.0358216762542725, "learning_rate": 4.200090357070624e-05, "loss": 0.1029, "step": 53430 }, { "epoch": 1.9420015989534123, "grad_norm": 0.9207081198692322, "learning_rate": 4.199709850727076e-05, "loss": 0.1171, "step": 53440 }, { "epoch": 1.9423649974562105, "grad_norm": 0.558474600315094, "learning_rate": 4.1993292711490784e-05, "loss": 0.1185, "step": 53450 }, { "epoch": 1.9427283959590087, "grad_norm": 1.7064687013626099, "learning_rate": 4.198948618353029e-05, "loss": 0.1429, "step": 53460 }, { "epoch": 1.943091794461807, "grad_norm": 0.4840683341026306, "learning_rate": 4.198567892355328e-05, "loss": 0.1566, "step": 53470 }, { "epoch": 1.943455192964605, "grad_norm": 2.152949810028076, "learning_rate": 4.19818709317238e-05, "loss": 0.11, "step": 53480 }, { "epoch": 1.943818591467403, "grad_norm": 1.0784387588500977, "learning_rate": 4.197806220820592e-05, "loss": 0.1211, "step": 53490 }, { "epoch": 1.9441819899702013, "grad_norm": 0.9039841890335083, "learning_rate": 4.197425275316376e-05, "loss": 0.1167, "step": 53500 }, { "epoch": 1.9445453884729995, "grad_norm": 0.8237749934196472, "learning_rate": 4.1970442566761436e-05, "loss": 0.1443, "step": 53510 }, { "epoch": 1.9449087869757977, "grad_norm": 0.5178882479667664, "learning_rate": 4.196663164916313e-05, "loss": 0.1251, "step": 53520 }, { "epoch": 1.9452721854785957, "grad_norm": 0.883787989616394, "learning_rate": 4.196282000053301e-05, "loss": 0.1078, "step": 53530 }, { "epoch": 1.9456355839813941, "grad_norm": 0.6376329064369202, "learning_rate": 4.195900762103535e-05, "loss": 0.1286, "step": 53540 }, { "epoch": 1.9459989824841921, "grad_norm": 1.3312426805496216, "learning_rate": 4.1955194510834394e-05, "loss": 0.1421, "step": 53550 }, { "epoch": 1.9463623809869903, "grad_norm": 0.8462713360786438, "learning_rate": 4.1951380670094424e-05, "loss": 0.1183, "step": 53560 }, { "epoch": 1.9467257794897885, "grad_norm": 0.4300178587436676, "learning_rate": 4.194756609897978e-05, "loss": 0.11, "step": 53570 }, { "epoch": 1.9470891779925865, "grad_norm": 0.5355455875396729, "learning_rate": 4.1943750797654816e-05, "loss": 0.1197, "step": 53580 }, { "epoch": 1.947452576495385, "grad_norm": 0.8750283122062683, "learning_rate": 4.193993476628391e-05, "loss": 0.1024, "step": 53590 }, { "epoch": 1.947815974998183, "grad_norm": 1.2552978992462158, "learning_rate": 4.193611800503148e-05, "loss": 0.152, "step": 53600 }, { "epoch": 1.9481793735009811, "grad_norm": 0.8852622509002686, "learning_rate": 4.1932300514062e-05, "loss": 0.1077, "step": 53610 }, { "epoch": 1.9485427720037793, "grad_norm": 0.6841835379600525, "learning_rate": 4.192848229353992e-05, "loss": 0.1281, "step": 53620 }, { "epoch": 1.9489061705065776, "grad_norm": 1.0521607398986816, "learning_rate": 4.192466334362978e-05, "loss": 0.1136, "step": 53630 }, { "epoch": 1.9492695690093758, "grad_norm": 4.119276523590088, "learning_rate": 4.192084366449612e-05, "loss": 0.0939, "step": 53640 }, { "epoch": 1.9496329675121737, "grad_norm": 0.8290958404541016, "learning_rate": 4.19170232563035e-05, "loss": 0.1625, "step": 53650 }, { "epoch": 1.9499963660149722, "grad_norm": 0.6359632015228271, "learning_rate": 4.191320211921654e-05, "loss": 0.1208, "step": 53660 }, { "epoch": 1.9503597645177702, "grad_norm": 0.699052631855011, "learning_rate": 4.1909380253399875e-05, "loss": 0.1248, "step": 53670 }, { "epoch": 1.9507231630205684, "grad_norm": 17.17115592956543, "learning_rate": 4.190555765901819e-05, "loss": 0.3458, "step": 53680 }, { "epoch": 1.9510865615233666, "grad_norm": 3.899052858352661, "learning_rate": 4.190173433623618e-05, "loss": 0.1144, "step": 53690 }, { "epoch": 1.9514499600261646, "grad_norm": 1.1907508373260498, "learning_rate": 4.1897910285218556e-05, "loss": 0.152, "step": 53700 }, { "epoch": 1.951813358528963, "grad_norm": 0.7645424008369446, "learning_rate": 4.189408550613011e-05, "loss": 0.1258, "step": 53710 }, { "epoch": 1.952176757031761, "grad_norm": 0.2523237466812134, "learning_rate": 4.1890259999135625e-05, "loss": 0.1213, "step": 53720 }, { "epoch": 1.9525401555345592, "grad_norm": 1.3578497171401978, "learning_rate": 4.188643376439993e-05, "loss": 1.5489, "step": 53730 }, { "epoch": 1.9529035540373574, "grad_norm": 0.6249386072158813, "learning_rate": 4.1882606802087896e-05, "loss": 0.1699, "step": 53740 }, { "epoch": 1.9532669525401556, "grad_norm": 0.9699862599372864, "learning_rate": 4.1878779112364394e-05, "loss": 0.1724, "step": 53750 }, { "epoch": 1.9536303510429538, "grad_norm": 1.3478792905807495, "learning_rate": 4.187495069539437e-05, "loss": 0.133, "step": 53760 }, { "epoch": 1.9539937495457518, "grad_norm": 0.6324986815452576, "learning_rate": 4.187112155134275e-05, "loss": 0.1354, "step": 53770 }, { "epoch": 1.95435714804855, "grad_norm": 2.009544610977173, "learning_rate": 4.186729168037453e-05, "loss": 0.1347, "step": 53780 }, { "epoch": 1.9547205465513482, "grad_norm": 0.6510929465293884, "learning_rate": 4.186346108265472e-05, "loss": 0.1227, "step": 53790 }, { "epoch": 1.9550839450541464, "grad_norm": 1.5079245567321777, "learning_rate": 4.185962975834838e-05, "loss": 0.1347, "step": 53800 }, { "epoch": 1.9554473435569446, "grad_norm": 3.214449882507324, "learning_rate": 4.1855797707620586e-05, "loss": 0.1138, "step": 53810 }, { "epoch": 1.9558107420597426, "grad_norm": 0.7995330095291138, "learning_rate": 4.1851964930636434e-05, "loss": 0.1522, "step": 53820 }, { "epoch": 1.956174140562541, "grad_norm": 1.6713122129440308, "learning_rate": 4.184813142756108e-05, "loss": 0.125, "step": 53830 }, { "epoch": 1.956537539065339, "grad_norm": 0.7136033177375793, "learning_rate": 4.184429719855968e-05, "loss": 0.1267, "step": 53840 }, { "epoch": 1.9569009375681372, "grad_norm": 0.5580174922943115, "learning_rate": 4.1840462243797444e-05, "loss": 0.1126, "step": 53850 }, { "epoch": 1.9572643360709354, "grad_norm": 0.8671419024467468, "learning_rate": 4.183662656343961e-05, "loss": 0.1209, "step": 53860 }, { "epoch": 1.9576277345737334, "grad_norm": 0.6624314188957214, "learning_rate": 4.183279015765145e-05, "loss": 0.1397, "step": 53870 }, { "epoch": 1.9579911330765318, "grad_norm": 1.4401901960372925, "learning_rate": 4.182895302659825e-05, "loss": 0.0894, "step": 53880 }, { "epoch": 1.9583545315793298, "grad_norm": 0.9187797904014587, "learning_rate": 4.182511517044534e-05, "loss": 0.127, "step": 53890 }, { "epoch": 1.958717930082128, "grad_norm": 1.2426072359085083, "learning_rate": 4.1821276589358084e-05, "loss": 0.1381, "step": 53900 }, { "epoch": 1.9590813285849262, "grad_norm": 0.8035231828689575, "learning_rate": 4.1817437283501865e-05, "loss": 0.0953, "step": 53910 }, { "epoch": 1.9594447270877244, "grad_norm": 0.32439205050468445, "learning_rate": 4.1813597253042115e-05, "loss": 0.138, "step": 53920 }, { "epoch": 1.9598081255905226, "grad_norm": 1.0287327766418457, "learning_rate": 4.180975649814428e-05, "loss": 3.1039, "step": 53930 }, { "epoch": 1.9601715240933206, "grad_norm": 1.3450182676315308, "learning_rate": 4.180591501897384e-05, "loss": 0.1081, "step": 53940 }, { "epoch": 1.960534922596119, "grad_norm": 7.1403961181640625, "learning_rate": 4.180207281569633e-05, "loss": 0.1484, "step": 53950 }, { "epoch": 1.960898321098917, "grad_norm": 1.2163225412368774, "learning_rate": 4.179822988847728e-05, "loss": 0.1041, "step": 53960 }, { "epoch": 1.9612617196017152, "grad_norm": 0.9922796487808228, "learning_rate": 4.179438623748228e-05, "loss": 0.1343, "step": 53970 }, { "epoch": 1.9616251181045135, "grad_norm": 2.245447874069214, "learning_rate": 4.1790541862876906e-05, "loss": 0.1015, "step": 53980 }, { "epoch": 1.9619885166073114, "grad_norm": 2.284679651260376, "learning_rate": 4.178669676482685e-05, "loss": 0.0913, "step": 53990 }, { "epoch": 1.9623519151101099, "grad_norm": 0.9692349433898926, "learning_rate": 4.178285094349775e-05, "loss": 0.1282, "step": 54000 }, { "epoch": 1.9623519151101099, "eval_loss": 0.3314037322998047, "eval_runtime": 180.606, "eval_samples_per_second": 41.051, "eval_steps_per_second": 5.133, "eval_wer": 0.16505709150979359, "step": 54000 }, { "epoch": 1.9627153136129079, "grad_norm": 0.4108816683292389, "learning_rate": 4.177900439905531e-05, "loss": 0.1272, "step": 54010 }, { "epoch": 1.963078712115706, "grad_norm": 0.3358526825904846, "learning_rate": 4.1775157131665276e-05, "loss": 0.1453, "step": 54020 }, { "epoch": 1.9634421106185043, "grad_norm": 1.476314663887024, "learning_rate": 4.177130914149341e-05, "loss": 0.1162, "step": 54030 }, { "epoch": 1.9638055091213025, "grad_norm": 0.7912114262580872, "learning_rate": 4.17674604287055e-05, "loss": 0.1056, "step": 54040 }, { "epoch": 1.9641689076241007, "grad_norm": 0.4801596403121948, "learning_rate": 4.176361099346738e-05, "loss": 0.1478, "step": 54050 }, { "epoch": 1.9645323061268987, "grad_norm": 0.7710531949996948, "learning_rate": 4.175976083594491e-05, "loss": 0.1131, "step": 54060 }, { "epoch": 1.9648957046296969, "grad_norm": 0.6709341406822205, "learning_rate": 4.175590995630398e-05, "loss": 0.1586, "step": 54070 }, { "epoch": 1.965259103132495, "grad_norm": 1.3941307067871094, "learning_rate": 4.17520583547105e-05, "loss": 0.1131, "step": 54080 }, { "epoch": 1.9656225016352933, "grad_norm": 0.759842038154602, "learning_rate": 4.174820603133043e-05, "loss": 0.0985, "step": 54090 }, { "epoch": 1.9659859001380915, "grad_norm": 0.9153608679771423, "learning_rate": 4.174435298632976e-05, "loss": 0.1547, "step": 54100 }, { "epoch": 1.9663492986408895, "grad_norm": 1.4363652467727661, "learning_rate": 4.174049921987449e-05, "loss": 0.1127, "step": 54110 }, { "epoch": 1.966712697143688, "grad_norm": 0.7368317246437073, "learning_rate": 4.173664473213067e-05, "loss": 0.1302, "step": 54120 }, { "epoch": 1.967076095646486, "grad_norm": 1.2740521430969238, "learning_rate": 4.173278952326438e-05, "loss": 0.1294, "step": 54130 }, { "epoch": 1.967439494149284, "grad_norm": 2.7798774242401123, "learning_rate": 4.1728933593441735e-05, "loss": 0.1011, "step": 54140 }, { "epoch": 1.9678028926520823, "grad_norm": 1.9629179239273071, "learning_rate": 4.172507694282885e-05, "loss": 0.3149, "step": 54150 }, { "epoch": 1.9681662911548803, "grad_norm": 3.5863332748413086, "learning_rate": 4.1721219571591915e-05, "loss": 0.1323, "step": 54160 }, { "epoch": 1.9685296896576787, "grad_norm": 0.29740679264068604, "learning_rate": 4.1717361479897116e-05, "loss": 0.1725, "step": 54170 }, { "epoch": 1.9688930881604767, "grad_norm": 1.0469319820404053, "learning_rate": 4.17135026679107e-05, "loss": 0.1138, "step": 54180 }, { "epoch": 1.969256486663275, "grad_norm": 0.5336177945137024, "learning_rate": 4.170964313579891e-05, "loss": 0.2207, "step": 54190 }, { "epoch": 1.9696198851660731, "grad_norm": 0.973862886428833, "learning_rate": 4.1705782883728055e-05, "loss": 0.1328, "step": 54200 }, { "epoch": 1.9699832836688713, "grad_norm": 0.8640954494476318, "learning_rate": 4.170192191186446e-05, "loss": 0.1315, "step": 54210 }, { "epoch": 1.9703466821716695, "grad_norm": 0.47578397393226624, "learning_rate": 4.169806022037447e-05, "loss": 0.1823, "step": 54220 }, { "epoch": 1.9707100806744675, "grad_norm": 1.4527409076690674, "learning_rate": 4.169419780942448e-05, "loss": 2.5822, "step": 54230 }, { "epoch": 1.971073479177266, "grad_norm": 0.48623302578926086, "learning_rate": 4.1690334679180896e-05, "loss": 0.1093, "step": 54240 }, { "epoch": 1.971436877680064, "grad_norm": 1.1767234802246094, "learning_rate": 4.1686470829810185e-05, "loss": 0.1329, "step": 54250 }, { "epoch": 1.9718002761828621, "grad_norm": 1.128841519355774, "learning_rate": 4.1682606261478816e-05, "loss": 0.1102, "step": 54260 }, { "epoch": 1.9721636746856603, "grad_norm": 1.4685746431350708, "learning_rate": 4.16787409743533e-05, "loss": 0.14, "step": 54270 }, { "epoch": 1.9725270731884583, "grad_norm": 0.9918948411941528, "learning_rate": 4.167487496860018e-05, "loss": 0.1093, "step": 54280 }, { "epoch": 1.9728904716912568, "grad_norm": 0.5849924683570862, "learning_rate": 4.167100824438602e-05, "loss": 0.3633, "step": 54290 }, { "epoch": 1.9732538701940547, "grad_norm": 1.0083026885986328, "learning_rate": 4.1667140801877433e-05, "loss": 0.3471, "step": 54300 }, { "epoch": 1.973617268696853, "grad_norm": 4.210540771484375, "learning_rate": 4.1663272641241056e-05, "loss": 0.111, "step": 54310 }, { "epoch": 1.9739806671996512, "grad_norm": 0.47457021474838257, "learning_rate": 4.165940376264354e-05, "loss": 0.1304, "step": 54320 }, { "epoch": 1.9743440657024494, "grad_norm": 0.6626879572868347, "learning_rate": 4.1655534166251596e-05, "loss": 0.1362, "step": 54330 }, { "epoch": 1.9747074642052476, "grad_norm": 1.0823551416397095, "learning_rate": 4.1651663852231946e-05, "loss": 0.1009, "step": 54340 }, { "epoch": 1.9750708627080455, "grad_norm": 1.6723361015319824, "learning_rate": 4.164779282075134e-05, "loss": 0.1539, "step": 54350 }, { "epoch": 1.9754342612108438, "grad_norm": 1.5842360258102417, "learning_rate": 4.1643921071976584e-05, "loss": 0.1342, "step": 54360 }, { "epoch": 1.975797659713642, "grad_norm": 1.055336594581604, "learning_rate": 4.164004860607448e-05, "loss": 0.176, "step": 54370 }, { "epoch": 1.9761610582164402, "grad_norm": 0.81571364402771, "learning_rate": 4.16361754232119e-05, "loss": 0.1187, "step": 54380 }, { "epoch": 1.9765244567192384, "grad_norm": 1.0346819162368774, "learning_rate": 4.1632301523555693e-05, "loss": 0.1255, "step": 54390 }, { "epoch": 1.9768878552220364, "grad_norm": 1.1211163997650146, "learning_rate": 4.162842690727281e-05, "loss": 0.1165, "step": 54400 }, { "epoch": 1.9772512537248348, "grad_norm": 0.5160552263259888, "learning_rate": 4.162455157453017e-05, "loss": 0.1393, "step": 54410 }, { "epoch": 1.9776146522276328, "grad_norm": 0.767784833908081, "learning_rate": 4.1620675525494746e-05, "loss": 0.1552, "step": 54420 }, { "epoch": 1.977978050730431, "grad_norm": 1.101317286491394, "learning_rate": 4.1616798760333554e-05, "loss": 0.1182, "step": 54430 }, { "epoch": 1.9783414492332292, "grad_norm": 0.7279396653175354, "learning_rate": 4.161292127921363e-05, "loss": 0.12, "step": 54440 }, { "epoch": 1.9787048477360272, "grad_norm": 1.5998153686523438, "learning_rate": 4.1609043082302036e-05, "loss": 0.1335, "step": 54450 }, { "epoch": 1.9790682462388256, "grad_norm": 0.8245583772659302, "learning_rate": 4.160516416976587e-05, "loss": 0.1249, "step": 54460 }, { "epoch": 1.9794316447416236, "grad_norm": 0.5749397277832031, "learning_rate": 4.1601284541772255e-05, "loss": 0.1939, "step": 54470 }, { "epoch": 1.9797950432444218, "grad_norm": 0.7786006927490234, "learning_rate": 4.159740419848837e-05, "loss": 1.8059, "step": 54480 }, { "epoch": 1.98015844174722, "grad_norm": 0.41233259439468384, "learning_rate": 4.159352314008138e-05, "loss": 0.1208, "step": 54490 }, { "epoch": 1.9805218402500182, "grad_norm": 0.5091323256492615, "learning_rate": 4.158964136671852e-05, "loss": 0.1279, "step": 54500 }, { "epoch": 1.9808852387528164, "grad_norm": 4.300207138061523, "learning_rate": 4.158575887856704e-05, "loss": 0.1744, "step": 54510 }, { "epoch": 1.9812486372556144, "grad_norm": 0.7447227239608765, "learning_rate": 4.1581875675794226e-05, "loss": 0.1652, "step": 54520 }, { "epoch": 1.9816120357584128, "grad_norm": 0.6846696734428406, "learning_rate": 4.157799175856738e-05, "loss": 0.1027, "step": 54530 }, { "epoch": 1.9819754342612108, "grad_norm": 0.8642467260360718, "learning_rate": 4.157410712705386e-05, "loss": 0.1165, "step": 54540 }, { "epoch": 1.982338832764009, "grad_norm": 0.8407902121543884, "learning_rate": 4.157022178142104e-05, "loss": 0.1623, "step": 54550 }, { "epoch": 1.9827022312668072, "grad_norm": 0.8839777708053589, "learning_rate": 4.156633572183631e-05, "loss": 0.1131, "step": 54560 }, { "epoch": 1.9830656297696052, "grad_norm": 1.39069402217865, "learning_rate": 4.1562448948467126e-05, "loss": 0.1906, "step": 54570 }, { "epoch": 1.9834290282724036, "grad_norm": 2.1196155548095703, "learning_rate": 4.1558561461480936e-05, "loss": 0.1261, "step": 54580 }, { "epoch": 1.9837924267752016, "grad_norm": 1.092934250831604, "learning_rate": 4.155467326104525e-05, "loss": 0.1029, "step": 54590 }, { "epoch": 1.9841558252779998, "grad_norm": 0.7902958989143372, "learning_rate": 4.1550784347327607e-05, "loss": 0.1159, "step": 54600 }, { "epoch": 1.9841558252779998, "eval_loss": 0.3433511555194855, "eval_runtime": 180.0868, "eval_samples_per_second": 41.169, "eval_steps_per_second": 5.148, "eval_wer": 0.16009221776462687, "step": 54600 }, { "epoch": 1.984519223780798, "grad_norm": 1.3083094358444214, "learning_rate": 4.1546894720495546e-05, "loss": 0.1172, "step": 54610 }, { "epoch": 1.9848826222835962, "grad_norm": 1.9061583280563354, "learning_rate": 4.154300438071666e-05, "loss": 0.1335, "step": 54620 }, { "epoch": 1.9852460207863944, "grad_norm": 1.9469786882400513, "learning_rate": 4.153911332815859e-05, "loss": 0.1014, "step": 54630 }, { "epoch": 1.9856094192891924, "grad_norm": 6.232102394104004, "learning_rate": 4.153522156298896e-05, "loss": 0.1216, "step": 54640 }, { "epoch": 1.9859728177919909, "grad_norm": 0.6339765191078186, "learning_rate": 4.153132908537547e-05, "loss": 0.1236, "step": 54650 }, { "epoch": 1.9863362162947888, "grad_norm": 0.9476169943809509, "learning_rate": 4.152743589548582e-05, "loss": 0.0962, "step": 54660 }, { "epoch": 1.986699614797587, "grad_norm": 1.0691879987716675, "learning_rate": 4.152354199348777e-05, "loss": 0.3789, "step": 54670 }, { "epoch": 1.9870630133003853, "grad_norm": 0.9338876605033875, "learning_rate": 4.1519647379549084e-05, "loss": 0.0914, "step": 54680 }, { "epoch": 1.9874264118031832, "grad_norm": 0.6754772663116455, "learning_rate": 4.151575205383758e-05, "loss": 0.1044, "step": 54690 }, { "epoch": 1.9877898103059817, "grad_norm": 0.6961863040924072, "learning_rate": 4.151185601652107e-05, "loss": 0.1322, "step": 54700 }, { "epoch": 1.9881532088087797, "grad_norm": 1.1425034999847412, "learning_rate": 4.150795926776744e-05, "loss": 0.1381, "step": 54710 }, { "epoch": 1.9885166073115779, "grad_norm": 1.4080971479415894, "learning_rate": 4.150406180774458e-05, "loss": 0.1234, "step": 54720 }, { "epoch": 1.988880005814376, "grad_norm": 0.7941197752952576, "learning_rate": 4.1500163636620414e-05, "loss": 0.0903, "step": 54730 }, { "epoch": 1.989243404317174, "grad_norm": 0.8813301920890808, "learning_rate": 4.149626475456291e-05, "loss": 0.0965, "step": 54740 }, { "epoch": 1.9896068028199725, "grad_norm": 0.727293848991394, "learning_rate": 4.1492365161740054e-05, "loss": 0.1269, "step": 54750 }, { "epoch": 1.9899702013227705, "grad_norm": 3.7548305988311768, "learning_rate": 4.148846485831986e-05, "loss": 0.0992, "step": 54760 }, { "epoch": 1.9903335998255687, "grad_norm": 0.5141910910606384, "learning_rate": 4.148456384447037e-05, "loss": 0.1275, "step": 54770 }, { "epoch": 1.9906969983283669, "grad_norm": 0.5424654483795166, "learning_rate": 4.1480662120359696e-05, "loss": 0.6733, "step": 54780 }, { "epoch": 1.991060396831165, "grad_norm": 0.8342083096504211, "learning_rate": 4.147675968615592e-05, "loss": 0.1126, "step": 54790 }, { "epoch": 1.9914237953339633, "grad_norm": 0.3992403745651245, "learning_rate": 4.147285654202719e-05, "loss": 0.1589, "step": 54800 }, { "epoch": 1.9917871938367613, "grad_norm": 0.9092950820922852, "learning_rate": 4.146895268814169e-05, "loss": 0.1217, "step": 54810 }, { "epoch": 1.9921505923395597, "grad_norm": 0.4327254295349121, "learning_rate": 4.1465048124667605e-05, "loss": 0.1615, "step": 54820 }, { "epoch": 1.9925139908423577, "grad_norm": 1.1109565496444702, "learning_rate": 4.146114285177319e-05, "loss": 0.1965, "step": 54830 }, { "epoch": 1.992877389345156, "grad_norm": 10.526979446411133, "learning_rate": 4.145723686962669e-05, "loss": 0.2047, "step": 54840 }, { "epoch": 1.993240787847954, "grad_norm": 1.4240983724594116, "learning_rate": 4.1453330178396415e-05, "loss": 0.1261, "step": 54850 }, { "epoch": 1.993604186350752, "grad_norm": 3.436688184738159, "learning_rate": 4.144942277825068e-05, "loss": 0.1194, "step": 54860 }, { "epoch": 1.9939675848535505, "grad_norm": 0.3504880666732788, "learning_rate": 4.1445514669357846e-05, "loss": 0.1269, "step": 54870 }, { "epoch": 1.9943309833563485, "grad_norm": 1.8600322008132935, "learning_rate": 4.14416058518863e-05, "loss": 0.1202, "step": 54880 }, { "epoch": 1.9946943818591467, "grad_norm": 0.7843186259269714, "learning_rate": 4.1437696326004456e-05, "loss": 0.1047, "step": 54890 }, { "epoch": 1.995057780361945, "grad_norm": 1.593837022781372, "learning_rate": 4.1433786091880765e-05, "loss": 0.1269, "step": 54900 }, { "epoch": 1.9954211788647431, "grad_norm": 1.9453426599502563, "learning_rate": 4.14298751496837e-05, "loss": 0.1054, "step": 54910 }, { "epoch": 1.9957845773675413, "grad_norm": 0.7861382365226746, "learning_rate": 4.142596349958177e-05, "loss": 0.1467, "step": 54920 }, { "epoch": 1.9961479758703393, "grad_norm": 0.9338520169258118, "learning_rate": 4.142205114174352e-05, "loss": 0.1014, "step": 54930 }, { "epoch": 1.9965113743731377, "grad_norm": 3.8717129230499268, "learning_rate": 4.1418138076337516e-05, "loss": 0.1426, "step": 54940 }, { "epoch": 1.9968747728759357, "grad_norm": 0.579759418964386, "learning_rate": 4.141422430353236e-05, "loss": 0.1154, "step": 54950 }, { "epoch": 1.997238171378734, "grad_norm": 1.129913091659546, "learning_rate": 4.141030982349668e-05, "loss": 0.1019, "step": 54960 }, { "epoch": 1.9976015698815321, "grad_norm": 0.5852164626121521, "learning_rate": 4.140639463639913e-05, "loss": 0.1719, "step": 54970 }, { "epoch": 1.9979649683843301, "grad_norm": 3.1367127895355225, "learning_rate": 4.1402478742408415e-05, "loss": 0.0909, "step": 54980 }, { "epoch": 1.9983283668871286, "grad_norm": 0.5207622051239014, "learning_rate": 4.1398562141693253e-05, "loss": 0.1212, "step": 54990 }, { "epoch": 1.9986917653899265, "grad_norm": 0.5118950605392456, "learning_rate": 4.1394644834422394e-05, "loss": 0.1217, "step": 55000 }, { "epoch": 1.9990551638927248, "grad_norm": 0.45482707023620605, "learning_rate": 4.1390726820764614e-05, "loss": 0.0986, "step": 55010 }, { "epoch": 1.999418562395523, "grad_norm": 1.9805399179458618, "learning_rate": 4.138680810088875e-05, "loss": 0.1356, "step": 55020 }, { "epoch": 1.999781960898321, "grad_norm": 1.0094414949417114, "learning_rate": 4.138288867496362e-05, "loss": 0.1751, "step": 55030 }, { "epoch": 2.0001453594011194, "grad_norm": 1.6492732763290405, "learning_rate": 4.1378968543158106e-05, "loss": 0.1792, "step": 55040 }, { "epoch": 2.0005087579039174, "grad_norm": 6.960714340209961, "learning_rate": 4.137504770564111e-05, "loss": 0.1707, "step": 55050 }, { "epoch": 2.000872156406716, "grad_norm": 0.483518123626709, "learning_rate": 4.1371126162581576e-05, "loss": 0.1043, "step": 55060 }, { "epoch": 2.0012355549095138, "grad_norm": 0.5076984763145447, "learning_rate": 4.1367203914148464e-05, "loss": 0.1356, "step": 55070 }, { "epoch": 2.0015989534123118, "grad_norm": 2.341773509979248, "learning_rate": 4.136328096051077e-05, "loss": 0.1096, "step": 55080 }, { "epoch": 2.00196235191511, "grad_norm": 0.5860946178436279, "learning_rate": 4.135935730183752e-05, "loss": 0.1076, "step": 55090 }, { "epoch": 2.002325750417908, "grad_norm": 0.4653785824775696, "learning_rate": 4.1355432938297774e-05, "loss": 0.1517, "step": 55100 }, { "epoch": 2.0026891489207066, "grad_norm": 1.198096513748169, "learning_rate": 4.135150787006061e-05, "loss": 0.369, "step": 55110 }, { "epoch": 2.0030525474235046, "grad_norm": 1.07427978515625, "learning_rate": 4.134758209729516e-05, "loss": 0.1476, "step": 55120 }, { "epoch": 2.0034159459263026, "grad_norm": 0.7984631657600403, "learning_rate": 4.134365562017055e-05, "loss": 0.0972, "step": 55130 }, { "epoch": 2.003779344429101, "grad_norm": 1.2470594644546509, "learning_rate": 4.133972843885598e-05, "loss": 0.0884, "step": 55140 }, { "epoch": 2.004142742931899, "grad_norm": 0.6046581268310547, "learning_rate": 4.133580055352064e-05, "loss": 0.2083, "step": 55150 }, { "epoch": 2.0045061414346974, "grad_norm": 0.8026099801063538, "learning_rate": 4.133187196433379e-05, "loss": 0.1278, "step": 55160 }, { "epoch": 2.0048695399374954, "grad_norm": 0.6957481503486633, "learning_rate": 4.132794267146467e-05, "loss": 0.1106, "step": 55170 }, { "epoch": 2.005232938440294, "grad_norm": 1.2208986282348633, "learning_rate": 4.13240126750826e-05, "loss": 0.1058, "step": 55180 }, { "epoch": 2.005596336943092, "grad_norm": 0.9665369391441345, "learning_rate": 4.132008197535692e-05, "loss": 0.1195, "step": 55190 }, { "epoch": 2.00595973544589, "grad_norm": 1.0869636535644531, "learning_rate": 4.131615057245696e-05, "loss": 0.1004, "step": 55200 }, { "epoch": 2.00595973544589, "eval_loss": 0.3372127115726471, "eval_runtime": 180.3164, "eval_samples_per_second": 41.117, "eval_steps_per_second": 5.141, "eval_wer": 0.16414035979450686, "step": 55200 }, { "epoch": 2.006323133948688, "grad_norm": 1.0461617708206177, "learning_rate": 4.131221846655212e-05, "loss": 0.1003, "step": 55210 }, { "epoch": 2.006686532451486, "grad_norm": 1.1234357357025146, "learning_rate": 4.130828565781183e-05, "loss": 0.131, "step": 55220 }, { "epoch": 2.0070499309542846, "grad_norm": 0.792592465877533, "learning_rate": 4.1304352146405544e-05, "loss": 0.1236, "step": 55230 }, { "epoch": 2.0074133294570826, "grad_norm": 2.0296480655670166, "learning_rate": 4.130041793250273e-05, "loss": 0.1162, "step": 55240 }, { "epoch": 2.0077767279598806, "grad_norm": 0.8490334153175354, "learning_rate": 4.12964830162729e-05, "loss": 0.0891, "step": 55250 }, { "epoch": 2.008140126462679, "grad_norm": 2.996204376220703, "learning_rate": 4.129254739788561e-05, "loss": 0.088, "step": 55260 }, { "epoch": 2.008503524965477, "grad_norm": 0.785502016544342, "learning_rate": 4.128861107751041e-05, "loss": 2.8838, "step": 55270 }, { "epoch": 2.0088669234682754, "grad_norm": 0.9276618957519531, "learning_rate": 4.128467405531693e-05, "loss": 0.1125, "step": 55280 }, { "epoch": 2.0092303219710734, "grad_norm": 0.6827619671821594, "learning_rate": 4.128073633147477e-05, "loss": 0.1538, "step": 55290 }, { "epoch": 2.009593720473872, "grad_norm": 0.5531404614448547, "learning_rate": 4.1276797906153614e-05, "loss": 0.1451, "step": 55300 }, { "epoch": 2.00995711897667, "grad_norm": 1.3195756673812866, "learning_rate": 4.127285877952315e-05, "loss": 0.0831, "step": 55310 }, { "epoch": 2.010320517479468, "grad_norm": 1.291306734085083, "learning_rate": 4.12689189517531e-05, "loss": 0.1168, "step": 55320 }, { "epoch": 2.0106839159822663, "grad_norm": 0.7740198373794556, "learning_rate": 4.126497842301322e-05, "loss": 0.1293, "step": 55330 }, { "epoch": 2.0110473144850642, "grad_norm": 0.619372546672821, "learning_rate": 4.126103719347329e-05, "loss": 0.1151, "step": 55340 }, { "epoch": 2.0114107129878627, "grad_norm": 0.6809590458869934, "learning_rate": 4.1257095263303114e-05, "loss": 0.0808, "step": 55350 }, { "epoch": 2.0117741114906607, "grad_norm": 0.7653446197509766, "learning_rate": 4.125315263267255e-05, "loss": 0.0847, "step": 55360 }, { "epoch": 2.0121375099934586, "grad_norm": 0.7010202407836914, "learning_rate": 4.124920930175148e-05, "loss": 0.1856, "step": 55370 }, { "epoch": 2.012500908496257, "grad_norm": 0.8760896921157837, "learning_rate": 4.1245265270709786e-05, "loss": 0.0966, "step": 55380 }, { "epoch": 2.012864306999055, "grad_norm": 0.8872328400611877, "learning_rate": 4.124132053971741e-05, "loss": 0.1225, "step": 55390 }, { "epoch": 2.0132277055018535, "grad_norm": 0.7111076712608337, "learning_rate": 4.123737510894433e-05, "loss": 0.0917, "step": 55400 }, { "epoch": 2.0135911040046515, "grad_norm": 0.2959582209587097, "learning_rate": 4.1233428978560515e-05, "loss": 0.084, "step": 55410 }, { "epoch": 2.0139545025074495, "grad_norm": 0.5472272038459778, "learning_rate": 4.122948214873602e-05, "loss": 0.1165, "step": 55420 }, { "epoch": 2.014317901010248, "grad_norm": 1.7232263088226318, "learning_rate": 4.1225534619640874e-05, "loss": 0.1483, "step": 55430 }, { "epoch": 2.014681299513046, "grad_norm": 0.6070485711097717, "learning_rate": 4.1221586391445164e-05, "loss": 0.1181, "step": 55440 }, { "epoch": 2.0150446980158443, "grad_norm": 0.42631739377975464, "learning_rate": 4.121763746431903e-05, "loss": 0.2435, "step": 55450 }, { "epoch": 2.0154080965186423, "grad_norm": 0.4716903865337372, "learning_rate": 4.1213687838432594e-05, "loss": 0.102, "step": 55460 }, { "epoch": 2.0157714950214407, "grad_norm": 1.0024840831756592, "learning_rate": 4.120973751395604e-05, "loss": 2.0817, "step": 55470 }, { "epoch": 2.0161348935242387, "grad_norm": 0.6983594298362732, "learning_rate": 4.1205786491059565e-05, "loss": 0.1091, "step": 55480 }, { "epoch": 2.0164982920270367, "grad_norm": 3.18595814704895, "learning_rate": 4.1201834769913405e-05, "loss": 0.1334, "step": 55490 }, { "epoch": 2.016861690529835, "grad_norm": 1.0065993070602417, "learning_rate": 4.119788235068785e-05, "loss": 0.0893, "step": 55500 }, { "epoch": 2.017225089032633, "grad_norm": 1.9013348817825317, "learning_rate": 4.119392923355315e-05, "loss": 0.1055, "step": 55510 }, { "epoch": 2.0175884875354315, "grad_norm": 0.727342963218689, "learning_rate": 4.118997541867968e-05, "loss": 0.1577, "step": 55520 }, { "epoch": 2.0179518860382295, "grad_norm": 1.3305946588516235, "learning_rate": 4.118602090623777e-05, "loss": 0.1156, "step": 55530 }, { "epoch": 2.0183152845410275, "grad_norm": 166.5440673828125, "learning_rate": 4.11820656963978e-05, "loss": 3.13, "step": 55540 }, { "epoch": 2.018678683043826, "grad_norm": 1.1718511581420898, "learning_rate": 4.11781097893302e-05, "loss": 1.2695, "step": 55550 }, { "epoch": 2.019042081546624, "grad_norm": 0.7488642930984497, "learning_rate": 4.117415318520541e-05, "loss": 0.1179, "step": 55560 }, { "epoch": 2.0194054800494223, "grad_norm": 0.8934155702590942, "learning_rate": 4.117019588419391e-05, "loss": 0.0957, "step": 55570 }, { "epoch": 2.0197688785522203, "grad_norm": 1.2470290660858154, "learning_rate": 4.11662378864662e-05, "loss": 0.0974, "step": 55580 }, { "epoch": 2.0201322770550187, "grad_norm": 2.387202501296997, "learning_rate": 4.116227919219282e-05, "loss": 0.2065, "step": 55590 }, { "epoch": 2.0204956755578167, "grad_norm": 0.9765509963035583, "learning_rate": 4.115831980154434e-05, "loss": 0.083, "step": 55600 }, { "epoch": 2.0208590740606147, "grad_norm": 1.544554591178894, "learning_rate": 4.115435971469135e-05, "loss": 0.1067, "step": 55610 }, { "epoch": 2.021222472563413, "grad_norm": 1.8516936302185059, "learning_rate": 4.1150398931804465e-05, "loss": 0.1292, "step": 55620 }, { "epoch": 2.021585871066211, "grad_norm": 1.211599349975586, "learning_rate": 4.114643745305437e-05, "loss": 0.098, "step": 55630 }, { "epoch": 2.0219492695690096, "grad_norm": 0.8160383105278015, "learning_rate": 4.114247527861173e-05, "loss": 0.0919, "step": 55640 }, { "epoch": 2.0223126680718075, "grad_norm": 0.8116459846496582, "learning_rate": 4.1138512408647256e-05, "loss": 0.0929, "step": 55650 }, { "epoch": 2.0226760665746055, "grad_norm": 0.9536616206169128, "learning_rate": 4.113454884333171e-05, "loss": 0.1269, "step": 55660 }, { "epoch": 2.023039465077404, "grad_norm": 0.6211200952529907, "learning_rate": 4.113058458283586e-05, "loss": 0.1285, "step": 55670 }, { "epoch": 2.023402863580202, "grad_norm": 1.3393282890319824, "learning_rate": 4.112661962733052e-05, "loss": 0.1211, "step": 55680 }, { "epoch": 2.0237662620830004, "grad_norm": 0.9137499928474426, "learning_rate": 4.1122653976986514e-05, "loss": 0.1492, "step": 55690 }, { "epoch": 2.0241296605857984, "grad_norm": 8.595315933227539, "learning_rate": 4.1118687631974705e-05, "loss": 0.0813, "step": 55700 }, { "epoch": 2.0244930590885963, "grad_norm": 8.519613265991211, "learning_rate": 4.111472059246601e-05, "loss": 0.0971, "step": 55710 }, { "epoch": 2.0248564575913948, "grad_norm": 0.906406819820404, "learning_rate": 4.111075285863133e-05, "loss": 0.1068, "step": 55720 }, { "epoch": 2.0252198560941927, "grad_norm": 0.6413214206695557, "learning_rate": 4.1106784430641634e-05, "loss": 0.0904, "step": 55730 }, { "epoch": 2.025583254596991, "grad_norm": 1.054943561553955, "learning_rate": 4.110281530866791e-05, "loss": 0.1087, "step": 55740 }, { "epoch": 2.025946653099789, "grad_norm": 0.686661958694458, "learning_rate": 4.1098845492881164e-05, "loss": 0.1022, "step": 55750 }, { "epoch": 2.0263100516025876, "grad_norm": 1.9529190063476562, "learning_rate": 4.109487498345245e-05, "loss": 0.1089, "step": 55760 }, { "epoch": 2.0266734501053856, "grad_norm": 0.5279061198234558, "learning_rate": 4.109090378055284e-05, "loss": 0.1115, "step": 55770 }, { "epoch": 2.0270368486081836, "grad_norm": 1.3651883602142334, "learning_rate": 4.108693188435343e-05, "loss": 0.1206, "step": 55780 }, { "epoch": 2.027400247110982, "grad_norm": 0.9911472201347351, "learning_rate": 4.108295929502536e-05, "loss": 0.1235, "step": 55790 }, { "epoch": 2.02776364561378, "grad_norm": 1.1165162324905396, "learning_rate": 4.107898601273981e-05, "loss": 0.0944, "step": 55800 }, { "epoch": 2.02776364561378, "eval_loss": 0.342909038066864, "eval_runtime": 179.8346, "eval_samples_per_second": 41.227, "eval_steps_per_second": 5.155, "eval_wer": 0.16486648392542705, "step": 55800 }, { "epoch": 2.0281270441165784, "grad_norm": 0.5809179544448853, "learning_rate": 4.107501203766795e-05, "loss": 0.0912, "step": 55810 }, { "epoch": 2.0284904426193764, "grad_norm": 0.5710409283638, "learning_rate": 4.1071037369981025e-05, "loss": 0.1165, "step": 55820 }, { "epoch": 2.0288538411221744, "grad_norm": 1.5615267753601074, "learning_rate": 4.1067062009850276e-05, "loss": 0.1126, "step": 55830 }, { "epoch": 2.029217239624973, "grad_norm": 0.5823513269424438, "learning_rate": 4.106308595744699e-05, "loss": 0.1649, "step": 55840 }, { "epoch": 2.029580638127771, "grad_norm": 1.1584099531173706, "learning_rate": 4.105910921294249e-05, "loss": 3.2733, "step": 55850 }, { "epoch": 2.029944036630569, "grad_norm": 3.6284244060516357, "learning_rate": 4.105513177650811e-05, "loss": 0.1123, "step": 55860 }, { "epoch": 2.030307435133367, "grad_norm": 0.2692999839782715, "learning_rate": 4.105115364831522e-05, "loss": 0.1022, "step": 55870 }, { "epoch": 2.0306708336361656, "grad_norm": 0.7533041834831238, "learning_rate": 4.1047174828535236e-05, "loss": 0.1051, "step": 55880 }, { "epoch": 2.0310342321389636, "grad_norm": 1.988377332687378, "learning_rate": 4.104319531733958e-05, "loss": 0.1112, "step": 55890 }, { "epoch": 2.0313976306417616, "grad_norm": 1.1997753381729126, "learning_rate": 4.103921511489972e-05, "loss": 0.1166, "step": 55900 }, { "epoch": 2.03176102914456, "grad_norm": 0.9296682476997375, "learning_rate": 4.1035234221387154e-05, "loss": 0.0914, "step": 55910 }, { "epoch": 2.032124427647358, "grad_norm": 1.5428096055984497, "learning_rate": 4.1031252636973394e-05, "loss": 0.1383, "step": 55920 }, { "epoch": 2.0324878261501564, "grad_norm": 1.2502493858337402, "learning_rate": 4.1027270361829995e-05, "loss": 0.0876, "step": 55930 }, { "epoch": 2.0328512246529544, "grad_norm": 0.9851539731025696, "learning_rate": 4.102328739612855e-05, "loss": 0.1441, "step": 55940 }, { "epoch": 2.0332146231557524, "grad_norm": 0.6906759738922119, "learning_rate": 4.101930374004066e-05, "loss": 0.0894, "step": 55950 }, { "epoch": 2.033578021658551, "grad_norm": 0.6129600405693054, "learning_rate": 4.101531939373796e-05, "loss": 0.0935, "step": 55960 }, { "epoch": 2.033941420161349, "grad_norm": 0.7423244118690491, "learning_rate": 4.101133435739214e-05, "loss": 0.0996, "step": 55970 }, { "epoch": 2.0343048186641473, "grad_norm": 0.9887922406196594, "learning_rate": 4.100734863117489e-05, "loss": 0.1159, "step": 55980 }, { "epoch": 2.0346682171669452, "grad_norm": 0.701602041721344, "learning_rate": 4.100336221525794e-05, "loss": 0.1278, "step": 55990 }, { "epoch": 2.0350316156697437, "grad_norm": 1.088302493095398, "learning_rate": 4.099937510981304e-05, "loss": 0.0966, "step": 56000 }, { "epoch": 2.0353950141725417, "grad_norm": 1.022271990776062, "learning_rate": 4.099538731501201e-05, "loss": 0.1055, "step": 56010 }, { "epoch": 2.0357584126753396, "grad_norm": 1.5955030918121338, "learning_rate": 4.099139883102664e-05, "loss": 0.1654, "step": 56020 }, { "epoch": 2.036121811178138, "grad_norm": 1.2459834814071655, "learning_rate": 4.0987409658028805e-05, "loss": 0.1179, "step": 56030 }, { "epoch": 2.036485209680936, "grad_norm": 2.4748172760009766, "learning_rate": 4.098341979619036e-05, "loss": 0.1511, "step": 56040 }, { "epoch": 2.0368486081837345, "grad_norm": 1.0897467136383057, "learning_rate": 4.097942924568323e-05, "loss": 0.1033, "step": 56050 }, { "epoch": 2.0372120066865325, "grad_norm": 0.9883999228477478, "learning_rate": 4.097543800667935e-05, "loss": 0.0879, "step": 56060 }, { "epoch": 2.0375754051893304, "grad_norm": 0.9798412919044495, "learning_rate": 4.097144607935068e-05, "loss": 0.9193, "step": 56070 }, { "epoch": 2.037938803692129, "grad_norm": 0.8659210801124573, "learning_rate": 4.0967453463869233e-05, "loss": 0.1121, "step": 56080 }, { "epoch": 2.038302202194927, "grad_norm": 0.825116753578186, "learning_rate": 4.096346016040703e-05, "loss": 0.1276, "step": 56090 }, { "epoch": 2.0386656006977253, "grad_norm": 0.6097813844680786, "learning_rate": 4.0959466169136115e-05, "loss": 0.1045, "step": 56100 }, { "epoch": 2.0390289992005233, "grad_norm": 2.39689564704895, "learning_rate": 4.0955471490228604e-05, "loss": 0.1286, "step": 56110 }, { "epoch": 2.0393923977033213, "grad_norm": 0.4547784626483917, "learning_rate": 4.095147612385658e-05, "loss": 0.1266, "step": 56120 }, { "epoch": 2.0397557962061197, "grad_norm": 1.1596136093139648, "learning_rate": 4.094748007019221e-05, "loss": 0.1022, "step": 56130 }, { "epoch": 2.0401191947089177, "grad_norm": 1.1768062114715576, "learning_rate": 4.094348332940767e-05, "loss": 0.1214, "step": 56140 }, { "epoch": 2.040482593211716, "grad_norm": 0.6755580902099609, "learning_rate": 4.0939485901675153e-05, "loss": 0.1078, "step": 56150 }, { "epoch": 2.040845991714514, "grad_norm": 0.7698992490768433, "learning_rate": 4.0935487787166914e-05, "loss": 0.0923, "step": 56160 }, { "epoch": 2.0412093902173125, "grad_norm": 0.6949880123138428, "learning_rate": 4.093148898605519e-05, "loss": 0.1065, "step": 56170 }, { "epoch": 2.0415727887201105, "grad_norm": 0.5093110203742981, "learning_rate": 4.09274894985123e-05, "loss": 0.1358, "step": 56180 }, { "epoch": 2.0419361872229085, "grad_norm": 0.8544941544532776, "learning_rate": 4.092348932471055e-05, "loss": 0.1215, "step": 56190 }, { "epoch": 2.042299585725707, "grad_norm": 0.8509432673454285, "learning_rate": 4.091948846482231e-05, "loss": 0.1024, "step": 56200 }, { "epoch": 2.042662984228505, "grad_norm": 0.5665140151977539, "learning_rate": 4.091548691901995e-05, "loss": 0.089, "step": 56210 }, { "epoch": 2.0430263827313033, "grad_norm": 0.9858969449996948, "learning_rate": 4.0911484687475886e-05, "loss": 0.1107, "step": 56220 }, { "epoch": 2.0433897812341013, "grad_norm": 0.8955181241035461, "learning_rate": 4.0907481770362556e-05, "loss": 0.113, "step": 56230 }, { "epoch": 2.0437531797368993, "grad_norm": 0.9829466938972473, "learning_rate": 4.090347816785244e-05, "loss": 0.1631, "step": 56240 }, { "epoch": 2.0441165782396977, "grad_norm": 0.5513119101524353, "learning_rate": 4.089947388011803e-05, "loss": 0.0959, "step": 56250 }, { "epoch": 2.0444799767424957, "grad_norm": 0.4322792887687683, "learning_rate": 4.089546890733187e-05, "loss": 0.0982, "step": 56260 }, { "epoch": 2.044843375245294, "grad_norm": 1.2782713174819946, "learning_rate": 4.0891463249666504e-05, "loss": 0.1112, "step": 56270 }, { "epoch": 2.045206773748092, "grad_norm": 0.9792034029960632, "learning_rate": 4.088745690729453e-05, "loss": 0.0975, "step": 56280 }, { "epoch": 2.0455701722508906, "grad_norm": 2.263601064682007, "learning_rate": 4.088344988038857e-05, "loss": 0.1418, "step": 56290 }, { "epoch": 2.0459335707536885, "grad_norm": 1.011856198310852, "learning_rate": 4.087944216912126e-05, "loss": 0.1022, "step": 56300 }, { "epoch": 2.0462969692564865, "grad_norm": 1.0281102657318115, "learning_rate": 4.0875433773665286e-05, "loss": 0.0925, "step": 56310 }, { "epoch": 2.046660367759285, "grad_norm": 0.5272021889686584, "learning_rate": 4.087142469419336e-05, "loss": 0.1297, "step": 56320 }, { "epoch": 2.047023766262083, "grad_norm": 0.9789879322052002, "learning_rate": 4.0867414930878224e-05, "loss": 0.0946, "step": 56330 }, { "epoch": 2.0473871647648814, "grad_norm": 0.8782264590263367, "learning_rate": 4.086340448389262e-05, "loss": 0.1246, "step": 56340 }, { "epoch": 2.0477505632676793, "grad_norm": 0.66651850938797, "learning_rate": 4.0859393353409364e-05, "loss": 0.0898, "step": 56350 }, { "epoch": 2.0481139617704773, "grad_norm": 1.6209585666656494, "learning_rate": 4.085538153960128e-05, "loss": 0.1016, "step": 56360 }, { "epoch": 2.0484773602732758, "grad_norm": 1.2438985109329224, "learning_rate": 4.085136904264121e-05, "loss": 0.1419, "step": 56370 }, { "epoch": 2.0488407587760737, "grad_norm": 0.7311316728591919, "learning_rate": 4.0847355862702055e-05, "loss": 0.102, "step": 56380 }, { "epoch": 2.049204157278872, "grad_norm": 1.2164160013198853, "learning_rate": 4.084334199995672e-05, "loss": 0.0936, "step": 56390 }, { "epoch": 2.04956755578167, "grad_norm": 0.746296226978302, "learning_rate": 4.083932745457815e-05, "loss": 0.1268, "step": 56400 }, { "epoch": 2.04956755578167, "eval_loss": 0.31779325008392334, "eval_runtime": 178.9992, "eval_samples_per_second": 41.419, "eval_steps_per_second": 5.179, "eval_wer": 0.15986530397371432, "step": 56400 }, { "epoch": 2.049930954284468, "grad_norm": 0.94898521900177, "learning_rate": 4.083531222673931e-05, "loss": 0.1344, "step": 56410 }, { "epoch": 2.0502943527872666, "grad_norm": 0.5653538703918457, "learning_rate": 4.083129631661322e-05, "loss": 0.1026, "step": 56420 }, { "epoch": 2.0506577512900646, "grad_norm": 0.6599397659301758, "learning_rate": 4.0827279724372884e-05, "loss": 0.0923, "step": 56430 }, { "epoch": 2.051021149792863, "grad_norm": 2.0571577548980713, "learning_rate": 4.082326245019139e-05, "loss": 0.1894, "step": 56440 }, { "epoch": 2.051384548295661, "grad_norm": 1.0707124471664429, "learning_rate": 4.081924449424182e-05, "loss": 0.1098, "step": 56450 }, { "epoch": 2.0517479467984594, "grad_norm": 0.450382798910141, "learning_rate": 4.081522585669728e-05, "loss": 0.0928, "step": 56460 }, { "epoch": 2.0521113453012574, "grad_norm": 0.9119880795478821, "learning_rate": 4.081120653773093e-05, "loss": 0.1102, "step": 56470 }, { "epoch": 2.0524747438040554, "grad_norm": 0.39113524556159973, "learning_rate": 4.080718653751595e-05, "loss": 0.1048, "step": 56480 }, { "epoch": 2.052838142306854, "grad_norm": 1.069718837738037, "learning_rate": 4.080316585622554e-05, "loss": 0.1165, "step": 56490 }, { "epoch": 2.053201540809652, "grad_norm": 1.0856863260269165, "learning_rate": 4.0799144494032936e-05, "loss": 0.0966, "step": 56500 }, { "epoch": 2.05356493931245, "grad_norm": 0.9092361927032471, "learning_rate": 4.079512245111142e-05, "loss": 0.1041, "step": 56510 }, { "epoch": 2.053928337815248, "grad_norm": 0.6025703549385071, "learning_rate": 4.079109972763428e-05, "loss": 0.1167, "step": 56520 }, { "epoch": 2.054291736318046, "grad_norm": 2.7288074493408203, "learning_rate": 4.078707632377483e-05, "loss": 0.1092, "step": 56530 }, { "epoch": 2.0546551348208446, "grad_norm": 1.0255563259124756, "learning_rate": 4.078305223970643e-05, "loss": 0.13, "step": 56540 }, { "epoch": 2.0550185333236426, "grad_norm": 0.556890070438385, "learning_rate": 4.0779429982609526e-05, "loss": 1.9074, "step": 56550 }, { "epoch": 2.055381931826441, "grad_norm": 0.8369362950325012, "learning_rate": 4.077540460662182e-05, "loss": 0.0925, "step": 56560 }, { "epoch": 2.055745330329239, "grad_norm": 0.6321738958358765, "learning_rate": 4.0771378550928064e-05, "loss": 0.1274, "step": 56570 }, { "epoch": 2.0561087288320374, "grad_norm": 2.1743392944335938, "learning_rate": 4.076735181570172e-05, "loss": 0.1147, "step": 56580 }, { "epoch": 2.0564721273348354, "grad_norm": 0.35284000635147095, "learning_rate": 4.076332440111629e-05, "loss": 0.2721, "step": 56590 }, { "epoch": 2.0568355258376334, "grad_norm": 0.4702494740486145, "learning_rate": 4.0759296307345285e-05, "loss": 0.0972, "step": 56600 }, { "epoch": 2.057198924340432, "grad_norm": 0.5263124704360962, "learning_rate": 4.075526753456229e-05, "loss": 0.1004, "step": 56610 }, { "epoch": 2.05756232284323, "grad_norm": 0.5063189268112183, "learning_rate": 4.0751238082940864e-05, "loss": 0.1254, "step": 56620 }, { "epoch": 2.0579257213460282, "grad_norm": 0.8294627070426941, "learning_rate": 4.074720795265463e-05, "loss": 0.1067, "step": 56630 }, { "epoch": 2.0582891198488262, "grad_norm": 0.8625883460044861, "learning_rate": 4.0743177143877244e-05, "loss": 0.1176, "step": 56640 }, { "epoch": 2.058652518351624, "grad_norm": 0.7036715745925903, "learning_rate": 4.073914565678236e-05, "loss": 0.0807, "step": 56650 }, { "epoch": 2.0590159168544226, "grad_norm": 1.748412013053894, "learning_rate": 4.07351134915437e-05, "loss": 0.0877, "step": 56660 }, { "epoch": 2.0593793153572206, "grad_norm": 1.1321426630020142, "learning_rate": 4.0731080648334975e-05, "loss": 0.097, "step": 56670 }, { "epoch": 2.059742713860019, "grad_norm": 2.1829307079315186, "learning_rate": 4.0727047127329964e-05, "loss": 0.1159, "step": 56680 }, { "epoch": 2.060106112362817, "grad_norm": 1.0120956897735596, "learning_rate": 4.0723012928702443e-05, "loss": 0.1096, "step": 56690 }, { "epoch": 2.060469510865615, "grad_norm": 0.6774507761001587, "learning_rate": 4.071897805262624e-05, "loss": 0.0925, "step": 56700 }, { "epoch": 2.0608329093684135, "grad_norm": 0.7925111651420593, "learning_rate": 4.07149424992752e-05, "loss": 0.0944, "step": 56710 }, { "epoch": 2.0611963078712114, "grad_norm": 0.41430070996284485, "learning_rate": 4.07109062688232e-05, "loss": 0.1082, "step": 56720 }, { "epoch": 2.06155970637401, "grad_norm": 0.5457510948181152, "learning_rate": 4.070686936144415e-05, "loss": 0.1724, "step": 56730 }, { "epoch": 2.061923104876808, "grad_norm": 1.1867283582687378, "learning_rate": 4.070283177731199e-05, "loss": 0.121, "step": 56740 }, { "epoch": 2.0622865033796063, "grad_norm": 0.5466375946998596, "learning_rate": 4.0698793516600676e-05, "loss": 0.0729, "step": 56750 }, { "epoch": 2.0626499018824043, "grad_norm": 0.802174985408783, "learning_rate": 4.0694754579484204e-05, "loss": 0.0983, "step": 56760 }, { "epoch": 2.0630133003852023, "grad_norm": 0.37869808077812195, "learning_rate": 4.06907149661366e-05, "loss": 0.1225, "step": 56770 }, { "epoch": 2.0633766988880007, "grad_norm": 1.0356521606445312, "learning_rate": 4.068667467673192e-05, "loss": 0.1046, "step": 56780 }, { "epoch": 2.0637400973907987, "grad_norm": 1.9794261455535889, "learning_rate": 4.068263371144423e-05, "loss": 0.1497, "step": 56790 }, { "epoch": 2.064103495893597, "grad_norm": 0.7426532506942749, "learning_rate": 4.067859207044766e-05, "loss": 0.1153, "step": 56800 }, { "epoch": 2.064466894396395, "grad_norm": 0.4637458622455597, "learning_rate": 4.0674549753916344e-05, "loss": 0.0885, "step": 56810 }, { "epoch": 2.064830292899193, "grad_norm": 0.44504934549331665, "learning_rate": 4.067050676202445e-05, "loss": 0.1483, "step": 56820 }, { "epoch": 2.0651936914019915, "grad_norm": 0.8600061535835266, "learning_rate": 4.066646309494617e-05, "loss": 0.113, "step": 56830 }, { "epoch": 2.0655570899047895, "grad_norm": 1.6054418087005615, "learning_rate": 4.0662418752855746e-05, "loss": 0.1626, "step": 56840 }, { "epoch": 2.065920488407588, "grad_norm": 0.9366486072540283, "learning_rate": 4.0658373735927415e-05, "loss": 0.1035, "step": 56850 }, { "epoch": 2.066283886910386, "grad_norm": 0.9057123064994812, "learning_rate": 4.065432804433548e-05, "loss": 0.0976, "step": 56860 }, { "epoch": 2.0666472854131843, "grad_norm": 0.7718061804771423, "learning_rate": 4.065028167825424e-05, "loss": 1.6687, "step": 56870 }, { "epoch": 2.0670106839159823, "grad_norm": 1.0670592784881592, "learning_rate": 4.064623463785805e-05, "loss": 0.0958, "step": 56880 }, { "epoch": 2.0673740824187803, "grad_norm": 0.6497521996498108, "learning_rate": 4.064218692332128e-05, "loss": 0.1301, "step": 56890 }, { "epoch": 2.0677374809215787, "grad_norm": 0.5239264369010925, "learning_rate": 4.063813853481833e-05, "loss": 0.0843, "step": 56900 }, { "epoch": 2.0681008794243767, "grad_norm": 0.7263264060020447, "learning_rate": 4.0634089472523626e-05, "loss": 0.0928, "step": 56910 }, { "epoch": 2.068464277927175, "grad_norm": 0.6024682521820068, "learning_rate": 4.063003973661164e-05, "loss": 0.1402, "step": 56920 }, { "epoch": 2.068827676429973, "grad_norm": 0.8949540853500366, "learning_rate": 4.0625989327256855e-05, "loss": 0.1171, "step": 56930 }, { "epoch": 2.069191074932771, "grad_norm": 0.9099026322364807, "learning_rate": 4.062193824463378e-05, "loss": 0.1184, "step": 56940 }, { "epoch": 2.0695544734355695, "grad_norm": 8.538558959960938, "learning_rate": 4.0617886488916976e-05, "loss": 0.0981, "step": 56950 }, { "epoch": 2.0699178719383675, "grad_norm": 0.8870179653167725, "learning_rate": 4.061383406028101e-05, "loss": 0.0796, "step": 56960 }, { "epoch": 2.070281270441166, "grad_norm": 0.8997694253921509, "learning_rate": 4.060978095890049e-05, "loss": 0.1289, "step": 56970 }, { "epoch": 2.070644668943964, "grad_norm": 1.018744707107544, "learning_rate": 4.060572718495004e-05, "loss": 0.1006, "step": 56980 }, { "epoch": 2.071008067446762, "grad_norm": 0.5158216953277588, "learning_rate": 4.0601672738604346e-05, "loss": 0.161, "step": 56990 }, { "epoch": 2.0713714659495603, "grad_norm": 1.025295615196228, "learning_rate": 4.059761762003807e-05, "loss": 0.086, "step": 57000 }, { "epoch": 2.0713714659495603, "eval_loss": 0.34686627984046936, "eval_runtime": 180.463, "eval_samples_per_second": 41.083, "eval_steps_per_second": 5.137, "eval_wer": 0.16041897362354093, "step": 57000 }, { "epoch": 2.0717348644523583, "grad_norm": 3.3658320903778076, "learning_rate": 4.0593561829425955e-05, "loss": 0.1124, "step": 57010 }, { "epoch": 2.0720982629551568, "grad_norm": 7.979375839233398, "learning_rate": 4.058950536694274e-05, "loss": 0.133, "step": 57020 }, { "epoch": 2.0724616614579547, "grad_norm": 0.7676217555999756, "learning_rate": 4.058544823276321e-05, "loss": 0.1096, "step": 57030 }, { "epoch": 2.072825059960753, "grad_norm": 0.6934232711791992, "learning_rate": 4.058139042706216e-05, "loss": 0.1132, "step": 57040 }, { "epoch": 2.073188458463551, "grad_norm": 0.9430510401725769, "learning_rate": 4.057733195001444e-05, "loss": 0.0998, "step": 57050 }, { "epoch": 2.073551856966349, "grad_norm": 3.497431993484497, "learning_rate": 4.057327280179491e-05, "loss": 0.089, "step": 57060 }, { "epoch": 2.0739152554691476, "grad_norm": 0.7105191349983215, "learning_rate": 4.056921298257847e-05, "loss": 0.1243, "step": 57070 }, { "epoch": 2.0742786539719456, "grad_norm": 7.004267692565918, "learning_rate": 4.0565152492540034e-05, "loss": 0.1229, "step": 57080 }, { "epoch": 2.074642052474744, "grad_norm": 0.42751577496528625, "learning_rate": 4.0561091331854555e-05, "loss": 0.1073, "step": 57090 }, { "epoch": 2.075005450977542, "grad_norm": 1.4434239864349365, "learning_rate": 4.055702950069702e-05, "loss": 0.1044, "step": 57100 }, { "epoch": 2.07536884948034, "grad_norm": 0.6759265661239624, "learning_rate": 4.055296699924244e-05, "loss": 0.089, "step": 57110 }, { "epoch": 2.0757322479831384, "grad_norm": 0.48018431663513184, "learning_rate": 4.0548903827665846e-05, "loss": 0.106, "step": 57120 }, { "epoch": 2.0760956464859364, "grad_norm": 1.510313630104065, "learning_rate": 4.054483998614231e-05, "loss": 0.1295, "step": 57130 }, { "epoch": 2.076459044988735, "grad_norm": 0.807949960231781, "learning_rate": 4.054077547484693e-05, "loss": 0.4319, "step": 57140 }, { "epoch": 2.0768224434915328, "grad_norm": 1.3790713548660278, "learning_rate": 4.0536710293954824e-05, "loss": 0.1006, "step": 57150 }, { "epoch": 2.077185841994331, "grad_norm": 0.507022500038147, "learning_rate": 4.0532644443641156e-05, "loss": 0.0751, "step": 57160 }, { "epoch": 2.077549240497129, "grad_norm": 1.7080292701721191, "learning_rate": 4.0528577924081104e-05, "loss": 0.1266, "step": 57170 }, { "epoch": 2.077912638999927, "grad_norm": 1.9344823360443115, "learning_rate": 4.052451073544987e-05, "loss": 0.094, "step": 57180 }, { "epoch": 2.0782760375027256, "grad_norm": 1.0933985710144043, "learning_rate": 4.0520442877922715e-05, "loss": 0.1295, "step": 57190 }, { "epoch": 2.0786394360055236, "grad_norm": 0.6466109752655029, "learning_rate": 4.05163743516749e-05, "loss": 0.108, "step": 57200 }, { "epoch": 2.079002834508322, "grad_norm": 0.5679341554641724, "learning_rate": 4.051230515688171e-05, "loss": 0.1205, "step": 57210 }, { "epoch": 2.07936623301112, "grad_norm": 0.5203921794891357, "learning_rate": 4.0508235293718495e-05, "loss": 0.1202, "step": 57220 }, { "epoch": 2.079729631513918, "grad_norm": 4.9159393310546875, "learning_rate": 4.050416476236059e-05, "loss": 0.0901, "step": 57230 }, { "epoch": 2.0800930300167164, "grad_norm": 0.7785301208496094, "learning_rate": 4.05000935629834e-05, "loss": 0.0885, "step": 57240 }, { "epoch": 2.0804564285195144, "grad_norm": 1.5235596895217896, "learning_rate": 4.049602169576232e-05, "loss": 0.1163, "step": 57250 }, { "epoch": 2.080819827022313, "grad_norm": 0.7558295726776123, "learning_rate": 4.0491949160872805e-05, "loss": 0.0969, "step": 57260 }, { "epoch": 2.081183225525111, "grad_norm": 0.8465888500213623, "learning_rate": 4.048787595849032e-05, "loss": 0.1061, "step": 57270 }, { "epoch": 2.081546624027909, "grad_norm": 1.5089519023895264, "learning_rate": 4.048380208879037e-05, "loss": 0.0918, "step": 57280 }, { "epoch": 2.0819100225307072, "grad_norm": 0.5132701992988586, "learning_rate": 4.047972755194847e-05, "loss": 0.0971, "step": 57290 }, { "epoch": 2.082273421033505, "grad_norm": 2.1400113105773926, "learning_rate": 4.047565234814019e-05, "loss": 0.0934, "step": 57300 }, { "epoch": 2.0826368195363036, "grad_norm": 0.6013107299804688, "learning_rate": 4.047157647754112e-05, "loss": 0.1349, "step": 57310 }, { "epoch": 2.0830002180391016, "grad_norm": 2.676640272140503, "learning_rate": 4.046749994032686e-05, "loss": 0.13, "step": 57320 }, { "epoch": 2.0833636165419, "grad_norm": 0.9156673550605774, "learning_rate": 4.046342273667306e-05, "loss": 0.1068, "step": 57330 }, { "epoch": 2.083727015044698, "grad_norm": 1.0060288906097412, "learning_rate": 4.04593448667554e-05, "loss": 0.1091, "step": 57340 }, { "epoch": 2.084090413547496, "grad_norm": 2.746476650238037, "learning_rate": 4.0455266330749567e-05, "loss": 0.1001, "step": 57350 }, { "epoch": 2.0844538120502945, "grad_norm": 1.1911275386810303, "learning_rate": 4.04511871288313e-05, "loss": 0.0981, "step": 57360 }, { "epoch": 2.0848172105530924, "grad_norm": 10.354631423950195, "learning_rate": 4.044710726117636e-05, "loss": 0.1354, "step": 57370 }, { "epoch": 2.085180609055891, "grad_norm": 1.2562741041183472, "learning_rate": 4.044302672796053e-05, "loss": 0.0962, "step": 57380 }, { "epoch": 2.085544007558689, "grad_norm": 0.48360708355903625, "learning_rate": 4.043894552935962e-05, "loss": 0.1203, "step": 57390 }, { "epoch": 2.085907406061487, "grad_norm": 1.9491641521453857, "learning_rate": 4.043486366554948e-05, "loss": 0.0984, "step": 57400 }, { "epoch": 2.0862708045642853, "grad_norm": 0.48460692167282104, "learning_rate": 4.0430781136705975e-05, "loss": 0.0984, "step": 57410 }, { "epoch": 2.0866342030670832, "grad_norm": 0.9770491719245911, "learning_rate": 4.042669794300502e-05, "loss": 0.1173, "step": 57420 }, { "epoch": 2.0869976015698817, "grad_norm": 0.4919109642505646, "learning_rate": 4.042261408462255e-05, "loss": 0.1162, "step": 57430 }, { "epoch": 2.0873610000726797, "grad_norm": 0.555167019367218, "learning_rate": 4.0418529561734495e-05, "loss": 0.1137, "step": 57440 }, { "epoch": 2.087724398575478, "grad_norm": 0.8190045356750488, "learning_rate": 4.041444437451687e-05, "loss": 0.0972, "step": 57450 }, { "epoch": 2.088087797078276, "grad_norm": 0.5673350691795349, "learning_rate": 4.041035852314568e-05, "loss": 0.0985, "step": 57460 }, { "epoch": 2.088451195581074, "grad_norm": 2.584392547607422, "learning_rate": 4.040627200779697e-05, "loss": 0.1159, "step": 57470 }, { "epoch": 2.0888145940838725, "grad_norm": 3.240104913711548, "learning_rate": 4.040218482864682e-05, "loss": 0.0886, "step": 57480 }, { "epoch": 2.0891779925866705, "grad_norm": 1.0577195882797241, "learning_rate": 4.039809698587132e-05, "loss": 0.1079, "step": 57490 }, { "epoch": 2.089541391089469, "grad_norm": 1.1150219440460205, "learning_rate": 4.039400847964661e-05, "loss": 0.091, "step": 57500 }, { "epoch": 2.089904789592267, "grad_norm": 0.42998915910720825, "learning_rate": 4.038991931014885e-05, "loss": 0.1038, "step": 57510 }, { "epoch": 2.090268188095065, "grad_norm": 1.2772380113601685, "learning_rate": 4.0385829477554216e-05, "loss": 0.1114, "step": 57520 }, { "epoch": 2.0906315865978633, "grad_norm": 0.6975306868553162, "learning_rate": 4.0381738982038944e-05, "loss": 0.1107, "step": 57530 }, { "epoch": 2.0909949851006613, "grad_norm": 0.5228861570358276, "learning_rate": 4.0377647823779257e-05, "loss": 0.1217, "step": 57540 }, { "epoch": 2.0913583836034597, "grad_norm": 0.8819922208786011, "learning_rate": 4.0373556002951444e-05, "loss": 0.1149, "step": 57550 }, { "epoch": 2.0917217821062577, "grad_norm": 0.47613778710365295, "learning_rate": 4.036946351973181e-05, "loss": 0.1037, "step": 57560 }, { "epoch": 2.0920851806090557, "grad_norm": 1.3058334589004517, "learning_rate": 4.0365370374296666e-05, "loss": 0.1195, "step": 57570 }, { "epoch": 2.092448579111854, "grad_norm": 0.9610320329666138, "learning_rate": 4.0361276566822383e-05, "loss": 0.1286, "step": 57580 }, { "epoch": 2.092811977614652, "grad_norm": 0.9065276980400085, "learning_rate": 4.035718209748536e-05, "loss": 0.1146, "step": 57590 }, { "epoch": 2.0931753761174505, "grad_norm": 1.189386248588562, "learning_rate": 4.0353086966461984e-05, "loss": 0.0853, "step": 57600 }, { "epoch": 2.0931753761174505, "eval_loss": 0.35443732142448425, "eval_runtime": 179.8419, "eval_samples_per_second": 41.225, "eval_steps_per_second": 5.155, "eval_wer": 0.16203459981483834, "step": 57600 }, { "epoch": 2.0935387746202485, "grad_norm": 1.9661606550216675, "learning_rate": 4.034899117392873e-05, "loss": 0.0915, "step": 57610 }, { "epoch": 2.093902173123047, "grad_norm": 1.0128490924835205, "learning_rate": 4.0344894720062055e-05, "loss": 2.6837, "step": 57620 }, { "epoch": 2.094265571625845, "grad_norm": 0.9373286962509155, "learning_rate": 4.0340797605038464e-05, "loss": 0.1149, "step": 57630 }, { "epoch": 2.094628970128643, "grad_norm": 0.7361924052238464, "learning_rate": 4.033669982903449e-05, "loss": 0.1473, "step": 57640 }, { "epoch": 2.0949923686314413, "grad_norm": 0.6584343314170837, "learning_rate": 4.0332601392226673e-05, "loss": 0.0983, "step": 57650 }, { "epoch": 2.0953557671342393, "grad_norm": 3.030869960784912, "learning_rate": 4.0328502294791634e-05, "loss": 0.0874, "step": 57660 }, { "epoch": 2.0957191656370378, "grad_norm": 0.4622768759727478, "learning_rate": 4.0324402536905964e-05, "loss": 0.122, "step": 57670 }, { "epoch": 2.0960825641398357, "grad_norm": 0.7545061111450195, "learning_rate": 4.0320302118746314e-05, "loss": 0.1077, "step": 57680 }, { "epoch": 2.0964459626426337, "grad_norm": 1.838789939880371, "learning_rate": 4.0316201040489355e-05, "loss": 0.1814, "step": 57690 }, { "epoch": 2.096809361145432, "grad_norm": 0.5931621789932251, "learning_rate": 4.031209930231179e-05, "loss": 0.1053, "step": 57700 }, { "epoch": 2.09717275964823, "grad_norm": 0.698026180267334, "learning_rate": 4.0307996904390336e-05, "loss": 0.0843, "step": 57710 }, { "epoch": 2.0975361581510286, "grad_norm": 0.663277804851532, "learning_rate": 4.030389384690177e-05, "loss": 0.1109, "step": 57720 }, { "epoch": 2.0978995566538265, "grad_norm": 0.6599337458610535, "learning_rate": 4.0299790130022874e-05, "loss": 0.1007, "step": 57730 }, { "epoch": 2.098262955156625, "grad_norm": 0.5328543186187744, "learning_rate": 4.0295685753930454e-05, "loss": 0.1004, "step": 57740 }, { "epoch": 2.098626353659423, "grad_norm": 0.5420628190040588, "learning_rate": 4.029158071880136e-05, "loss": 0.0959, "step": 57750 }, { "epoch": 2.098989752162221, "grad_norm": 1.3125580549240112, "learning_rate": 4.028747502481245e-05, "loss": 0.0835, "step": 57760 }, { "epoch": 2.0993531506650194, "grad_norm": 0.6729845404624939, "learning_rate": 4.028336867214064e-05, "loss": 0.1596, "step": 57770 }, { "epoch": 2.0997165491678174, "grad_norm": 0.8784998655319214, "learning_rate": 4.0279261660962854e-05, "loss": 0.1261, "step": 57780 }, { "epoch": 2.100079947670616, "grad_norm": 0.8162268996238708, "learning_rate": 4.027515399145605e-05, "loss": 0.0996, "step": 57790 }, { "epoch": 2.1004433461734138, "grad_norm": 0.8188743591308594, "learning_rate": 4.02710456637972e-05, "loss": 0.1043, "step": 57800 }, { "epoch": 2.1008067446762118, "grad_norm": 2.6283457279205322, "learning_rate": 4.0266936678163333e-05, "loss": 0.1207, "step": 57810 }, { "epoch": 2.10117014317901, "grad_norm": 0.9076483249664307, "learning_rate": 4.0262827034731486e-05, "loss": 0.1283, "step": 57820 }, { "epoch": 2.101533541681808, "grad_norm": 1.4384301900863647, "learning_rate": 4.025871673367873e-05, "loss": 0.0942, "step": 57830 }, { "epoch": 2.1018969401846066, "grad_norm": 0.7651816010475159, "learning_rate": 4.025460577518215e-05, "loss": 0.1171, "step": 57840 }, { "epoch": 2.1022603386874046, "grad_norm": 1.075475811958313, "learning_rate": 4.025049415941889e-05, "loss": 0.1002, "step": 57850 }, { "epoch": 2.1026237371902026, "grad_norm": 0.5640189051628113, "learning_rate": 4.02463818865661e-05, "loss": 0.0797, "step": 57860 }, { "epoch": 2.102987135693001, "grad_norm": 2.052508592605591, "learning_rate": 4.024226895680097e-05, "loss": 0.114, "step": 57870 }, { "epoch": 2.103350534195799, "grad_norm": 0.8014973998069763, "learning_rate": 4.023815537030068e-05, "loss": 0.1304, "step": 57880 }, { "epoch": 2.1037139326985974, "grad_norm": 0.9665643572807312, "learning_rate": 4.02340411272425e-05, "loss": 0.1191, "step": 57890 }, { "epoch": 2.1040773312013954, "grad_norm": 1.0654706954956055, "learning_rate": 4.02299262278037e-05, "loss": 0.0836, "step": 57900 }, { "epoch": 2.104440729704194, "grad_norm": 1.1803388595581055, "learning_rate": 4.022581067216157e-05, "loss": 0.0988, "step": 57910 }, { "epoch": 2.104804128206992, "grad_norm": 0.5792093276977539, "learning_rate": 4.022169446049342e-05, "loss": 0.1177, "step": 57920 }, { "epoch": 2.10516752670979, "grad_norm": 0.9450294375419617, "learning_rate": 4.021757759297662e-05, "loss": 0.094, "step": 57930 }, { "epoch": 2.1055309252125882, "grad_norm": 0.5335323810577393, "learning_rate": 4.021346006978854e-05, "loss": 0.1358, "step": 57940 }, { "epoch": 2.105894323715386, "grad_norm": 1.1108689308166504, "learning_rate": 4.02093418911066e-05, "loss": 0.0964, "step": 57950 }, { "epoch": 2.1062577222181846, "grad_norm": 0.3482346534729004, "learning_rate": 4.020522305710823e-05, "loss": 0.0928, "step": 57960 }, { "epoch": 2.1066211207209826, "grad_norm": 0.6527045369148254, "learning_rate": 4.02011035679709e-05, "loss": 0.1033, "step": 57970 }, { "epoch": 2.1069845192237806, "grad_norm": 0.9047361612319946, "learning_rate": 4.019698342387211e-05, "loss": 0.0939, "step": 57980 }, { "epoch": 2.107347917726579, "grad_norm": 1.3960262537002563, "learning_rate": 4.019286262498937e-05, "loss": 0.1275, "step": 57990 }, { "epoch": 2.107711316229377, "grad_norm": 0.49838632345199585, "learning_rate": 4.0188741171500234e-05, "loss": 0.1133, "step": 58000 }, { "epoch": 2.1080747147321754, "grad_norm": 0.6651538014411926, "learning_rate": 4.0184619063582284e-05, "loss": 0.1361, "step": 58010 }, { "epoch": 2.1084381132349734, "grad_norm": 0.7778026461601257, "learning_rate": 4.018049630141313e-05, "loss": 0.117, "step": 58020 }, { "epoch": 2.108801511737772, "grad_norm": 1.0851924419403076, "learning_rate": 4.0176372885170396e-05, "loss": 0.096, "step": 58030 }, { "epoch": 2.10916491024057, "grad_norm": 0.5920321345329285, "learning_rate": 4.017224881503176e-05, "loss": 0.1812, "step": 58040 }, { "epoch": 2.109528308743368, "grad_norm": 1.2104512453079224, "learning_rate": 4.0168124091174896e-05, "loss": 0.1002, "step": 58050 }, { "epoch": 2.1098917072461663, "grad_norm": 0.8000385761260986, "learning_rate": 4.016399871377754e-05, "loss": 0.099, "step": 58060 }, { "epoch": 2.1102551057489642, "grad_norm": 0.9628605246543884, "learning_rate": 4.015987268301742e-05, "loss": 0.1322, "step": 58070 }, { "epoch": 2.1106185042517627, "grad_norm": 1.1031752824783325, "learning_rate": 4.015574599907235e-05, "loss": 0.1089, "step": 58080 }, { "epoch": 2.1109819027545607, "grad_norm": 0.7440558075904846, "learning_rate": 4.0151618662120084e-05, "loss": 0.1255, "step": 58090 }, { "epoch": 2.1113453012573586, "grad_norm": 0.7492482662200928, "learning_rate": 4.0147490672338494e-05, "loss": 0.0787, "step": 58100 }, { "epoch": 2.111708699760157, "grad_norm": 1.2699692249298096, "learning_rate": 4.0143362029905415e-05, "loss": 0.0835, "step": 58110 }, { "epoch": 2.112072098262955, "grad_norm": 0.5075403451919556, "learning_rate": 4.013923273499876e-05, "loss": 0.113, "step": 58120 }, { "epoch": 2.1124354967657535, "grad_norm": 0.47074371576309204, "learning_rate": 4.013510278779643e-05, "loss": 0.1045, "step": 58130 }, { "epoch": 2.1127988952685515, "grad_norm": 1.9055145978927612, "learning_rate": 4.013097218847636e-05, "loss": 0.1096, "step": 58140 }, { "epoch": 2.1131622937713495, "grad_norm": 0.8922753930091858, "learning_rate": 4.0126840937216545e-05, "loss": 0.1129, "step": 58150 }, { "epoch": 2.113525692274148, "grad_norm": 1.5678116083145142, "learning_rate": 4.012270903419497e-05, "loss": 0.1314, "step": 58160 }, { "epoch": 2.113889090776946, "grad_norm": 1.4676604270935059, "learning_rate": 4.0118576479589675e-05, "loss": 0.119, "step": 58170 }, { "epoch": 2.1142524892797443, "grad_norm": 1.0103446245193481, "learning_rate": 4.0114443273578714e-05, "loss": 0.1036, "step": 58180 }, { "epoch": 2.1146158877825423, "grad_norm": 0.5744931101799011, "learning_rate": 4.011030941634016e-05, "loss": 0.1493, "step": 58190 }, { "epoch": 2.1149792862853407, "grad_norm": 1.430180311203003, "learning_rate": 4.010617490805214e-05, "loss": 0.0928, "step": 58200 }, { "epoch": 2.1149792862853407, "eval_loss": 0.3442366421222687, "eval_runtime": 180.0152, "eval_samples_per_second": 41.185, "eval_steps_per_second": 5.15, "eval_wer": 0.16051881569154247, "step": 58200 }, { "epoch": 2.1153426847881387, "grad_norm": 0.7342690825462341, "learning_rate": 4.0102039748892786e-05, "loss": 0.0878, "step": 58210 }, { "epoch": 2.1157060832909367, "grad_norm": 1.540487289428711, "learning_rate": 4.0097903939040284e-05, "loss": 0.1158, "step": 58220 }, { "epoch": 2.116069481793735, "grad_norm": 0.9415495991706848, "learning_rate": 4.009376747867281e-05, "loss": 0.105, "step": 58230 }, { "epoch": 2.116432880296533, "grad_norm": 0.8002855181694031, "learning_rate": 4.008963036796861e-05, "loss": 0.0855, "step": 58240 }, { "epoch": 2.1167962787993315, "grad_norm": 0.7064021825790405, "learning_rate": 4.008549260710591e-05, "loss": 0.1319, "step": 58250 }, { "epoch": 2.1171596773021295, "grad_norm": 0.5867117047309875, "learning_rate": 4.008135419626302e-05, "loss": 0.1232, "step": 58260 }, { "epoch": 2.1175230758049275, "grad_norm": 0.7439972162246704, "learning_rate": 4.007721513561824e-05, "loss": 0.1359, "step": 58270 }, { "epoch": 2.117886474307726, "grad_norm": 0.9335612058639526, "learning_rate": 4.007307542534989e-05, "loss": 0.0763, "step": 58280 }, { "epoch": 2.118249872810524, "grad_norm": 0.6899220943450928, "learning_rate": 4.006893506563637e-05, "loss": 0.129, "step": 58290 }, { "epoch": 2.1186132713133223, "grad_norm": 0.9896695613861084, "learning_rate": 4.006479405665604e-05, "loss": 0.0756, "step": 58300 }, { "epoch": 2.1189766698161203, "grad_norm": 0.8844881057739258, "learning_rate": 4.0060652398587335e-05, "loss": 0.111, "step": 58310 }, { "epoch": 2.1193400683189187, "grad_norm": 0.3384082615375519, "learning_rate": 4.0056510091608706e-05, "loss": 0.1182, "step": 58320 }, { "epoch": 2.1197034668217167, "grad_norm": 1.8488768339157104, "learning_rate": 4.005236713589863e-05, "loss": 0.1045, "step": 58330 }, { "epoch": 2.1200668653245147, "grad_norm": 4.640181064605713, "learning_rate": 4.004822353163561e-05, "loss": 0.1085, "step": 58340 }, { "epoch": 2.120430263827313, "grad_norm": 2.7104008197784424, "learning_rate": 4.004407927899817e-05, "loss": 0.0996, "step": 58350 }, { "epoch": 2.120793662330111, "grad_norm": 0.8320967555046082, "learning_rate": 4.00399343781649e-05, "loss": 0.0887, "step": 58360 }, { "epoch": 2.1211570608329096, "grad_norm": 0.5715747475624084, "learning_rate": 4.003578882931436e-05, "loss": 0.0961, "step": 58370 }, { "epoch": 2.1215204593357075, "grad_norm": 1.0619550943374634, "learning_rate": 4.003164263262518e-05, "loss": 0.1733, "step": 58380 }, { "epoch": 2.1218838578385055, "grad_norm": 0.6880344748497009, "learning_rate": 4.0027495788275995e-05, "loss": 0.1722, "step": 58390 }, { "epoch": 2.122247256341304, "grad_norm": 0.503822922706604, "learning_rate": 4.0023348296445483e-05, "loss": 0.0772, "step": 58400 }, { "epoch": 2.122610654844102, "grad_norm": 0.4914768636226654, "learning_rate": 4.001920015731235e-05, "loss": 0.0752, "step": 58410 }, { "epoch": 2.1229740533469004, "grad_norm": 0.7141969799995422, "learning_rate": 4.001505137105532e-05, "loss": 0.1247, "step": 58420 }, { "epoch": 2.1233374518496984, "grad_norm": 1.7771844863891602, "learning_rate": 4.0010901937853164e-05, "loss": 0.123, "step": 58430 }, { "epoch": 2.1237008503524963, "grad_norm": 0.8636963367462158, "learning_rate": 4.0006751857884636e-05, "loss": 0.1224, "step": 58440 }, { "epoch": 2.1240642488552948, "grad_norm": 0.6579970121383667, "learning_rate": 4.000260113132857e-05, "loss": 0.0992, "step": 58450 }, { "epoch": 2.1244276473580928, "grad_norm": 0.5212269425392151, "learning_rate": 3.99984497583638e-05, "loss": 0.1097, "step": 58460 }, { "epoch": 2.124791045860891, "grad_norm": 0.44934549927711487, "learning_rate": 3.999429773916919e-05, "loss": 0.1304, "step": 58470 }, { "epoch": 2.125154444363689, "grad_norm": 0.7750062942504883, "learning_rate": 3.999014507392365e-05, "loss": 0.1233, "step": 58480 }, { "epoch": 2.1255178428664876, "grad_norm": 0.9064908623695374, "learning_rate": 3.9985991762806087e-05, "loss": 0.2681, "step": 58490 }, { "epoch": 2.1258812413692856, "grad_norm": 0.9376353025436401, "learning_rate": 3.998183780599546e-05, "loss": 0.0911, "step": 58500 }, { "epoch": 2.1262446398720836, "grad_norm": 1.2456096410751343, "learning_rate": 3.9977683203670755e-05, "loss": 0.1072, "step": 58510 }, { "epoch": 2.126608038374882, "grad_norm": 1.1492791175842285, "learning_rate": 3.997352795601096e-05, "loss": 0.1181, "step": 58520 }, { "epoch": 2.12697143687768, "grad_norm": 1.6713447570800781, "learning_rate": 3.996937206319513e-05, "loss": 0.1018, "step": 58530 }, { "epoch": 2.1273348353804784, "grad_norm": 1.8490865230560303, "learning_rate": 3.996521552540231e-05, "loss": 0.1267, "step": 58540 }, { "epoch": 2.1276982338832764, "grad_norm": 0.8250418305397034, "learning_rate": 3.9961058342811606e-05, "loss": 0.1118, "step": 58550 }, { "epoch": 2.1280616323860744, "grad_norm": 1.141861915588379, "learning_rate": 3.995690051560213e-05, "loss": 0.0958, "step": 58560 }, { "epoch": 2.128425030888873, "grad_norm": 0.9268454313278198, "learning_rate": 3.995274204395303e-05, "loss": 0.1196, "step": 58570 }, { "epoch": 2.128788429391671, "grad_norm": 0.6160836219787598, "learning_rate": 3.994858292804347e-05, "loss": 0.1017, "step": 58580 }, { "epoch": 2.129151827894469, "grad_norm": 0.9815055131912231, "learning_rate": 3.994442316805266e-05, "loss": 0.0977, "step": 58590 }, { "epoch": 2.129515226397267, "grad_norm": 0.887614369392395, "learning_rate": 3.994026276415983e-05, "loss": 0.0924, "step": 58600 }, { "epoch": 2.1298786249000656, "grad_norm": 1.7379142045974731, "learning_rate": 3.993610171654424e-05, "loss": 0.1115, "step": 58610 }, { "epoch": 2.1302420234028636, "grad_norm": 0.9149182438850403, "learning_rate": 3.993194002538516e-05, "loss": 0.4902, "step": 58620 }, { "epoch": 2.1306054219056616, "grad_norm": 0.4498516619205475, "learning_rate": 3.992777769086192e-05, "loss": 0.1172, "step": 58630 }, { "epoch": 2.13096882040846, "grad_norm": 0.8547645807266235, "learning_rate": 3.992361471315385e-05, "loss": 0.1816, "step": 58640 }, { "epoch": 2.131332218911258, "grad_norm": 0.6961509585380554, "learning_rate": 3.991945109244032e-05, "loss": 0.1024, "step": 58650 }, { "epoch": 2.1316956174140564, "grad_norm": 0.989095151424408, "learning_rate": 3.9915286828900725e-05, "loss": 0.0871, "step": 58660 }, { "epoch": 2.1320590159168544, "grad_norm": 0.6588122844696045, "learning_rate": 3.9911121922714496e-05, "loss": 0.2563, "step": 58670 }, { "epoch": 2.1324224144196524, "grad_norm": 0.6134093999862671, "learning_rate": 3.9906956374061075e-05, "loss": 0.1274, "step": 58680 }, { "epoch": 2.132785812922451, "grad_norm": 1.8236083984375, "learning_rate": 3.990279018311993e-05, "loss": 0.1083, "step": 58690 }, { "epoch": 2.133149211425249, "grad_norm": 0.8734591007232666, "learning_rate": 3.989862335007059e-05, "loss": 0.0925, "step": 58700 }, { "epoch": 2.1335126099280473, "grad_norm": 0.5155262351036072, "learning_rate": 3.9894455875092587e-05, "loss": 0.1428, "step": 58710 }, { "epoch": 2.1338760084308452, "grad_norm": 1.4302911758422852, "learning_rate": 3.989028775836546e-05, "loss": 0.1089, "step": 58720 }, { "epoch": 2.1342394069336432, "grad_norm": 1.1335387229919434, "learning_rate": 3.988611900006882e-05, "loss": 0.1031, "step": 58730 }, { "epoch": 2.1346028054364417, "grad_norm": 0.45461785793304443, "learning_rate": 3.988194960038228e-05, "loss": 0.1125, "step": 58740 }, { "epoch": 2.1349662039392396, "grad_norm": 2.3098812103271484, "learning_rate": 3.9877779559485484e-05, "loss": 0.6339, "step": 58750 }, { "epoch": 2.135329602442038, "grad_norm": 2.491065502166748, "learning_rate": 3.98736088775581e-05, "loss": 0.1011, "step": 58760 }, { "epoch": 2.135693000944836, "grad_norm": 2.0698654651641846, "learning_rate": 3.986943755477983e-05, "loss": 1.3794, "step": 58770 }, { "epoch": 2.1360563994476345, "grad_norm": 1.4950264692306519, "learning_rate": 3.9865265591330394e-05, "loss": 0.108, "step": 58780 }, { "epoch": 2.1364197979504325, "grad_norm": 0.3976856768131256, "learning_rate": 3.986109298738957e-05, "loss": 0.1407, "step": 58790 }, { "epoch": 2.1367831964532304, "grad_norm": 1.319399356842041, "learning_rate": 3.985691974313711e-05, "loss": 0.1168, "step": 58800 }, { "epoch": 2.1367831964532304, "eval_loss": 0.33521324396133423, "eval_runtime": 180.0938, "eval_samples_per_second": 41.167, "eval_steps_per_second": 5.147, "eval_wer": 0.16158984878464974, "step": 58800 }, { "epoch": 2.137146594956029, "grad_norm": 3.924207925796509, "learning_rate": 3.985274585875284e-05, "loss": 0.0899, "step": 58810 }, { "epoch": 2.137509993458827, "grad_norm": 0.7248135805130005, "learning_rate": 3.984857133441661e-05, "loss": 0.1256, "step": 58820 }, { "epoch": 2.1378733919616253, "grad_norm": 0.5945442914962769, "learning_rate": 3.984439617030826e-05, "loss": 0.0891, "step": 58830 }, { "epoch": 2.1382367904644233, "grad_norm": 2.0642237663269043, "learning_rate": 3.98402203666077e-05, "loss": 0.1359, "step": 58840 }, { "epoch": 2.1386001889672213, "grad_norm": 1.0051828622817993, "learning_rate": 3.983604392349485e-05, "loss": 0.099, "step": 58850 }, { "epoch": 2.1389635874700197, "grad_norm": 1.7241709232330322, "learning_rate": 3.983186684114965e-05, "loss": 0.1353, "step": 58860 }, { "epoch": 2.1393269859728177, "grad_norm": 0.6430028080940247, "learning_rate": 3.9827689119752076e-05, "loss": 0.6511, "step": 58870 }, { "epoch": 2.139690384475616, "grad_norm": 0.76287442445755, "learning_rate": 3.9823510759482134e-05, "loss": 0.1082, "step": 58880 }, { "epoch": 2.140053782978414, "grad_norm": 0.6280699372291565, "learning_rate": 3.981933176051986e-05, "loss": 0.114, "step": 58890 }, { "epoch": 2.1404171814812125, "grad_norm": 0.8308879733085632, "learning_rate": 3.9815152123045305e-05, "loss": 0.1072, "step": 58900 }, { "epoch": 2.1407805799840105, "grad_norm": 0.5416497588157654, "learning_rate": 3.981097184723856e-05, "loss": 0.1809, "step": 58910 }, { "epoch": 2.1411439784868085, "grad_norm": 0.5450316071510315, "learning_rate": 3.9806790933279745e-05, "loss": 0.1198, "step": 58920 }, { "epoch": 2.141507376989607, "grad_norm": 0.6177099347114563, "learning_rate": 3.980260938134898e-05, "loss": 0.0926, "step": 58930 }, { "epoch": 2.141870775492405, "grad_norm": 1.365262746810913, "learning_rate": 3.9798427191626455e-05, "loss": 0.0998, "step": 58940 }, { "epoch": 2.1422341739952033, "grad_norm": 0.4065784513950348, "learning_rate": 3.979424436429234e-05, "loss": 0.0958, "step": 58950 }, { "epoch": 2.1425975724980013, "grad_norm": 0.7803066372871399, "learning_rate": 3.979006089952688e-05, "loss": 0.0997, "step": 58960 }, { "epoch": 2.1429609710007993, "grad_norm": 0.41044801473617554, "learning_rate": 3.978587679751032e-05, "loss": 0.1265, "step": 58970 }, { "epoch": 2.1433243695035977, "grad_norm": 1.1145354509353638, "learning_rate": 3.9781692058422936e-05, "loss": 0.0965, "step": 58980 }, { "epoch": 2.1436877680063957, "grad_norm": 0.6286850571632385, "learning_rate": 3.977750668244504e-05, "loss": 0.1237, "step": 58990 }, { "epoch": 2.144051166509194, "grad_norm": 0.7701926827430725, "learning_rate": 3.977332066975695e-05, "loss": 0.0984, "step": 59000 }, { "epoch": 2.144414565011992, "grad_norm": 2.205230236053467, "learning_rate": 3.976913402053904e-05, "loss": 0.1007, "step": 59010 }, { "epoch": 2.14477796351479, "grad_norm": 0.7837009429931641, "learning_rate": 3.97649467349717e-05, "loss": 0.1412, "step": 59020 }, { "epoch": 2.1451413620175885, "grad_norm": 1.4856473207473755, "learning_rate": 3.9760758813235336e-05, "loss": 0.1069, "step": 59030 }, { "epoch": 2.1455047605203865, "grad_norm": 0.7916889190673828, "learning_rate": 3.975657025551039e-05, "loss": 0.1216, "step": 59040 }, { "epoch": 2.145868159023185, "grad_norm": 2.3275558948516846, "learning_rate": 3.975238106197734e-05, "loss": 0.0862, "step": 59050 }, { "epoch": 2.146231557525983, "grad_norm": 1.2247077226638794, "learning_rate": 3.974819123281668e-05, "loss": 0.09, "step": 59060 }, { "epoch": 2.1465949560287814, "grad_norm": 3.578880548477173, "learning_rate": 3.9744000768208926e-05, "loss": 0.1694, "step": 59070 }, { "epoch": 2.1469583545315793, "grad_norm": 0.7688897848129272, "learning_rate": 3.973980966833465e-05, "loss": 0.1064, "step": 59080 }, { "epoch": 2.1473217530343773, "grad_norm": 0.7360697388648987, "learning_rate": 3.973561793337441e-05, "loss": 0.1038, "step": 59090 }, { "epoch": 2.1476851515371758, "grad_norm": 1.5406807661056519, "learning_rate": 3.9731425563508826e-05, "loss": 0.0949, "step": 59100 }, { "epoch": 2.1480485500399737, "grad_norm": 1.3897796869277954, "learning_rate": 3.972723255891853e-05, "loss": 0.1097, "step": 59110 }, { "epoch": 2.148411948542772, "grad_norm": 0.9940290451049805, "learning_rate": 3.9723038919784176e-05, "loss": 0.1342, "step": 59120 }, { "epoch": 2.14877534704557, "grad_norm": 1.5705652236938477, "learning_rate": 3.971884464628647e-05, "loss": 0.1225, "step": 59130 }, { "epoch": 2.149138745548368, "grad_norm": 0.8528106212615967, "learning_rate": 3.971464973860611e-05, "loss": 0.1127, "step": 59140 }, { "epoch": 2.1495021440511666, "grad_norm": 0.5715293884277344, "learning_rate": 3.971045419692385e-05, "loss": 0.1089, "step": 59150 }, { "epoch": 2.1498655425539646, "grad_norm": 1.5109196901321411, "learning_rate": 3.970625802142046e-05, "loss": 0.0809, "step": 59160 }, { "epoch": 2.150228941056763, "grad_norm": 0.4277292788028717, "learning_rate": 3.9702061212276744e-05, "loss": 0.1368, "step": 59170 }, { "epoch": 2.150592339559561, "grad_norm": 0.692513644695282, "learning_rate": 3.969786376967351e-05, "loss": 0.1399, "step": 59180 }, { "epoch": 2.1509557380623594, "grad_norm": 3.4921178817749023, "learning_rate": 3.969366569379162e-05, "loss": 0.1315, "step": 59190 }, { "epoch": 2.1513191365651574, "grad_norm": 1.5540839433670044, "learning_rate": 3.9689466984811964e-05, "loss": 0.1015, "step": 59200 }, { "epoch": 2.1516825350679554, "grad_norm": 0.6076385378837585, "learning_rate": 3.9685267642915436e-05, "loss": 0.111, "step": 59210 }, { "epoch": 2.152045933570754, "grad_norm": 0.5078336596488953, "learning_rate": 3.968106766828298e-05, "loss": 0.1122, "step": 59220 }, { "epoch": 2.152409332073552, "grad_norm": 1.294973373413086, "learning_rate": 3.967686706109554e-05, "loss": 0.1202, "step": 59230 }, { "epoch": 2.15277273057635, "grad_norm": 0.5963008999824524, "learning_rate": 3.967350612002765e-05, "loss": 6.9715, "step": 59240 }, { "epoch": 2.153136129079148, "grad_norm": 0.9680716395378113, "learning_rate": 3.966930437469738e-05, "loss": 0.761, "step": 59250 }, { "epoch": 2.153499527581946, "grad_norm": 0.5637746453285217, "learning_rate": 3.966510199731898e-05, "loss": 0.127, "step": 59260 }, { "epoch": 2.1538629260847446, "grad_norm": 0.5631716251373291, "learning_rate": 3.9660898988073514e-05, "loss": 0.1065, "step": 59270 }, { "epoch": 2.1542263245875426, "grad_norm": 2.773534059524536, "learning_rate": 3.965669534714208e-05, "loss": 0.1039, "step": 59280 }, { "epoch": 2.154589723090341, "grad_norm": 0.5603722333908081, "learning_rate": 3.965249107470579e-05, "loss": 0.1243, "step": 59290 }, { "epoch": 2.154953121593139, "grad_norm": 0.8897901177406311, "learning_rate": 3.964828617094579e-05, "loss": 0.0867, "step": 59300 }, { "epoch": 2.155316520095937, "grad_norm": 0.9018154144287109, "learning_rate": 3.9644080636043255e-05, "loss": 0.1066, "step": 59310 }, { "epoch": 2.1556799185987354, "grad_norm": 1.247503399848938, "learning_rate": 3.963987447017939e-05, "loss": 0.1193, "step": 59320 }, { "epoch": 2.1560433171015334, "grad_norm": 0.5965039730072021, "learning_rate": 3.963566767353544e-05, "loss": 0.1065, "step": 59330 }, { "epoch": 2.156406715604332, "grad_norm": 0.6746231913566589, "learning_rate": 3.9631460246292616e-05, "loss": 0.1096, "step": 59340 }, { "epoch": 2.15677011410713, "grad_norm": 0.8131401538848877, "learning_rate": 3.9627252188632246e-05, "loss": 0.0903, "step": 59350 }, { "epoch": 2.1571335126099282, "grad_norm": 0.8984467387199402, "learning_rate": 3.962304350073562e-05, "loss": 0.1095, "step": 59360 }, { "epoch": 2.1574969111127262, "grad_norm": 0.7640008926391602, "learning_rate": 3.961883418278408e-05, "loss": 0.1255, "step": 59370 }, { "epoch": 2.157860309615524, "grad_norm": 0.522688627243042, "learning_rate": 3.961462423495899e-05, "loss": 0.1144, "step": 59380 }, { "epoch": 2.1582237081183226, "grad_norm": 0.4221755266189575, "learning_rate": 3.961041365744174e-05, "loss": 0.1031, "step": 59390 }, { "epoch": 2.1585871066211206, "grad_norm": 1.1756844520568848, "learning_rate": 3.960620245041374e-05, "loss": 0.1034, "step": 59400 }, { "epoch": 2.1585871066211206, "eval_loss": 0.33832496404647827, "eval_runtime": 179.7531, "eval_samples_per_second": 41.245, "eval_steps_per_second": 5.157, "eval_wer": 0.16059142810463448, "step": 59400 }, { "epoch": 2.158950505123919, "grad_norm": 0.6600112915039062, "learning_rate": 3.960199061405646e-05, "loss": 0.1055, "step": 59410 }, { "epoch": 2.159313903626717, "grad_norm": 0.6152768135070801, "learning_rate": 3.959777814855135e-05, "loss": 0.1349, "step": 59420 }, { "epoch": 2.159677302129515, "grad_norm": 0.9786444306373596, "learning_rate": 3.959356505407992e-05, "loss": 0.1021, "step": 59430 }, { "epoch": 2.1600407006323135, "grad_norm": 1.3649888038635254, "learning_rate": 3.9589351330823697e-05, "loss": 0.1002, "step": 59440 }, { "epoch": 2.1604040991351114, "grad_norm": 0.8674107789993286, "learning_rate": 3.958513697896423e-05, "loss": 0.0963, "step": 59450 }, { "epoch": 2.16076749763791, "grad_norm": 0.7542990446090698, "learning_rate": 3.9580921998683114e-05, "loss": 0.0837, "step": 59460 }, { "epoch": 2.161130896140708, "grad_norm": 1.032072901725769, "learning_rate": 3.957670639016194e-05, "loss": 0.1991, "step": 59470 }, { "epoch": 2.1614942946435063, "grad_norm": 0.5288215279579163, "learning_rate": 3.9572490153582354e-05, "loss": 0.0821, "step": 59480 }, { "epoch": 2.1618576931463043, "grad_norm": 1.010878562927246, "learning_rate": 3.956827328912602e-05, "loss": 0.1697, "step": 59490 }, { "epoch": 2.1622210916491023, "grad_norm": 0.9703467488288879, "learning_rate": 3.956405579697462e-05, "loss": 0.135, "step": 59500 }, { "epoch": 2.1625844901519007, "grad_norm": 0.8474395275115967, "learning_rate": 3.9559837677309874e-05, "loss": 0.0969, "step": 59510 }, { "epoch": 2.1629478886546987, "grad_norm": 0.6262643933296204, "learning_rate": 3.955561893031353e-05, "loss": 0.6284, "step": 59520 }, { "epoch": 2.163311287157497, "grad_norm": 1.7965657711029053, "learning_rate": 3.955139955616735e-05, "loss": 0.103, "step": 59530 }, { "epoch": 2.163674685660295, "grad_norm": 1.317929744720459, "learning_rate": 3.954717955505314e-05, "loss": 0.1266, "step": 59540 }, { "epoch": 2.164038084163093, "grad_norm": 2.5945920944213867, "learning_rate": 3.954295892715272e-05, "loss": 0.2541, "step": 59550 }, { "epoch": 2.1644014826658915, "grad_norm": 0.8854953050613403, "learning_rate": 3.9538737672647955e-05, "loss": 0.0872, "step": 59560 }, { "epoch": 2.1647648811686895, "grad_norm": 1.2449252605438232, "learning_rate": 3.953451579172069e-05, "loss": 0.1297, "step": 59570 }, { "epoch": 2.165128279671488, "grad_norm": 0.9489690661430359, "learning_rate": 3.9530293284552876e-05, "loss": 0.1213, "step": 59580 }, { "epoch": 2.165491678174286, "grad_norm": 1.2009365558624268, "learning_rate": 3.952607015132642e-05, "loss": 0.1116, "step": 59590 }, { "epoch": 2.165855076677084, "grad_norm": 2.0308213233947754, "learning_rate": 3.952184639222327e-05, "loss": 0.104, "step": 59600 }, { "epoch": 2.1662184751798823, "grad_norm": 0.9132998585700989, "learning_rate": 3.951762200742544e-05, "loss": 0.0821, "step": 59610 }, { "epoch": 2.1665818736826803, "grad_norm": 0.3481888473033905, "learning_rate": 3.951339699711493e-05, "loss": 0.1061, "step": 59620 }, { "epoch": 2.1669452721854787, "grad_norm": 1.2526309490203857, "learning_rate": 3.950917136147378e-05, "loss": 0.134, "step": 59630 }, { "epoch": 2.1673086706882767, "grad_norm": 1.3150311708450317, "learning_rate": 3.950494510068407e-05, "loss": 0.1387, "step": 59640 }, { "epoch": 2.167672069191075, "grad_norm": 0.6540773510932922, "learning_rate": 3.950071821492787e-05, "loss": 0.1038, "step": 59650 }, { "epoch": 2.168035467693873, "grad_norm": 0.7014539837837219, "learning_rate": 3.949649070438732e-05, "loss": 0.1047, "step": 59660 }, { "epoch": 2.168398866196671, "grad_norm": 1.7086548805236816, "learning_rate": 3.9492262569244566e-05, "loss": 0.1298, "step": 59670 }, { "epoch": 2.1687622646994695, "grad_norm": 0.5339615941047668, "learning_rate": 3.9488033809681785e-05, "loss": 0.0818, "step": 59680 }, { "epoch": 2.1691256632022675, "grad_norm": 1.4150161743164062, "learning_rate": 3.9483804425881167e-05, "loss": 0.0952, "step": 59690 }, { "epoch": 2.169489061705066, "grad_norm": 1.182112216949463, "learning_rate": 3.947957441802496e-05, "loss": 0.0855, "step": 59700 }, { "epoch": 2.169852460207864, "grad_norm": 22.265352249145508, "learning_rate": 3.94753437862954e-05, "loss": 0.2064, "step": 59710 }, { "epoch": 2.170215858710662, "grad_norm": 1.3365362882614136, "learning_rate": 3.9471112530874784e-05, "loss": 0.1314, "step": 59720 }, { "epoch": 2.1705792572134603, "grad_norm": 0.5914321541786194, "learning_rate": 3.946688065194543e-05, "loss": 0.1072, "step": 59730 }, { "epoch": 2.1709426557162583, "grad_norm": 1.0717413425445557, "learning_rate": 3.946264814968964e-05, "loss": 0.1144, "step": 59740 }, { "epoch": 2.1713060542190568, "grad_norm": 0.7842442393302917, "learning_rate": 3.945841502428981e-05, "loss": 0.0989, "step": 59750 }, { "epoch": 2.1716694527218547, "grad_norm": 0.4757680594921112, "learning_rate": 3.9454181275928315e-05, "loss": 0.0909, "step": 59760 }, { "epoch": 2.172032851224653, "grad_norm": 0.9192887544631958, "learning_rate": 3.944994690478758e-05, "loss": 2.1207, "step": 59770 }, { "epoch": 2.172396249727451, "grad_norm": 1.9832956790924072, "learning_rate": 3.9445711911050055e-05, "loss": 0.1235, "step": 59780 }, { "epoch": 2.172759648230249, "grad_norm": 12.941081047058105, "learning_rate": 3.944147629489819e-05, "loss": 0.3816, "step": 59790 }, { "epoch": 2.1731230467330476, "grad_norm": 1.5549241304397583, "learning_rate": 3.9437240056514504e-05, "loss": 0.109, "step": 59800 }, { "epoch": 2.1734864452358456, "grad_norm": 3.1633951663970947, "learning_rate": 3.9433003196081495e-05, "loss": 0.1156, "step": 59810 }, { "epoch": 2.173849843738644, "grad_norm": 1.274003505706787, "learning_rate": 3.9428765713781744e-05, "loss": 0.0984, "step": 59820 }, { "epoch": 2.174213242241442, "grad_norm": 0.5220558047294617, "learning_rate": 3.9424527609797825e-05, "loss": 0.1151, "step": 59830 }, { "epoch": 2.17457664074424, "grad_norm": 1.241507887840271, "learning_rate": 3.942028888431232e-05, "loss": 0.1219, "step": 59840 }, { "epoch": 2.1749400392470384, "grad_norm": 0.5816989541053772, "learning_rate": 3.9416049537507875e-05, "loss": 0.0976, "step": 59850 }, { "epoch": 2.1753034377498364, "grad_norm": 0.6653616428375244, "learning_rate": 3.941180956956715e-05, "loss": 0.1196, "step": 59860 }, { "epoch": 2.175666836252635, "grad_norm": 0.6018986105918884, "learning_rate": 3.940756898067283e-05, "loss": 0.1151, "step": 59870 }, { "epoch": 2.176030234755433, "grad_norm": 0.5224238038063049, "learning_rate": 3.940332777100762e-05, "loss": 0.0892, "step": 59880 }, { "epoch": 2.1763936332582308, "grad_norm": 0.7985048294067383, "learning_rate": 3.939908594075427e-05, "loss": 0.1244, "step": 59890 }, { "epoch": 2.176757031761029, "grad_norm": 1.0602693557739258, "learning_rate": 3.9394843490095535e-05, "loss": 0.107, "step": 59900 }, { "epoch": 2.177120430263827, "grad_norm": 0.789055347442627, "learning_rate": 3.939060041921421e-05, "loss": 0.1354, "step": 59910 }, { "epoch": 2.1774838287666256, "grad_norm": 0.27713751792907715, "learning_rate": 3.9386356728293123e-05, "loss": 0.1047, "step": 59920 }, { "epoch": 2.1778472272694236, "grad_norm": 1.9695335626602173, "learning_rate": 3.9382112417515106e-05, "loss": 0.0788, "step": 59930 }, { "epoch": 2.178210625772222, "grad_norm": 1.5898009538650513, "learning_rate": 3.937786748706304e-05, "loss": 0.1194, "step": 59940 }, { "epoch": 2.17857402427502, "grad_norm": 1.2933491468429565, "learning_rate": 3.937362193711981e-05, "loss": 0.0878, "step": 59950 }, { "epoch": 2.178937422777818, "grad_norm": 0.5345110297203064, "learning_rate": 3.9369375767868355e-05, "loss": 0.1, "step": 59960 }, { "epoch": 2.1793008212806164, "grad_norm": 0.5044030547142029, "learning_rate": 3.936512897949163e-05, "loss": 0.1144, "step": 59970 }, { "epoch": 2.1796642197834144, "grad_norm": 0.5815631151199341, "learning_rate": 3.9360881572172605e-05, "loss": 0.0789, "step": 59980 }, { "epoch": 2.180027618286213, "grad_norm": 0.8639971613883972, "learning_rate": 3.9356633546094297e-05, "loss": 0.0971, "step": 59990 }, { "epoch": 2.180391016789011, "grad_norm": 1.318261981010437, "learning_rate": 3.935238490143972e-05, "loss": 0.0979, "step": 60000 }, { "epoch": 2.180391016789011, "eval_loss": 0.339672327041626, "eval_runtime": 179.2051, "eval_samples_per_second": 41.372, "eval_steps_per_second": 5.173, "eval_wer": 0.1550910378129141, "step": 60000 }, { "epoch": 2.180754415291809, "grad_norm": 1.4749493598937988, "learning_rate": 3.934813563839195e-05, "loss": 0.0857, "step": 60010 }, { "epoch": 2.1811178137946072, "grad_norm": 0.6420970559120178, "learning_rate": 3.934388575713407e-05, "loss": 0.1378, "step": 60020 }, { "epoch": 2.181481212297405, "grad_norm": 2.692276954650879, "learning_rate": 3.9339635257849176e-05, "loss": 0.1229, "step": 60030 }, { "epoch": 2.1818446108002036, "grad_norm": 0.6107433438301086, "learning_rate": 3.9335384140720435e-05, "loss": 0.1196, "step": 60040 }, { "epoch": 2.1822080093030016, "grad_norm": 8.781155586242676, "learning_rate": 3.933113240593098e-05, "loss": 0.1229, "step": 60050 }, { "epoch": 2.1825714078058, "grad_norm": 2.4440197944641113, "learning_rate": 3.9326880053664026e-05, "loss": 0.1012, "step": 60060 }, { "epoch": 2.182934806308598, "grad_norm": 0.6593974828720093, "learning_rate": 3.932262708410279e-05, "loss": 0.0975, "step": 60070 }, { "epoch": 2.183298204811396, "grad_norm": 1.740123987197876, "learning_rate": 3.931837349743051e-05, "loss": 0.1086, "step": 60080 }, { "epoch": 2.1836616033141945, "grad_norm": 0.8486297130584717, "learning_rate": 3.9314119293830466e-05, "loss": 0.1325, "step": 60090 }, { "epoch": 2.1840250018169924, "grad_norm": 1.1630836725234985, "learning_rate": 3.9309864473485945e-05, "loss": 0.0936, "step": 60100 }, { "epoch": 2.184388400319791, "grad_norm": 1.5026519298553467, "learning_rate": 3.930560903658028e-05, "loss": 0.1088, "step": 60110 }, { "epoch": 2.184751798822589, "grad_norm": 0.8840125799179077, "learning_rate": 3.9301352983296816e-05, "loss": 0.7203, "step": 60120 }, { "epoch": 2.185115197325387, "grad_norm": 1.5866588354110718, "learning_rate": 3.929709631381895e-05, "loss": 0.1023, "step": 60130 }, { "epoch": 2.1854785958281853, "grad_norm": 1.0091042518615723, "learning_rate": 3.9292839028330065e-05, "loss": 0.1165, "step": 60140 }, { "epoch": 2.1858419943309833, "grad_norm": 0.8317708969116211, "learning_rate": 3.9288581127013603e-05, "loss": 0.084, "step": 60150 }, { "epoch": 2.1862053928337817, "grad_norm": 0.5231217741966248, "learning_rate": 3.9284322610053016e-05, "loss": 0.0832, "step": 60160 }, { "epoch": 2.1865687913365797, "grad_norm": 1.9025609493255615, "learning_rate": 3.928006347763179e-05, "loss": 0.1349, "step": 60170 }, { "epoch": 2.1869321898393776, "grad_norm": 1.5179822444915771, "learning_rate": 3.927580372993344e-05, "loss": 0.1029, "step": 60180 }, { "epoch": 2.187295588342176, "grad_norm": 1.7581968307495117, "learning_rate": 3.9271543367141494e-05, "loss": 0.1232, "step": 60190 }, { "epoch": 2.187658986844974, "grad_norm": 1.4503281116485596, "learning_rate": 3.926728238943953e-05, "loss": 0.0832, "step": 60200 }, { "epoch": 2.1880223853477725, "grad_norm": 1.222233533859253, "learning_rate": 3.926302079701113e-05, "loss": 0.0918, "step": 60210 }, { "epoch": 2.1883857838505705, "grad_norm": 2.6328423023223877, "learning_rate": 3.9258758590039915e-05, "loss": 0.1229, "step": 60220 }, { "epoch": 2.188749182353369, "grad_norm": 1.2800387144088745, "learning_rate": 3.925449576870952e-05, "loss": 0.1132, "step": 60230 }, { "epoch": 2.189112580856167, "grad_norm": 1.5218274593353271, "learning_rate": 3.925023233320362e-05, "loss": 0.1508, "step": 60240 }, { "epoch": 2.189475979358965, "grad_norm": 0.6339848041534424, "learning_rate": 3.9245968283705916e-05, "loss": 0.0934, "step": 60250 }, { "epoch": 2.1898393778617633, "grad_norm": 0.6518699526786804, "learning_rate": 3.924170362040012e-05, "loss": 0.0979, "step": 60260 }, { "epoch": 2.1902027763645613, "grad_norm": 0.6267105340957642, "learning_rate": 3.923743834346999e-05, "loss": 0.3877, "step": 60270 }, { "epoch": 2.1905661748673597, "grad_norm": 0.5715605616569519, "learning_rate": 3.92331724530993e-05, "loss": 0.1171, "step": 60280 }, { "epoch": 2.1909295733701577, "grad_norm": 1.068161129951477, "learning_rate": 3.922890594947185e-05, "loss": 0.1452, "step": 60290 }, { "epoch": 2.1912929718729557, "grad_norm": 0.9280456304550171, "learning_rate": 3.9224638832771475e-05, "loss": 0.0951, "step": 60300 }, { "epoch": 2.191656370375754, "grad_norm": 1.1696865558624268, "learning_rate": 3.922037110318201e-05, "loss": 0.1019, "step": 60310 }, { "epoch": 2.192019768878552, "grad_norm": 0.8494959473609924, "learning_rate": 3.921610276088736e-05, "loss": 0.1189, "step": 60320 }, { "epoch": 2.1923831673813505, "grad_norm": 3.686048746109009, "learning_rate": 3.921183380607142e-05, "loss": 0.1161, "step": 60330 }, { "epoch": 2.1927465658841485, "grad_norm": 1.5831258296966553, "learning_rate": 3.920756423891814e-05, "loss": 0.1309, "step": 60340 }, { "epoch": 2.193109964386947, "grad_norm": 1.9985876083374023, "learning_rate": 3.920329405961145e-05, "loss": 0.1393, "step": 60350 }, { "epoch": 2.193473362889745, "grad_norm": 4.160605430603027, "learning_rate": 3.919902326833536e-05, "loss": 0.1535, "step": 60360 }, { "epoch": 2.193836761392543, "grad_norm": 0.43690192699432373, "learning_rate": 3.919475186527388e-05, "loss": 0.1186, "step": 60370 }, { "epoch": 2.1942001598953413, "grad_norm": 0.8073493242263794, "learning_rate": 3.9190479850611044e-05, "loss": 0.1047, "step": 60380 }, { "epoch": 2.1945635583981393, "grad_norm": 9.085131645202637, "learning_rate": 3.9186207224530925e-05, "loss": 0.1332, "step": 60390 }, { "epoch": 2.1949269569009378, "grad_norm": 1.6787877082824707, "learning_rate": 3.9181933987217614e-05, "loss": 0.0998, "step": 60400 }, { "epoch": 2.1952903554037357, "grad_norm": 0.6496911644935608, "learning_rate": 3.917766013885522e-05, "loss": 0.103, "step": 60410 }, { "epoch": 2.1956537539065337, "grad_norm": 0.9650323987007141, "learning_rate": 3.9173385679627896e-05, "loss": 0.1055, "step": 60420 }, { "epoch": 2.196017152409332, "grad_norm": 2.345998525619507, "learning_rate": 3.916911060971981e-05, "loss": 0.1, "step": 60430 }, { "epoch": 2.19638055091213, "grad_norm": 0.6440123915672302, "learning_rate": 3.9164834929315165e-05, "loss": 0.1562, "step": 60440 }, { "epoch": 2.1967439494149286, "grad_norm": 6.226611614227295, "learning_rate": 3.916055863859818e-05, "loss": 0.0971, "step": 60450 }, { "epoch": 2.1971073479177265, "grad_norm": 6.518206596374512, "learning_rate": 3.915628173775311e-05, "loss": 0.0858, "step": 60460 }, { "epoch": 2.1974707464205245, "grad_norm": 0.48097607493400574, "learning_rate": 3.915200422696423e-05, "loss": 0.1248, "step": 60470 }, { "epoch": 2.197834144923323, "grad_norm": 1.0158125162124634, "learning_rate": 3.914772610641584e-05, "loss": 0.0952, "step": 60480 }, { "epoch": 2.198197543426121, "grad_norm": 0.9592711925506592, "learning_rate": 3.914344737629226e-05, "loss": 0.1202, "step": 60490 }, { "epoch": 2.1985609419289194, "grad_norm": 0.8496592044830322, "learning_rate": 3.9139168036777864e-05, "loss": 0.1377, "step": 60500 }, { "epoch": 2.1989243404317174, "grad_norm": 0.9268959760665894, "learning_rate": 3.913488808805702e-05, "loss": 0.1058, "step": 60510 }, { "epoch": 2.199287738934516, "grad_norm": 1.091874122619629, "learning_rate": 3.913060753031414e-05, "loss": 0.1232, "step": 60520 }, { "epoch": 2.1996511374373138, "grad_norm": 45.63993835449219, "learning_rate": 3.912632636373367e-05, "loss": 0.4101, "step": 60530 }, { "epoch": 2.2000145359401118, "grad_norm": 1.5052204132080078, "learning_rate": 3.912204458850005e-05, "loss": 0.1542, "step": 60540 }, { "epoch": 2.20037793444291, "grad_norm": 0.9882798790931702, "learning_rate": 3.911776220479777e-05, "loss": 0.1096, "step": 60550 }, { "epoch": 2.200741332945708, "grad_norm": 2.0385029315948486, "learning_rate": 3.9113479212811356e-05, "loss": 0.0945, "step": 60560 }, { "epoch": 2.2011047314485066, "grad_norm": 0.5360209345817566, "learning_rate": 3.910919561272533e-05, "loss": 0.1064, "step": 60570 }, { "epoch": 2.2014681299513046, "grad_norm": 2.028599739074707, "learning_rate": 3.910491140472428e-05, "loss": 0.1076, "step": 60580 }, { "epoch": 2.2018315284541026, "grad_norm": 2.3928070068359375, "learning_rate": 3.910062658899277e-05, "loss": 0.1278, "step": 60590 }, { "epoch": 2.202194926956901, "grad_norm": 0.851287305355072, "learning_rate": 3.9096341165715436e-05, "loss": 0.0905, "step": 60600 }, { "epoch": 2.202194926956901, "eval_loss": 0.3481411039829254, "eval_runtime": 180.2524, "eval_samples_per_second": 41.131, "eval_steps_per_second": 5.143, "eval_wer": 0.16283333635885056, "step": 60600 }, { "epoch": 2.202558325459699, "grad_norm": 2.9646081924438477, "learning_rate": 3.9092055135076915e-05, "loss": 0.1062, "step": 60610 }, { "epoch": 2.2029217239624974, "grad_norm": 0.6181505918502808, "learning_rate": 3.908776849726188e-05, "loss": 0.102, "step": 60620 }, { "epoch": 2.2032851224652954, "grad_norm": 0.49643078446388245, "learning_rate": 3.908348125245502e-05, "loss": 0.1266, "step": 60630 }, { "epoch": 2.203648520968094, "grad_norm": 4.227423667907715, "learning_rate": 3.907919340084106e-05, "loss": 0.1613, "step": 60640 }, { "epoch": 2.204011919470892, "grad_norm": 0.5859548449516296, "learning_rate": 3.9074904942604764e-05, "loss": 0.0863, "step": 60650 }, { "epoch": 2.20437531797369, "grad_norm": 0.9373226761817932, "learning_rate": 3.9070615877930886e-05, "loss": 0.1071, "step": 60660 }, { "epoch": 2.2047387164764882, "grad_norm": 0.8272415399551392, "learning_rate": 3.906632620700422e-05, "loss": 0.1139, "step": 60670 }, { "epoch": 2.205102114979286, "grad_norm": 1.1634105443954468, "learning_rate": 3.9062035930009625e-05, "loss": 0.0981, "step": 60680 }, { "epoch": 2.2054655134820846, "grad_norm": 1.0491262674331665, "learning_rate": 3.905774504713192e-05, "loss": 0.1312, "step": 60690 }, { "epoch": 2.2058289119848826, "grad_norm": 0.6341159343719482, "learning_rate": 3.905345355855601e-05, "loss": 0.0847, "step": 60700 }, { "epoch": 2.2061923104876806, "grad_norm": 0.8382464647293091, "learning_rate": 3.904916146446678e-05, "loss": 0.0945, "step": 60710 }, { "epoch": 2.206555708990479, "grad_norm": 0.5253706574440002, "learning_rate": 3.904486876504917e-05, "loss": 0.1328, "step": 60720 }, { "epoch": 2.206919107493277, "grad_norm": 3.4987101554870605, "learning_rate": 3.904057546048815e-05, "loss": 0.0862, "step": 60730 }, { "epoch": 2.2072825059960755, "grad_norm": 1.807373285293579, "learning_rate": 3.903628155096867e-05, "loss": 0.1005, "step": 60740 }, { "epoch": 2.2076459044988734, "grad_norm": 1.1272157430648804, "learning_rate": 3.9031987036675774e-05, "loss": 0.1044, "step": 60750 }, { "epoch": 2.2080093030016714, "grad_norm": 0.47526538372039795, "learning_rate": 3.902769191779448e-05, "loss": 0.0947, "step": 60760 }, { "epoch": 2.20837270150447, "grad_norm": 0.8546761274337769, "learning_rate": 3.9023396194509846e-05, "loss": 0.1129, "step": 60770 }, { "epoch": 2.208736100007268, "grad_norm": 0.557783305644989, "learning_rate": 3.901909986700697e-05, "loss": 0.1198, "step": 60780 }, { "epoch": 2.2090994985100663, "grad_norm": 0.5007415413856506, "learning_rate": 3.901480293547096e-05, "loss": 0.1154, "step": 60790 }, { "epoch": 2.2094628970128642, "grad_norm": 1.8647228479385376, "learning_rate": 3.901050540008696e-05, "loss": 0.1997, "step": 60800 }, { "epoch": 2.2098262955156627, "grad_norm": 0.7277741432189941, "learning_rate": 3.900620726104012e-05, "loss": 0.1032, "step": 60810 }, { "epoch": 2.2101896940184607, "grad_norm": 0.4809872806072235, "learning_rate": 3.9001908518515656e-05, "loss": 0.1162, "step": 60820 }, { "epoch": 2.2105530925212586, "grad_norm": 0.7930201888084412, "learning_rate": 3.899760917269877e-05, "loss": 0.1207, "step": 60830 }, { "epoch": 2.210916491024057, "grad_norm": 1.0866421461105347, "learning_rate": 3.89933092237747e-05, "loss": 0.1334, "step": 60840 }, { "epoch": 2.211279889526855, "grad_norm": 2.5568645000457764, "learning_rate": 3.898900867192874e-05, "loss": 0.0934, "step": 60850 }, { "epoch": 2.2116432880296535, "grad_norm": 1.1865488290786743, "learning_rate": 3.8984707517346154e-05, "loss": 0.1072, "step": 60860 }, { "epoch": 2.2120066865324515, "grad_norm": 1.0457924604415894, "learning_rate": 3.8980405760212284e-05, "loss": 0.1108, "step": 60870 }, { "epoch": 2.2123700850352495, "grad_norm": 1.0669806003570557, "learning_rate": 3.897610340071247e-05, "loss": 0.1128, "step": 60880 }, { "epoch": 2.212733483538048, "grad_norm": 4.467153072357178, "learning_rate": 3.897180043903209e-05, "loss": 0.1347, "step": 60890 }, { "epoch": 2.213096882040846, "grad_norm": 0.48086392879486084, "learning_rate": 3.896749687535655e-05, "loss": 0.1018, "step": 60900 }, { "epoch": 2.2134602805436443, "grad_norm": 0.6917502284049988, "learning_rate": 3.8963192709871253e-05, "loss": 0.0779, "step": 60910 }, { "epoch": 2.2138236790464423, "grad_norm": 0.9939578771591187, "learning_rate": 3.8958887942761665e-05, "loss": 0.1278, "step": 60920 }, { "epoch": 2.2141870775492407, "grad_norm": 0.8723199963569641, "learning_rate": 3.895458257421327e-05, "loss": 0.1034, "step": 60930 }, { "epoch": 2.2145504760520387, "grad_norm": 2.1347460746765137, "learning_rate": 3.8950276604411554e-05, "loss": 0.1086, "step": 60940 }, { "epoch": 2.2149138745548367, "grad_norm": 0.6032381653785706, "learning_rate": 3.894597003354206e-05, "loss": 0.1141, "step": 60950 }, { "epoch": 2.215277273057635, "grad_norm": 0.540093719959259, "learning_rate": 3.894166286179033e-05, "loss": 0.0892, "step": 60960 }, { "epoch": 2.215640671560433, "grad_norm": 0.6019798517227173, "learning_rate": 3.893735508934197e-05, "loss": 0.0911, "step": 60970 }, { "epoch": 2.2160040700632315, "grad_norm": 1.290984869003296, "learning_rate": 3.893304671638254e-05, "loss": 0.1283, "step": 60980 }, { "epoch": 2.2163674685660295, "grad_norm": 0.5830800533294678, "learning_rate": 3.892873774309772e-05, "loss": 0.1094, "step": 60990 }, { "epoch": 2.2167308670688275, "grad_norm": 1.1006908416748047, "learning_rate": 3.892442816967315e-05, "loss": 0.1157, "step": 61000 }, { "epoch": 2.217094265571626, "grad_norm": 0.42782625555992126, "learning_rate": 3.8920117996294505e-05, "loss": 0.0852, "step": 61010 }, { "epoch": 2.217457664074424, "grad_norm": 1.036010503768921, "learning_rate": 3.8915807223147506e-05, "loss": 0.1175, "step": 61020 }, { "epoch": 2.2178210625772223, "grad_norm": 1.0316133499145508, "learning_rate": 3.891149585041789e-05, "loss": 0.1007, "step": 61030 }, { "epoch": 2.2181844610800203, "grad_norm": 1.3433195352554321, "learning_rate": 3.890718387829141e-05, "loss": 2.4829, "step": 61040 }, { "epoch": 2.2185478595828183, "grad_norm": 1.0637513399124146, "learning_rate": 3.890287130695386e-05, "loss": 0.1012, "step": 61050 }, { "epoch": 2.2189112580856167, "grad_norm": 0.9853934645652771, "learning_rate": 3.8898558136591055e-05, "loss": 0.0983, "step": 61060 }, { "epoch": 2.2192746565884147, "grad_norm": 0.6070169806480408, "learning_rate": 3.889424436738882e-05, "loss": 0.0933, "step": 61070 }, { "epoch": 2.219638055091213, "grad_norm": 0.9032323360443115, "learning_rate": 3.8889929999533045e-05, "loss": 0.1039, "step": 61080 }, { "epoch": 2.220001453594011, "grad_norm": 1.1702359914779663, "learning_rate": 3.888561503320961e-05, "loss": 0.1674, "step": 61090 }, { "epoch": 2.2203648520968096, "grad_norm": 1.5377318859100342, "learning_rate": 3.888129946860442e-05, "loss": 0.0977, "step": 61100 }, { "epoch": 2.2207282505996075, "grad_norm": 0.8765788078308105, "learning_rate": 3.887698330590342e-05, "loss": 0.1133, "step": 61110 }, { "epoch": 2.2210916491024055, "grad_norm": 1.543609857559204, "learning_rate": 3.887266654529259e-05, "loss": 0.1335, "step": 61120 }, { "epoch": 2.221455047605204, "grad_norm": 2.144033908843994, "learning_rate": 3.886834918695792e-05, "loss": 0.1097, "step": 61130 }, { "epoch": 2.221818446108002, "grad_norm": 0.9922833442687988, "learning_rate": 3.886403123108542e-05, "loss": 0.1245, "step": 61140 }, { "epoch": 2.2221818446108004, "grad_norm": 0.7214832305908203, "learning_rate": 3.885971267786115e-05, "loss": 0.3578, "step": 61150 }, { "epoch": 2.2225452431135984, "grad_norm": 0.3823475241661072, "learning_rate": 3.8855393527471175e-05, "loss": 0.1396, "step": 61160 }, { "epoch": 2.2229086416163963, "grad_norm": 0.4039243459701538, "learning_rate": 3.885107378010158e-05, "loss": 0.0998, "step": 61170 }, { "epoch": 2.2232720401191948, "grad_norm": 0.6202207207679749, "learning_rate": 3.884675343593851e-05, "loss": 0.1278, "step": 61180 }, { "epoch": 2.2236354386219928, "grad_norm": 1.5638877153396606, "learning_rate": 3.884243249516809e-05, "loss": 0.1162, "step": 61190 }, { "epoch": 2.223998837124791, "grad_norm": 2.9136829376220703, "learning_rate": 3.8838110957976514e-05, "loss": 0.1007, "step": 61200 }, { "epoch": 2.223998837124791, "eval_loss": 0.3254208564758301, "eval_runtime": 180.3346, "eval_samples_per_second": 41.112, "eval_steps_per_second": 5.14, "eval_wer": 0.15656143917802748, "step": 61200 }, { "epoch": 2.224362235627589, "grad_norm": 0.40365439653396606, "learning_rate": 3.883378882454998e-05, "loss": 0.1016, "step": 61210 }, { "epoch": 2.2247256341303876, "grad_norm": 0.48598694801330566, "learning_rate": 3.882946609507468e-05, "loss": 0.1089, "step": 61220 }, { "epoch": 2.2250890326331856, "grad_norm": 1.7332137823104858, "learning_rate": 3.882514276973692e-05, "loss": 0.1101, "step": 61230 }, { "epoch": 2.2254524311359836, "grad_norm": 2.3783786296844482, "learning_rate": 3.882081884872293e-05, "loss": 0.0936, "step": 61240 }, { "epoch": 2.225815829638782, "grad_norm": 0.684394896030426, "learning_rate": 3.881649433221904e-05, "loss": 0.0868, "step": 61250 }, { "epoch": 2.22617922814158, "grad_norm": 0.43269750475883484, "learning_rate": 3.881216922041156e-05, "loss": 0.1026, "step": 61260 }, { "epoch": 2.2265426266443784, "grad_norm": 0.9126709699630737, "learning_rate": 3.8807843513486866e-05, "loss": 0.1436, "step": 61270 }, { "epoch": 2.2269060251471764, "grad_norm": 1.7345128059387207, "learning_rate": 3.880351721163131e-05, "loss": 0.0992, "step": 61280 }, { "epoch": 2.2272694236499744, "grad_norm": 1.4722065925598145, "learning_rate": 3.879919031503131e-05, "loss": 0.1637, "step": 61290 }, { "epoch": 2.227632822152773, "grad_norm": 0.6145905256271362, "learning_rate": 3.879486282387331e-05, "loss": 0.0881, "step": 61300 }, { "epoch": 2.227996220655571, "grad_norm": 0.5936566591262817, "learning_rate": 3.879053473834374e-05, "loss": 0.0947, "step": 61310 }, { "epoch": 2.228359619158369, "grad_norm": 2.5217325687408447, "learning_rate": 3.87862060586291e-05, "loss": 0.1251, "step": 61320 }, { "epoch": 2.228723017661167, "grad_norm": 2.556070327758789, "learning_rate": 3.878187678491589e-05, "loss": 0.129, "step": 61330 }, { "epoch": 2.229086416163965, "grad_norm": 1.7533297538757324, "learning_rate": 3.877754691739065e-05, "loss": 0.1331, "step": 61340 }, { "epoch": 2.2294498146667636, "grad_norm": 0.6436717510223389, "learning_rate": 3.877321645623994e-05, "loss": 0.0836, "step": 61350 }, { "epoch": 2.2298132131695616, "grad_norm": 0.5834245085716248, "learning_rate": 3.8768885401650325e-05, "loss": 0.0953, "step": 61360 }, { "epoch": 2.23017661167236, "grad_norm": 2.3103013038635254, "learning_rate": 3.8764553753808436e-05, "loss": 0.1138, "step": 61370 }, { "epoch": 2.230540010175158, "grad_norm": 1.5668505430221558, "learning_rate": 3.87602215129009e-05, "loss": 0.1569, "step": 61380 }, { "epoch": 2.2309034086779564, "grad_norm": 0.719791054725647, "learning_rate": 3.875588867911437e-05, "loss": 0.108, "step": 61390 }, { "epoch": 2.2312668071807544, "grad_norm": 0.729350745677948, "learning_rate": 3.875155525263555e-05, "loss": 0.0832, "step": 61400 }, { "epoch": 2.2316302056835524, "grad_norm": 1.3647226095199585, "learning_rate": 3.874722123365113e-05, "loss": 0.0913, "step": 61410 }, { "epoch": 2.231993604186351, "grad_norm": 0.6896275877952576, "learning_rate": 3.8742886622347876e-05, "loss": 0.1133, "step": 61420 }, { "epoch": 2.232357002689149, "grad_norm": 0.8130580186843872, "learning_rate": 3.8738551418912526e-05, "loss": 0.0909, "step": 61430 }, { "epoch": 2.2327204011919473, "grad_norm": 1.155916690826416, "learning_rate": 3.873421562353188e-05, "loss": 0.114, "step": 61440 }, { "epoch": 2.2330837996947452, "grad_norm": 1.4737950563430786, "learning_rate": 3.872987923639274e-05, "loss": 0.4289, "step": 61450 }, { "epoch": 2.2334471981975432, "grad_norm": 0.41144660115242004, "learning_rate": 3.8725542257681966e-05, "loss": 0.0862, "step": 61460 }, { "epoch": 2.2338105967003417, "grad_norm": 0.5804570913314819, "learning_rate": 3.872120468758641e-05, "loss": 0.1067, "step": 61470 }, { "epoch": 2.2341739952031396, "grad_norm": 0.8408393263816833, "learning_rate": 3.871686652629296e-05, "loss": 0.1097, "step": 61480 }, { "epoch": 2.234537393705938, "grad_norm": 1.0146747827529907, "learning_rate": 3.871252777398854e-05, "loss": 0.1391, "step": 61490 }, { "epoch": 2.234900792208736, "grad_norm": 0.8638483881950378, "learning_rate": 3.8708188430860084e-05, "loss": 0.5518, "step": 61500 }, { "epoch": 2.2352641907115345, "grad_norm": 0.9493032693862915, "learning_rate": 3.8703848497094565e-05, "loss": 0.3308, "step": 61510 }, { "epoch": 2.2356275892143325, "grad_norm": 2.7466158866882324, "learning_rate": 3.8699507972878974e-05, "loss": 0.1196, "step": 61520 }, { "epoch": 2.2359909877171305, "grad_norm": 0.737774133682251, "learning_rate": 3.869516685840032e-05, "loss": 0.0988, "step": 61530 }, { "epoch": 2.236354386219929, "grad_norm": 0.4675132632255554, "learning_rate": 3.8690825153845667e-05, "loss": 0.1314, "step": 61540 }, { "epoch": 2.236717784722727, "grad_norm": 0.9533403515815735, "learning_rate": 3.8686482859402055e-05, "loss": 0.1024, "step": 61550 }, { "epoch": 2.2370811832255253, "grad_norm": 0.7988652586936951, "learning_rate": 3.8682139975256605e-05, "loss": 0.1002, "step": 61560 }, { "epoch": 2.2374445817283233, "grad_norm": 0.45931610465049744, "learning_rate": 3.867779650159642e-05, "loss": 0.1012, "step": 61570 }, { "epoch": 2.2378079802311213, "grad_norm": 2.8576176166534424, "learning_rate": 3.8673452438608646e-05, "loss": 0.2177, "step": 61580 }, { "epoch": 2.2381713787339197, "grad_norm": 1.2942947149276733, "learning_rate": 3.8669107786480464e-05, "loss": 0.1286, "step": 61590 }, { "epoch": 2.2385347772367177, "grad_norm": 0.4589090049266815, "learning_rate": 3.866476254539906e-05, "loss": 0.0818, "step": 61600 }, { "epoch": 2.238898175739516, "grad_norm": 0.5710172057151794, "learning_rate": 3.866041671555166e-05, "loss": 0.2093, "step": 61610 }, { "epoch": 2.239261574242314, "grad_norm": 0.6458502411842346, "learning_rate": 3.86560702971255e-05, "loss": 0.1259, "step": 61620 }, { "epoch": 2.239624972745112, "grad_norm": 1.265261173248291, "learning_rate": 3.865172329030786e-05, "loss": 0.1009, "step": 61630 }, { "epoch": 2.2399883712479105, "grad_norm": 0.49177274107933044, "learning_rate": 3.8647375695286036e-05, "loss": 0.1111, "step": 61640 }, { "epoch": 2.2403517697507085, "grad_norm": 1.8626538515090942, "learning_rate": 3.864302751224736e-05, "loss": 0.1194, "step": 61650 }, { "epoch": 2.240715168253507, "grad_norm": 0.9763522148132324, "learning_rate": 3.8638678741379166e-05, "loss": 0.0953, "step": 61660 }, { "epoch": 2.241078566756305, "grad_norm": 2.4940896034240723, "learning_rate": 3.863432938286883e-05, "loss": 0.1028, "step": 61670 }, { "epoch": 2.2414419652591033, "grad_norm": 1.7410259246826172, "learning_rate": 3.862997943690375e-05, "loss": 0.1435, "step": 61680 }, { "epoch": 2.2418053637619013, "grad_norm": 2.1346585750579834, "learning_rate": 3.862562890367135e-05, "loss": 0.1286, "step": 61690 }, { "epoch": 2.2421687622646993, "grad_norm": 0.6110004782676697, "learning_rate": 3.862127778335909e-05, "loss": 0.1131, "step": 61700 }, { "epoch": 2.2425321607674977, "grad_norm": 0.7446867227554321, "learning_rate": 3.8616926076154426e-05, "loss": 0.0888, "step": 61710 }, { "epoch": 2.2428955592702957, "grad_norm": 0.820365846157074, "learning_rate": 3.861257378224488e-05, "loss": 0.104, "step": 61720 }, { "epoch": 2.243258957773094, "grad_norm": 0.5953546166419983, "learning_rate": 3.860822090181795e-05, "loss": 0.0993, "step": 61730 }, { "epoch": 2.243622356275892, "grad_norm": 1.5128546953201294, "learning_rate": 3.86038674350612e-05, "loss": 0.2106, "step": 61740 }, { "epoch": 2.24398575477869, "grad_norm": 0.7002906799316406, "learning_rate": 3.859951338216221e-05, "loss": 0.0863, "step": 61750 }, { "epoch": 2.2443491532814885, "grad_norm": 0.7450056076049805, "learning_rate": 3.859515874330857e-05, "loss": 0.0991, "step": 61760 }, { "epoch": 2.2447125517842865, "grad_norm": 0.5604157447814941, "learning_rate": 3.859080351868792e-05, "loss": 0.1255, "step": 61770 }, { "epoch": 2.245075950287085, "grad_norm": 1.1846556663513184, "learning_rate": 3.85864477084879e-05, "loss": 0.1329, "step": 61780 }, { "epoch": 2.245439348789883, "grad_norm": 1.1680017709732056, "learning_rate": 3.8582091312896186e-05, "loss": 0.1409, "step": 61790 }, { "epoch": 2.2458027472926814, "grad_norm": 0.34896320104599, "learning_rate": 3.857773433210048e-05, "loss": 1.4069, "step": 61800 }, { "epoch": 2.2458027472926814, "eval_loss": 0.31015458703041077, "eval_runtime": 179.4469, "eval_samples_per_second": 41.316, "eval_steps_per_second": 5.166, "eval_wer": 0.15887595984533556, "step": 61800 }, { "epoch": 2.2461661457954794, "grad_norm": 8.44802188873291, "learning_rate": 3.8573376766288515e-05, "loss": 0.0953, "step": 61810 }, { "epoch": 2.2465295442982773, "grad_norm": 0.44796204566955566, "learning_rate": 3.8569018615648034e-05, "loss": 0.1058, "step": 61820 }, { "epoch": 2.2468929428010758, "grad_norm": 0.7886875867843628, "learning_rate": 3.8564659880366826e-05, "loss": 0.163, "step": 61830 }, { "epoch": 2.2472563413038738, "grad_norm": 0.5576759576797485, "learning_rate": 3.856030056063269e-05, "loss": 0.1326, "step": 61840 }, { "epoch": 2.247619739806672, "grad_norm": 0.8255923986434937, "learning_rate": 3.855594065663345e-05, "loss": 0.1045, "step": 61850 }, { "epoch": 2.24798313830947, "grad_norm": 1.2470930814743042, "learning_rate": 3.855158016855695e-05, "loss": 0.0788, "step": 61860 }, { "epoch": 2.248346536812268, "grad_norm": 0.9577877521514893, "learning_rate": 3.854721909659108e-05, "loss": 0.1024, "step": 61870 }, { "epoch": 2.2487099353150666, "grad_norm": 1.3195165395736694, "learning_rate": 3.854285744092375e-05, "loss": 0.0975, "step": 61880 }, { "epoch": 2.2490733338178646, "grad_norm": 0.8952762484550476, "learning_rate": 3.853849520174286e-05, "loss": 0.1479, "step": 61890 }, { "epoch": 2.249436732320663, "grad_norm": 0.9849411249160767, "learning_rate": 3.85341323792364e-05, "loss": 0.0954, "step": 61900 }, { "epoch": 2.249800130823461, "grad_norm": 1.1869410276412964, "learning_rate": 3.8529768973592325e-05, "loss": 0.104, "step": 61910 }, { "epoch": 2.250163529326259, "grad_norm": 0.7452064752578735, "learning_rate": 3.852540498499864e-05, "loss": 0.1, "step": 61920 }, { "epoch": 2.2505269278290574, "grad_norm": 0.7757828831672668, "learning_rate": 3.8521040413643385e-05, "loss": 0.1397, "step": 61930 }, { "epoch": 2.2508903263318554, "grad_norm": 1.0734906196594238, "learning_rate": 3.8516675259714594e-05, "loss": 0.1162, "step": 61940 }, { "epoch": 2.251253724834654, "grad_norm": 1.4619065523147583, "learning_rate": 3.851230952340037e-05, "loss": 0.6661, "step": 61950 }, { "epoch": 2.251617123337452, "grad_norm": 1.221156120300293, "learning_rate": 3.850794320488881e-05, "loss": 0.1048, "step": 61960 }, { "epoch": 2.2519805218402498, "grad_norm": 1.1556357145309448, "learning_rate": 3.8503576304368025e-05, "loss": 0.1435, "step": 61970 }, { "epoch": 2.252343920343048, "grad_norm": 0.5849198698997498, "learning_rate": 3.849920882202619e-05, "loss": 0.1031, "step": 61980 }, { "epoch": 2.252707318845846, "grad_norm": 0.5589366555213928, "learning_rate": 3.849484075805148e-05, "loss": 0.1123, "step": 61990 }, { "epoch": 2.2530707173486446, "grad_norm": 1.709695816040039, "learning_rate": 3.849047211263209e-05, "loss": 0.1071, "step": 62000 }, { "epoch": 2.2534341158514426, "grad_norm": 5.30033016204834, "learning_rate": 3.848610288595626e-05, "loss": 0.1276, "step": 62010 }, { "epoch": 2.253797514354241, "grad_norm": 1.242638349533081, "learning_rate": 3.848173307821224e-05, "loss": 0.1183, "step": 62020 }, { "epoch": 2.254160912857039, "grad_norm": 0.650566816329956, "learning_rate": 3.84773626895883e-05, "loss": 0.1074, "step": 62030 }, { "epoch": 2.2545243113598374, "grad_norm": 0.8243488669395447, "learning_rate": 3.847299172027277e-05, "loss": 0.2269, "step": 62040 }, { "epoch": 2.2548877098626354, "grad_norm": 0.5993553996086121, "learning_rate": 3.846862017045396e-05, "loss": 0.1093, "step": 62050 }, { "epoch": 2.2552511083654334, "grad_norm": 1.5640254020690918, "learning_rate": 3.846424804032023e-05, "loss": 0.1403, "step": 62060 }, { "epoch": 2.255614506868232, "grad_norm": 2.9386844635009766, "learning_rate": 3.8459875330059946e-05, "loss": 0.1261, "step": 62070 }, { "epoch": 2.25597790537103, "grad_norm": 0.45292994379997253, "learning_rate": 3.845550203986154e-05, "loss": 0.0919, "step": 62080 }, { "epoch": 2.2563413038738283, "grad_norm": 1.122269868850708, "learning_rate": 3.845112816991341e-05, "loss": 0.1646, "step": 62090 }, { "epoch": 2.2567047023766262, "grad_norm": 0.33831652998924255, "learning_rate": 3.844675372040403e-05, "loss": 0.0923, "step": 62100 }, { "epoch": 2.257068100879424, "grad_norm": 0.6775882244110107, "learning_rate": 3.844237869152188e-05, "loss": 0.092, "step": 62110 }, { "epoch": 2.2574314993822227, "grad_norm": 1.5221953392028809, "learning_rate": 3.843800308345547e-05, "loss": 0.1027, "step": 62120 }, { "epoch": 2.2577948978850206, "grad_norm": 1.1137598752975464, "learning_rate": 3.8433626896393306e-05, "loss": 0.1145, "step": 62130 }, { "epoch": 2.258158296387819, "grad_norm": 1.5517561435699463, "learning_rate": 3.842925013052395e-05, "loss": 0.0914, "step": 62140 }, { "epoch": 2.258521694890617, "grad_norm": 0.9319009184837341, "learning_rate": 3.8424872786036006e-05, "loss": 0.0987, "step": 62150 }, { "epoch": 2.258885093393415, "grad_norm": 1.056016206741333, "learning_rate": 3.842049486311805e-05, "loss": 0.0768, "step": 62160 }, { "epoch": 2.2592484918962135, "grad_norm": 0.3143411874771118, "learning_rate": 3.8416116361958724e-05, "loss": 0.124, "step": 62170 }, { "epoch": 2.2596118903990114, "grad_norm": 0.5706644058227539, "learning_rate": 3.841173728274668e-05, "loss": 0.0781, "step": 62180 }, { "epoch": 2.25997528890181, "grad_norm": 0.7634672522544861, "learning_rate": 3.840735762567058e-05, "loss": 0.1179, "step": 62190 }, { "epoch": 2.260338687404608, "grad_norm": 1.0519330501556396, "learning_rate": 3.840297739091916e-05, "loss": 0.0966, "step": 62200 }, { "epoch": 2.260702085907406, "grad_norm": 0.7548292875289917, "learning_rate": 3.839859657868112e-05, "loss": 0.1004, "step": 62210 }, { "epoch": 2.2610654844102043, "grad_norm": 0.5876504182815552, "learning_rate": 3.8394215189145236e-05, "loss": 0.1199, "step": 62220 }, { "epoch": 2.2614288829130023, "grad_norm": 0.7557339668273926, "learning_rate": 3.838983322250028e-05, "loss": 0.1043, "step": 62230 }, { "epoch": 2.2617922814158007, "grad_norm": 0.9950221180915833, "learning_rate": 3.838545067893504e-05, "loss": 0.103, "step": 62240 }, { "epoch": 2.2621556799185987, "grad_norm": 1.2867968082427979, "learning_rate": 3.838106755863836e-05, "loss": 0.0987, "step": 62250 }, { "epoch": 2.2625190784213967, "grad_norm": 0.8998819589614868, "learning_rate": 3.837668386179909e-05, "loss": 0.1041, "step": 62260 }, { "epoch": 2.262882476924195, "grad_norm": 1.0797913074493408, "learning_rate": 3.837229958860611e-05, "loss": 0.1054, "step": 62270 }, { "epoch": 2.263245875426993, "grad_norm": 1.4692394733428955, "learning_rate": 3.836791473924831e-05, "loss": 0.1027, "step": 62280 }, { "epoch": 2.2636092739297915, "grad_norm": 1.2375293970108032, "learning_rate": 3.836352931391464e-05, "loss": 0.1983, "step": 62290 }, { "epoch": 2.2639726724325895, "grad_norm": 1.2827754020690918, "learning_rate": 3.8359143312794035e-05, "loss": 0.0914, "step": 62300 }, { "epoch": 2.264336070935388, "grad_norm": 0.5154075622558594, "learning_rate": 3.835475673607547e-05, "loss": 0.0992, "step": 62310 }, { "epoch": 2.264699469438186, "grad_norm": 0.3848717510700226, "learning_rate": 3.8350369583947956e-05, "loss": 0.115, "step": 62320 }, { "epoch": 2.2650628679409843, "grad_norm": 0.6954711675643921, "learning_rate": 3.834598185660052e-05, "loss": 0.1018, "step": 62330 }, { "epoch": 2.2654262664437823, "grad_norm": 1.0320098400115967, "learning_rate": 3.834159355422221e-05, "loss": 0.1365, "step": 62340 }, { "epoch": 2.2657896649465803, "grad_norm": 0.6527755856513977, "learning_rate": 3.83372046770021e-05, "loss": 0.0761, "step": 62350 }, { "epoch": 2.2661530634493787, "grad_norm": 1.2087364196777344, "learning_rate": 3.8332815225129303e-05, "loss": 0.0941, "step": 62360 }, { "epoch": 2.2665164619521767, "grad_norm": 1.7340302467346191, "learning_rate": 3.8328425198792926e-05, "loss": 0.1125, "step": 62370 }, { "epoch": 2.266879860454975, "grad_norm": 1.7903550863265991, "learning_rate": 3.8324034598182135e-05, "loss": 0.1045, "step": 62380 }, { "epoch": 2.267243258957773, "grad_norm": 0.498909592628479, "learning_rate": 3.8319643423486105e-05, "loss": 0.1317, "step": 62390 }, { "epoch": 2.267606657460571, "grad_norm": 1.1796486377716064, "learning_rate": 3.831525167489403e-05, "loss": 0.0968, "step": 62400 }, { "epoch": 2.267606657460571, "eval_loss": 0.3475956916809082, "eval_runtime": 179.5468, "eval_samples_per_second": 41.293, "eval_steps_per_second": 5.163, "eval_wer": 0.1566340515911195, "step": 62400 }, { "epoch": 2.2679700559633695, "grad_norm": 0.7915635704994202, "learning_rate": 3.831085935259513e-05, "loss": 0.0949, "step": 62410 }, { "epoch": 2.2683334544661675, "grad_norm": 0.5292233824729919, "learning_rate": 3.8306466456778655e-05, "loss": 0.1073, "step": 62420 }, { "epoch": 2.268696852968966, "grad_norm": 0.5092893242835999, "learning_rate": 3.8302072987633895e-05, "loss": 0.1053, "step": 62430 }, { "epoch": 2.269060251471764, "grad_norm": 0.5169047117233276, "learning_rate": 3.829767894535013e-05, "loss": 0.1986, "step": 62440 }, { "epoch": 2.269423649974562, "grad_norm": 0.4594692587852478, "learning_rate": 3.829328433011671e-05, "loss": 0.1058, "step": 62450 }, { "epoch": 2.2697870484773603, "grad_norm": 0.4674893021583557, "learning_rate": 3.8288889142122955e-05, "loss": 0.1126, "step": 62460 }, { "epoch": 2.2701504469801583, "grad_norm": 1.422492504119873, "learning_rate": 3.828449338155825e-05, "loss": 0.1232, "step": 62470 }, { "epoch": 2.2705138454829568, "grad_norm": 2.171562671661377, "learning_rate": 3.828009704861199e-05, "loss": 0.1213, "step": 62480 }, { "epoch": 2.2708772439857547, "grad_norm": 1.4158885478973389, "learning_rate": 3.8275700143473595e-05, "loss": 0.1294, "step": 62490 }, { "epoch": 2.2712406424885527, "grad_norm": 0.7011764049530029, "learning_rate": 3.827130266633253e-05, "loss": 0.0875, "step": 62500 }, { "epoch": 2.271604040991351, "grad_norm": 2.2935948371887207, "learning_rate": 3.8266904617378235e-05, "loss": 0.2261, "step": 62510 }, { "epoch": 2.271967439494149, "grad_norm": 0.653005063533783, "learning_rate": 3.826250599680023e-05, "loss": 0.1304, "step": 62520 }, { "epoch": 2.2723308379969476, "grad_norm": 0.6509010791778564, "learning_rate": 3.8258106804788035e-05, "loss": 0.0971, "step": 62530 }, { "epoch": 2.2726942364997456, "grad_norm": 1.473751425743103, "learning_rate": 3.8253707041531186e-05, "loss": 0.1029, "step": 62540 }, { "epoch": 2.2730576350025435, "grad_norm": 0.31367307901382446, "learning_rate": 3.824930670721926e-05, "loss": 0.0996, "step": 62550 }, { "epoch": 2.273421033505342, "grad_norm": 0.6324036121368408, "learning_rate": 3.824490580204185e-05, "loss": 0.0984, "step": 62560 }, { "epoch": 2.27378443200814, "grad_norm": 1.8539944887161255, "learning_rate": 3.824050432618858e-05, "loss": 0.157, "step": 62570 }, { "epoch": 2.2741478305109384, "grad_norm": 1.1299885511398315, "learning_rate": 3.823610227984907e-05, "loss": 0.1997, "step": 62580 }, { "epoch": 2.2745112290137364, "grad_norm": 1.0749928951263428, "learning_rate": 3.823169966321302e-05, "loss": 0.092, "step": 62590 }, { "epoch": 2.274874627516535, "grad_norm": 1.3616483211517334, "learning_rate": 3.822729647647011e-05, "loss": 0.1034, "step": 62600 }, { "epoch": 2.275238026019333, "grad_norm": 1.2886927127838135, "learning_rate": 3.8222892719810057e-05, "loss": 0.0943, "step": 62610 }, { "epoch": 2.275601424522131, "grad_norm": 0.5466746091842651, "learning_rate": 3.82184883934226e-05, "loss": 0.1075, "step": 62620 }, { "epoch": 2.275964823024929, "grad_norm": 0.6999200582504272, "learning_rate": 3.821408349749751e-05, "loss": 0.1001, "step": 62630 }, { "epoch": 2.276328221527727, "grad_norm": 0.6271117329597473, "learning_rate": 3.820967803222458e-05, "loss": 0.0887, "step": 62640 }, { "epoch": 2.2766916200305256, "grad_norm": 1.088416337966919, "learning_rate": 3.820527199779362e-05, "loss": 0.2975, "step": 62650 }, { "epoch": 2.2770550185333236, "grad_norm": 0.5583050847053528, "learning_rate": 3.820086539439448e-05, "loss": 0.0849, "step": 62660 }, { "epoch": 2.277418417036122, "grad_norm": 0.5963543057441711, "learning_rate": 3.819645822221701e-05, "loss": 0.1082, "step": 62670 }, { "epoch": 2.27778181553892, "grad_norm": 2.868208408355713, "learning_rate": 3.819205048145113e-05, "loss": 0.0928, "step": 62680 }, { "epoch": 2.278145214041718, "grad_norm": 0.9108635187149048, "learning_rate": 3.8187642172286706e-05, "loss": 0.1155, "step": 62690 }, { "epoch": 2.2785086125445164, "grad_norm": 0.9071031808853149, "learning_rate": 3.8183233294913725e-05, "loss": 0.0974, "step": 62700 }, { "epoch": 2.2788720110473144, "grad_norm": 0.5449077486991882, "learning_rate": 3.817882384952212e-05, "loss": 0.0807, "step": 62710 }, { "epoch": 2.279235409550113, "grad_norm": 0.7269715666770935, "learning_rate": 3.817441383630187e-05, "loss": 0.1273, "step": 62720 }, { "epoch": 2.279598808052911, "grad_norm": 1.493605375289917, "learning_rate": 3.817000325544302e-05, "loss": 0.1112, "step": 62730 }, { "epoch": 2.279962206555709, "grad_norm": 0.6935878992080688, "learning_rate": 3.816559210713558e-05, "loss": 0.1291, "step": 62740 }, { "epoch": 2.2803256050585072, "grad_norm": 1.932387113571167, "learning_rate": 3.8161180391569625e-05, "loss": 0.0937, "step": 62750 }, { "epoch": 2.280689003561305, "grad_norm": 1.2899200916290283, "learning_rate": 3.8156768108935226e-05, "loss": 0.0894, "step": 62760 }, { "epoch": 2.2810524020641036, "grad_norm": 1.262176752090454, "learning_rate": 3.815235525942251e-05, "loss": 0.4695, "step": 62770 }, { "epoch": 2.2814158005669016, "grad_norm": 0.48227742314338684, "learning_rate": 3.8147941843221604e-05, "loss": 0.1126, "step": 62780 }, { "epoch": 2.2817791990696996, "grad_norm": 1.0351576805114746, "learning_rate": 3.814352786052266e-05, "loss": 0.1762, "step": 62790 }, { "epoch": 2.282142597572498, "grad_norm": 1.1177520751953125, "learning_rate": 3.813911331151586e-05, "loss": 0.1046, "step": 62800 }, { "epoch": 2.282505996075296, "grad_norm": 7.0832295417785645, "learning_rate": 3.8134698196391427e-05, "loss": 0.1436, "step": 62810 }, { "epoch": 2.2828693945780945, "grad_norm": 0.9384248852729797, "learning_rate": 3.8130282515339576e-05, "loss": 0.1185, "step": 62820 }, { "epoch": 2.2832327930808924, "grad_norm": 2.5718233585357666, "learning_rate": 3.812586626855057e-05, "loss": 0.1172, "step": 62830 }, { "epoch": 2.2835961915836904, "grad_norm": 0.9541994333267212, "learning_rate": 3.812144945621469e-05, "loss": 0.1141, "step": 62840 }, { "epoch": 2.283959590086489, "grad_norm": 0.7058838605880737, "learning_rate": 3.811703207852224e-05, "loss": 0.0813, "step": 62850 }, { "epoch": 2.284322988589287, "grad_norm": 0.6324445605278015, "learning_rate": 3.811261413566354e-05, "loss": 0.1308, "step": 62860 }, { "epoch": 2.2846863870920853, "grad_norm": 0.5424672365188599, "learning_rate": 3.810819562782896e-05, "loss": 0.1055, "step": 62870 }, { "epoch": 2.2850497855948833, "grad_norm": 0.5509172677993774, "learning_rate": 3.810377655520887e-05, "loss": 0.1323, "step": 62880 }, { "epoch": 2.2854131840976817, "grad_norm": 1.111088752746582, "learning_rate": 3.8099356917993664e-05, "loss": 0.142, "step": 62890 }, { "epoch": 2.2857765826004797, "grad_norm": 0.44855383038520813, "learning_rate": 3.8094936716373784e-05, "loss": 0.0943, "step": 62900 }, { "epoch": 2.286139981103278, "grad_norm": 3.367194890975952, "learning_rate": 3.8090515950539674e-05, "loss": 2.8216, "step": 62910 }, { "epoch": 2.286503379606076, "grad_norm": 0.8625146746635437, "learning_rate": 3.80860946206818e-05, "loss": 0.1108, "step": 62920 }, { "epoch": 2.286866778108874, "grad_norm": 0.6024346351623535, "learning_rate": 3.808167272699067e-05, "loss": 0.2589, "step": 62930 }, { "epoch": 2.2872301766116725, "grad_norm": 0.3697529733181, "learning_rate": 3.8077250269656813e-05, "loss": 0.1722, "step": 62940 }, { "epoch": 2.2875935751144705, "grad_norm": 1.8003566265106201, "learning_rate": 3.807282724887077e-05, "loss": 0.1144, "step": 62950 }, { "epoch": 2.287956973617269, "grad_norm": 0.6778300404548645, "learning_rate": 3.806840366482311e-05, "loss": 0.0946, "step": 62960 }, { "epoch": 2.288320372120067, "grad_norm": 0.5251741409301758, "learning_rate": 3.806397951770444e-05, "loss": 0.1181, "step": 62970 }, { "epoch": 2.288683770622865, "grad_norm": 1.101876974105835, "learning_rate": 3.805955480770537e-05, "loss": 0.1195, "step": 62980 }, { "epoch": 2.2890471691256633, "grad_norm": 0.5283622741699219, "learning_rate": 3.805512953501655e-05, "loss": 0.1299, "step": 62990 }, { "epoch": 2.2894105676284613, "grad_norm": 0.3856213390827179, "learning_rate": 3.8050703699828636e-05, "loss": 0.0909, "step": 63000 }, { "epoch": 2.2894105676284613, "eval_loss": 0.32783856987953186, "eval_runtime": 179.5121, "eval_samples_per_second": 41.301, "eval_steps_per_second": 5.164, "eval_wer": 0.15972915569916676, "step": 63000 }, { "epoch": 2.2897739661312597, "grad_norm": 0.5770326256752014, "learning_rate": 3.8046277302332357e-05, "loss": 0.1017, "step": 63010 }, { "epoch": 2.2901373646340577, "grad_norm": 0.5281986594200134, "learning_rate": 3.804185034271839e-05, "loss": 0.1164, "step": 63020 }, { "epoch": 2.2905007631368557, "grad_norm": 1.011020302772522, "learning_rate": 3.803742282117751e-05, "loss": 0.0986, "step": 63030 }, { "epoch": 2.290864161639654, "grad_norm": 0.9110655784606934, "learning_rate": 3.803299473790046e-05, "loss": 0.1498, "step": 63040 }, { "epoch": 2.291227560142452, "grad_norm": 1.5773357152938843, "learning_rate": 3.8028566093078036e-05, "loss": 0.0975, "step": 63050 }, { "epoch": 2.2915909586452505, "grad_norm": 3.258551597595215, "learning_rate": 3.802413688690105e-05, "loss": 0.0939, "step": 63060 }, { "epoch": 2.2919543571480485, "grad_norm": 0.6953330039978027, "learning_rate": 3.801970711956036e-05, "loss": 0.1111, "step": 63070 }, { "epoch": 2.2923177556508465, "grad_norm": 0.8726534843444824, "learning_rate": 3.80152767912468e-05, "loss": 0.1196, "step": 63080 }, { "epoch": 2.292681154153645, "grad_norm": 0.5163739323616028, "learning_rate": 3.801084590215128e-05, "loss": 0.1048, "step": 63090 }, { "epoch": 2.293044552656443, "grad_norm": 2.2029974460601807, "learning_rate": 3.80064144524647e-05, "loss": 0.099, "step": 63100 }, { "epoch": 2.2934079511592413, "grad_norm": 0.8436546921730042, "learning_rate": 3.8001982442378004e-05, "loss": 0.0945, "step": 63110 }, { "epoch": 2.2937713496620393, "grad_norm": 0.5407220721244812, "learning_rate": 3.799754987208214e-05, "loss": 0.1066, "step": 63120 }, { "epoch": 2.2941347481648373, "grad_norm": 0.7019248008728027, "learning_rate": 3.7993116741768095e-05, "loss": 0.11, "step": 63130 }, { "epoch": 2.2944981466676357, "grad_norm": 0.5901986956596375, "learning_rate": 3.7988683051626886e-05, "loss": 0.119, "step": 63140 }, { "epoch": 2.2948615451704337, "grad_norm": 0.6483830809593201, "learning_rate": 3.798424880184954e-05, "loss": 0.0827, "step": 63150 }, { "epoch": 2.295224943673232, "grad_norm": 0.41235288977622986, "learning_rate": 3.7979813992627103e-05, "loss": 0.0921, "step": 63160 }, { "epoch": 2.29558834217603, "grad_norm": 3.996107339859009, "learning_rate": 3.797537862415066e-05, "loss": 0.0967, "step": 63170 }, { "epoch": 2.2959517406788286, "grad_norm": 2.476738452911377, "learning_rate": 3.7970942696611335e-05, "loss": 0.1186, "step": 63180 }, { "epoch": 2.2963151391816266, "grad_norm": 0.8279284238815308, "learning_rate": 3.7966506210200224e-05, "loss": 0.1086, "step": 63190 }, { "epoch": 2.296678537684425, "grad_norm": 5.163793087005615, "learning_rate": 3.79620691651085e-05, "loss": 0.0827, "step": 63200 }, { "epoch": 2.297041936187223, "grad_norm": 0.577820360660553, "learning_rate": 3.795763156152734e-05, "loss": 0.1126, "step": 63210 }, { "epoch": 2.297405334690021, "grad_norm": 0.29767242074012756, "learning_rate": 3.7953193399647934e-05, "loss": 0.1169, "step": 63220 }, { "epoch": 2.2977687331928194, "grad_norm": 1.0364243984222412, "learning_rate": 3.794875467966152e-05, "loss": 0.1004, "step": 63230 }, { "epoch": 2.2981321316956174, "grad_norm": 0.5417031049728394, "learning_rate": 3.794475935465031e-05, "loss": 4.3853, "step": 63240 }, { "epoch": 2.298495530198416, "grad_norm": 0.8863941431045532, "learning_rate": 3.794031957478746e-05, "loss": 0.0942, "step": 63250 }, { "epoch": 2.2988589287012138, "grad_norm": 0.6005067825317383, "learning_rate": 3.7935879237372296e-05, "loss": 0.0879, "step": 63260 }, { "epoch": 2.2992223272040118, "grad_norm": 0.5702997446060181, "learning_rate": 3.793143834259612e-05, "loss": 0.1344, "step": 63270 }, { "epoch": 2.29958572570681, "grad_norm": 0.5447559356689453, "learning_rate": 3.7926996890650265e-05, "loss": 0.1249, "step": 63280 }, { "epoch": 2.299949124209608, "grad_norm": 0.9382325410842896, "learning_rate": 3.7922554881726125e-05, "loss": 0.1639, "step": 63290 }, { "epoch": 2.3003125227124066, "grad_norm": 1.128554344177246, "learning_rate": 3.791811231601506e-05, "loss": 0.124, "step": 63300 }, { "epoch": 2.3006759212152046, "grad_norm": 1.2001831531524658, "learning_rate": 3.7913669193708505e-05, "loss": 0.1115, "step": 63310 }, { "epoch": 2.3010393197180026, "grad_norm": 0.5141827464103699, "learning_rate": 3.790922551499789e-05, "loss": 0.1934, "step": 63320 }, { "epoch": 2.301402718220801, "grad_norm": 1.1889158487319946, "learning_rate": 3.7904781280074674e-05, "loss": 0.112, "step": 63330 }, { "epoch": 2.301766116723599, "grad_norm": 1.0070478916168213, "learning_rate": 3.7900336489130355e-05, "loss": 0.1434, "step": 63340 }, { "epoch": 2.3021295152263974, "grad_norm": 0.6805721521377563, "learning_rate": 3.789589114235643e-05, "loss": 0.088, "step": 63350 }, { "epoch": 2.3024929137291954, "grad_norm": 0.8101871013641357, "learning_rate": 3.789144523994445e-05, "loss": 0.0786, "step": 63360 }, { "epoch": 2.3028563122319934, "grad_norm": 0.5728216767311096, "learning_rate": 3.788699878208595e-05, "loss": 0.1176, "step": 63370 }, { "epoch": 2.303219710734792, "grad_norm": 6.430160999298096, "learning_rate": 3.788255176897253e-05, "loss": 0.1395, "step": 63380 }, { "epoch": 2.30358310923759, "grad_norm": 0.8273718953132629, "learning_rate": 3.78781042007958e-05, "loss": 0.1157, "step": 63390 }, { "epoch": 2.3039465077403882, "grad_norm": 0.7474293112754822, "learning_rate": 3.787365607774736e-05, "loss": 0.6462, "step": 63400 }, { "epoch": 2.304309906243186, "grad_norm": 4.311099052429199, "learning_rate": 3.7869207400018905e-05, "loss": 0.1136, "step": 63410 }, { "epoch": 2.304673304745984, "grad_norm": 0.8261300921440125, "learning_rate": 3.7864758167802074e-05, "loss": 0.1357, "step": 63420 }, { "epoch": 2.3050367032487826, "grad_norm": 2.123488187789917, "learning_rate": 3.78603083812886e-05, "loss": 0.0907, "step": 63430 }, { "epoch": 2.3054001017515806, "grad_norm": 0.7351600527763367, "learning_rate": 3.7855858040670175e-05, "loss": 0.1308, "step": 63440 }, { "epoch": 2.305763500254379, "grad_norm": 3.070939064025879, "learning_rate": 3.785140714613859e-05, "loss": 2.963, "step": 63450 }, { "epoch": 2.306126898757177, "grad_norm": 0.4340088963508606, "learning_rate": 3.7846955697885586e-05, "loss": 0.0927, "step": 63460 }, { "epoch": 2.3064902972599755, "grad_norm": 0.6686544418334961, "learning_rate": 3.7842503696102976e-05, "loss": 0.1441, "step": 63470 }, { "epoch": 2.3068536957627734, "grad_norm": 7.206737041473389, "learning_rate": 3.7838051140982575e-05, "loss": 0.1299, "step": 63480 }, { "epoch": 2.307217094265572, "grad_norm": 0.6773508191108704, "learning_rate": 3.7833598032716225e-05, "loss": 0.1177, "step": 63490 }, { "epoch": 2.30758049276837, "grad_norm": 0.5695934295654297, "learning_rate": 3.78291443714958e-05, "loss": 0.0918, "step": 63500 }, { "epoch": 2.307943891271168, "grad_norm": 0.6884729862213135, "learning_rate": 3.782469015751319e-05, "loss": 0.1068, "step": 63510 }, { "epoch": 2.3083072897739663, "grad_norm": 0.5272583365440369, "learning_rate": 3.782023539096031e-05, "loss": 0.1021, "step": 63520 }, { "epoch": 2.3086706882767642, "grad_norm": 3.166252613067627, "learning_rate": 3.7815780072029103e-05, "loss": 0.1035, "step": 63530 }, { "epoch": 2.3090340867795627, "grad_norm": 0.47669315338134766, "learning_rate": 3.781132420091153e-05, "loss": 0.1203, "step": 63540 }, { "epoch": 2.3093974852823607, "grad_norm": 1.5463957786560059, "learning_rate": 3.780686777779958e-05, "loss": 0.0845, "step": 63550 }, { "epoch": 2.3097608837851586, "grad_norm": 0.9264553785324097, "learning_rate": 3.780241080288527e-05, "loss": 0.0906, "step": 63560 }, { "epoch": 2.310124282287957, "grad_norm": 0.604017436504364, "learning_rate": 3.7797953276360624e-05, "loss": 0.128, "step": 63570 }, { "epoch": 2.310487680790755, "grad_norm": 0.6354121565818787, "learning_rate": 3.779349519841771e-05, "loss": 0.1044, "step": 63580 }, { "epoch": 2.3108510792935535, "grad_norm": 0.6733710169792175, "learning_rate": 3.7789036569248606e-05, "loss": 0.1096, "step": 63590 }, { "epoch": 2.3112144777963515, "grad_norm": 0.6780581474304199, "learning_rate": 3.778457738904542e-05, "loss": 0.1325, "step": 63600 }, { "epoch": 2.3112144777963515, "eval_loss": 0.33778947591781616, "eval_runtime": 180.1308, "eval_samples_per_second": 41.159, "eval_steps_per_second": 5.146, "eval_wer": 0.15696988400167008, "step": 63600 }, { "epoch": 2.3115778762991495, "grad_norm": 0.42744994163513184, "learning_rate": 3.778011765800028e-05, "loss": 0.0901, "step": 63610 }, { "epoch": 2.311941274801948, "grad_norm": 0.5375288724899292, "learning_rate": 3.777565737630534e-05, "loss": 0.1126, "step": 63620 }, { "epoch": 2.312304673304746, "grad_norm": 0.6710574626922607, "learning_rate": 3.777119654415279e-05, "loss": 0.4501, "step": 63630 }, { "epoch": 2.3126680718075443, "grad_norm": 1.513808012008667, "learning_rate": 3.77667351617348e-05, "loss": 0.1793, "step": 63640 }, { "epoch": 2.3130314703103423, "grad_norm": 0.4935424029827118, "learning_rate": 3.776227322924364e-05, "loss": 0.0848, "step": 63650 }, { "epoch": 2.3133948688131403, "grad_norm": 1.5648393630981445, "learning_rate": 3.775781074687152e-05, "loss": 0.0965, "step": 63660 }, { "epoch": 2.3137582673159387, "grad_norm": 0.3886503279209137, "learning_rate": 3.775334771481073e-05, "loss": 0.1081, "step": 63670 }, { "epoch": 2.3141216658187367, "grad_norm": 1.0196889638900757, "learning_rate": 3.7748884133253566e-05, "loss": 0.0974, "step": 63680 }, { "epoch": 2.314485064321535, "grad_norm": 1.0317192077636719, "learning_rate": 3.7744420002392345e-05, "loss": 0.1129, "step": 63690 }, { "epoch": 2.314848462824333, "grad_norm": 1.3416907787322998, "learning_rate": 3.773995532241941e-05, "loss": 0.0985, "step": 63700 }, { "epoch": 2.3152118613271315, "grad_norm": 1.2515931129455566, "learning_rate": 3.7735490093527126e-05, "loss": 0.1034, "step": 63710 }, { "epoch": 2.3155752598299295, "grad_norm": 0.6103869676589966, "learning_rate": 3.773102431590789e-05, "loss": 0.1153, "step": 63720 }, { "epoch": 2.3159386583327275, "grad_norm": 2.0320076942443848, "learning_rate": 3.772655798975412e-05, "loss": 0.1065, "step": 63730 }, { "epoch": 2.316302056835526, "grad_norm": 0.9758360385894775, "learning_rate": 3.772209111525824e-05, "loss": 0.1187, "step": 63740 }, { "epoch": 2.316665455338324, "grad_norm": 1.36004638671875, "learning_rate": 3.771762369261272e-05, "loss": 0.1045, "step": 63750 }, { "epoch": 2.3170288538411223, "grad_norm": 0.45251816511154175, "learning_rate": 3.771315572201004e-05, "loss": 0.1054, "step": 63760 }, { "epoch": 2.3173922523439203, "grad_norm": 3.3651912212371826, "learning_rate": 3.7708687203642724e-05, "loss": 0.1215, "step": 63770 }, { "epoch": 2.3177556508467188, "grad_norm": 0.9686463475227356, "learning_rate": 3.7704218137703284e-05, "loss": 0.1114, "step": 63780 }, { "epoch": 2.3181190493495167, "grad_norm": 0.7810651659965515, "learning_rate": 3.769974852438429e-05, "loss": 0.1284, "step": 63790 }, { "epoch": 2.3184824478523147, "grad_norm": 0.600099503993988, "learning_rate": 3.7695278363878325e-05, "loss": 0.0929, "step": 63800 }, { "epoch": 2.318845846355113, "grad_norm": 0.9034928679466248, "learning_rate": 3.769080765637798e-05, "loss": 0.099, "step": 63810 }, { "epoch": 2.319209244857911, "grad_norm": 1.4133280515670776, "learning_rate": 3.7686336402075885e-05, "loss": 0.1076, "step": 63820 }, { "epoch": 2.3195726433607096, "grad_norm": 0.6236594319343567, "learning_rate": 3.768186460116469e-05, "loss": 0.1036, "step": 63830 }, { "epoch": 2.3199360418635075, "grad_norm": 2.116008996963501, "learning_rate": 3.7677392253837076e-05, "loss": 0.1521, "step": 63840 }, { "epoch": 2.3202994403663055, "grad_norm": 1.0746735334396362, "learning_rate": 3.767291936028574e-05, "loss": 0.5176, "step": 63850 }, { "epoch": 2.320662838869104, "grad_norm": 1.0380078554153442, "learning_rate": 3.766844592070339e-05, "loss": 0.0889, "step": 63860 }, { "epoch": 2.321026237371902, "grad_norm": 0.835041344165802, "learning_rate": 3.766397193528278e-05, "loss": 0.1305, "step": 63870 }, { "epoch": 2.3213896358747004, "grad_norm": 3.784654140472412, "learning_rate": 3.7659497404216685e-05, "loss": 0.1531, "step": 63880 }, { "epoch": 2.3217530343774984, "grad_norm": 1.1239734888076782, "learning_rate": 3.765502232769789e-05, "loss": 0.1244, "step": 63890 }, { "epoch": 2.3221164328802963, "grad_norm": 0.5810584425926208, "learning_rate": 3.7650546705919204e-05, "loss": 0.1013, "step": 63900 }, { "epoch": 2.3224798313830948, "grad_norm": 0.6790658831596375, "learning_rate": 3.7646070539073475e-05, "loss": 0.1047, "step": 63910 }, { "epoch": 2.3228432298858928, "grad_norm": 0.3619256615638733, "learning_rate": 3.7641593827353556e-05, "loss": 0.0927, "step": 63920 }, { "epoch": 2.323206628388691, "grad_norm": 2.329050064086914, "learning_rate": 3.7637116570952346e-05, "loss": 0.12, "step": 63930 }, { "epoch": 2.323570026891489, "grad_norm": 1.9159663915634155, "learning_rate": 3.763263877006273e-05, "loss": 0.1296, "step": 63940 }, { "epoch": 2.323933425394287, "grad_norm": 1.206432819366455, "learning_rate": 3.762816042487768e-05, "loss": 0.0802, "step": 63950 }, { "epoch": 2.3242968238970856, "grad_norm": 0.9730502963066101, "learning_rate": 3.762368153559012e-05, "loss": 0.1171, "step": 63960 }, { "epoch": 2.3246602223998836, "grad_norm": 0.3301490247249603, "learning_rate": 3.761920210239303e-05, "loss": 0.1111, "step": 63970 }, { "epoch": 2.325023620902682, "grad_norm": 0.580382227897644, "learning_rate": 3.7614722125479425e-05, "loss": 0.0951, "step": 63980 }, { "epoch": 2.32538701940548, "grad_norm": 1.5714104175567627, "learning_rate": 3.761024160504232e-05, "loss": 0.1407, "step": 63990 }, { "epoch": 2.3257504179082784, "grad_norm": 0.6567360162734985, "learning_rate": 3.7605760541274784e-05, "loss": 0.0823, "step": 64000 }, { "epoch": 2.3261138164110764, "grad_norm": 1.1323597431182861, "learning_rate": 3.760127893436988e-05, "loss": 0.108, "step": 64010 }, { "epoch": 2.3264772149138744, "grad_norm": 0.9358565807342529, "learning_rate": 3.7596796784520684e-05, "loss": 0.1014, "step": 64020 }, { "epoch": 2.326840613416673, "grad_norm": 0.7020303010940552, "learning_rate": 3.759231409192034e-05, "loss": 0.098, "step": 64030 }, { "epoch": 2.327204011919471, "grad_norm": 1.7113333940505981, "learning_rate": 3.7587830856761996e-05, "loss": 0.1149, "step": 64040 }, { "epoch": 2.3275674104222692, "grad_norm": 2.211527109146118, "learning_rate": 3.75833470792388e-05, "loss": 0.085, "step": 64050 }, { "epoch": 2.327930808925067, "grad_norm": 0.6617085933685303, "learning_rate": 3.7578862759543954e-05, "loss": 0.1596, "step": 64060 }, { "epoch": 2.3282942074278656, "grad_norm": 0.6133392453193665, "learning_rate": 3.757437789787066e-05, "loss": 0.1044, "step": 64070 }, { "epoch": 2.3286576059306636, "grad_norm": 5.026115894317627, "learning_rate": 3.7569892494412175e-05, "loss": 0.1376, "step": 64080 }, { "epoch": 2.3290210044334616, "grad_norm": 1.0616756677627563, "learning_rate": 3.756540654936174e-05, "loss": 0.1029, "step": 64090 }, { "epoch": 2.32938440293626, "grad_norm": 0.9439811706542969, "learning_rate": 3.756092006291264e-05, "loss": 0.0779, "step": 64100 }, { "epoch": 2.329747801439058, "grad_norm": 1.4717971086502075, "learning_rate": 3.755643303525819e-05, "loss": 0.0926, "step": 64110 }, { "epoch": 2.3301111999418564, "grad_norm": 0.3808611035346985, "learning_rate": 3.7551945466591716e-05, "loss": 0.1099, "step": 64120 }, { "epoch": 2.3304745984446544, "grad_norm": 1.0548149347305298, "learning_rate": 3.754745735710657e-05, "loss": 0.1072, "step": 64130 }, { "epoch": 2.3308379969474524, "grad_norm": 2.6092560291290283, "learning_rate": 3.7542968706996136e-05, "loss": 0.1365, "step": 64140 }, { "epoch": 2.331201395450251, "grad_norm": 1.3453460931777954, "learning_rate": 3.7538479516453805e-05, "loss": 0.0904, "step": 64150 }, { "epoch": 2.331564793953049, "grad_norm": 0.8132860660552979, "learning_rate": 3.7533989785673e-05, "loss": 0.0848, "step": 64160 }, { "epoch": 2.3319281924558473, "grad_norm": 1.1206045150756836, "learning_rate": 3.7529499514847175e-05, "loss": 0.1255, "step": 64170 }, { "epoch": 2.3322915909586452, "grad_norm": 1.248970866203308, "learning_rate": 3.7525008704169795e-05, "loss": 0.0924, "step": 64180 }, { "epoch": 2.3326549894614432, "grad_norm": 0.9841907620429993, "learning_rate": 3.752051735383436e-05, "loss": 0.1479, "step": 64190 }, { "epoch": 2.3330183879642417, "grad_norm": 2.1250979900360107, "learning_rate": 3.7516025464034376e-05, "loss": 0.0934, "step": 64200 }, { "epoch": 2.3330183879642417, "eval_loss": 0.3528118431568146, "eval_runtime": 179.4186, "eval_samples_per_second": 41.322, "eval_steps_per_second": 5.167, "eval_wer": 0.15433768402708442, "step": 64200 }, { "epoch": 2.3333817864670396, "grad_norm": 0.7961970567703247, "learning_rate": 3.7511533034963384e-05, "loss": 0.0977, "step": 64210 }, { "epoch": 2.333745184969838, "grad_norm": 0.8082739114761353, "learning_rate": 3.750704006681495e-05, "loss": 0.1188, "step": 64220 }, { "epoch": 2.334108583472636, "grad_norm": 1.3309545516967773, "learning_rate": 3.7502546559782656e-05, "loss": 0.1256, "step": 64230 }, { "epoch": 2.334471981975434, "grad_norm": 0.8950253129005432, "learning_rate": 3.749805251406013e-05, "loss": 0.097, "step": 64240 }, { "epoch": 2.3348353804782325, "grad_norm": 1.537735939025879, "learning_rate": 3.7493557929840974e-05, "loss": 0.1024, "step": 64250 }, { "epoch": 2.3351987789810305, "grad_norm": 0.9132232666015625, "learning_rate": 3.748906280731887e-05, "loss": 0.1118, "step": 64260 }, { "epoch": 2.335562177483829, "grad_norm": 0.540766179561615, "learning_rate": 3.7484567146687485e-05, "loss": 0.1203, "step": 64270 }, { "epoch": 2.335925575986627, "grad_norm": 0.5811611413955688, "learning_rate": 3.748007094814051e-05, "loss": 0.1067, "step": 64280 }, { "epoch": 2.3362889744894253, "grad_norm": 0.715090274810791, "learning_rate": 3.747557421187169e-05, "loss": 0.1685, "step": 64290 }, { "epoch": 2.3366523729922233, "grad_norm": 0.6315838694572449, "learning_rate": 3.747107693807477e-05, "loss": 0.1196, "step": 64300 }, { "epoch": 2.3370157714950213, "grad_norm": 1.2922756671905518, "learning_rate": 3.7466579126943514e-05, "loss": 0.0906, "step": 64310 }, { "epoch": 2.3373791699978197, "grad_norm": 9.100321769714355, "learning_rate": 3.746208077867172e-05, "loss": 0.1205, "step": 64320 }, { "epoch": 2.3377425685006177, "grad_norm": 0.5777522921562195, "learning_rate": 3.74575818934532e-05, "loss": 0.0787, "step": 64330 }, { "epoch": 2.338105967003416, "grad_norm": 0.5407727360725403, "learning_rate": 3.74530824714818e-05, "loss": 0.0998, "step": 64340 }, { "epoch": 2.338469365506214, "grad_norm": 0.6790062785148621, "learning_rate": 3.744858251295139e-05, "loss": 0.0855, "step": 64350 }, { "epoch": 2.3388327640090125, "grad_norm": 0.552946925163269, "learning_rate": 3.744408201805585e-05, "loss": 0.0864, "step": 64360 }, { "epoch": 2.3391961625118105, "grad_norm": 0.36451128125190735, "learning_rate": 3.743958098698909e-05, "loss": 0.1093, "step": 64370 }, { "epoch": 2.3395595610146085, "grad_norm": 0.834068775177002, "learning_rate": 3.743507941994505e-05, "loss": 0.1181, "step": 64380 }, { "epoch": 2.339922959517407, "grad_norm": 1.2418774366378784, "learning_rate": 3.743057731711768e-05, "loss": 0.1128, "step": 64390 }, { "epoch": 2.340286358020205, "grad_norm": 1.0964419841766357, "learning_rate": 3.7426074678700964e-05, "loss": 0.1012, "step": 64400 }, { "epoch": 2.3406497565230033, "grad_norm": 0.7740904092788696, "learning_rate": 3.74215715048889e-05, "loss": 0.0835, "step": 64410 }, { "epoch": 2.3410131550258013, "grad_norm": 0.6739581823348999, "learning_rate": 3.741706779587551e-05, "loss": 0.1302, "step": 64420 }, { "epoch": 2.3413765535285993, "grad_norm": 1.141020655632019, "learning_rate": 3.7412563551854854e-05, "loss": 0.1881, "step": 64430 }, { "epoch": 2.3417399520313977, "grad_norm": 0.7994565367698669, "learning_rate": 3.7408058773020994e-05, "loss": 0.1099, "step": 64440 }, { "epoch": 2.3421033505341957, "grad_norm": 0.7365929484367371, "learning_rate": 3.740355345956804e-05, "loss": 0.088, "step": 64450 }, { "epoch": 2.342466749036994, "grad_norm": 0.5093470215797424, "learning_rate": 3.7399047611690095e-05, "loss": 0.1143, "step": 64460 }, { "epoch": 2.342830147539792, "grad_norm": 1.3585693836212158, "learning_rate": 3.7394541229581295e-05, "loss": 0.12, "step": 64470 }, { "epoch": 2.34319354604259, "grad_norm": 0.9446144700050354, "learning_rate": 3.739003431343583e-05, "loss": 2.1812, "step": 64480 }, { "epoch": 2.3435569445453885, "grad_norm": 0.4501352310180664, "learning_rate": 3.738552686344786e-05, "loss": 0.1086, "step": 64490 }, { "epoch": 2.3439203430481865, "grad_norm": 0.5260722637176514, "learning_rate": 3.73810188798116e-05, "loss": 0.1806, "step": 64500 }, { "epoch": 2.344283741550985, "grad_norm": 2.6056125164031982, "learning_rate": 3.73765103627213e-05, "loss": 0.1072, "step": 64510 }, { "epoch": 2.344647140053783, "grad_norm": 0.6981383562088013, "learning_rate": 3.73720013123712e-05, "loss": 0.1415, "step": 64520 }, { "epoch": 2.345010538556581, "grad_norm": 0.5633025169372559, "learning_rate": 3.7367491728955585e-05, "loss": 0.0986, "step": 64530 }, { "epoch": 2.3453739370593794, "grad_norm": 0.5558316111564636, "learning_rate": 3.7362981612668745e-05, "loss": 0.1791, "step": 64540 }, { "epoch": 2.3457373355621773, "grad_norm": 0.543397068977356, "learning_rate": 3.735847096370503e-05, "loss": 0.1002, "step": 64550 }, { "epoch": 2.3461007340649758, "grad_norm": 0.5885327458381653, "learning_rate": 3.7353959782258755e-05, "loss": 0.0944, "step": 64560 }, { "epoch": 2.3464641325677738, "grad_norm": 0.9266073107719421, "learning_rate": 3.7349448068524325e-05, "loss": 1.9036, "step": 64570 }, { "epoch": 2.346827531070572, "grad_norm": 0.4478204548358917, "learning_rate": 3.7344935822696116e-05, "loss": 0.1028, "step": 64580 }, { "epoch": 2.34719092957337, "grad_norm": 1.6678454875946045, "learning_rate": 3.7340423044968534e-05, "loss": 0.1299, "step": 64590 }, { "epoch": 2.347554328076168, "grad_norm": 2.0704760551452637, "learning_rate": 3.733590973553604e-05, "loss": 0.086, "step": 64600 }, { "epoch": 2.3479177265789666, "grad_norm": 1.1192750930786133, "learning_rate": 3.733139589459308e-05, "loss": 0.0977, "step": 64610 }, { "epoch": 2.3482811250817646, "grad_norm": 1.0189874172210693, "learning_rate": 3.732688152233415e-05, "loss": 0.0966, "step": 64620 }, { "epoch": 2.348644523584563, "grad_norm": 0.8110418319702148, "learning_rate": 3.7322366618953755e-05, "loss": 0.1015, "step": 64630 }, { "epoch": 2.349007922087361, "grad_norm": 0.7661551833152771, "learning_rate": 3.731785118464642e-05, "loss": 0.1056, "step": 64640 }, { "epoch": 2.3493713205901594, "grad_norm": 0.6133613586425781, "learning_rate": 3.731333521960672e-05, "loss": 0.0862, "step": 64650 }, { "epoch": 2.3497347190929574, "grad_norm": 1.6493825912475586, "learning_rate": 3.73088187240292e-05, "loss": 0.1103, "step": 64660 }, { "epoch": 2.3500981175957554, "grad_norm": 0.7170090675354004, "learning_rate": 3.7304301698108486e-05, "loss": 1.3385, "step": 64670 }, { "epoch": 2.350461516098554, "grad_norm": 0.3917316794395447, "learning_rate": 3.7299784142039186e-05, "loss": 0.0838, "step": 64680 }, { "epoch": 2.350824914601352, "grad_norm": 0.85912024974823, "learning_rate": 3.729526605601595e-05, "loss": 0.1025, "step": 64690 }, { "epoch": 2.35118831310415, "grad_norm": 0.3880862295627594, "learning_rate": 3.729074744023345e-05, "loss": 0.0867, "step": 64700 }, { "epoch": 2.351551711606948, "grad_norm": 0.5538926124572754, "learning_rate": 3.728622829488637e-05, "loss": 0.1349, "step": 64710 }, { "epoch": 2.351915110109746, "grad_norm": 0.4051951766014099, "learning_rate": 3.7281708620169424e-05, "loss": 0.1023, "step": 64720 }, { "epoch": 2.3522785086125446, "grad_norm": 3.5087623596191406, "learning_rate": 3.7277188416277354e-05, "loss": 0.1052, "step": 64730 }, { "epoch": 2.3526419071153426, "grad_norm": 0.7415525317192078, "learning_rate": 3.727266768340492e-05, "loss": 0.1682, "step": 64740 }, { "epoch": 2.353005305618141, "grad_norm": 0.41486695408821106, "learning_rate": 3.7268146421746895e-05, "loss": 0.123, "step": 64750 }, { "epoch": 2.353368704120939, "grad_norm": 1.6847058534622192, "learning_rate": 3.726362463149811e-05, "loss": 0.108, "step": 64760 }, { "epoch": 2.353732102623737, "grad_norm": 0.6038152575492859, "learning_rate": 3.7259102312853356e-05, "loss": 0.1165, "step": 64770 }, { "epoch": 2.3540955011265354, "grad_norm": 1.413368821144104, "learning_rate": 3.7254579466007505e-05, "loss": 0.0947, "step": 64780 }, { "epoch": 2.3544588996293334, "grad_norm": 0.9277619123458862, "learning_rate": 3.7250056091155427e-05, "loss": 0.1269, "step": 64790 }, { "epoch": 2.354822298132132, "grad_norm": 1.1914100646972656, "learning_rate": 3.724553218849202e-05, "loss": 0.0882, "step": 64800 }, { "epoch": 2.354822298132132, "eval_loss": 0.34669631719589233, "eval_runtime": 180.4215, "eval_samples_per_second": 41.093, "eval_steps_per_second": 5.138, "eval_wer": 0.15537241091364568, "step": 64800 }, { "epoch": 2.35518569663493, "grad_norm": 0.8063227534294128, "learning_rate": 3.7241007758212195e-05, "loss": 0.0882, "step": 64810 }, { "epoch": 2.355549095137728, "grad_norm": 8.477306365966797, "learning_rate": 3.723648280051091e-05, "loss": 0.1379, "step": 64820 }, { "epoch": 2.3559124936405262, "grad_norm": 1.4196289777755737, "learning_rate": 3.723195731558311e-05, "loss": 0.0822, "step": 64830 }, { "epoch": 2.3562758921433242, "grad_norm": 0.8000519275665283, "learning_rate": 3.722743130362379e-05, "loss": 0.0988, "step": 64840 }, { "epoch": 2.3566392906461227, "grad_norm": 5.073339939117432, "learning_rate": 3.722290476482796e-05, "loss": 0.0932, "step": 64850 }, { "epoch": 2.3570026891489206, "grad_norm": 0.8329682946205139, "learning_rate": 3.7218377699390666e-05, "loss": 0.091, "step": 64860 }, { "epoch": 2.357366087651719, "grad_norm": 33.570316314697266, "learning_rate": 3.7213850107506936e-05, "loss": 2.4337, "step": 64870 }, { "epoch": 2.357729486154517, "grad_norm": 0.5240826606750488, "learning_rate": 3.720932198937187e-05, "loss": 0.118, "step": 64880 }, { "epoch": 2.358092884657315, "grad_norm": 0.7187747955322266, "learning_rate": 3.720479334518056e-05, "loss": 0.1019, "step": 64890 }, { "epoch": 2.3584562831601135, "grad_norm": 5.0828351974487305, "learning_rate": 3.720026417512812e-05, "loss": 0.0869, "step": 64900 }, { "epoch": 2.3588196816629114, "grad_norm": 0.688025176525116, "learning_rate": 3.719573447940972e-05, "loss": 0.1029, "step": 64910 }, { "epoch": 2.35918308016571, "grad_norm": 1.203792691230774, "learning_rate": 3.71912042582205e-05, "loss": 0.1196, "step": 64920 }, { "epoch": 2.359546478668508, "grad_norm": 0.5731534361839294, "learning_rate": 3.718667351175567e-05, "loss": 0.0968, "step": 64930 }, { "epoch": 2.3599098771713063, "grad_norm": 1.7429757118225098, "learning_rate": 3.718214224021044e-05, "loss": 0.104, "step": 64940 }, { "epoch": 2.3602732756741043, "grad_norm": 1.9315886497497559, "learning_rate": 3.7177610443780045e-05, "loss": 0.1186, "step": 64950 }, { "epoch": 2.3606366741769023, "grad_norm": 0.8713351488113403, "learning_rate": 3.717307812265974e-05, "loss": 0.0962, "step": 64960 }, { "epoch": 2.3610000726797007, "grad_norm": 1.1917448043823242, "learning_rate": 3.716854527704482e-05, "loss": 0.1117, "step": 64970 }, { "epoch": 2.3613634711824987, "grad_norm": 1.092644214630127, "learning_rate": 3.716401190713057e-05, "loss": 0.0889, "step": 64980 }, { "epoch": 2.361726869685297, "grad_norm": 1.172472357749939, "learning_rate": 3.715947801311233e-05, "loss": 0.1126, "step": 64990 }, { "epoch": 2.362090268188095, "grad_norm": 1.0360251665115356, "learning_rate": 3.715494359518545e-05, "loss": 0.104, "step": 65000 }, { "epoch": 2.362453666690893, "grad_norm": 0.88475102186203, "learning_rate": 3.715040865354529e-05, "loss": 0.0905, "step": 65010 }, { "epoch": 2.3628170651936915, "grad_norm": 2.172114849090576, "learning_rate": 3.714587318838726e-05, "loss": 0.1047, "step": 65020 }, { "epoch": 2.3631804636964895, "grad_norm": 1.0699172019958496, "learning_rate": 3.7141337199906766e-05, "loss": 0.1052, "step": 65030 }, { "epoch": 2.363543862199288, "grad_norm": 0.365556925535202, "learning_rate": 3.713680068829925e-05, "loss": 0.1084, "step": 65040 }, { "epoch": 2.363907260702086, "grad_norm": 0.6626974940299988, "learning_rate": 3.713226365376018e-05, "loss": 0.0883, "step": 65050 }, { "epoch": 2.364270659204884, "grad_norm": 1.7596914768218994, "learning_rate": 3.7127726096485026e-05, "loss": 0.0966, "step": 65060 }, { "epoch": 2.3646340577076823, "grad_norm": 0.5741199254989624, "learning_rate": 3.712318801666932e-05, "loss": 0.1532, "step": 65070 }, { "epoch": 2.3649974562104803, "grad_norm": 1.754315733909607, "learning_rate": 3.711864941450856e-05, "loss": 0.1066, "step": 65080 }, { "epoch": 2.3653608547132787, "grad_norm": 0.7265182137489319, "learning_rate": 3.711411029019833e-05, "loss": 0.1318, "step": 65090 }, { "epoch": 2.3657242532160767, "grad_norm": 0.5546099543571472, "learning_rate": 3.7109570643934185e-05, "loss": 0.1076, "step": 65100 }, { "epoch": 2.3660876517188747, "grad_norm": 0.5260456800460815, "learning_rate": 3.7105030475911716e-05, "loss": 0.0945, "step": 65110 }, { "epoch": 2.366451050221673, "grad_norm": 0.733099639415741, "learning_rate": 3.710048978632657e-05, "loss": 0.0959, "step": 65120 }, { "epoch": 2.366814448724471, "grad_norm": 0.7349701523780823, "learning_rate": 3.709594857537436e-05, "loss": 0.0935, "step": 65130 }, { "epoch": 2.3671778472272695, "grad_norm": 1.6328225135803223, "learning_rate": 3.7091406843250774e-05, "loss": 0.158, "step": 65140 }, { "epoch": 2.3675412457300675, "grad_norm": 0.4439161717891693, "learning_rate": 3.7086864590151484e-05, "loss": 0.0965, "step": 65150 }, { "epoch": 2.367904644232866, "grad_norm": 1.500626564025879, "learning_rate": 3.70823218162722e-05, "loss": 0.0838, "step": 65160 }, { "epoch": 2.368268042735664, "grad_norm": 0.5546636581420898, "learning_rate": 3.7077778521808656e-05, "loss": 0.1117, "step": 65170 }, { "epoch": 2.368631441238462, "grad_norm": 2.3335354328155518, "learning_rate": 3.707323470695662e-05, "loss": 0.5259, "step": 65180 }, { "epoch": 2.3689948397412603, "grad_norm": 1.3475418090820312, "learning_rate": 3.706869037191185e-05, "loss": 0.1113, "step": 65190 }, { "epoch": 2.3693582382440583, "grad_norm": 1.5157225131988525, "learning_rate": 3.706414551687015e-05, "loss": 0.1229, "step": 65200 }, { "epoch": 2.3697216367468568, "grad_norm": 0.707976758480072, "learning_rate": 3.7059600142027354e-05, "loss": 0.1022, "step": 65210 }, { "epoch": 2.3700850352496547, "grad_norm": 0.48478442430496216, "learning_rate": 3.7055054247579285e-05, "loss": 0.1455, "step": 65220 }, { "epoch": 2.370448433752453, "grad_norm": 1.4668298959732056, "learning_rate": 3.7050507833721824e-05, "loss": 0.1318, "step": 65230 }, { "epoch": 2.370811832255251, "grad_norm": 0.6836544275283813, "learning_rate": 3.704596090065085e-05, "loss": 0.1028, "step": 65240 }, { "epoch": 2.371175230758049, "grad_norm": 0.5317667722702026, "learning_rate": 3.70414134485623e-05, "loss": 0.082, "step": 65250 }, { "epoch": 2.3715386292608476, "grad_norm": 0.5413720607757568, "learning_rate": 3.703686547765208e-05, "loss": 0.0988, "step": 65260 }, { "epoch": 2.3719020277636456, "grad_norm": 2.2720227241516113, "learning_rate": 3.703231698811614e-05, "loss": 0.1121, "step": 65270 }, { "epoch": 2.372265426266444, "grad_norm": 1.079412579536438, "learning_rate": 3.7027767980150485e-05, "loss": 0.1999, "step": 65280 }, { "epoch": 2.372628824769242, "grad_norm": 2.772294282913208, "learning_rate": 3.70232184539511e-05, "loss": 0.1109, "step": 65290 }, { "epoch": 2.37299222327204, "grad_norm": 1.1685398817062378, "learning_rate": 3.701866840971401e-05, "loss": 0.0708, "step": 65300 }, { "epoch": 2.3733556217748384, "grad_norm": 1.359842300415039, "learning_rate": 3.701411784763526e-05, "loss": 0.074, "step": 65310 }, { "epoch": 2.3737190202776364, "grad_norm": 0.42569172382354736, "learning_rate": 3.700956676791092e-05, "loss": 0.1001, "step": 65320 }, { "epoch": 2.374082418780435, "grad_norm": 0.6070738434791565, "learning_rate": 3.700501517073707e-05, "loss": 0.1083, "step": 65330 }, { "epoch": 2.374445817283233, "grad_norm": 5.960649490356445, "learning_rate": 3.700046305630984e-05, "loss": 0.1321, "step": 65340 }, { "epoch": 2.3748092157860308, "grad_norm": 0.613503098487854, "learning_rate": 3.699591042482536e-05, "loss": 0.1093, "step": 65350 }, { "epoch": 2.375172614288829, "grad_norm": 0.5209415555000305, "learning_rate": 3.699135727647977e-05, "loss": 0.103, "step": 65360 }, { "epoch": 2.375536012791627, "grad_norm": 0.7532001733779907, "learning_rate": 3.698680361146926e-05, "loss": 0.1061, "step": 65370 }, { "epoch": 2.3758994112944256, "grad_norm": 0.7915641665458679, "learning_rate": 3.6982249429990035e-05, "loss": 0.0951, "step": 65380 }, { "epoch": 2.3762628097972236, "grad_norm": 0.6081142425537109, "learning_rate": 3.697769473223832e-05, "loss": 0.115, "step": 65390 }, { "epoch": 2.3766262083000216, "grad_norm": 6.751429080963135, "learning_rate": 3.697313951841035e-05, "loss": 0.1017, "step": 65400 }, { "epoch": 2.3766262083000216, "eval_loss": 0.341545969247818, "eval_runtime": 180.2661, "eval_samples_per_second": 41.128, "eval_steps_per_second": 5.142, "eval_wer": 0.15476428195400005, "step": 65400 }, { "epoch": 2.37698960680282, "grad_norm": 0.7073554992675781, "learning_rate": 3.69685837887024e-05, "loss": 0.0784, "step": 65410 }, { "epoch": 2.377353005305618, "grad_norm": 1.2818964719772339, "learning_rate": 3.696402754331076e-05, "loss": 0.1119, "step": 65420 }, { "epoch": 2.3777164038084164, "grad_norm": 1.005615234375, "learning_rate": 3.695947078243174e-05, "loss": 0.1153, "step": 65430 }, { "epoch": 2.3780798023112144, "grad_norm": 0.8593710660934448, "learning_rate": 3.695491350626168e-05, "loss": 0.0793, "step": 65440 }, { "epoch": 2.378443200814013, "grad_norm": 2.337388038635254, "learning_rate": 3.695035571499692e-05, "loss": 0.097, "step": 65450 }, { "epoch": 2.378806599316811, "grad_norm": 0.9329900741577148, "learning_rate": 3.694579740883387e-05, "loss": 0.1108, "step": 65460 }, { "epoch": 2.379169997819609, "grad_norm": 0.7032762765884399, "learning_rate": 3.69412385879689e-05, "loss": 0.1192, "step": 65470 }, { "epoch": 2.3795333963224072, "grad_norm": 0.7048949599266052, "learning_rate": 3.693667925259845e-05, "loss": 0.0932, "step": 65480 }, { "epoch": 2.379896794825205, "grad_norm": 0.8367437124252319, "learning_rate": 3.693211940291896e-05, "loss": 0.139, "step": 65490 }, { "epoch": 2.3802601933280036, "grad_norm": 0.9763396978378296, "learning_rate": 3.69275590391269e-05, "loss": 0.0914, "step": 65500 }, { "epoch": 2.3806235918308016, "grad_norm": 1.1304420232772827, "learning_rate": 3.6922998161418764e-05, "loss": 0.0912, "step": 65510 }, { "epoch": 2.3809869903336, "grad_norm": 4.609717845916748, "learning_rate": 3.691843676999105e-05, "loss": 0.099, "step": 65520 }, { "epoch": 2.381350388836398, "grad_norm": 1.2089684009552002, "learning_rate": 3.6913874865040307e-05, "loss": 0.1087, "step": 65530 }, { "epoch": 2.381713787339196, "grad_norm": 0.7825998663902283, "learning_rate": 3.690931244676309e-05, "loss": 0.1141, "step": 65540 }, { "epoch": 2.3820771858419945, "grad_norm": 0.3670007586479187, "learning_rate": 3.690474951535597e-05, "loss": 0.0831, "step": 65550 }, { "epoch": 2.3824405843447924, "grad_norm": 0.8454808592796326, "learning_rate": 3.6900186071015545e-05, "loss": 2.7237, "step": 65560 }, { "epoch": 2.382803982847591, "grad_norm": 0.6918748617172241, "learning_rate": 3.689562211393845e-05, "loss": 0.4282, "step": 65570 }, { "epoch": 2.383167381350389, "grad_norm": 0.8183717727661133, "learning_rate": 3.6891057644321326e-05, "loss": 0.0964, "step": 65580 }, { "epoch": 2.383530779853187, "grad_norm": 1.358555793762207, "learning_rate": 3.688649266236083e-05, "loss": 0.1215, "step": 65590 }, { "epoch": 2.3838941783559853, "grad_norm": 0.7757040858268738, "learning_rate": 3.688192716825366e-05, "loss": 0.0865, "step": 65600 }, { "epoch": 2.3842575768587833, "grad_norm": 2.875025510787964, "learning_rate": 3.687736116219652e-05, "loss": 0.0834, "step": 65610 }, { "epoch": 2.3846209753615817, "grad_norm": 0.5162243247032166, "learning_rate": 3.6872794644386156e-05, "loss": 0.1043, "step": 65620 }, { "epoch": 2.3849843738643797, "grad_norm": 0.7602340579032898, "learning_rate": 3.68682276150193e-05, "loss": 0.1051, "step": 65630 }, { "epoch": 2.3853477723671777, "grad_norm": 0.4563780128955841, "learning_rate": 3.686366007429276e-05, "loss": 0.1144, "step": 65640 }, { "epoch": 2.385711170869976, "grad_norm": 1.0391710996627808, "learning_rate": 3.685909202240331e-05, "loss": 0.0894, "step": 65650 }, { "epoch": 2.386074569372774, "grad_norm": 0.5064871907234192, "learning_rate": 3.685452345954778e-05, "loss": 0.0882, "step": 65660 }, { "epoch": 2.3864379678755725, "grad_norm": 1.3617416620254517, "learning_rate": 3.684995438592301e-05, "loss": 0.1068, "step": 65670 }, { "epoch": 2.3868013663783705, "grad_norm": 0.7488900423049927, "learning_rate": 3.684538480172587e-05, "loss": 0.1205, "step": 65680 }, { "epoch": 2.3871647648811685, "grad_norm": 0.4754915237426758, "learning_rate": 3.684081470715325e-05, "loss": 0.1073, "step": 65690 }, { "epoch": 2.387528163383967, "grad_norm": 1.4789927005767822, "learning_rate": 3.6836244102402053e-05, "loss": 0.1681, "step": 65700 }, { "epoch": 2.387891561886765, "grad_norm": 0.6496606469154358, "learning_rate": 3.68316729876692e-05, "loss": 0.0993, "step": 65710 }, { "epoch": 2.3882549603895633, "grad_norm": 0.8786084651947021, "learning_rate": 3.6827101363151676e-05, "loss": 0.1326, "step": 65720 }, { "epoch": 2.3886183588923613, "grad_norm": 0.7775259613990784, "learning_rate": 3.682252922904641e-05, "loss": 0.1014, "step": 65730 }, { "epoch": 2.3889817573951597, "grad_norm": 1.226577877998352, "learning_rate": 3.681795658555044e-05, "loss": 0.1135, "step": 65740 }, { "epoch": 2.3893451558979577, "grad_norm": 0.6108711957931519, "learning_rate": 3.681338343286077e-05, "loss": 0.1047, "step": 65750 }, { "epoch": 2.3897085544007557, "grad_norm": 1.8602646589279175, "learning_rate": 3.6808809771174435e-05, "loss": 0.0888, "step": 65760 }, { "epoch": 2.390071952903554, "grad_norm": 0.9307143092155457, "learning_rate": 3.6804235600688503e-05, "loss": 0.1109, "step": 65770 }, { "epoch": 2.390435351406352, "grad_norm": 0.7531790733337402, "learning_rate": 3.679966092160005e-05, "loss": 0.1228, "step": 65780 }, { "epoch": 2.3907987499091505, "grad_norm": 0.608249843120575, "learning_rate": 3.679508573410621e-05, "loss": 0.1062, "step": 65790 }, { "epoch": 2.3911621484119485, "grad_norm": 1.1337485313415527, "learning_rate": 3.679051003840408e-05, "loss": 0.5558, "step": 65800 }, { "epoch": 2.391525546914747, "grad_norm": 0.5861150622367859, "learning_rate": 3.678593383469083e-05, "loss": 0.1104, "step": 65810 }, { "epoch": 2.391888945417545, "grad_norm": 0.3788084089756012, "learning_rate": 3.678135712316362e-05, "loss": 0.1413, "step": 65820 }, { "epoch": 2.392252343920343, "grad_norm": 2.359208106994629, "learning_rate": 3.6776779904019656e-05, "loss": 0.1204, "step": 65830 }, { "epoch": 2.3926157424231413, "grad_norm": 0.5361478328704834, "learning_rate": 3.677220217745614e-05, "loss": 0.1073, "step": 65840 }, { "epoch": 2.3929791409259393, "grad_norm": 1.1404966115951538, "learning_rate": 3.676762394367032e-05, "loss": 0.0842, "step": 65850 }, { "epoch": 2.3933425394287378, "grad_norm": 0.6131421327590942, "learning_rate": 3.676304520285946e-05, "loss": 0.0872, "step": 65860 }, { "epoch": 2.3937059379315357, "grad_norm": 0.7355049848556519, "learning_rate": 3.675846595522082e-05, "loss": 0.1181, "step": 65870 }, { "epoch": 2.3940693364343337, "grad_norm": 0.7013423442840576, "learning_rate": 3.675388620095174e-05, "loss": 0.0889, "step": 65880 }, { "epoch": 2.394432734937132, "grad_norm": 0.5543515086174011, "learning_rate": 3.674930594024951e-05, "loss": 0.1213, "step": 65890 }, { "epoch": 2.39479613343993, "grad_norm": 0.709343671798706, "learning_rate": 3.674472517331149e-05, "loss": 0.0912, "step": 65900 }, { "epoch": 2.3951595319427286, "grad_norm": 0.6905022859573364, "learning_rate": 3.674014390033506e-05, "loss": 0.6845, "step": 65910 }, { "epoch": 2.3955229304455266, "grad_norm": 0.6566099524497986, "learning_rate": 3.6735562121517593e-05, "loss": 0.1347, "step": 65920 }, { "epoch": 2.3958863289483245, "grad_norm": 1.4284336566925049, "learning_rate": 3.673097983705651e-05, "loss": 0.1064, "step": 65930 }, { "epoch": 2.396249727451123, "grad_norm": 0.9759535193443298, "learning_rate": 3.672639704714925e-05, "loss": 0.149, "step": 65940 }, { "epoch": 2.396613125953921, "grad_norm": 1.247986078262329, "learning_rate": 3.6721813751993255e-05, "loss": 0.1248, "step": 65950 }, { "epoch": 2.3969765244567194, "grad_norm": 0.7816616296768188, "learning_rate": 3.671722995178603e-05, "loss": 0.0935, "step": 65960 }, { "epoch": 2.3973399229595174, "grad_norm": 2.142498016357422, "learning_rate": 3.671264564672503e-05, "loss": 0.1141, "step": 65970 }, { "epoch": 2.3977033214623154, "grad_norm": 0.7998883724212646, "learning_rate": 3.670806083700782e-05, "loss": 0.0958, "step": 65980 }, { "epoch": 2.398066719965114, "grad_norm": 1.2408504486083984, "learning_rate": 3.6703475522831924e-05, "loss": 0.1669, "step": 65990 }, { "epoch": 2.3984301184679118, "grad_norm": 0.7468869686126709, "learning_rate": 3.669888970439491e-05, "loss": 0.0939, "step": 66000 }, { "epoch": 2.3984301184679118, "eval_loss": 0.3319300711154938, "eval_runtime": 179.03, "eval_samples_per_second": 41.412, "eval_steps_per_second": 5.178, "eval_wer": 0.1522319240474159, "step": 66000 }, { "epoch": 2.39879351697071, "grad_norm": 0.3617503046989441, "learning_rate": 3.669430338189436e-05, "loss": 0.0804, "step": 66010 }, { "epoch": 2.399156915473508, "grad_norm": 1.2790522575378418, "learning_rate": 3.668971655552788e-05, "loss": 0.1073, "step": 66020 }, { "epoch": 2.3995203139763066, "grad_norm": 0.5524618029594421, "learning_rate": 3.668512922549312e-05, "loss": 0.1024, "step": 66030 }, { "epoch": 2.3998837124791046, "grad_norm": 0.6617368459701538, "learning_rate": 3.6680541391987706e-05, "loss": 0.1241, "step": 66040 }, { "epoch": 2.4002471109819026, "grad_norm": 1.515463948249817, "learning_rate": 3.667595305520933e-05, "loss": 0.0991, "step": 66050 }, { "epoch": 2.400610509484701, "grad_norm": 1.0713670253753662, "learning_rate": 3.667136421535567e-05, "loss": 0.0983, "step": 66060 }, { "epoch": 2.400973907987499, "grad_norm": 0.4194028973579407, "learning_rate": 3.666677487262446e-05, "loss": 0.1354, "step": 66070 }, { "epoch": 2.4013373064902974, "grad_norm": 1.1584357023239136, "learning_rate": 3.6662185027213436e-05, "loss": 0.1073, "step": 66080 }, { "epoch": 2.4017007049930954, "grad_norm": 0.9621077179908752, "learning_rate": 3.6657594679320346e-05, "loss": 0.1, "step": 66090 }, { "epoch": 2.402064103495894, "grad_norm": 0.8532549738883972, "learning_rate": 3.665300382914298e-05, "loss": 0.0972, "step": 66100 }, { "epoch": 2.402427501998692, "grad_norm": 0.44833171367645264, "learning_rate": 3.664841247687914e-05, "loss": 0.094, "step": 66110 }, { "epoch": 2.40279090050149, "grad_norm": 0.8976952433586121, "learning_rate": 3.6643820622726654e-05, "loss": 0.1537, "step": 66120 }, { "epoch": 2.4031542990042882, "grad_norm": 1.280044674873352, "learning_rate": 3.663922826688336e-05, "loss": 0.1071, "step": 66130 }, { "epoch": 2.403517697507086, "grad_norm": 1.037636160850525, "learning_rate": 3.6634635409547144e-05, "loss": 0.1271, "step": 66140 }, { "epoch": 2.4038810960098846, "grad_norm": 0.6089548468589783, "learning_rate": 3.663004205091588e-05, "loss": 0.106, "step": 66150 }, { "epoch": 2.4042444945126826, "grad_norm": 0.5719799995422363, "learning_rate": 3.662544819118748e-05, "loss": 0.1048, "step": 66160 }, { "epoch": 2.4046078930154806, "grad_norm": 1.0621087551116943, "learning_rate": 3.662131328915747e-05, "loss": 0.1121, "step": 66170 }, { "epoch": 2.404971291518279, "grad_norm": 0.9576284289360046, "learning_rate": 3.6616718477889837e-05, "loss": 0.1075, "step": 66180 }, { "epoch": 2.405334690021077, "grad_norm": 0.6212823987007141, "learning_rate": 3.661212316609915e-05, "loss": 0.1262, "step": 66190 }, { "epoch": 2.4056980885238755, "grad_norm": 0.9172229170799255, "learning_rate": 3.660752735398338e-05, "loss": 0.0759, "step": 66200 }, { "epoch": 2.4060614870266734, "grad_norm": 0.7851585745811462, "learning_rate": 3.660293104174057e-05, "loss": 0.0873, "step": 66210 }, { "epoch": 2.4064248855294714, "grad_norm": 0.6783828735351562, "learning_rate": 3.659833422956873e-05, "loss": 0.1069, "step": 66220 }, { "epoch": 2.40678828403227, "grad_norm": 3.5662567615509033, "learning_rate": 3.659373691766594e-05, "loss": 0.0983, "step": 66230 }, { "epoch": 2.407151682535068, "grad_norm": 0.9401397705078125, "learning_rate": 3.658913910623028e-05, "loss": 0.1096, "step": 66240 }, { "epoch": 2.4075150810378663, "grad_norm": 0.5327457189559937, "learning_rate": 3.658454079545985e-05, "loss": 0.0867, "step": 66250 }, { "epoch": 2.4078784795406643, "grad_norm": 0.5370202660560608, "learning_rate": 3.657994198555278e-05, "loss": 0.0901, "step": 66260 }, { "epoch": 2.4082418780434622, "grad_norm": 0.2850395143032074, "learning_rate": 3.65753426767072e-05, "loss": 0.1159, "step": 66270 }, { "epoch": 2.4086052765462607, "grad_norm": 3.4857585430145264, "learning_rate": 3.65707428691213e-05, "loss": 0.117, "step": 66280 }, { "epoch": 2.4089686750490586, "grad_norm": 0.8752036690711975, "learning_rate": 3.656614256299325e-05, "loss": 0.1518, "step": 66290 }, { "epoch": 2.409332073551857, "grad_norm": 0.7939157485961914, "learning_rate": 3.656154175852128e-05, "loss": 0.0889, "step": 66300 }, { "epoch": 2.409695472054655, "grad_norm": 0.5964920520782471, "learning_rate": 3.6556940455903603e-05, "loss": 0.0838, "step": 66310 }, { "epoch": 2.4100588705574535, "grad_norm": 0.3993948996067047, "learning_rate": 3.655233865533848e-05, "loss": 0.1318, "step": 66320 }, { "epoch": 2.4104222690602515, "grad_norm": 0.5623260736465454, "learning_rate": 3.65477363570242e-05, "loss": 0.1193, "step": 66330 }, { "epoch": 2.4107856675630495, "grad_norm": 0.5477907061576843, "learning_rate": 3.654313356115903e-05, "loss": 0.1197, "step": 66340 }, { "epoch": 2.411149066065848, "grad_norm": 0.8918854594230652, "learning_rate": 3.653853026794132e-05, "loss": 0.1546, "step": 66350 }, { "epoch": 2.411512464568646, "grad_norm": 0.5984349250793457, "learning_rate": 3.6533926477569384e-05, "loss": 0.0822, "step": 66360 }, { "epoch": 2.4118758630714443, "grad_norm": 0.3398670554161072, "learning_rate": 3.65293221902416e-05, "loss": 0.1134, "step": 66370 }, { "epoch": 2.4122392615742423, "grad_norm": 1.7309616804122925, "learning_rate": 3.652471740615634e-05, "loss": 0.0953, "step": 66380 }, { "epoch": 2.4126026600770407, "grad_norm": 0.5632598996162415, "learning_rate": 3.6520112125512016e-05, "loss": 0.1023, "step": 66390 }, { "epoch": 2.4129660585798387, "grad_norm": 1.3867424726486206, "learning_rate": 3.6515506348507054e-05, "loss": 0.0856, "step": 66400 }, { "epoch": 2.4133294570826367, "grad_norm": 1.5078961849212646, "learning_rate": 3.651090007533989e-05, "loss": 0.0795, "step": 66410 }, { "epoch": 2.413692855585435, "grad_norm": 0.596082866191864, "learning_rate": 3.650629330620899e-05, "loss": 0.1231, "step": 66420 }, { "epoch": 2.414056254088233, "grad_norm": 0.9665220379829407, "learning_rate": 3.6501686041312865e-05, "loss": 0.1076, "step": 66430 }, { "epoch": 2.4144196525910315, "grad_norm": 0.4780147075653076, "learning_rate": 3.649753907919114e-05, "loss": 3.842, "step": 66440 }, { "epoch": 2.4147830510938295, "grad_norm": 0.7537965774536133, "learning_rate": 3.6492930872887963e-05, "loss": 0.0878, "step": 66450 }, { "epoch": 2.4151464495966275, "grad_norm": 2.1421070098876953, "learning_rate": 3.6488322171395295e-05, "loss": 0.0812, "step": 66460 }, { "epoch": 2.415509848099426, "grad_norm": 0.36758169531822205, "learning_rate": 3.648371297491169e-05, "loss": 0.1109, "step": 66470 }, { "epoch": 2.415873246602224, "grad_norm": 1.2690719366073608, "learning_rate": 3.647910328363577e-05, "loss": 0.1199, "step": 66480 }, { "epoch": 2.4162366451050223, "grad_norm": 1.2424167394638062, "learning_rate": 3.647449309776612e-05, "loss": 0.1087, "step": 66490 }, { "epoch": 2.4166000436078203, "grad_norm": 0.6070811748504639, "learning_rate": 3.6469882417501386e-05, "loss": 0.1042, "step": 66500 }, { "epoch": 2.4169634421106183, "grad_norm": 0.3652547597885132, "learning_rate": 3.646527124304024e-05, "loss": 0.13, "step": 66510 }, { "epoch": 2.4173268406134167, "grad_norm": 0.8389589190483093, "learning_rate": 3.646065957458134e-05, "loss": 0.1059, "step": 66520 }, { "epoch": 2.4176902391162147, "grad_norm": 4.236841678619385, "learning_rate": 3.64560474123234e-05, "loss": 0.1248, "step": 66530 }, { "epoch": 2.418053637619013, "grad_norm": 0.4040025770664215, "learning_rate": 3.645143475646514e-05, "loss": 0.1224, "step": 66540 }, { "epoch": 2.418417036121811, "grad_norm": 1.0393097400665283, "learning_rate": 3.6446821607205294e-05, "loss": 0.0945, "step": 66550 }, { "epoch": 2.418780434624609, "grad_norm": 1.010204792022705, "learning_rate": 3.644220796474264e-05, "loss": 0.0797, "step": 66560 }, { "epoch": 2.4191438331274076, "grad_norm": 0.8821393847465515, "learning_rate": 3.643759382927595e-05, "loss": 0.1042, "step": 66570 }, { "epoch": 2.4195072316302055, "grad_norm": 0.35728177428245544, "learning_rate": 3.643297920100404e-05, "loss": 0.0965, "step": 66580 }, { "epoch": 2.419870630133004, "grad_norm": 1.833901286125183, "learning_rate": 3.642836408012573e-05, "loss": 0.176, "step": 66590 }, { "epoch": 2.420234028635802, "grad_norm": 1.3145054578781128, "learning_rate": 3.6423748466839884e-05, "loss": 0.0881, "step": 66600 }, { "epoch": 2.420234028635802, "eval_loss": 0.3247428834438324, "eval_runtime": 179.5083, "eval_samples_per_second": 41.302, "eval_steps_per_second": 5.164, "eval_wer": 0.15563563091110424, "step": 66600 }, { "epoch": 2.4205974271386004, "grad_norm": 0.4729728102684021, "learning_rate": 3.6419132361345366e-05, "loss": 0.1078, "step": 66610 }, { "epoch": 2.4209608256413984, "grad_norm": 0.780598521232605, "learning_rate": 3.6414515763841054e-05, "loss": 0.104, "step": 66620 }, { "epoch": 2.4213242241441963, "grad_norm": 0.8436282873153687, "learning_rate": 3.6409898674525865e-05, "loss": 0.1148, "step": 66630 }, { "epoch": 2.4216876226469948, "grad_norm": 1.6270266771316528, "learning_rate": 3.640528109359875e-05, "loss": 1.2437, "step": 66640 }, { "epoch": 2.4220510211497928, "grad_norm": 0.6060745716094971, "learning_rate": 3.640066302125865e-05, "loss": 0.0958, "step": 66650 }, { "epoch": 2.422414419652591, "grad_norm": 1.076560139656067, "learning_rate": 3.6396044457704535e-05, "loss": 0.105, "step": 66660 }, { "epoch": 2.422777818155389, "grad_norm": 0.4505023956298828, "learning_rate": 3.6391425403135425e-05, "loss": 0.1123, "step": 66670 }, { "epoch": 2.4231412166581876, "grad_norm": 0.5208647847175598, "learning_rate": 3.6386805857750315e-05, "loss": 0.0909, "step": 66680 }, { "epoch": 2.4235046151609856, "grad_norm": 0.7721276879310608, "learning_rate": 3.638218582174826e-05, "loss": 0.113, "step": 66690 }, { "epoch": 2.4238680136637836, "grad_norm": 1.676924467086792, "learning_rate": 3.6377565295328316e-05, "loss": 0.1015, "step": 66700 }, { "epoch": 2.424231412166582, "grad_norm": 0.7819331288337708, "learning_rate": 3.6372944278689566e-05, "loss": 0.1092, "step": 66710 }, { "epoch": 2.42459481066938, "grad_norm": 0.6924077272415161, "learning_rate": 3.636832277203111e-05, "loss": 0.1739, "step": 66720 }, { "epoch": 2.4249582091721784, "grad_norm": 0.48950478434562683, "learning_rate": 3.636370077555208e-05, "loss": 0.1288, "step": 66730 }, { "epoch": 2.4253216076749764, "grad_norm": 1.9735438823699951, "learning_rate": 3.6359078289451604e-05, "loss": 0.1444, "step": 66740 }, { "epoch": 2.4256850061777744, "grad_norm": 2.684687852859497, "learning_rate": 3.635445531392887e-05, "loss": 0.0867, "step": 66750 }, { "epoch": 2.426048404680573, "grad_norm": 0.4811551570892334, "learning_rate": 3.634983184918305e-05, "loss": 0.1158, "step": 66760 }, { "epoch": 2.426411803183371, "grad_norm": 1.0460630655288696, "learning_rate": 3.6345207895413367e-05, "loss": 0.1113, "step": 66770 }, { "epoch": 2.4267752016861692, "grad_norm": 0.47594699263572693, "learning_rate": 3.634058345281903e-05, "loss": 0.119, "step": 66780 }, { "epoch": 2.427138600188967, "grad_norm": 1.4716179370880127, "learning_rate": 3.633595852159931e-05, "loss": 0.1607, "step": 66790 }, { "epoch": 2.427501998691765, "grad_norm": 2.8937737941741943, "learning_rate": 3.6331333101953465e-05, "loss": 0.0732, "step": 66800 }, { "epoch": 2.4278653971945636, "grad_norm": 0.4008066654205322, "learning_rate": 3.63267071940808e-05, "loss": 0.108, "step": 66810 }, { "epoch": 2.4282287956973616, "grad_norm": 0.6345723271369934, "learning_rate": 3.632208079818062e-05, "loss": 0.1021, "step": 66820 }, { "epoch": 2.42859219420016, "grad_norm": 4.007993221282959, "learning_rate": 3.631745391445226e-05, "loss": 0.1041, "step": 66830 }, { "epoch": 2.428955592702958, "grad_norm": 1.5959880352020264, "learning_rate": 3.631282654309508e-05, "loss": 0.1387, "step": 66840 }, { "epoch": 2.429318991205756, "grad_norm": 2.597745180130005, "learning_rate": 3.6308198684308465e-05, "loss": 0.1163, "step": 66850 }, { "epoch": 2.4296823897085544, "grad_norm": 0.8064637184143066, "learning_rate": 3.630357033829179e-05, "loss": 0.1064, "step": 66860 }, { "epoch": 2.4300457882113524, "grad_norm": 0.9430283308029175, "learning_rate": 3.629894150524449e-05, "loss": 0.1267, "step": 66870 }, { "epoch": 2.430409186714151, "grad_norm": 0.7025822997093201, "learning_rate": 3.629431218536601e-05, "loss": 0.0967, "step": 66880 }, { "epoch": 2.430772585216949, "grad_norm": 1.0002391338348389, "learning_rate": 3.628968237885579e-05, "loss": 0.1241, "step": 66890 }, { "epoch": 2.4311359837197473, "grad_norm": 1.6046959161758423, "learning_rate": 3.628505208591334e-05, "loss": 0.0894, "step": 66900 }, { "epoch": 2.4314993822225452, "grad_norm": 0.773638129234314, "learning_rate": 3.628042130673814e-05, "loss": 0.0885, "step": 66910 }, { "epoch": 2.4318627807253432, "grad_norm": 0.7153804898262024, "learning_rate": 3.627579004152972e-05, "loss": 0.127, "step": 66920 }, { "epoch": 2.4322261792281417, "grad_norm": 0.8669637441635132, "learning_rate": 3.627115829048763e-05, "loss": 0.0891, "step": 66930 }, { "epoch": 2.4325895777309396, "grad_norm": 2.438815116882324, "learning_rate": 3.6266526053811434e-05, "loss": 3.7705, "step": 66940 }, { "epoch": 2.432952976233738, "grad_norm": 0.46661120653152466, "learning_rate": 3.626189333170071e-05, "loss": 0.0928, "step": 66950 }, { "epoch": 2.433316374736536, "grad_norm": 1.2738080024719238, "learning_rate": 3.625726012435508e-05, "loss": 0.0838, "step": 66960 }, { "epoch": 2.4336797732393345, "grad_norm": 0.8235649466514587, "learning_rate": 3.6252626431974155e-05, "loss": 0.1173, "step": 66970 }, { "epoch": 2.4340431717421325, "grad_norm": 0.8627928495407104, "learning_rate": 3.62479922547576e-05, "loss": 0.0915, "step": 66980 }, { "epoch": 2.4344065702449305, "grad_norm": 0.746405839920044, "learning_rate": 3.624335759290509e-05, "loss": 0.2557, "step": 66990 }, { "epoch": 2.434769968747729, "grad_norm": 1.1601886749267578, "learning_rate": 3.6238722446616285e-05, "loss": 0.0987, "step": 67000 }, { "epoch": 2.435133367250527, "grad_norm": 1.9349639415740967, "learning_rate": 3.623408681609093e-05, "loss": 0.0899, "step": 67010 }, { "epoch": 2.4354967657533253, "grad_norm": 0.6410073637962341, "learning_rate": 3.622945070152874e-05, "loss": 0.164, "step": 67020 }, { "epoch": 2.4358601642561233, "grad_norm": 0.46642959117889404, "learning_rate": 3.622481410312948e-05, "loss": 0.1156, "step": 67030 }, { "epoch": 2.4362235627589213, "grad_norm": 1.0162826776504517, "learning_rate": 3.6220177021092916e-05, "loss": 0.1193, "step": 67040 }, { "epoch": 2.4365869612617197, "grad_norm": 1.3133575916290283, "learning_rate": 3.621553945561884e-05, "loss": 0.0849, "step": 67050 }, { "epoch": 2.4369503597645177, "grad_norm": 0.6921333074569702, "learning_rate": 3.621090140690708e-05, "loss": 0.0855, "step": 67060 }, { "epoch": 2.437313758267316, "grad_norm": 0.8446233868598938, "learning_rate": 3.620626287515746e-05, "loss": 0.9796, "step": 67070 }, { "epoch": 2.437677156770114, "grad_norm": 1.3895478248596191, "learning_rate": 3.620162386056985e-05, "loss": 0.1147, "step": 67080 }, { "epoch": 2.438040555272912, "grad_norm": 0.5276104807853699, "learning_rate": 3.619698436334412e-05, "loss": 0.0987, "step": 67090 }, { "epoch": 2.4384039537757105, "grad_norm": 1.7694755792617798, "learning_rate": 3.619234438368018e-05, "loss": 0.1291, "step": 67100 }, { "epoch": 2.4387673522785085, "grad_norm": 0.5948963761329651, "learning_rate": 3.618770392177794e-05, "loss": 0.0976, "step": 67110 }, { "epoch": 2.439130750781307, "grad_norm": 0.2391016185283661, "learning_rate": 3.618306297783734e-05, "loss": 0.0982, "step": 67120 }, { "epoch": 2.439494149284105, "grad_norm": 0.9383694529533386, "learning_rate": 3.617842155205835e-05, "loss": 0.0995, "step": 67130 }, { "epoch": 2.439857547786903, "grad_norm": 0.9149391055107117, "learning_rate": 3.617377964464094e-05, "loss": 0.1012, "step": 67140 }, { "epoch": 2.4402209462897013, "grad_norm": 0.5762970447540283, "learning_rate": 3.616913725578513e-05, "loss": 0.0943, "step": 67150 }, { "epoch": 2.4405843447924993, "grad_norm": 0.7008225321769714, "learning_rate": 3.6164494385690936e-05, "loss": 0.0838, "step": 67160 }, { "epoch": 2.4409477432952977, "grad_norm": 1.0070174932479858, "learning_rate": 3.61598510345584e-05, "loss": 0.1395, "step": 67170 }, { "epoch": 2.4413111417980957, "grad_norm": 0.7962942123413086, "learning_rate": 3.6155207202587596e-05, "loss": 0.1115, "step": 67180 }, { "epoch": 2.441674540300894, "grad_norm": 0.4024165868759155, "learning_rate": 3.615056288997859e-05, "loss": 0.0848, "step": 67190 }, { "epoch": 2.442037938803692, "grad_norm": 0.39084872603416443, "learning_rate": 3.6145918096931515e-05, "loss": 0.0967, "step": 67200 }, { "epoch": 2.442037938803692, "eval_loss": 0.33589035272598267, "eval_runtime": 179.6199, "eval_samples_per_second": 41.276, "eval_steps_per_second": 5.161, "eval_wer": 0.15488227712527455, "step": 67200 }, { "epoch": 2.44240133730649, "grad_norm": 0.6485455632209778, "learning_rate": 3.614127282364648e-05, "loss": 1.4456, "step": 67210 }, { "epoch": 2.4427647358092885, "grad_norm": 0.4933464229106903, "learning_rate": 3.613662707032364e-05, "loss": 0.1259, "step": 67220 }, { "epoch": 2.4431281343120865, "grad_norm": 2.199694871902466, "learning_rate": 3.613198083716317e-05, "loss": 0.1013, "step": 67230 }, { "epoch": 2.443491532814885, "grad_norm": 1.2690855264663696, "learning_rate": 3.612733412436524e-05, "loss": 0.1256, "step": 67240 }, { "epoch": 2.443854931317683, "grad_norm": 1.8013975620269775, "learning_rate": 3.612268693213009e-05, "loss": 0.4379, "step": 67250 }, { "epoch": 2.4442183298204814, "grad_norm": 4.287527561187744, "learning_rate": 3.611803926065792e-05, "loss": 0.0803, "step": 67260 }, { "epoch": 2.4445817283232794, "grad_norm": 0.6265177726745605, "learning_rate": 3.6113391110149006e-05, "loss": 0.1046, "step": 67270 }, { "epoch": 2.4449451268260773, "grad_norm": 1.527327537536621, "learning_rate": 3.6108742480803606e-05, "loss": 0.1095, "step": 67280 }, { "epoch": 2.4453085253288758, "grad_norm": 1.2177270650863647, "learning_rate": 3.6104093372822026e-05, "loss": 0.0972, "step": 67290 }, { "epoch": 2.4456719238316738, "grad_norm": 0.7354857921600342, "learning_rate": 3.609944378640457e-05, "loss": 0.0893, "step": 67300 }, { "epoch": 2.446035322334472, "grad_norm": 0.8578464984893799, "learning_rate": 3.609479372175156e-05, "loss": 0.0914, "step": 67310 }, { "epoch": 2.44639872083727, "grad_norm": 0.5541604161262512, "learning_rate": 3.6090143179063374e-05, "loss": 0.113, "step": 67320 }, { "epoch": 2.446762119340068, "grad_norm": 0.7503251433372498, "learning_rate": 3.608549215854037e-05, "loss": 0.116, "step": 67330 }, { "epoch": 2.4471255178428666, "grad_norm": 0.7713415026664734, "learning_rate": 3.608084066038297e-05, "loss": 0.1122, "step": 67340 }, { "epoch": 2.4474889163456646, "grad_norm": 2.4603497982025146, "learning_rate": 3.607618868479156e-05, "loss": 0.0932, "step": 67350 }, { "epoch": 2.447852314848463, "grad_norm": 0.4980012774467468, "learning_rate": 3.607153623196658e-05, "loss": 0.0905, "step": 67360 }, { "epoch": 2.448215713351261, "grad_norm": 0.5134033560752869, "learning_rate": 3.606688330210851e-05, "loss": 0.1666, "step": 67370 }, { "epoch": 2.448579111854059, "grad_norm": 0.5784050822257996, "learning_rate": 3.60622298954178e-05, "loss": 0.1092, "step": 67380 }, { "epoch": 2.4489425103568574, "grad_norm": 0.4290425777435303, "learning_rate": 3.605757601209497e-05, "loss": 0.1189, "step": 67390 }, { "epoch": 2.4493059088596554, "grad_norm": 1.0926011800765991, "learning_rate": 3.605292165234053e-05, "loss": 0.0879, "step": 67400 }, { "epoch": 2.449669307362454, "grad_norm": 1.1270503997802734, "learning_rate": 3.604826681635504e-05, "loss": 0.0893, "step": 67410 }, { "epoch": 2.450032705865252, "grad_norm": 0.6691473126411438, "learning_rate": 3.604361150433903e-05, "loss": 0.7363, "step": 67420 }, { "epoch": 2.4503961043680498, "grad_norm": 1.2996752262115479, "learning_rate": 3.603895571649308e-05, "loss": 0.0946, "step": 67430 }, { "epoch": 2.450759502870848, "grad_norm": 1.3618733882904053, "learning_rate": 3.603429945301783e-05, "loss": 0.1242, "step": 67440 }, { "epoch": 2.451122901373646, "grad_norm": 0.7978112101554871, "learning_rate": 3.6029642714113853e-05, "loss": 0.0783, "step": 67450 }, { "epoch": 2.4514862998764446, "grad_norm": 1.727400302886963, "learning_rate": 3.602498549998183e-05, "loss": 0.106, "step": 67460 }, { "epoch": 2.4518496983792426, "grad_norm": 0.9686618447303772, "learning_rate": 3.602032781082241e-05, "loss": 0.1259, "step": 67470 }, { "epoch": 2.452213096882041, "grad_norm": 0.4624063968658447, "learning_rate": 3.601566964683627e-05, "loss": 0.1066, "step": 67480 }, { "epoch": 2.452576495384839, "grad_norm": 0.38952404260635376, "learning_rate": 3.601101100822412e-05, "loss": 0.0993, "step": 67490 }, { "epoch": 2.452939893887637, "grad_norm": 1.37151300907135, "learning_rate": 3.600635189518668e-05, "loss": 0.0988, "step": 67500 }, { "epoch": 2.4533032923904354, "grad_norm": 0.4988241195678711, "learning_rate": 3.60016923079247e-05, "loss": 0.0942, "step": 67510 }, { "epoch": 2.4536666908932334, "grad_norm": 0.8300676941871643, "learning_rate": 3.599703224663894e-05, "loss": 0.1087, "step": 67520 }, { "epoch": 2.454030089396032, "grad_norm": 9.264083862304688, "learning_rate": 3.599237171153019e-05, "loss": 0.1155, "step": 67530 }, { "epoch": 2.45439348789883, "grad_norm": 0.9220635294914246, "learning_rate": 3.598771070279926e-05, "loss": 0.1134, "step": 67540 }, { "epoch": 2.4547568864016283, "grad_norm": 0.6584560871124268, "learning_rate": 3.598304922064696e-05, "loss": 0.0906, "step": 67550 }, { "epoch": 2.4551202849044262, "grad_norm": 2.7506167888641357, "learning_rate": 3.5978387265274157e-05, "loss": 0.1129, "step": 67560 }, { "epoch": 2.4554836834072242, "grad_norm": 1.5210083723068237, "learning_rate": 3.5973724836881694e-05, "loss": 0.1005, "step": 67570 }, { "epoch": 2.4558470819100227, "grad_norm": 0.7032837271690369, "learning_rate": 3.596906193567049e-05, "loss": 0.0681, "step": 67580 }, { "epoch": 2.4562104804128206, "grad_norm": 1.5217934846878052, "learning_rate": 3.596439856184142e-05, "loss": 0.1203, "step": 67590 }, { "epoch": 2.456573878915619, "grad_norm": 0.5665151476860046, "learning_rate": 3.595973471559544e-05, "loss": 0.0865, "step": 67600 }, { "epoch": 2.456937277418417, "grad_norm": 1.023913025856018, "learning_rate": 3.595507039713348e-05, "loss": 0.0941, "step": 67610 }, { "epoch": 2.457300675921215, "grad_norm": 0.6718622446060181, "learning_rate": 3.595040560665651e-05, "loss": 0.1392, "step": 67620 }, { "epoch": 2.4576640744240135, "grad_norm": 0.5096120238304138, "learning_rate": 3.594574034436553e-05, "loss": 0.1164, "step": 67630 }, { "epoch": 2.4580274729268115, "grad_norm": 0.776214063167572, "learning_rate": 3.594107461046154e-05, "loss": 0.1106, "step": 67640 }, { "epoch": 2.45839087142961, "grad_norm": 1.91248619556427, "learning_rate": 3.5936408405145575e-05, "loss": 4.1324, "step": 67650 }, { "epoch": 2.458754269932408, "grad_norm": 1.217971920967102, "learning_rate": 3.593174172861868e-05, "loss": 0.0972, "step": 67660 }, { "epoch": 2.459117668435206, "grad_norm": 6.793942451477051, "learning_rate": 3.5927074581081935e-05, "loss": 0.7676, "step": 67670 }, { "epoch": 2.4594810669380043, "grad_norm": 0.5515997409820557, "learning_rate": 3.592240696273643e-05, "loss": 0.0907, "step": 67680 }, { "epoch": 2.4598444654408023, "grad_norm": 0.4186965227127075, "learning_rate": 3.591773887378326e-05, "loss": 0.0876, "step": 67690 }, { "epoch": 2.4602078639436007, "grad_norm": 0.4198078215122223, "learning_rate": 3.5913070314423575e-05, "loss": 0.0872, "step": 67700 }, { "epoch": 2.4605712624463987, "grad_norm": 0.7509788870811462, "learning_rate": 3.5908401284858514e-05, "loss": 0.0912, "step": 67710 }, { "epoch": 2.4609346609491967, "grad_norm": 0.8919647336006165, "learning_rate": 3.590373178528926e-05, "loss": 0.1003, "step": 67720 }, { "epoch": 2.461298059451995, "grad_norm": 1.2128369808197021, "learning_rate": 3.5899061815917e-05, "loss": 0.1129, "step": 67730 }, { "epoch": 2.461661457954793, "grad_norm": 0.5779681205749512, "learning_rate": 3.589439137694293e-05, "loss": 0.1169, "step": 67740 }, { "epoch": 2.4620248564575915, "grad_norm": 0.6092358827590942, "learning_rate": 3.588972046856831e-05, "loss": 0.0884, "step": 67750 }, { "epoch": 2.4623882549603895, "grad_norm": 1.222869873046875, "learning_rate": 3.588504909099438e-05, "loss": 0.0993, "step": 67760 }, { "epoch": 2.462751653463188, "grad_norm": 0.26627829670906067, "learning_rate": 3.5880377244422416e-05, "loss": 0.1261, "step": 67770 }, { "epoch": 2.463115051965986, "grad_norm": 1.2034231424331665, "learning_rate": 3.58757049290537e-05, "loss": 0.0899, "step": 67780 }, { "epoch": 2.463478450468784, "grad_norm": 0.3671499192714691, "learning_rate": 3.5871032145089565e-05, "loss": 0.1387, "step": 67790 }, { "epoch": 2.4638418489715823, "grad_norm": 0.5502142310142517, "learning_rate": 3.586635889273133e-05, "loss": 0.1053, "step": 67800 }, { "epoch": 2.4638418489715823, "eval_loss": 0.32282206416130066, "eval_runtime": 179.8955, "eval_samples_per_second": 41.213, "eval_steps_per_second": 5.153, "eval_wer": 0.1538112440321673, "step": 67800 }, { "epoch": 2.4642052474743803, "grad_norm": 1.090920329093933, "learning_rate": 3.5861685172180346e-05, "loss": 0.1039, "step": 67810 }, { "epoch": 2.4645686459771787, "grad_norm": 0.406110018491745, "learning_rate": 3.5857010983638e-05, "loss": 0.1042, "step": 67820 }, { "epoch": 2.4649320444799767, "grad_norm": 1.2592461109161377, "learning_rate": 3.585233632730568e-05, "loss": 0.0835, "step": 67830 }, { "epoch": 2.465295442982775, "grad_norm": 0.5883360505104065, "learning_rate": 3.58476612033848e-05, "loss": 0.1341, "step": 67840 }, { "epoch": 2.465658841485573, "grad_norm": 1.322466492652893, "learning_rate": 3.58429856120768e-05, "loss": 0.0797, "step": 67850 }, { "epoch": 2.466022239988371, "grad_norm": 0.4922407567501068, "learning_rate": 3.583830955358312e-05, "loss": 0.0859, "step": 67860 }, { "epoch": 2.4663856384911695, "grad_norm": 0.7841882705688477, "learning_rate": 3.583363302810525e-05, "loss": 0.1096, "step": 67870 }, { "epoch": 2.4667490369939675, "grad_norm": 0.7191815376281738, "learning_rate": 3.582895603584467e-05, "loss": 0.0956, "step": 67880 }, { "epoch": 2.467112435496766, "grad_norm": 0.43222716450691223, "learning_rate": 3.5824278577002925e-05, "loss": 0.139, "step": 67890 }, { "epoch": 2.467475833999564, "grad_norm": 1.4954817295074463, "learning_rate": 3.581960065178151e-05, "loss": 0.0903, "step": 67900 }, { "epoch": 2.467839232502362, "grad_norm": 0.6472924947738647, "learning_rate": 3.5814922260382e-05, "loss": 0.0989, "step": 67910 }, { "epoch": 2.4682026310051604, "grad_norm": 1.0343185663223267, "learning_rate": 3.581024340300598e-05, "loss": 0.0951, "step": 67920 }, { "epoch": 2.4685660295079583, "grad_norm": 0.6948789358139038, "learning_rate": 3.580556407985503e-05, "loss": 0.3052, "step": 67930 }, { "epoch": 2.4689294280107568, "grad_norm": 0.5896201729774475, "learning_rate": 3.580088429113077e-05, "loss": 0.0787, "step": 67940 }, { "epoch": 2.4692928265135548, "grad_norm": 0.7022304534912109, "learning_rate": 3.5796204037034834e-05, "loss": 0.086, "step": 67950 }, { "epoch": 2.4696562250163527, "grad_norm": 0.6120296120643616, "learning_rate": 3.579152331776888e-05, "loss": 0.101, "step": 67960 }, { "epoch": 2.470019623519151, "grad_norm": 0.7050819993019104, "learning_rate": 3.5786842133534584e-05, "loss": 0.1042, "step": 67970 }, { "epoch": 2.470383022021949, "grad_norm": 0.728625476360321, "learning_rate": 3.578216048453364e-05, "loss": 0.1194, "step": 67980 }, { "epoch": 2.4707464205247476, "grad_norm": 5.270279884338379, "learning_rate": 3.577747837096776e-05, "loss": 0.1007, "step": 67990 }, { "epoch": 2.4711098190275456, "grad_norm": 1.098525047302246, "learning_rate": 3.577279579303868e-05, "loss": 0.1017, "step": 68000 }, { "epoch": 2.4714732175303435, "grad_norm": 2.74465012550354, "learning_rate": 3.576811275094817e-05, "loss": 0.3871, "step": 68010 }, { "epoch": 2.471836616033142, "grad_norm": 0.6227459907531738, "learning_rate": 3.576342924489799e-05, "loss": 0.1103, "step": 68020 }, { "epoch": 2.47220001453594, "grad_norm": 2.293656349182129, "learning_rate": 3.5758745275089945e-05, "loss": 0.0953, "step": 68030 }, { "epoch": 2.4725634130387384, "grad_norm": 1.2598451375961304, "learning_rate": 3.575406084172584e-05, "loss": 0.1743, "step": 68040 }, { "epoch": 2.4729268115415364, "grad_norm": 1.4611924886703491, "learning_rate": 3.574937594500751e-05, "loss": 0.0955, "step": 68050 }, { "epoch": 2.473290210044335, "grad_norm": 0.6100664138793945, "learning_rate": 3.5744690585136834e-05, "loss": 0.0935, "step": 68060 }, { "epoch": 2.473653608547133, "grad_norm": 1.22284996509552, "learning_rate": 3.574000476231566e-05, "loss": 0.1435, "step": 68070 }, { "epoch": 2.4740170070499308, "grad_norm": 0.8457713723182678, "learning_rate": 3.5735318476745887e-05, "loss": 0.0832, "step": 68080 }, { "epoch": 2.474380405552729, "grad_norm": 1.3872827291488647, "learning_rate": 3.573063172862944e-05, "loss": 0.1453, "step": 68090 }, { "epoch": 2.474743804055527, "grad_norm": 1.066683292388916, "learning_rate": 3.572594451816826e-05, "loss": 0.0809, "step": 68100 }, { "epoch": 2.4751072025583256, "grad_norm": 1.5101946592330933, "learning_rate": 3.5721256845564286e-05, "loss": 0.0854, "step": 68110 }, { "epoch": 2.4754706010611236, "grad_norm": 0.6682563424110413, "learning_rate": 3.571656871101951e-05, "loss": 0.1077, "step": 68120 }, { "epoch": 2.475833999563922, "grad_norm": 1.0795047283172607, "learning_rate": 3.5711880114735917e-05, "loss": 0.0855, "step": 68130 }, { "epoch": 2.47619739806672, "grad_norm": 4.4557671546936035, "learning_rate": 3.570719105691551e-05, "loss": 0.1676, "step": 68140 }, { "epoch": 2.476560796569518, "grad_norm": 0.7962543368339539, "learning_rate": 3.570250153776035e-05, "loss": 0.0869, "step": 68150 }, { "epoch": 2.4769241950723164, "grad_norm": 12.166545867919922, "learning_rate": 3.569781155747247e-05, "loss": 0.2161, "step": 68160 }, { "epoch": 2.4772875935751144, "grad_norm": 0.4934634864330292, "learning_rate": 3.569312111625396e-05, "loss": 0.1146, "step": 68170 }, { "epoch": 2.477650992077913, "grad_norm": 1.008591651916504, "learning_rate": 3.56884302143069e-05, "loss": 0.1029, "step": 68180 }, { "epoch": 2.478014390580711, "grad_norm": 1.2141749858856201, "learning_rate": 3.568373885183342e-05, "loss": 0.1215, "step": 68190 }, { "epoch": 2.478377789083509, "grad_norm": 1.004011631011963, "learning_rate": 3.567904702903564e-05, "loss": 0.0831, "step": 68200 }, { "epoch": 2.4787411875863072, "grad_norm": 31.751787185668945, "learning_rate": 3.567435474611572e-05, "loss": 0.4307, "step": 68210 }, { "epoch": 2.4791045860891052, "grad_norm": 0.7640292048454285, "learning_rate": 3.566966200327584e-05, "loss": 0.1086, "step": 68220 }, { "epoch": 2.4794679845919037, "grad_norm": 0.5559817552566528, "learning_rate": 3.566496880071817e-05, "loss": 0.1082, "step": 68230 }, { "epoch": 2.4798313830947016, "grad_norm": 2.7342145442962646, "learning_rate": 3.566027513864496e-05, "loss": 0.1049, "step": 68240 }, { "epoch": 2.4801947815974996, "grad_norm": 1.2804802656173706, "learning_rate": 3.565558101725841e-05, "loss": 0.0957, "step": 68250 }, { "epoch": 2.480558180100298, "grad_norm": 6.4595770835876465, "learning_rate": 3.565088643676079e-05, "loss": 0.0967, "step": 68260 }, { "epoch": 2.480921578603096, "grad_norm": 0.7362810373306274, "learning_rate": 3.564619139735437e-05, "loss": 0.1271, "step": 68270 }, { "epoch": 2.4812849771058945, "grad_norm": 2.1541872024536133, "learning_rate": 3.564149589924145e-05, "loss": 0.1168, "step": 68280 }, { "epoch": 2.4816483756086924, "grad_norm": 1.1019583940505981, "learning_rate": 3.563679994262433e-05, "loss": 0.1151, "step": 68290 }, { "epoch": 2.4820117741114904, "grad_norm": 0.7224584817886353, "learning_rate": 3.563210352770534e-05, "loss": 0.2149, "step": 68300 }, { "epoch": 2.482375172614289, "grad_norm": 0.6910248398780823, "learning_rate": 3.562740665468684e-05, "loss": 0.0971, "step": 68310 }, { "epoch": 2.482738571117087, "grad_norm": 1.294913411140442, "learning_rate": 3.56227093237712e-05, "loss": 0.1336, "step": 68320 }, { "epoch": 2.4831019696198853, "grad_norm": 0.5386795401573181, "learning_rate": 3.561801153516082e-05, "loss": 0.1147, "step": 68330 }, { "epoch": 2.4834653681226833, "grad_norm": 0.5479850769042969, "learning_rate": 3.561331328905809e-05, "loss": 0.0878, "step": 68340 }, { "epoch": 2.4838287666254817, "grad_norm": 0.24666792154312134, "learning_rate": 3.560861458566546e-05, "loss": 0.9362, "step": 68350 }, { "epoch": 2.4841921651282797, "grad_norm": 0.776744544506073, "learning_rate": 3.560391542518537e-05, "loss": 0.1084, "step": 68360 }, { "epoch": 2.4845555636310777, "grad_norm": 0.7053751945495605, "learning_rate": 3.55992158078203e-05, "loss": 0.1096, "step": 68370 }, { "epoch": 2.484918962133876, "grad_norm": 0.5632005929946899, "learning_rate": 3.559451573377272e-05, "loss": 0.1125, "step": 68380 }, { "epoch": 2.485282360636674, "grad_norm": 0.5601955652236938, "learning_rate": 3.558981520324516e-05, "loss": 0.1011, "step": 68390 }, { "epoch": 2.4856457591394725, "grad_norm": 0.9751861691474915, "learning_rate": 3.558511421644014e-05, "loss": 0.1193, "step": 68400 }, { "epoch": 2.4856457591394725, "eval_loss": 0.3299192190170288, "eval_runtime": 180.0784, "eval_samples_per_second": 41.171, "eval_steps_per_second": 5.148, "eval_wer": 0.15812260605950587, "step": 68400 }, { "epoch": 2.4860091576422705, "grad_norm": 3.060753107070923, "learning_rate": 3.5580412773560214e-05, "loss": 0.1417, "step": 68410 }, { "epoch": 2.486372556145069, "grad_norm": 0.9213599562644958, "learning_rate": 3.557571087480794e-05, "loss": 0.1066, "step": 68420 }, { "epoch": 2.486735954647867, "grad_norm": 0.6596553921699524, "learning_rate": 3.557100852038592e-05, "loss": 0.0984, "step": 68430 }, { "epoch": 2.487099353150665, "grad_norm": 0.7937065362930298, "learning_rate": 3.556630571049675e-05, "loss": 0.1673, "step": 68440 }, { "epoch": 2.4874627516534633, "grad_norm": 1.1487483978271484, "learning_rate": 3.556160244534307e-05, "loss": 0.0982, "step": 68450 }, { "epoch": 2.4878261501562613, "grad_norm": 0.7516663074493408, "learning_rate": 3.5556898725127504e-05, "loss": 0.0879, "step": 68460 }, { "epoch": 2.4881895486590597, "grad_norm": 3.729604721069336, "learning_rate": 3.5552194550052745e-05, "loss": 0.1866, "step": 68470 }, { "epoch": 2.4885529471618577, "grad_norm": 0.6454250812530518, "learning_rate": 3.554748992032146e-05, "loss": 0.1261, "step": 68480 }, { "epoch": 2.4889163456646557, "grad_norm": 1.3000408411026, "learning_rate": 3.554278483613637e-05, "loss": 0.1297, "step": 68490 }, { "epoch": 2.489279744167454, "grad_norm": 1.060686707496643, "learning_rate": 3.5538079297700185e-05, "loss": 0.0863, "step": 68500 }, { "epoch": 2.489643142670252, "grad_norm": 1.2778925895690918, "learning_rate": 3.5533373305215665e-05, "loss": 0.0819, "step": 68510 }, { "epoch": 2.4900065411730505, "grad_norm": 0.9975671172142029, "learning_rate": 3.5528666858885565e-05, "loss": 0.101, "step": 68520 }, { "epoch": 2.4903699396758485, "grad_norm": 0.8623627424240112, "learning_rate": 3.5523959958912666e-05, "loss": 0.1161, "step": 68530 }, { "epoch": 2.4907333381786465, "grad_norm": 0.5452187061309814, "learning_rate": 3.551925260549979e-05, "loss": 0.0967, "step": 68540 }, { "epoch": 2.491096736681445, "grad_norm": 0.7726628184318542, "learning_rate": 3.5514544798849736e-05, "loss": 0.1111, "step": 68550 }, { "epoch": 2.491460135184243, "grad_norm": 2.074589490890503, "learning_rate": 3.550983653916536e-05, "loss": 0.0911, "step": 68560 }, { "epoch": 2.4918235336870413, "grad_norm": 0.777515709400177, "learning_rate": 3.550512782664952e-05, "loss": 0.1118, "step": 68570 }, { "epoch": 2.4921869321898393, "grad_norm": 0.7411642074584961, "learning_rate": 3.55004186615051e-05, "loss": 0.0832, "step": 68580 }, { "epoch": 2.4925503306926373, "grad_norm": 1.0494729280471802, "learning_rate": 3.5495709043935e-05, "loss": 0.1126, "step": 68590 }, { "epoch": 2.4929137291954357, "grad_norm": 0.825706422328949, "learning_rate": 3.5490998974142144e-05, "loss": 2.8725, "step": 68600 }, { "epoch": 2.4932771276982337, "grad_norm": 0.7414544820785522, "learning_rate": 3.548628845232947e-05, "loss": 0.1034, "step": 68610 }, { "epoch": 2.493640526201032, "grad_norm": 1.752670168876648, "learning_rate": 3.548157747869993e-05, "loss": 0.4002, "step": 68620 }, { "epoch": 2.49400392470383, "grad_norm": 0.9184174537658691, "learning_rate": 3.547686605345651e-05, "loss": 0.101, "step": 68630 }, { "epoch": 2.4943673232066286, "grad_norm": 0.540532112121582, "learning_rate": 3.547215417680222e-05, "loss": 0.1, "step": 68640 }, { "epoch": 2.4947307217094266, "grad_norm": 0.7241819500923157, "learning_rate": 3.5467441848940056e-05, "loss": 0.0812, "step": 68650 }, { "epoch": 2.4950941202122245, "grad_norm": 0.5261086225509644, "learning_rate": 3.546272907007307e-05, "loss": 0.1093, "step": 68660 }, { "epoch": 2.495457518715023, "grad_norm": 0.5485601425170898, "learning_rate": 3.545801584040431e-05, "loss": 0.7212, "step": 68670 }, { "epoch": 2.495820917217821, "grad_norm": 0.5442925691604614, "learning_rate": 3.545330216013687e-05, "loss": 0.1235, "step": 68680 }, { "epoch": 2.4961843157206194, "grad_norm": 0.6182003021240234, "learning_rate": 3.5448588029473825e-05, "loss": 0.1382, "step": 68690 }, { "epoch": 2.4965477142234174, "grad_norm": 0.8053919076919556, "learning_rate": 3.5443873448618296e-05, "loss": 0.1266, "step": 68700 }, { "epoch": 2.496911112726216, "grad_norm": 2.04055118560791, "learning_rate": 3.5439158417773424e-05, "loss": 0.1026, "step": 68710 }, { "epoch": 2.497274511229014, "grad_norm": 0.5255793929100037, "learning_rate": 3.5434442937142354e-05, "loss": 0.1031, "step": 68720 }, { "epoch": 2.4976379097318118, "grad_norm": 1.7394444942474365, "learning_rate": 3.5429727006928266e-05, "loss": 0.081, "step": 68730 }, { "epoch": 2.49800130823461, "grad_norm": 1.1095107793807983, "learning_rate": 3.542501062733435e-05, "loss": 0.1198, "step": 68740 }, { "epoch": 2.498364706737408, "grad_norm": 1.0827983617782593, "learning_rate": 3.542029379856382e-05, "loss": 0.0985, "step": 68750 }, { "epoch": 2.4987281052402066, "grad_norm": 0.5815703868865967, "learning_rate": 3.54155765208199e-05, "loss": 0.0946, "step": 68760 }, { "epoch": 2.4990915037430046, "grad_norm": 1.133452296257019, "learning_rate": 3.541085879430585e-05, "loss": 0.0897, "step": 68770 }, { "epoch": 2.4994549022458026, "grad_norm": 1.6809009313583374, "learning_rate": 3.5406140619224936e-05, "loss": 0.1182, "step": 68780 }, { "epoch": 2.499818300748601, "grad_norm": 0.6066719889640808, "learning_rate": 3.540142199578045e-05, "loss": 0.1223, "step": 68790 }, { "epoch": 2.500181699251399, "grad_norm": 0.45101696252822876, "learning_rate": 3.53967029241757e-05, "loss": 0.0951, "step": 68800 }, { "epoch": 2.5005450977541974, "grad_norm": 2.0316238403320312, "learning_rate": 3.5391983404614e-05, "loss": 0.0941, "step": 68810 }, { "epoch": 2.5009084962569954, "grad_norm": 0.8582636117935181, "learning_rate": 3.538726343729873e-05, "loss": 0.1308, "step": 68820 }, { "epoch": 2.5012718947597934, "grad_norm": 1.0573068857192993, "learning_rate": 3.538254302243322e-05, "loss": 0.1064, "step": 68830 }, { "epoch": 2.501635293262592, "grad_norm": 1.7201263904571533, "learning_rate": 3.537782216022088e-05, "loss": 0.1303, "step": 68840 }, { "epoch": 2.50199869176539, "grad_norm": 0.8848857879638672, "learning_rate": 3.53731008508651e-05, "loss": 0.0885, "step": 68850 }, { "epoch": 2.5023620902681882, "grad_norm": 0.6936333775520325, "learning_rate": 3.5368379094569325e-05, "loss": 0.0989, "step": 68860 }, { "epoch": 2.502725488770986, "grad_norm": 0.7901983261108398, "learning_rate": 3.536365689153698e-05, "loss": 0.1984, "step": 68870 }, { "epoch": 2.503088887273784, "grad_norm": 0.5054183602333069, "learning_rate": 3.5358934241971534e-05, "loss": 0.0928, "step": 68880 }, { "epoch": 2.5034522857765826, "grad_norm": 1.7566126585006714, "learning_rate": 3.535421114607647e-05, "loss": 0.1212, "step": 68890 }, { "epoch": 2.5038156842793806, "grad_norm": 0.5128380656242371, "learning_rate": 3.5349487604055274e-05, "loss": 0.0774, "step": 68900 }, { "epoch": 2.504179082782179, "grad_norm": 0.994647741317749, "learning_rate": 3.53447636161115e-05, "loss": 0.1288, "step": 68910 }, { "epoch": 2.504542481284977, "grad_norm": 0.35602259635925293, "learning_rate": 3.534003918244866e-05, "loss": 0.1006, "step": 68920 }, { "epoch": 2.504905879787775, "grad_norm": 0.9458356499671936, "learning_rate": 3.533531430327032e-05, "loss": 0.1199, "step": 68930 }, { "epoch": 2.5052692782905734, "grad_norm": 1.100160837173462, "learning_rate": 3.533058897878006e-05, "loss": 0.0892, "step": 68940 }, { "epoch": 2.505632676793372, "grad_norm": 0.695726215839386, "learning_rate": 3.532586320918147e-05, "loss": 0.0928, "step": 68950 }, { "epoch": 2.50599607529617, "grad_norm": 1.826897382736206, "learning_rate": 3.532113699467819e-05, "loss": 0.105, "step": 68960 }, { "epoch": 2.506359473798968, "grad_norm": 1.4014049768447876, "learning_rate": 3.531641033547383e-05, "loss": 0.2298, "step": 68970 }, { "epoch": 2.5067228723017663, "grad_norm": 1.4749367237091064, "learning_rate": 3.531168323177206e-05, "loss": 0.0966, "step": 68980 }, { "epoch": 2.5070862708045643, "grad_norm": 4.613848686218262, "learning_rate": 3.530695568377655e-05, "loss": 0.1281, "step": 68990 }, { "epoch": 2.5074496693073627, "grad_norm": 0.9928845167160034, "learning_rate": 3.5302227691690984e-05, "loss": 0.1213, "step": 69000 }, { "epoch": 2.5074496693073627, "eval_loss": 0.30671653151512146, "eval_runtime": 179.555, "eval_samples_per_second": 41.291, "eval_steps_per_second": 5.163, "eval_wer": 0.1598017681122588, "step": 69000 }, { "epoch": 2.5078130678101607, "grad_norm": 0.24582041800022125, "learning_rate": 3.5297499255719094e-05, "loss": 0.0949, "step": 69010 }, { "epoch": 2.5081764663129587, "grad_norm": 0.4762285053730011, "learning_rate": 3.529277037606458e-05, "loss": 0.0983, "step": 69020 }, { "epoch": 2.508539864815757, "grad_norm": 0.6749287843704224, "learning_rate": 3.528804105293123e-05, "loss": 0.0911, "step": 69030 }, { "epoch": 2.508903263318555, "grad_norm": 0.4179406762123108, "learning_rate": 3.528331128652279e-05, "loss": 0.1979, "step": 69040 }, { "epoch": 2.5092666618213535, "grad_norm": 1.1406326293945312, "learning_rate": 3.5278581077043047e-05, "loss": 0.0918, "step": 69050 }, { "epoch": 2.5096300603241515, "grad_norm": 0.8093327879905701, "learning_rate": 3.527385042469583e-05, "loss": 0.0978, "step": 69060 }, { "epoch": 2.5099934588269495, "grad_norm": 1.7931946516036987, "learning_rate": 3.5269119329684945e-05, "loss": 0.109, "step": 69070 }, { "epoch": 2.510356857329748, "grad_norm": 0.6986146569252014, "learning_rate": 3.526438779221425e-05, "loss": 0.0993, "step": 69080 }, { "epoch": 2.510720255832546, "grad_norm": 1.2395824193954468, "learning_rate": 3.5259655812487604e-05, "loss": 0.1468, "step": 69090 }, { "epoch": 2.5110836543353443, "grad_norm": 3.537288188934326, "learning_rate": 3.525492339070889e-05, "loss": 0.0997, "step": 69100 }, { "epoch": 2.5114470528381423, "grad_norm": 0.8501663208007812, "learning_rate": 3.525019052708202e-05, "loss": 0.0933, "step": 69110 }, { "epoch": 2.5118104513409403, "grad_norm": 1.3228484392166138, "learning_rate": 3.524545722181091e-05, "loss": 0.1387, "step": 69120 }, { "epoch": 2.5121738498437387, "grad_norm": 1.2074254751205444, "learning_rate": 3.52407234750995e-05, "loss": 0.1062, "step": 69130 }, { "epoch": 2.5125372483465367, "grad_norm": 0.6108558177947998, "learning_rate": 3.523598928715174e-05, "loss": 0.1207, "step": 69140 }, { "epoch": 2.512900646849335, "grad_norm": 0.6959209442138672, "learning_rate": 3.523125465817164e-05, "loss": 0.0823, "step": 69150 }, { "epoch": 2.513264045352133, "grad_norm": 0.5447746515274048, "learning_rate": 3.5226519588363164e-05, "loss": 0.1009, "step": 69160 }, { "epoch": 2.513627443854931, "grad_norm": 6.87611198425293, "learning_rate": 3.522178407793036e-05, "loss": 0.1082, "step": 69170 }, { "epoch": 2.5139908423577295, "grad_norm": 1.2013996839523315, "learning_rate": 3.5217048127077246e-05, "loss": 0.1041, "step": 69180 }, { "epoch": 2.5143542408605275, "grad_norm": 2.1484246253967285, "learning_rate": 3.521231173600787e-05, "loss": 0.1174, "step": 69190 }, { "epoch": 2.514717639363326, "grad_norm": 0.6024388670921326, "learning_rate": 3.520757490492633e-05, "loss": 0.0968, "step": 69200 }, { "epoch": 2.515081037866124, "grad_norm": 0.621998131275177, "learning_rate": 3.5202837634036696e-05, "loss": 0.1441, "step": 69210 }, { "epoch": 2.515444436368922, "grad_norm": 0.7772573828697205, "learning_rate": 3.519809992354309e-05, "loss": 0.1199, "step": 69220 }, { "epoch": 2.5158078348717203, "grad_norm": 0.8994972109794617, "learning_rate": 3.519336177364966e-05, "loss": 0.1099, "step": 69230 }, { "epoch": 2.5161712333745188, "grad_norm": 0.7937003970146179, "learning_rate": 3.5188623184560524e-05, "loss": 0.1091, "step": 69240 }, { "epoch": 2.5165346318773167, "grad_norm": 1.3785254955291748, "learning_rate": 3.518388415647986e-05, "loss": 0.1035, "step": 69250 }, { "epoch": 2.5168980303801147, "grad_norm": 0.6472801566123962, "learning_rate": 3.517914468961188e-05, "loss": 0.1054, "step": 69260 }, { "epoch": 2.517261428882913, "grad_norm": 2.0437135696411133, "learning_rate": 3.517440478416076e-05, "loss": 0.1224, "step": 69270 }, { "epoch": 2.517624827385711, "grad_norm": 0.9029390811920166, "learning_rate": 3.516966444033074e-05, "loss": 0.0865, "step": 69280 }, { "epoch": 2.5179882258885096, "grad_norm": 0.801255464553833, "learning_rate": 3.5164923658326064e-05, "loss": 0.0891, "step": 69290 }, { "epoch": 2.5183516243913076, "grad_norm": 1.0700057744979858, "learning_rate": 3.5160182438350995e-05, "loss": 0.0928, "step": 69300 }, { "epoch": 2.5187150228941055, "grad_norm": 0.6255751848220825, "learning_rate": 3.515544078060982e-05, "loss": 0.1071, "step": 69310 }, { "epoch": 2.519078421396904, "grad_norm": 0.784589409828186, "learning_rate": 3.515069868530683e-05, "loss": 0.0892, "step": 69320 }, { "epoch": 2.519441819899702, "grad_norm": 0.8623689413070679, "learning_rate": 3.514595615264635e-05, "loss": 0.1024, "step": 69330 }, { "epoch": 2.5198052184025004, "grad_norm": 1.3670728206634521, "learning_rate": 3.514121318283272e-05, "loss": 0.1021, "step": 69340 }, { "epoch": 2.5201686169052984, "grad_norm": 1.2742701768875122, "learning_rate": 3.513646977607029e-05, "loss": 0.0916, "step": 69350 }, { "epoch": 2.5205320154080963, "grad_norm": 2.6667962074279785, "learning_rate": 3.513172593256345e-05, "loss": 0.0921, "step": 69360 }, { "epoch": 2.5208954139108948, "grad_norm": 0.8958526849746704, "learning_rate": 3.512698165251659e-05, "loss": 0.0989, "step": 69370 }, { "epoch": 2.5212588124136928, "grad_norm": 1.1172994375228882, "learning_rate": 3.512223693613412e-05, "loss": 0.1104, "step": 69380 }, { "epoch": 2.521622210916491, "grad_norm": 0.5839262008666992, "learning_rate": 3.5117491783620475e-05, "loss": 0.128, "step": 69390 }, { "epoch": 2.521985609419289, "grad_norm": 0.9729129672050476, "learning_rate": 3.51127461951801e-05, "loss": 0.1229, "step": 69400 }, { "epoch": 2.522349007922087, "grad_norm": 3.964264154434204, "learning_rate": 3.510800017101749e-05, "loss": 0.096, "step": 69410 }, { "epoch": 2.5227124064248856, "grad_norm": 0.4221835732460022, "learning_rate": 3.51032537113371e-05, "loss": 0.1109, "step": 69420 }, { "epoch": 2.5230758049276836, "grad_norm": 0.6467729806900024, "learning_rate": 3.5098506816343466e-05, "loss": 0.116, "step": 69430 }, { "epoch": 2.523439203430482, "grad_norm": 3.9705393314361572, "learning_rate": 3.50937594862411e-05, "loss": 0.1349, "step": 69440 }, { "epoch": 2.52380260193328, "grad_norm": 1.3955297470092773, "learning_rate": 3.508901172123455e-05, "loss": 0.1116, "step": 69450 }, { "epoch": 2.524166000436078, "grad_norm": 0.8039283156394958, "learning_rate": 3.508426352152838e-05, "loss": 0.0905, "step": 69460 }, { "epoch": 2.5245293989388764, "grad_norm": 1.1199578046798706, "learning_rate": 3.507951488732718e-05, "loss": 0.1136, "step": 69470 }, { "epoch": 2.5248927974416744, "grad_norm": 0.7925732731819153, "learning_rate": 3.507476581883555e-05, "loss": 0.1058, "step": 69480 }, { "epoch": 2.525256195944473, "grad_norm": 0.8125994205474854, "learning_rate": 3.5070016316258106e-05, "loss": 0.1033, "step": 69490 }, { "epoch": 2.525619594447271, "grad_norm": 0.4621226489543915, "learning_rate": 3.5065266379799475e-05, "loss": 1.4773, "step": 69500 }, { "epoch": 2.525982992950069, "grad_norm": 1.0948034524917603, "learning_rate": 3.506051600966434e-05, "loss": 0.0797, "step": 69510 }, { "epoch": 2.526346391452867, "grad_norm": 1.1567878723144531, "learning_rate": 3.5055765206057354e-05, "loss": 0.1143, "step": 69520 }, { "epoch": 2.5267097899556656, "grad_norm": 0.95686936378479, "learning_rate": 3.505101396918324e-05, "loss": 0.1188, "step": 69530 }, { "epoch": 2.5270731884584636, "grad_norm": 0.34038084745407104, "learning_rate": 3.504626229924669e-05, "loss": 0.1076, "step": 69540 }, { "epoch": 2.5274365869612616, "grad_norm": 4.851949214935303, "learning_rate": 3.504151019645243e-05, "loss": 0.0955, "step": 69550 }, { "epoch": 2.52779998546406, "grad_norm": 0.8883131742477417, "learning_rate": 3.503675766100524e-05, "loss": 0.1427, "step": 69560 }, { "epoch": 2.528163383966858, "grad_norm": 0.7588313221931458, "learning_rate": 3.5032004693109866e-05, "loss": 0.1198, "step": 69570 }, { "epoch": 2.5285267824696565, "grad_norm": 0.5408293604850769, "learning_rate": 3.50272512929711e-05, "loss": 0.1115, "step": 69580 }, { "epoch": 2.5288901809724544, "grad_norm": 1.0919950008392334, "learning_rate": 3.5022497460793754e-05, "loss": 0.7792, "step": 69590 }, { "epoch": 2.5292535794752524, "grad_norm": 0.9922258853912354, "learning_rate": 3.501774319678266e-05, "loss": 0.079, "step": 69600 }, { "epoch": 2.5292535794752524, "eval_loss": 0.3091621398925781, "eval_runtime": 180.6731, "eval_samples_per_second": 41.035, "eval_steps_per_second": 5.131, "eval_wer": 0.15795015157841233, "step": 69600 }, { "epoch": 2.529616977978051, "grad_norm": 1.5794726610183716, "learning_rate": 3.501298850114266e-05, "loss": 0.1154, "step": 69610 }, { "epoch": 2.529980376480849, "grad_norm": 3.069139003753662, "learning_rate": 3.5008233374078594e-05, "loss": 0.1161, "step": 69620 }, { "epoch": 2.5303437749836473, "grad_norm": 0.8879293203353882, "learning_rate": 3.500347781579537e-05, "loss": 0.0929, "step": 69630 }, { "epoch": 2.5307071734864452, "grad_norm": 2.097984552383423, "learning_rate": 3.4998721826497885e-05, "loss": 0.0873, "step": 69640 }, { "epoch": 2.5310705719892432, "grad_norm": 0.8583676218986511, "learning_rate": 3.499396540639104e-05, "loss": 0.6541, "step": 69650 }, { "epoch": 2.5314339704920417, "grad_norm": 0.44445595145225525, "learning_rate": 3.498920855567979e-05, "loss": 0.0748, "step": 69660 }, { "epoch": 2.5317973689948396, "grad_norm": 0.9186582565307617, "learning_rate": 3.4984451274569094e-05, "loss": 0.1022, "step": 69670 }, { "epoch": 2.532160767497638, "grad_norm": 1.34561288356781, "learning_rate": 3.497969356326391e-05, "loss": 0.0962, "step": 69680 }, { "epoch": 2.532524166000436, "grad_norm": 1.5889935493469238, "learning_rate": 3.497493542196923e-05, "loss": 0.1013, "step": 69690 }, { "epoch": 2.532887564503234, "grad_norm": 1.0599699020385742, "learning_rate": 3.4970176850890085e-05, "loss": 0.1048, "step": 69700 }, { "epoch": 2.5332509630060325, "grad_norm": 0.7291392087936401, "learning_rate": 3.496541785023149e-05, "loss": 0.1002, "step": 69710 }, { "epoch": 2.5336143615088305, "grad_norm": 0.5541179180145264, "learning_rate": 3.4960658420198494e-05, "loss": 0.1062, "step": 69720 }, { "epoch": 2.533977760011629, "grad_norm": 1.0008395910263062, "learning_rate": 3.495589856099617e-05, "loss": 0.2525, "step": 69730 }, { "epoch": 2.534341158514427, "grad_norm": 0.7523865699768066, "learning_rate": 3.49511382728296e-05, "loss": 0.117, "step": 69740 }, { "epoch": 2.534704557017225, "grad_norm": 1.8582743406295776, "learning_rate": 3.4946377555903886e-05, "loss": 0.0834, "step": 69750 }, { "epoch": 2.5350679555200233, "grad_norm": 0.44991886615753174, "learning_rate": 3.494161641042415e-05, "loss": 0.0895, "step": 69760 }, { "epoch": 2.5354313540228213, "grad_norm": 0.46044957637786865, "learning_rate": 3.4936854836595545e-05, "loss": 0.1333, "step": 69770 }, { "epoch": 2.5357947525256197, "grad_norm": 2.098876476287842, "learning_rate": 3.493209283462321e-05, "loss": 0.1073, "step": 69780 }, { "epoch": 2.5361581510284177, "grad_norm": 0.5006657838821411, "learning_rate": 3.492733040471234e-05, "loss": 0.1205, "step": 69790 }, { "epoch": 2.5365215495312157, "grad_norm": 1.2363359928131104, "learning_rate": 3.492256754706813e-05, "loss": 0.0865, "step": 69800 }, { "epoch": 2.536884948034014, "grad_norm": 0.5873517394065857, "learning_rate": 3.491780426189577e-05, "loss": 0.0842, "step": 69810 }, { "epoch": 2.5372483465368125, "grad_norm": 0.5149590373039246, "learning_rate": 3.491304054940053e-05, "loss": 0.302, "step": 69820 }, { "epoch": 2.5376117450396105, "grad_norm": 0.613667368888855, "learning_rate": 3.4908276409787635e-05, "loss": 0.1106, "step": 69830 }, { "epoch": 2.5379751435424085, "grad_norm": 1.8323549032211304, "learning_rate": 3.490351184326236e-05, "loss": 0.1301, "step": 69840 }, { "epoch": 2.538338542045207, "grad_norm": 1.859044075012207, "learning_rate": 3.4898746850030005e-05, "loss": 0.0863, "step": 69850 }, { "epoch": 2.538701940548005, "grad_norm": 1.0749214887619019, "learning_rate": 3.4893981430295864e-05, "loss": 0.0798, "step": 69860 }, { "epoch": 2.5390653390508033, "grad_norm": 0.9566397070884705, "learning_rate": 3.488921558426527e-05, "loss": 0.1183, "step": 69870 }, { "epoch": 2.5394287375536013, "grad_norm": 1.2835750579833984, "learning_rate": 3.4884449312143555e-05, "loss": 0.104, "step": 69880 }, { "epoch": 2.5397921360563993, "grad_norm": 0.6767297387123108, "learning_rate": 3.48796826141361e-05, "loss": 0.1889, "step": 69890 }, { "epoch": 2.5401555345591977, "grad_norm": 1.499045729637146, "learning_rate": 3.487491549044826e-05, "loss": 0.1031, "step": 69900 }, { "epoch": 2.5405189330619957, "grad_norm": 0.3522442877292633, "learning_rate": 3.487014794128545e-05, "loss": 0.1065, "step": 69910 }, { "epoch": 2.540882331564794, "grad_norm": 0.6056109070777893, "learning_rate": 3.486537996685309e-05, "loss": 0.1181, "step": 69920 }, { "epoch": 2.541245730067592, "grad_norm": 2.347325563430786, "learning_rate": 3.48606115673566e-05, "loss": 0.3176, "step": 69930 }, { "epoch": 2.54160912857039, "grad_norm": 2.3445467948913574, "learning_rate": 3.4855842743001446e-05, "loss": 0.1717, "step": 69940 }, { "epoch": 2.5419725270731885, "grad_norm": 0.9979462027549744, "learning_rate": 3.485107349399309e-05, "loss": 0.0845, "step": 69950 }, { "epoch": 2.5423359255759865, "grad_norm": 3.576714038848877, "learning_rate": 3.484630382053704e-05, "loss": 0.1516, "step": 69960 }, { "epoch": 2.542699324078785, "grad_norm": 0.4525027573108673, "learning_rate": 3.484153372283878e-05, "loss": 0.1062, "step": 69970 }, { "epoch": 2.543062722581583, "grad_norm": 1.1381046772003174, "learning_rate": 3.4836763201103854e-05, "loss": 0.1246, "step": 69980 }, { "epoch": 2.543426121084381, "grad_norm": 0.6374491453170776, "learning_rate": 3.48319922555378e-05, "loss": 0.1775, "step": 69990 }, { "epoch": 2.5437895195871794, "grad_norm": 1.7682280540466309, "learning_rate": 3.482722088634618e-05, "loss": 0.1135, "step": 70000 }, { "epoch": 2.5441529180899773, "grad_norm": 1.1015331745147705, "learning_rate": 3.482244909373458e-05, "loss": 0.0904, "step": 70010 }, { "epoch": 2.5445163165927758, "grad_norm": 4.6638689041137695, "learning_rate": 3.481767687790859e-05, "loss": 0.2748, "step": 70020 }, { "epoch": 2.5448797150955738, "grad_norm": 0.8912318348884583, "learning_rate": 3.481290423907384e-05, "loss": 0.094, "step": 70030 }, { "epoch": 2.5452431135983717, "grad_norm": 2.43723726272583, "learning_rate": 3.480813117743596e-05, "loss": 0.1276, "step": 70040 }, { "epoch": 2.54560651210117, "grad_norm": 3.3461971282958984, "learning_rate": 3.480335769320061e-05, "loss": 0.0971, "step": 70050 }, { "epoch": 2.545969910603968, "grad_norm": 3.6578071117401123, "learning_rate": 3.479858378657346e-05, "loss": 0.1575, "step": 70060 }, { "epoch": 2.5463333091067666, "grad_norm": 1.831850290298462, "learning_rate": 3.479380945776018e-05, "loss": 0.0947, "step": 70070 }, { "epoch": 2.5466967076095646, "grad_norm": 0.8772917985916138, "learning_rate": 3.478903470696651e-05, "loss": 0.1189, "step": 70080 }, { "epoch": 2.5470601061123626, "grad_norm": 7.842989921569824, "learning_rate": 3.478425953439816e-05, "loss": 0.1245, "step": 70090 }, { "epoch": 2.547423504615161, "grad_norm": 1.8557602167129517, "learning_rate": 3.4779483940260885e-05, "loss": 0.1099, "step": 70100 }, { "epoch": 2.5477869031179594, "grad_norm": 1.2630740404129028, "learning_rate": 3.477470792476044e-05, "loss": 0.076, "step": 70110 }, { "epoch": 2.5481503016207574, "grad_norm": 0.5257185697555542, "learning_rate": 3.4769931488102606e-05, "loss": 0.1972, "step": 70120 }, { "epoch": 2.5485137001235554, "grad_norm": 0.5321794748306274, "learning_rate": 3.4765154630493194e-05, "loss": 0.087, "step": 70130 }, { "epoch": 2.548877098626354, "grad_norm": 0.7569301128387451, "learning_rate": 3.4760377352138e-05, "loss": 0.1349, "step": 70140 }, { "epoch": 2.549240497129152, "grad_norm": 3.5890607833862305, "learning_rate": 3.475559965324289e-05, "loss": 1.1846, "step": 70150 }, { "epoch": 2.5496038956319502, "grad_norm": 0.8748692870140076, "learning_rate": 3.475082153401368e-05, "loss": 0.0899, "step": 70160 }, { "epoch": 2.549967294134748, "grad_norm": 0.45375722646713257, "learning_rate": 3.474604299465628e-05, "loss": 0.1166, "step": 70170 }, { "epoch": 2.550330692637546, "grad_norm": 4.401093006134033, "learning_rate": 3.474126403537656e-05, "loss": 0.1247, "step": 70180 }, { "epoch": 2.5506940911403446, "grad_norm": 0.7887241244316101, "learning_rate": 3.473648465638043e-05, "loss": 0.1344, "step": 70190 }, { "epoch": 2.5510574896431426, "grad_norm": 1.8106690645217896, "learning_rate": 3.4731704857873826e-05, "loss": 0.095, "step": 70200 }, { "epoch": 2.5510574896431426, "eval_loss": 0.32158222794532776, "eval_runtime": 180.3244, "eval_samples_per_second": 41.115, "eval_steps_per_second": 5.141, "eval_wer": 0.15185070887868282, "step": 70200 }, { "epoch": 2.551420888145941, "grad_norm": 0.9934507012367249, "learning_rate": 3.4726924640062676e-05, "loss": 0.0928, "step": 70210 }, { "epoch": 2.551784286648739, "grad_norm": 2.932734966278076, "learning_rate": 3.472214400315296e-05, "loss": 0.1131, "step": 70220 }, { "epoch": 2.552147685151537, "grad_norm": 0.6811621189117432, "learning_rate": 3.471736294735065e-05, "loss": 0.1616, "step": 70230 }, { "epoch": 2.5525110836543354, "grad_norm": 3.0019402503967285, "learning_rate": 3.471258147286173e-05, "loss": 0.1317, "step": 70240 }, { "epoch": 2.5528744821571334, "grad_norm": 0.5437862873077393, "learning_rate": 3.470779957989225e-05, "loss": 0.0941, "step": 70250 }, { "epoch": 2.553237880659932, "grad_norm": 1.201907992362976, "learning_rate": 3.470301726864822e-05, "loss": 0.0959, "step": 70260 }, { "epoch": 2.55360127916273, "grad_norm": 0.8288230299949646, "learning_rate": 3.469823453933569e-05, "loss": 0.1101, "step": 70270 }, { "epoch": 2.553964677665528, "grad_norm": 0.6374495625495911, "learning_rate": 3.469345139216075e-05, "loss": 0.0854, "step": 70280 }, { "epoch": 2.5543280761683262, "grad_norm": 0.5856258273124695, "learning_rate": 3.468866782732948e-05, "loss": 0.1128, "step": 70290 }, { "epoch": 2.5546914746711242, "grad_norm": 0.42517444491386414, "learning_rate": 3.4683883845047985e-05, "loss": 0.1721, "step": 70300 }, { "epoch": 2.5550548731739227, "grad_norm": 3.486084222793579, "learning_rate": 3.467909944552239e-05, "loss": 0.0961, "step": 70310 }, { "epoch": 2.5554182716767206, "grad_norm": 0.40620315074920654, "learning_rate": 3.467431462895884e-05, "loss": 0.1109, "step": 70320 }, { "epoch": 2.5557816701795186, "grad_norm": 0.5691574811935425, "learning_rate": 3.466952939556349e-05, "loss": 0.1062, "step": 70330 }, { "epoch": 2.556145068682317, "grad_norm": 1.281260371208191, "learning_rate": 3.466474374554252e-05, "loss": 0.096, "step": 70340 }, { "epoch": 2.556508467185115, "grad_norm": 0.9632150530815125, "learning_rate": 3.465995767910213e-05, "loss": 0.0934, "step": 70350 }, { "epoch": 2.5568718656879135, "grad_norm": 1.603409767150879, "learning_rate": 3.4655171196448544e-05, "loss": 0.0931, "step": 70360 }, { "epoch": 2.5572352641907115, "grad_norm": 0.9560374021530151, "learning_rate": 3.465038429778798e-05, "loss": 0.0984, "step": 70370 }, { "epoch": 2.5575986626935094, "grad_norm": 0.6290355920791626, "learning_rate": 3.464559698332669e-05, "loss": 0.113, "step": 70380 }, { "epoch": 2.557962061196308, "grad_norm": 156.17115783691406, "learning_rate": 3.464080925327094e-05, "loss": 0.4685, "step": 70390 }, { "epoch": 2.5583254596991063, "grad_norm": 1.0737193822860718, "learning_rate": 3.4636021107827026e-05, "loss": 0.0767, "step": 70400 }, { "epoch": 2.5586888582019043, "grad_norm": 0.8538148403167725, "learning_rate": 3.463123254720125e-05, "loss": 0.087, "step": 70410 }, { "epoch": 2.5590522567047023, "grad_norm": 0.826351523399353, "learning_rate": 3.462644357159993e-05, "loss": 0.1008, "step": 70420 }, { "epoch": 2.5594156552075007, "grad_norm": 0.4948084056377411, "learning_rate": 3.462165418122941e-05, "loss": 0.0961, "step": 70430 }, { "epoch": 2.5597790537102987, "grad_norm": 0.9462293982505798, "learning_rate": 3.4616864376296046e-05, "loss": 0.135, "step": 70440 }, { "epoch": 2.560142452213097, "grad_norm": 0.5528499484062195, "learning_rate": 3.4612074157006206e-05, "loss": 0.101, "step": 70450 }, { "epoch": 2.560505850715895, "grad_norm": 0.822938859462738, "learning_rate": 3.4607283523566294e-05, "loss": 0.095, "step": 70460 }, { "epoch": 2.560869249218693, "grad_norm": 0.6554206013679504, "learning_rate": 3.460249247618271e-05, "loss": 0.0966, "step": 70470 }, { "epoch": 2.5612326477214915, "grad_norm": 0.8792755007743835, "learning_rate": 3.4597701015061904e-05, "loss": 0.1041, "step": 70480 }, { "epoch": 2.5615960462242895, "grad_norm": 0.8316457867622375, "learning_rate": 3.4592909140410304e-05, "loss": 0.1166, "step": 70490 }, { "epoch": 2.561959444727088, "grad_norm": 1.183933138847351, "learning_rate": 3.458811685243438e-05, "loss": 0.0911, "step": 70500 }, { "epoch": 2.562322843229886, "grad_norm": 0.6310432553291321, "learning_rate": 3.458332415134062e-05, "loss": 0.0833, "step": 70510 }, { "epoch": 2.562686241732684, "grad_norm": 0.5768032670021057, "learning_rate": 3.457853103733552e-05, "loss": 0.1941, "step": 70520 }, { "epoch": 2.5630496402354823, "grad_norm": 0.5388504266738892, "learning_rate": 3.457373751062559e-05, "loss": 0.1083, "step": 70530 }, { "epoch": 2.5634130387382803, "grad_norm": 9.278057098388672, "learning_rate": 3.4568943571417376e-05, "loss": 0.102, "step": 70540 }, { "epoch": 2.5637764372410787, "grad_norm": 0.8533729910850525, "learning_rate": 3.456414921991744e-05, "loss": 0.0916, "step": 70550 }, { "epoch": 2.5641398357438767, "grad_norm": 0.8473436832427979, "learning_rate": 3.455935445633234e-05, "loss": 0.0953, "step": 70560 }, { "epoch": 2.5645032342466747, "grad_norm": 0.8911932706832886, "learning_rate": 3.455455928086866e-05, "loss": 0.0992, "step": 70570 }, { "epoch": 2.564866632749473, "grad_norm": 0.9488405585289001, "learning_rate": 3.4549763693733026e-05, "loss": 0.1191, "step": 70580 }, { "epoch": 2.565230031252271, "grad_norm": 0.6498254537582397, "learning_rate": 3.454496769513204e-05, "loss": 0.1442, "step": 70590 }, { "epoch": 2.5655934297550695, "grad_norm": 0.5127254724502563, "learning_rate": 3.4540171285272374e-05, "loss": 0.0843, "step": 70600 }, { "epoch": 2.5659568282578675, "grad_norm": 2.8321163654327393, "learning_rate": 3.453537446436066e-05, "loss": 0.076, "step": 70610 }, { "epoch": 2.5663202267606655, "grad_norm": 0.8829347491264343, "learning_rate": 3.4530577232603584e-05, "loss": 0.1044, "step": 70620 }, { "epoch": 2.566683625263464, "grad_norm": 1.8622163534164429, "learning_rate": 3.452577959020785e-05, "loss": 0.1057, "step": 70630 }, { "epoch": 2.567047023766262, "grad_norm": 0.5306766629219055, "learning_rate": 3.452098153738017e-05, "loss": 0.1118, "step": 70640 }, { "epoch": 2.5674104222690604, "grad_norm": 0.5810162425041199, "learning_rate": 3.451618307432727e-05, "loss": 0.0862, "step": 70650 }, { "epoch": 2.5677738207718583, "grad_norm": 0.790539026260376, "learning_rate": 3.4511384201255895e-05, "loss": 0.1227, "step": 70660 }, { "epoch": 2.5681372192746563, "grad_norm": 3.3890788555145264, "learning_rate": 3.450658491837282e-05, "loss": 0.0883, "step": 70670 }, { "epoch": 2.5685006177774548, "grad_norm": 1.1996808052062988, "learning_rate": 3.4501785225884816e-05, "loss": 0.0989, "step": 70680 }, { "epoch": 2.568864016280253, "grad_norm": 0.81224524974823, "learning_rate": 3.449698512399871e-05, "loss": 0.1149, "step": 70690 }, { "epoch": 2.569227414783051, "grad_norm": 1.3377439975738525, "learning_rate": 3.4492184612921305e-05, "loss": 0.1048, "step": 70700 }, { "epoch": 2.569590813285849, "grad_norm": 0.9538800716400146, "learning_rate": 3.4487383692859423e-05, "loss": 0.0946, "step": 70710 }, { "epoch": 2.5699542117886476, "grad_norm": 0.49254775047302246, "learning_rate": 3.448258236401994e-05, "loss": 0.1008, "step": 70720 }, { "epoch": 2.5703176102914456, "grad_norm": 0.44506704807281494, "learning_rate": 3.447778062660973e-05, "loss": 0.1111, "step": 70730 }, { "epoch": 2.570681008794244, "grad_norm": 0.8836443424224854, "learning_rate": 3.4472978480835674e-05, "loss": 0.1064, "step": 70740 }, { "epoch": 2.571044407297042, "grad_norm": 0.8320255279541016, "learning_rate": 3.4468175926904666e-05, "loss": 0.0926, "step": 70750 }, { "epoch": 2.57140780579984, "grad_norm": 0.6895723342895508, "learning_rate": 3.446337296502366e-05, "loss": 0.0766, "step": 70760 }, { "epoch": 2.5717712043026384, "grad_norm": 0.6943153738975525, "learning_rate": 3.445856959539958e-05, "loss": 0.1134, "step": 70770 }, { "epoch": 2.5721346028054364, "grad_norm": 0.7596734166145325, "learning_rate": 3.4453765818239387e-05, "loss": 0.0969, "step": 70780 }, { "epoch": 2.572498001308235, "grad_norm": 0.42216864228248596, "learning_rate": 3.4448961633750066e-05, "loss": 0.1094, "step": 70790 }, { "epoch": 2.572861399811033, "grad_norm": 0.8295478224754333, "learning_rate": 3.44441570421386e-05, "loss": 0.0825, "step": 70800 }, { "epoch": 2.572861399811033, "eval_loss": 0.3259897530078888, "eval_runtime": 180.0538, "eval_samples_per_second": 41.177, "eval_steps_per_second": 5.148, "eval_wer": 0.15045291992666146, "step": 70800 }, { "epoch": 2.5732247983138308, "grad_norm": 0.8128442168235779, "learning_rate": 3.4439352043612015e-05, "loss": 0.093, "step": 70810 }, { "epoch": 2.573588196816629, "grad_norm": 0.5261029601097107, "learning_rate": 3.4434546638377334e-05, "loss": 0.1067, "step": 70820 }, { "epoch": 2.573951595319427, "grad_norm": 2.5018603801727295, "learning_rate": 3.442974082664161e-05, "loss": 0.114, "step": 70830 }, { "epoch": 2.5743149938222256, "grad_norm": 0.37377244234085083, "learning_rate": 3.44249346086119e-05, "loss": 0.1822, "step": 70840 }, { "epoch": 2.5746783923250236, "grad_norm": 6.293512344360352, "learning_rate": 3.4420127984495295e-05, "loss": 0.1651, "step": 70850 }, { "epoch": 2.5750417908278216, "grad_norm": 1.2653559446334839, "learning_rate": 3.4415320954498894e-05, "loss": 0.0936, "step": 70860 }, { "epoch": 2.57540518933062, "grad_norm": 0.4816114008426666, "learning_rate": 3.4410513518829806e-05, "loss": 0.1242, "step": 70870 }, { "epoch": 2.575768587833418, "grad_norm": 0.6479201316833496, "learning_rate": 3.440570567769518e-05, "loss": 0.0924, "step": 70880 }, { "epoch": 2.5761319863362164, "grad_norm": 1.3513591289520264, "learning_rate": 3.440089743130216e-05, "loss": 0.1385, "step": 70890 }, { "epoch": 2.5764953848390144, "grad_norm": 0.4289826452732086, "learning_rate": 3.4396088779857917e-05, "loss": 0.0769, "step": 70900 }, { "epoch": 2.5768587833418124, "grad_norm": 1.5458887815475464, "learning_rate": 3.4391279723569635e-05, "loss": 0.1007, "step": 70910 }, { "epoch": 2.577222181844611, "grad_norm": 0.5470010638237, "learning_rate": 3.438647026264453e-05, "loss": 0.1041, "step": 70920 }, { "epoch": 2.577585580347409, "grad_norm": 0.7723416090011597, "learning_rate": 3.438166039728982e-05, "loss": 0.128, "step": 70930 }, { "epoch": 2.5779489788502072, "grad_norm": 0.7723271250724792, "learning_rate": 3.437685012771274e-05, "loss": 0.1332, "step": 70940 }, { "epoch": 2.5783123773530052, "grad_norm": 0.6610028147697449, "learning_rate": 3.4372039454120556e-05, "loss": 0.093, "step": 70950 }, { "epoch": 2.578675775855803, "grad_norm": 0.8244014978408813, "learning_rate": 3.436722837672053e-05, "loss": 0.0913, "step": 70960 }, { "epoch": 2.5790391743586016, "grad_norm": 3.8544437885284424, "learning_rate": 3.4362416895719966e-05, "loss": 0.1292, "step": 70970 }, { "epoch": 2.5794025728614, "grad_norm": 0.570715069770813, "learning_rate": 3.4357605011326164e-05, "loss": 0.1142, "step": 70980 }, { "epoch": 2.579765971364198, "grad_norm": 0.6846952438354492, "learning_rate": 3.435279272374647e-05, "loss": 0.1157, "step": 70990 }, { "epoch": 2.580129369866996, "grad_norm": 0.8145487904548645, "learning_rate": 3.4347980033188203e-05, "loss": 0.0972, "step": 71000 }, { "epoch": 2.5804927683697945, "grad_norm": 0.3357942998409271, "learning_rate": 3.434316693985874e-05, "loss": 0.0952, "step": 71010 }, { "epoch": 2.5808561668725924, "grad_norm": 0.6499632000923157, "learning_rate": 3.433835344396546e-05, "loss": 0.2527, "step": 71020 }, { "epoch": 2.581219565375391, "grad_norm": 1.1719329357147217, "learning_rate": 3.4333539545715754e-05, "loss": 0.0954, "step": 71030 }, { "epoch": 2.581582963878189, "grad_norm": 2.707500457763672, "learning_rate": 3.432872524531704e-05, "loss": 0.1674, "step": 71040 }, { "epoch": 2.581946362380987, "grad_norm": 1.7513278722763062, "learning_rate": 3.432391054297674e-05, "loss": 0.0758, "step": 71050 }, { "epoch": 2.5823097608837853, "grad_norm": 0.9239100813865662, "learning_rate": 3.431909543890231e-05, "loss": 0.0919, "step": 71060 }, { "epoch": 2.5826731593865833, "grad_norm": 0.21719126403331757, "learning_rate": 3.431427993330122e-05, "loss": 0.1458, "step": 71070 }, { "epoch": 2.5830365578893817, "grad_norm": 0.7722142338752747, "learning_rate": 3.430946402638095e-05, "loss": 0.0977, "step": 71080 }, { "epoch": 2.5833999563921797, "grad_norm": 0.7812473773956299, "learning_rate": 3.430464771834899e-05, "loss": 0.1204, "step": 71090 }, { "epoch": 2.5837633548949777, "grad_norm": 1.0319454669952393, "learning_rate": 3.429983100941287e-05, "loss": 0.0921, "step": 71100 }, { "epoch": 2.584126753397776, "grad_norm": 0.8233940601348877, "learning_rate": 3.429501389978013e-05, "loss": 0.0931, "step": 71110 }, { "epoch": 2.584490151900574, "grad_norm": 0.5543156862258911, "learning_rate": 3.42901963896583e-05, "loss": 0.0936, "step": 71120 }, { "epoch": 2.5848535504033725, "grad_norm": 0.849062979221344, "learning_rate": 3.4285378479254964e-05, "loss": 0.1034, "step": 71130 }, { "epoch": 2.5852169489061705, "grad_norm": 0.7621930837631226, "learning_rate": 3.428056016877771e-05, "loss": 0.1219, "step": 71140 }, { "epoch": 2.5855803474089685, "grad_norm": 1.097886323928833, "learning_rate": 3.427574145843413e-05, "loss": 0.1034, "step": 71150 }, { "epoch": 2.585943745911767, "grad_norm": 1.2844264507293701, "learning_rate": 3.4270922348431866e-05, "loss": 0.0961, "step": 71160 }, { "epoch": 2.586307144414565, "grad_norm": 0.6416186094284058, "learning_rate": 3.4266102838978544e-05, "loss": 0.0982, "step": 71170 }, { "epoch": 2.5866705429173633, "grad_norm": 1.0426020622253418, "learning_rate": 3.426128293028181e-05, "loss": 0.108, "step": 71180 }, { "epoch": 2.5870339414201613, "grad_norm": 1.2115471363067627, "learning_rate": 3.425646262254935e-05, "loss": 0.0981, "step": 71190 }, { "epoch": 2.5873973399229593, "grad_norm": 1.362383484840393, "learning_rate": 3.425164191598885e-05, "loss": 0.0936, "step": 71200 }, { "epoch": 2.5877607384257577, "grad_norm": 1.0915354490280151, "learning_rate": 3.4246820810808025e-05, "loss": 0.0832, "step": 71210 }, { "epoch": 2.5881241369285557, "grad_norm": 1.0872890949249268, "learning_rate": 3.424199930721459e-05, "loss": 0.1135, "step": 71220 }, { "epoch": 2.588487535431354, "grad_norm": 0.5933959484100342, "learning_rate": 3.4237177405416276e-05, "loss": 0.0971, "step": 71230 }, { "epoch": 2.588850933934152, "grad_norm": 1.2194724082946777, "learning_rate": 3.423235510562086e-05, "loss": 0.1171, "step": 71240 }, { "epoch": 2.58921433243695, "grad_norm": 1.0390851497650146, "learning_rate": 3.422753240803612e-05, "loss": 1.326, "step": 71250 }, { "epoch": 2.5895777309397485, "grad_norm": 1.630076289176941, "learning_rate": 3.4222709312869825e-05, "loss": 0.0837, "step": 71260 }, { "epoch": 2.589941129442547, "grad_norm": 0.4006626307964325, "learning_rate": 3.421788582032981e-05, "loss": 0.1116, "step": 71270 }, { "epoch": 2.590304527945345, "grad_norm": 0.5512908697128296, "learning_rate": 3.4213061930623884e-05, "loss": 0.1169, "step": 71280 }, { "epoch": 2.590667926448143, "grad_norm": 1.0259326696395874, "learning_rate": 3.420823764395991e-05, "loss": 0.1221, "step": 71290 }, { "epoch": 2.5910313249509414, "grad_norm": 1.1377673149108887, "learning_rate": 3.420341296054574e-05, "loss": 0.1241, "step": 71300 }, { "epoch": 2.5913947234537393, "grad_norm": 0.9114333987236023, "learning_rate": 3.419858788058924e-05, "loss": 0.1012, "step": 71310 }, { "epoch": 2.5917581219565378, "grad_norm": 0.6368651390075684, "learning_rate": 3.4193762404298327e-05, "loss": 0.1096, "step": 71320 }, { "epoch": 2.5921215204593357, "grad_norm": 1.080757975578308, "learning_rate": 3.4188936531880894e-05, "loss": 0.1046, "step": 71330 }, { "epoch": 2.5924849189621337, "grad_norm": 11.998626708984375, "learning_rate": 3.418411026354489e-05, "loss": 0.2426, "step": 71340 }, { "epoch": 2.592848317464932, "grad_norm": 1.4404159784317017, "learning_rate": 3.417928359949824e-05, "loss": 0.0751, "step": 71350 }, { "epoch": 2.59321171596773, "grad_norm": 0.42481374740600586, "learning_rate": 3.417445653994893e-05, "loss": 0.0891, "step": 71360 }, { "epoch": 2.5935751144705286, "grad_norm": 0.6405854225158691, "learning_rate": 3.416962908510493e-05, "loss": 0.1497, "step": 71370 }, { "epoch": 2.5939385129733266, "grad_norm": 0.6601307392120361, "learning_rate": 3.416480123517424e-05, "loss": 0.1041, "step": 71380 }, { "epoch": 2.5943019114761245, "grad_norm": 6.7601318359375, "learning_rate": 3.415997299036486e-05, "loss": 0.1162, "step": 71390 }, { "epoch": 2.594665309978923, "grad_norm": 0.7878421545028687, "learning_rate": 3.415514435088485e-05, "loss": 0.089, "step": 71400 }, { "epoch": 2.594665309978923, "eval_loss": 0.32694903016090393, "eval_runtime": 180.1568, "eval_samples_per_second": 41.153, "eval_steps_per_second": 5.146, "eval_wer": 0.15186886198195582, "step": 71400 }, { "epoch": 2.595028708481721, "grad_norm": 0.8284702897071838, "learning_rate": 3.415031531694224e-05, "loss": 0.1116, "step": 71410 }, { "epoch": 2.5953921069845194, "grad_norm": 0.6205730438232422, "learning_rate": 3.41454858887451e-05, "loss": 0.1055, "step": 71420 }, { "epoch": 2.5957555054873174, "grad_norm": 0.6210823655128479, "learning_rate": 3.414065606650151e-05, "loss": 0.1, "step": 71430 }, { "epoch": 2.5961189039901154, "grad_norm": 1.2746903896331787, "learning_rate": 3.4135825850419576e-05, "loss": 0.0938, "step": 71440 }, { "epoch": 2.596482302492914, "grad_norm": 0.6673762202262878, "learning_rate": 3.4130995240707406e-05, "loss": 1.6946, "step": 71450 }, { "epoch": 2.5968457009957118, "grad_norm": 0.6017360687255859, "learning_rate": 3.4126164237573145e-05, "loss": 0.0974, "step": 71460 }, { "epoch": 2.59720909949851, "grad_norm": 0.5965964198112488, "learning_rate": 3.4121332841224926e-05, "loss": 0.1109, "step": 71470 }, { "epoch": 2.597572498001308, "grad_norm": 0.8033668398857117, "learning_rate": 3.411650105187094e-05, "loss": 0.1007, "step": 71480 }, { "epoch": 2.597935896504106, "grad_norm": 0.9280270338058472, "learning_rate": 3.411166886971936e-05, "loss": 0.1341, "step": 71490 }, { "epoch": 2.5982992950069046, "grad_norm": 0.6610667705535889, "learning_rate": 3.4106836294978386e-05, "loss": 0.088, "step": 71500 }, { "epoch": 2.5986626935097026, "grad_norm": 0.6038778424263, "learning_rate": 3.410200332785624e-05, "loss": 0.0901, "step": 71510 }, { "epoch": 2.599026092012501, "grad_norm": 0.673305094242096, "learning_rate": 3.409716996856115e-05, "loss": 0.1105, "step": 71520 }, { "epoch": 2.599389490515299, "grad_norm": 0.5786300301551819, "learning_rate": 3.409233621730139e-05, "loss": 0.0854, "step": 71530 }, { "epoch": 2.599752889018097, "grad_norm": 0.8676998615264893, "learning_rate": 3.40875020742852e-05, "loss": 0.1241, "step": 71540 }, { "epoch": 2.6001162875208954, "grad_norm": 1.203029990196228, "learning_rate": 3.4083151010791036e-05, "loss": 2.9622, "step": 71550 }, { "epoch": 2.600479686023694, "grad_norm": 0.34861287474632263, "learning_rate": 3.40783161240115e-05, "loss": 0.1029, "step": 71560 }, { "epoch": 2.600843084526492, "grad_norm": 0.649398148059845, "learning_rate": 3.407348084607961e-05, "loss": 0.1265, "step": 71570 }, { "epoch": 2.60120648302929, "grad_norm": 1.2412714958190918, "learning_rate": 3.406864517720373e-05, "loss": 0.0755, "step": 71580 }, { "epoch": 2.6015698815320882, "grad_norm": 0.3836827278137207, "learning_rate": 3.406380911759219e-05, "loss": 0.1238, "step": 71590 }, { "epoch": 2.601933280034886, "grad_norm": 0.7698721885681152, "learning_rate": 3.405897266745337e-05, "loss": 0.09, "step": 71600 }, { "epoch": 2.6022966785376846, "grad_norm": 3.227402448654175, "learning_rate": 3.4054135826995636e-05, "loss": 0.5652, "step": 71610 }, { "epoch": 2.6026600770404826, "grad_norm": 0.5352892279624939, "learning_rate": 3.4049298596427415e-05, "loss": 0.1299, "step": 71620 }, { "epoch": 2.6030234755432806, "grad_norm": 1.8280853033065796, "learning_rate": 3.40444609759571e-05, "loss": 0.0985, "step": 71630 }, { "epoch": 2.603386874046079, "grad_norm": 0.6967837810516357, "learning_rate": 3.403962296579316e-05, "loss": 0.1074, "step": 71640 }, { "epoch": 2.603750272548877, "grad_norm": 0.4568573832511902, "learning_rate": 3.403478456614402e-05, "loss": 0.0929, "step": 71650 }, { "epoch": 2.6041136710516755, "grad_norm": 0.7679555416107178, "learning_rate": 3.402994577721816e-05, "loss": 0.0802, "step": 71660 }, { "epoch": 2.6044770695544734, "grad_norm": 0.9601152539253235, "learning_rate": 3.402510659922407e-05, "loss": 0.1268, "step": 71670 }, { "epoch": 2.6048404680572714, "grad_norm": 0.4837740659713745, "learning_rate": 3.4020267032370245e-05, "loss": 0.172, "step": 71680 }, { "epoch": 2.60520386656007, "grad_norm": 0.4992314279079437, "learning_rate": 3.401542707686521e-05, "loss": 0.1196, "step": 71690 }, { "epoch": 2.605567265062868, "grad_norm": 0.8764163255691528, "learning_rate": 3.4010586732917495e-05, "loss": 0.4405, "step": 71700 }, { "epoch": 2.6059306635656663, "grad_norm": 1.7109190225601196, "learning_rate": 3.400574600073566e-05, "loss": 0.1298, "step": 71710 }, { "epoch": 2.6062940620684643, "grad_norm": 1.2784879207611084, "learning_rate": 3.4000904880528275e-05, "loss": 0.1156, "step": 71720 }, { "epoch": 2.6066574605712622, "grad_norm": 8.60650634765625, "learning_rate": 3.399606337250392e-05, "loss": 0.125, "step": 71730 }, { "epoch": 2.6070208590740607, "grad_norm": 0.4801369309425354, "learning_rate": 3.39912214768712e-05, "loss": 0.1208, "step": 71740 }, { "epoch": 2.6073842575768587, "grad_norm": 4.337435722351074, "learning_rate": 3.398637919383873e-05, "loss": 0.1001, "step": 71750 }, { "epoch": 2.607747656079657, "grad_norm": 0.7785841822624207, "learning_rate": 3.398153652361517e-05, "loss": 0.088, "step": 71760 }, { "epoch": 2.608111054582455, "grad_norm": 0.7540931105613708, "learning_rate": 3.3976693466409155e-05, "loss": 0.0973, "step": 71770 }, { "epoch": 2.608474453085253, "grad_norm": 0.8233292698860168, "learning_rate": 3.3971850022429354e-05, "loss": 0.1088, "step": 71780 }, { "epoch": 2.6088378515880515, "grad_norm": 3.5180065631866455, "learning_rate": 3.396700619188446e-05, "loss": 0.1259, "step": 71790 }, { "epoch": 2.6092012500908495, "grad_norm": 0.8610531687736511, "learning_rate": 3.396216197498317e-05, "loss": 0.0909, "step": 71800 }, { "epoch": 2.609564648593648, "grad_norm": 0.7979753613471985, "learning_rate": 3.395731737193421e-05, "loss": 0.0987, "step": 71810 }, { "epoch": 2.609928047096446, "grad_norm": 3.379258871078491, "learning_rate": 3.3952472382946313e-05, "loss": 0.0889, "step": 71820 }, { "epoch": 2.610291445599244, "grad_norm": 1.4110392332077026, "learning_rate": 3.394762700822824e-05, "loss": 0.0718, "step": 71830 }, { "epoch": 2.6106548441020423, "grad_norm": 0.41368210315704346, "learning_rate": 3.3942781247988754e-05, "loss": 0.0993, "step": 71840 }, { "epoch": 2.6110182426048407, "grad_norm": 0.6575911045074463, "learning_rate": 3.3937935102436636e-05, "loss": 0.0845, "step": 71850 }, { "epoch": 2.6113816411076387, "grad_norm": 0.545257568359375, "learning_rate": 3.39330885717807e-05, "loss": 0.0896, "step": 71860 }, { "epoch": 2.6117450396104367, "grad_norm": 3.2522082328796387, "learning_rate": 3.392824165622976e-05, "loss": 0.1424, "step": 71870 }, { "epoch": 2.612108438113235, "grad_norm": 1.4843670129776, "learning_rate": 3.392339435599265e-05, "loss": 0.0788, "step": 71880 }, { "epoch": 2.612471836616033, "grad_norm": 1.475480318069458, "learning_rate": 3.3918546671278235e-05, "loss": 0.1693, "step": 71890 }, { "epoch": 2.6128352351188315, "grad_norm": 0.9142501354217529, "learning_rate": 3.3913698602295376e-05, "loss": 0.0975, "step": 71900 }, { "epoch": 2.6131986336216295, "grad_norm": 1.1580731868743896, "learning_rate": 3.390885014925295e-05, "loss": 0.1173, "step": 71910 }, { "epoch": 2.6135620321244275, "grad_norm": 0.5489696860313416, "learning_rate": 3.3904001312359874e-05, "loss": 0.111, "step": 71920 }, { "epoch": 2.613925430627226, "grad_norm": 1.6776854991912842, "learning_rate": 3.3899152091825064e-05, "loss": 0.1057, "step": 71930 }, { "epoch": 2.614288829130024, "grad_norm": 0.8543124198913574, "learning_rate": 3.3894302487857446e-05, "loss": 0.1383, "step": 71940 }, { "epoch": 2.6146522276328223, "grad_norm": 0.4847543239593506, "learning_rate": 3.388945250066599e-05, "loss": 0.1408, "step": 71950 }, { "epoch": 2.6150156261356203, "grad_norm": 0.8432245254516602, "learning_rate": 3.388460213045965e-05, "loss": 0.1051, "step": 71960 }, { "epoch": 2.6153790246384183, "grad_norm": 0.6041918396949768, "learning_rate": 3.387975137744742e-05, "loss": 0.0875, "step": 71970 }, { "epoch": 2.6157424231412167, "grad_norm": 0.6646948456764221, "learning_rate": 3.387490024183829e-05, "loss": 0.0961, "step": 71980 }, { "epoch": 2.6161058216440147, "grad_norm": 1.7802671194076538, "learning_rate": 3.387004872384129e-05, "loss": 0.1365, "step": 71990 }, { "epoch": 2.616469220146813, "grad_norm": 5.307714462280273, "learning_rate": 3.3865196823665454e-05, "loss": 0.0987, "step": 72000 }, { "epoch": 2.616469220146813, "eval_loss": 0.3264125883579254, "eval_runtime": 179.4492, "eval_samples_per_second": 41.315, "eval_steps_per_second": 5.166, "eval_wer": 0.15330295714052317, "step": 72000 }, { "epoch": 2.616832618649611, "grad_norm": 2.6637954711914062, "learning_rate": 3.386034454151982e-05, "loss": 0.0894, "step": 72010 }, { "epoch": 2.617196017152409, "grad_norm": 1.1595145463943481, "learning_rate": 3.385549187761347e-05, "loss": 0.1152, "step": 72020 }, { "epoch": 2.6175594156552076, "grad_norm": 0.6032492518424988, "learning_rate": 3.3850638832155486e-05, "loss": 0.097, "step": 72030 }, { "epoch": 2.6179228141580055, "grad_norm": 0.9775734543800354, "learning_rate": 3.3845785405354955e-05, "loss": 0.109, "step": 72040 }, { "epoch": 2.618286212660804, "grad_norm": 1.095192790031433, "learning_rate": 3.384093159742102e-05, "loss": 3.8308, "step": 72050 }, { "epoch": 2.618649611163602, "grad_norm": 0.4287867248058319, "learning_rate": 3.383607740856278e-05, "loss": 0.109, "step": 72060 }, { "epoch": 2.6190130096664, "grad_norm": 0.8537576794624329, "learning_rate": 3.3831222838989416e-05, "loss": 0.1129, "step": 72070 }, { "epoch": 2.6193764081691984, "grad_norm": 0.6496450901031494, "learning_rate": 3.382636788891008e-05, "loss": 0.1073, "step": 72080 }, { "epoch": 2.6197398066719964, "grad_norm": 1.3108080625534058, "learning_rate": 3.382151255853396e-05, "loss": 0.0974, "step": 72090 }, { "epoch": 2.620103205174795, "grad_norm": 0.6950684785842896, "learning_rate": 3.381665684807024e-05, "loss": 0.0906, "step": 72100 }, { "epoch": 2.6204666036775928, "grad_norm": 0.5899102091789246, "learning_rate": 3.381180075772815e-05, "loss": 0.0891, "step": 72110 }, { "epoch": 2.6208300021803907, "grad_norm": 1.007866382598877, "learning_rate": 3.380694428771692e-05, "loss": 0.1396, "step": 72120 }, { "epoch": 2.621193400683189, "grad_norm": 1.360654592514038, "learning_rate": 3.38020874382458e-05, "loss": 0.1011, "step": 72130 }, { "epoch": 2.6215567991859876, "grad_norm": 0.9132879972457886, "learning_rate": 3.3797230209524046e-05, "loss": 0.1458, "step": 72140 }, { "epoch": 2.6219201976887856, "grad_norm": 0.9610080122947693, "learning_rate": 3.379237260176093e-05, "loss": 0.0807, "step": 72150 }, { "epoch": 2.6222835961915836, "grad_norm": 0.8496606349945068, "learning_rate": 3.378751461516578e-05, "loss": 0.1041, "step": 72160 }, { "epoch": 2.622646994694382, "grad_norm": 0.5927808284759521, "learning_rate": 3.3782656249947894e-05, "loss": 1.5747, "step": 72170 }, { "epoch": 2.62301039319718, "grad_norm": 0.6998677849769592, "learning_rate": 3.3777797506316586e-05, "loss": 0.1019, "step": 72180 }, { "epoch": 2.6233737916999784, "grad_norm": 0.7553665637969971, "learning_rate": 3.3772938384481225e-05, "loss": 0.1208, "step": 72190 }, { "epoch": 2.6237371902027764, "grad_norm": 0.3797171115875244, "learning_rate": 3.376807888465116e-05, "loss": 0.1391, "step": 72200 }, { "epoch": 2.6241005887055744, "grad_norm": 0.8174279928207397, "learning_rate": 3.376321900703576e-05, "loss": 0.0969, "step": 72210 }, { "epoch": 2.624463987208373, "grad_norm": 3.2531015872955322, "learning_rate": 3.375835875184445e-05, "loss": 0.1024, "step": 72220 }, { "epoch": 2.624827385711171, "grad_norm": 1.0118309259414673, "learning_rate": 3.3753498119286616e-05, "loss": 0.1033, "step": 72230 }, { "epoch": 2.6251907842139692, "grad_norm": 1.3845003843307495, "learning_rate": 3.374863710957169e-05, "loss": 0.0972, "step": 72240 }, { "epoch": 2.625554182716767, "grad_norm": 0.5679929256439209, "learning_rate": 3.3743775722909124e-05, "loss": 0.0885, "step": 72250 }, { "epoch": 2.625917581219565, "grad_norm": 0.30551066994667053, "learning_rate": 3.373891395950838e-05, "loss": 0.0825, "step": 72260 }, { "epoch": 2.6262809797223636, "grad_norm": 0.40965649485588074, "learning_rate": 3.373405181957891e-05, "loss": 0.1317, "step": 72270 }, { "epoch": 2.6266443782251616, "grad_norm": 0.6095037460327148, "learning_rate": 3.3729189303330236e-05, "loss": 0.0956, "step": 72280 }, { "epoch": 2.62700777672796, "grad_norm": 0.6658949851989746, "learning_rate": 3.3724326410971844e-05, "loss": 0.7257, "step": 72290 }, { "epoch": 2.627371175230758, "grad_norm": 3.251826524734497, "learning_rate": 3.371946314271327e-05, "loss": 0.1154, "step": 72300 }, { "epoch": 2.627734573733556, "grad_norm": 0.6652829051017761, "learning_rate": 3.371459949876406e-05, "loss": 0.0783, "step": 72310 }, { "epoch": 2.6280979722363544, "grad_norm": 7.321001052856445, "learning_rate": 3.370973547933376e-05, "loss": 0.1234, "step": 72320 }, { "epoch": 2.6284613707391524, "grad_norm": 0.5794792771339417, "learning_rate": 3.370487108463195e-05, "loss": 0.0981, "step": 72330 }, { "epoch": 2.628824769241951, "grad_norm": 2.157670736312866, "learning_rate": 3.370000631486822e-05, "loss": 0.1266, "step": 72340 }, { "epoch": 2.629188167744749, "grad_norm": 0.7778168320655823, "learning_rate": 3.369514117025216e-05, "loss": 0.1093, "step": 72350 }, { "epoch": 2.629551566247547, "grad_norm": 0.4507717490196228, "learning_rate": 3.3690275650993416e-05, "loss": 0.0935, "step": 72360 }, { "epoch": 2.6299149647503453, "grad_norm": 0.2999439835548401, "learning_rate": 3.36854097573016e-05, "loss": 0.1175, "step": 72370 }, { "epoch": 2.6302783632531432, "grad_norm": 0.6613568663597107, "learning_rate": 3.36805434893864e-05, "loss": 0.0982, "step": 72380 }, { "epoch": 2.6306417617559417, "grad_norm": 2.211897611618042, "learning_rate": 3.367567684745745e-05, "loss": 0.0877, "step": 72390 }, { "epoch": 2.6310051602587397, "grad_norm": 2.100520372390747, "learning_rate": 3.367080983172446e-05, "loss": 0.1534, "step": 72400 }, { "epoch": 2.6313685587615376, "grad_norm": 0.6530410647392273, "learning_rate": 3.366594244239713e-05, "loss": 0.0819, "step": 72410 }, { "epoch": 2.631731957264336, "grad_norm": 0.8508390188217163, "learning_rate": 3.366107467968517e-05, "loss": 1.7912, "step": 72420 }, { "epoch": 2.6320953557671345, "grad_norm": 2.8727540969848633, "learning_rate": 3.365620654379831e-05, "loss": 0.1243, "step": 72430 }, { "epoch": 2.6324587542699325, "grad_norm": 0.42828473448753357, "learning_rate": 3.3651338034946314e-05, "loss": 0.1423, "step": 72440 }, { "epoch": 2.6328221527727305, "grad_norm": 0.5628288388252258, "learning_rate": 3.364646915333895e-05, "loss": 0.4869, "step": 72450 }, { "epoch": 2.633185551275529, "grad_norm": 0.48309531807899475, "learning_rate": 3.364159989918598e-05, "loss": 0.095, "step": 72460 }, { "epoch": 2.633548949778327, "grad_norm": 0.5635913014411926, "learning_rate": 3.3636730272697234e-05, "loss": 0.0948, "step": 72470 }, { "epoch": 2.6339123482811253, "grad_norm": 0.7630922198295593, "learning_rate": 3.3631860274082504e-05, "loss": 0.108, "step": 72480 }, { "epoch": 2.6342757467839233, "grad_norm": 1.6271787881851196, "learning_rate": 3.3626989903551626e-05, "loss": 0.1169, "step": 72490 }, { "epoch": 2.6346391452867213, "grad_norm": 1.0612133741378784, "learning_rate": 3.3622119161314446e-05, "loss": 0.1574, "step": 72500 }, { "epoch": 2.6350025437895197, "grad_norm": 0.5140257477760315, "learning_rate": 3.361724804758083e-05, "loss": 0.0765, "step": 72510 }, { "epoch": 2.6353659422923177, "grad_norm": 1.7995847463607788, "learning_rate": 3.361237656256066e-05, "loss": 0.1069, "step": 72520 }, { "epoch": 2.635729340795116, "grad_norm": 0.6248586177825928, "learning_rate": 3.360750470646383e-05, "loss": 0.0842, "step": 72530 }, { "epoch": 2.636092739297914, "grad_norm": 0.8050362467765808, "learning_rate": 3.360263247950023e-05, "loss": 0.2037, "step": 72540 }, { "epoch": 2.636456137800712, "grad_norm": 1.0852928161621094, "learning_rate": 3.359775988187983e-05, "loss": 0.0985, "step": 72550 }, { "epoch": 2.6368195363035105, "grad_norm": 0.591783344745636, "learning_rate": 3.359288691381253e-05, "loss": 0.0948, "step": 72560 }, { "epoch": 2.6371829348063085, "grad_norm": 0.6417847275733948, "learning_rate": 3.358801357550831e-05, "loss": 0.1011, "step": 72570 }, { "epoch": 2.637546333309107, "grad_norm": 0.8405566811561584, "learning_rate": 3.358313986717714e-05, "loss": 0.1016, "step": 72580 }, { "epoch": 2.637909731811905, "grad_norm": 1.3981549739837646, "learning_rate": 3.357826578902901e-05, "loss": 0.1164, "step": 72590 }, { "epoch": 2.638273130314703, "grad_norm": 0.6139928698539734, "learning_rate": 3.357339134127393e-05, "loss": 0.091, "step": 72600 }, { "epoch": 2.638273130314703, "eval_loss": 0.3287167251110077, "eval_runtime": 180.0474, "eval_samples_per_second": 41.178, "eval_steps_per_second": 5.149, "eval_wer": 0.15324849783070416, "step": 72600 }, { "epoch": 2.6386365288175013, "grad_norm": 0.5749704241752625, "learning_rate": 3.356851652412193e-05, "loss": 0.1381, "step": 72610 }, { "epoch": 2.6389999273202993, "grad_norm": 0.48276287317276, "learning_rate": 3.3563641337783035e-05, "loss": 0.1009, "step": 72620 }, { "epoch": 2.6393633258230977, "grad_norm": 1.9593979120254517, "learning_rate": 3.35587657824673e-05, "loss": 0.1092, "step": 72630 }, { "epoch": 2.6397267243258957, "grad_norm": 1.3498990535736084, "learning_rate": 3.35538898583848e-05, "loss": 0.1033, "step": 72640 }, { "epoch": 2.6400901228286937, "grad_norm": 4.862055778503418, "learning_rate": 3.354901356574563e-05, "loss": 0.0958, "step": 72650 }, { "epoch": 2.640453521331492, "grad_norm": 0.3533124029636383, "learning_rate": 3.354413690475987e-05, "loss": 0.0717, "step": 72660 }, { "epoch": 2.64081691983429, "grad_norm": 1.32888662815094, "learning_rate": 3.3539259875637664e-05, "loss": 0.1151, "step": 72670 }, { "epoch": 2.6411803183370886, "grad_norm": 0.8028721213340759, "learning_rate": 3.353438247858912e-05, "loss": 0.1194, "step": 72680 }, { "epoch": 2.6415437168398865, "grad_norm": 0.8875879645347595, "learning_rate": 3.352950471382441e-05, "loss": 0.1026, "step": 72690 }, { "epoch": 2.6419071153426845, "grad_norm": 0.956411600112915, "learning_rate": 3.3524626581553684e-05, "loss": 0.0575, "step": 72700 }, { "epoch": 2.642270513845483, "grad_norm": 2.3979716300964355, "learning_rate": 3.351974808198713e-05, "loss": 0.1078, "step": 72710 }, { "epoch": 2.6426339123482814, "grad_norm": 3.241748571395874, "learning_rate": 3.351486921533495e-05, "loss": 0.1079, "step": 72720 }, { "epoch": 2.6429973108510794, "grad_norm": 0.5039170384407043, "learning_rate": 3.350998998180735e-05, "loss": 0.112, "step": 72730 }, { "epoch": 2.6433607093538773, "grad_norm": 1.3268622159957886, "learning_rate": 3.350511038161456e-05, "loss": 0.115, "step": 72740 }, { "epoch": 2.6437241078566758, "grad_norm": 0.8192645907402039, "learning_rate": 3.350023041496682e-05, "loss": 0.0873, "step": 72750 }, { "epoch": 2.6440875063594738, "grad_norm": 0.7651393413543701, "learning_rate": 3.34953500820744e-05, "loss": 0.0978, "step": 72760 }, { "epoch": 2.644450904862272, "grad_norm": 3.417607069015503, "learning_rate": 3.3490469383147564e-05, "loss": 0.1212, "step": 72770 }, { "epoch": 2.64481430336507, "grad_norm": 6.00585412979126, "learning_rate": 3.348558831839661e-05, "loss": 0.1212, "step": 72780 }, { "epoch": 2.645177701867868, "grad_norm": 0.36312735080718994, "learning_rate": 3.3480706888031865e-05, "loss": 0.1236, "step": 72790 }, { "epoch": 2.6455411003706666, "grad_norm": 0.6871273517608643, "learning_rate": 3.347582509226362e-05, "loss": 0.0875, "step": 72800 }, { "epoch": 2.6459044988734646, "grad_norm": 1.383457899093628, "learning_rate": 3.3470942931302236e-05, "loss": 0.1213, "step": 72810 }, { "epoch": 2.646267897376263, "grad_norm": 0.7942748665809631, "learning_rate": 3.346606040535805e-05, "loss": 0.0837, "step": 72820 }, { "epoch": 2.646631295879061, "grad_norm": 1.3881112337112427, "learning_rate": 3.346117751464146e-05, "loss": 0.085, "step": 72830 }, { "epoch": 2.646994694381859, "grad_norm": 2.80416202545166, "learning_rate": 3.345629425936283e-05, "loss": 4.2814, "step": 72840 }, { "epoch": 2.6473580928846574, "grad_norm": 0.7120780348777771, "learning_rate": 3.345141063973256e-05, "loss": 0.0874, "step": 72850 }, { "epoch": 2.6477214913874554, "grad_norm": 1.7287395000457764, "learning_rate": 3.344652665596108e-05, "loss": 0.1066, "step": 72860 }, { "epoch": 2.648084889890254, "grad_norm": 0.6621074080467224, "learning_rate": 3.344164230825882e-05, "loss": 0.1096, "step": 72870 }, { "epoch": 2.648448288393052, "grad_norm": 1.1361407041549683, "learning_rate": 3.343675759683623e-05, "loss": 0.1008, "step": 72880 }, { "epoch": 2.64881168689585, "grad_norm": 0.6596959829330444, "learning_rate": 3.3431872521903766e-05, "loss": 0.1222, "step": 72890 }, { "epoch": 2.649175085398648, "grad_norm": 1.1007713079452515, "learning_rate": 3.342698708367192e-05, "loss": 0.0837, "step": 72900 }, { "epoch": 2.649538483901446, "grad_norm": 0.754059910774231, "learning_rate": 3.342210128235119e-05, "loss": 0.1006, "step": 72910 }, { "epoch": 2.6499018824042446, "grad_norm": 0.3821747303009033, "learning_rate": 3.341721511815208e-05, "loss": 0.1176, "step": 72920 }, { "epoch": 2.6502652809070426, "grad_norm": 1.3366892337799072, "learning_rate": 3.341232859128511e-05, "loss": 0.0974, "step": 72930 }, { "epoch": 2.6506286794098406, "grad_norm": 0.8863315582275391, "learning_rate": 3.340744170196084e-05, "loss": 0.1181, "step": 72940 }, { "epoch": 2.650992077912639, "grad_norm": 0.8968802094459534, "learning_rate": 3.3402554450389826e-05, "loss": 0.1012, "step": 72950 }, { "epoch": 2.651355476415437, "grad_norm": 0.6080183982849121, "learning_rate": 3.339766683678262e-05, "loss": 0.0997, "step": 72960 }, { "epoch": 2.6517188749182354, "grad_norm": 0.8404228687286377, "learning_rate": 3.339277886134985e-05, "loss": 0.098, "step": 72970 }, { "epoch": 2.6520822734210334, "grad_norm": 0.5841569900512695, "learning_rate": 3.338789052430208e-05, "loss": 0.1032, "step": 72980 }, { "epoch": 2.6524456719238314, "grad_norm": 1.0763466358184814, "learning_rate": 3.3383001825849966e-05, "loss": 0.0971, "step": 72990 }, { "epoch": 2.65280907042663, "grad_norm": 2.3036322593688965, "learning_rate": 3.337811276620412e-05, "loss": 1.71, "step": 73000 }, { "epoch": 2.6531724689294283, "grad_norm": 0.29687023162841797, "learning_rate": 3.337322334557521e-05, "loss": 0.1814, "step": 73010 }, { "epoch": 2.6535358674322262, "grad_norm": 1.0260635614395142, "learning_rate": 3.3368333564173905e-05, "loss": 0.1138, "step": 73020 }, { "epoch": 2.6538992659350242, "grad_norm": 1.9731409549713135, "learning_rate": 3.3363443422210875e-05, "loss": 0.1022, "step": 73030 }, { "epoch": 2.6542626644378227, "grad_norm": 1.9720983505249023, "learning_rate": 3.335855291989682e-05, "loss": 0.1169, "step": 73040 }, { "epoch": 2.6546260629406206, "grad_norm": 0.8619999885559082, "learning_rate": 3.335366205744246e-05, "loss": 0.0857, "step": 73050 }, { "epoch": 2.654989461443419, "grad_norm": 1.4044042825698853, "learning_rate": 3.334877083505853e-05, "loss": 0.0945, "step": 73060 }, { "epoch": 2.655352859946217, "grad_norm": 0.6870516538619995, "learning_rate": 3.3343879252955765e-05, "loss": 0.0916, "step": 73070 }, { "epoch": 2.655716258449015, "grad_norm": 1.0098764896392822, "learning_rate": 3.3338987311344935e-05, "loss": 0.1125, "step": 73080 }, { "epoch": 2.6560796569518135, "grad_norm": 3.241283655166626, "learning_rate": 3.333409501043681e-05, "loss": 0.1192, "step": 73090 }, { "epoch": 2.6564430554546115, "grad_norm": 0.5080071687698364, "learning_rate": 3.332920235044219e-05, "loss": 0.0923, "step": 73100 }, { "epoch": 2.65680645395741, "grad_norm": 0.7751922607421875, "learning_rate": 3.332430933157187e-05, "loss": 0.1025, "step": 73110 }, { "epoch": 2.657169852460208, "grad_norm": 1.3310539722442627, "learning_rate": 3.3319415954036674e-05, "loss": 0.1309, "step": 73120 }, { "epoch": 2.657533250963006, "grad_norm": 2.5508885383605957, "learning_rate": 3.331452221804745e-05, "loss": 0.1121, "step": 73130 }, { "epoch": 2.6578966494658043, "grad_norm": 4.273713111877441, "learning_rate": 3.330962812381505e-05, "loss": 0.1043, "step": 73140 }, { "epoch": 2.6582600479686023, "grad_norm": 0.32448074221611023, "learning_rate": 3.3304733671550336e-05, "loss": 0.865, "step": 73150 }, { "epoch": 2.6586234464714007, "grad_norm": 0.39604759216308594, "learning_rate": 3.329983886146419e-05, "loss": 0.0736, "step": 73160 }, { "epoch": 2.6589868449741987, "grad_norm": 2.2893431186676025, "learning_rate": 3.3294943693767536e-05, "loss": 0.1108, "step": 73170 }, { "epoch": 2.6593502434769967, "grad_norm": 1.7954707145690918, "learning_rate": 3.3290048168671256e-05, "loss": 0.0905, "step": 73180 }, { "epoch": 2.659713641979795, "grad_norm": 0.5648924708366394, "learning_rate": 3.3285152286386305e-05, "loss": 0.1076, "step": 73190 }, { "epoch": 2.660077040482593, "grad_norm": 0.6963376998901367, "learning_rate": 3.3280256047123614e-05, "loss": 0.0812, "step": 73200 }, { "epoch": 2.660077040482593, "eval_loss": 0.3232385516166687, "eval_runtime": 181.4857, "eval_samples_per_second": 40.852, "eval_steps_per_second": 5.108, "eval_wer": 0.1502078530324759, "step": 73200 }, { "epoch": 2.6604404389853915, "grad_norm": 15.696502685546875, "learning_rate": 3.3275359451094157e-05, "loss": 0.0925, "step": 73210 }, { "epoch": 2.6608038374881895, "grad_norm": 0.43037521839141846, "learning_rate": 3.327046249850891e-05, "loss": 0.1148, "step": 73220 }, { "epoch": 2.6611672359909875, "grad_norm": 2.093749523162842, "learning_rate": 3.326556518957885e-05, "loss": 0.098, "step": 73230 }, { "epoch": 2.661530634493786, "grad_norm": 0.5571810603141785, "learning_rate": 3.3260667524514996e-05, "loss": 0.1274, "step": 73240 }, { "epoch": 2.661894032996584, "grad_norm": 0.46131211519241333, "learning_rate": 3.3255769503528374e-05, "loss": 0.0872, "step": 73250 }, { "epoch": 2.6622574314993823, "grad_norm": 2.3554348945617676, "learning_rate": 3.325087112683002e-05, "loss": 0.1181, "step": 73260 }, { "epoch": 2.6626208300021803, "grad_norm": 0.8151417970657349, "learning_rate": 3.324597239463097e-05, "loss": 0.1176, "step": 73270 }, { "epoch": 2.6629842285049783, "grad_norm": 0.97243332862854, "learning_rate": 3.324107330714233e-05, "loss": 0.1183, "step": 73280 }, { "epoch": 2.6633476270077767, "grad_norm": 0.8408851623535156, "learning_rate": 3.3236173864575154e-05, "loss": 0.1119, "step": 73290 }, { "epoch": 2.663711025510575, "grad_norm": 0.8413365483283997, "learning_rate": 3.323127406714055e-05, "loss": 0.1135, "step": 73300 }, { "epoch": 2.664074424013373, "grad_norm": 1.1940609216690063, "learning_rate": 3.3226373915049636e-05, "loss": 0.114, "step": 73310 }, { "epoch": 2.664437822516171, "grad_norm": 0.761283814907074, "learning_rate": 3.3221473408513534e-05, "loss": 0.0885, "step": 73320 }, { "epoch": 2.6648012210189695, "grad_norm": 7.94679594039917, "learning_rate": 3.3216572547743396e-05, "loss": 0.0983, "step": 73330 }, { "epoch": 2.6651646195217675, "grad_norm": 0.7168159484863281, "learning_rate": 3.321167133295038e-05, "loss": 0.1152, "step": 73340 }, { "epoch": 2.665528018024566, "grad_norm": 1.2092477083206177, "learning_rate": 3.3206769764345676e-05, "loss": 0.0965, "step": 73350 }, { "epoch": 2.665891416527364, "grad_norm": 0.47513625025749207, "learning_rate": 3.320186784214045e-05, "loss": 0.4253, "step": 73360 }, { "epoch": 2.666254815030162, "grad_norm": 1.22835111618042, "learning_rate": 3.319696556654592e-05, "loss": 0.1073, "step": 73370 }, { "epoch": 2.6666182135329604, "grad_norm": 0.9767407774925232, "learning_rate": 3.319206293777332e-05, "loss": 0.1118, "step": 73380 }, { "epoch": 2.6669816120357583, "grad_norm": 0.8150458335876465, "learning_rate": 3.318715995603387e-05, "loss": 0.0999, "step": 73390 }, { "epoch": 2.6673450105385568, "grad_norm": 0.8495771288871765, "learning_rate": 3.3182256621538826e-05, "loss": 0.0753, "step": 73400 }, { "epoch": 2.6677084090413548, "grad_norm": 1.7628092765808105, "learning_rate": 3.317735293449946e-05, "loss": 0.1248, "step": 73410 }, { "epoch": 2.6680718075441527, "grad_norm": 0.7195779085159302, "learning_rate": 3.317244889512704e-05, "loss": 0.1246, "step": 73420 }, { "epoch": 2.668435206046951, "grad_norm": 1.0770323276519775, "learning_rate": 3.316754450363289e-05, "loss": 0.0859, "step": 73430 }, { "epoch": 2.668798604549749, "grad_norm": 0.6442772746086121, "learning_rate": 3.31626397602283e-05, "loss": 0.1092, "step": 73440 }, { "epoch": 2.6691620030525476, "grad_norm": 0.9785648584365845, "learning_rate": 3.315773466512461e-05, "loss": 0.1202, "step": 73450 }, { "epoch": 2.6695254015553456, "grad_norm": 0.7215647101402283, "learning_rate": 3.315282921853316e-05, "loss": 0.1158, "step": 73460 }, { "epoch": 2.6698888000581436, "grad_norm": 0.5394707918167114, "learning_rate": 3.31479234206653e-05, "loss": 0.1174, "step": 73470 }, { "epoch": 2.670252198560942, "grad_norm": 0.6166718602180481, "learning_rate": 3.3143017271732416e-05, "loss": 0.1115, "step": 73480 }, { "epoch": 2.67061559706374, "grad_norm": 0.7951743006706238, "learning_rate": 3.3138110771945876e-05, "loss": 0.0974, "step": 73490 }, { "epoch": 2.6709789955665384, "grad_norm": 1.5483492612838745, "learning_rate": 3.313320392151711e-05, "loss": 0.0771, "step": 73500 }, { "epoch": 2.6713423940693364, "grad_norm": 0.6423888206481934, "learning_rate": 3.3128296720657524e-05, "loss": 0.0939, "step": 73510 }, { "epoch": 2.6717057925721344, "grad_norm": 0.5538840293884277, "learning_rate": 3.3123389169578556e-05, "loss": 0.108, "step": 73520 }, { "epoch": 2.672069191074933, "grad_norm": 0.5549068450927734, "learning_rate": 3.311848126849165e-05, "loss": 0.1466, "step": 73530 }, { "epoch": 2.6724325895777308, "grad_norm": 0.7564308643341064, "learning_rate": 3.311357301760827e-05, "loss": 0.1241, "step": 73540 }, { "epoch": 2.672795988080529, "grad_norm": 0.683080792427063, "learning_rate": 3.310866441713989e-05, "loss": 0.0924, "step": 73550 }, { "epoch": 2.673159386583327, "grad_norm": 0.5799837708473206, "learning_rate": 3.3103755467298024e-05, "loss": 0.1, "step": 73560 }, { "epoch": 2.673522785086125, "grad_norm": 0.9366778135299683, "learning_rate": 3.309884616829416e-05, "loss": 0.0942, "step": 73570 }, { "epoch": 2.6738861835889236, "grad_norm": 2.1698505878448486, "learning_rate": 3.309393652033984e-05, "loss": 0.1085, "step": 73580 }, { "epoch": 2.674249582091722, "grad_norm": 0.8438801169395447, "learning_rate": 3.308902652364658e-05, "loss": 1.4071, "step": 73590 }, { "epoch": 2.67461298059452, "grad_norm": 0.5139632225036621, "learning_rate": 3.308411617842595e-05, "loss": 0.0993, "step": 73600 }, { "epoch": 2.674976379097318, "grad_norm": 0.5203242301940918, "learning_rate": 3.3079205484889534e-05, "loss": 0.0798, "step": 73610 }, { "epoch": 2.6753397776001164, "grad_norm": 0.8717512488365173, "learning_rate": 3.307429444324888e-05, "loss": 0.1186, "step": 73620 }, { "epoch": 2.6757031761029144, "grad_norm": 2.0422654151916504, "learning_rate": 3.3069383053715617e-05, "loss": 0.1053, "step": 73630 }, { "epoch": 2.676066574605713, "grad_norm": 1.5940972566604614, "learning_rate": 3.306447131650135e-05, "loss": 0.129, "step": 73640 }, { "epoch": 2.676429973108511, "grad_norm": 0.8113188743591309, "learning_rate": 3.30595592318177e-05, "loss": 0.0888, "step": 73650 }, { "epoch": 2.676793371611309, "grad_norm": 0.5296781063079834, "learning_rate": 3.305464679987632e-05, "loss": 0.0927, "step": 73660 }, { "epoch": 2.6771567701141072, "grad_norm": 1.491728663444519, "learning_rate": 3.304973402088887e-05, "loss": 0.1386, "step": 73670 }, { "epoch": 2.6775201686169052, "grad_norm": 0.6483957767486572, "learning_rate": 3.304482089506703e-05, "loss": 0.0994, "step": 73680 }, { "epoch": 2.6778835671197037, "grad_norm": 1.6856306791305542, "learning_rate": 3.303990742262247e-05, "loss": 0.1227, "step": 73690 }, { "epoch": 2.6782469656225016, "grad_norm": 1.752510905265808, "learning_rate": 3.3034993603766906e-05, "loss": 0.0956, "step": 73700 }, { "epoch": 2.6786103641252996, "grad_norm": 0.6596788167953491, "learning_rate": 3.303007943871206e-05, "loss": 0.1126, "step": 73710 }, { "epoch": 2.678973762628098, "grad_norm": 1.1651583909988403, "learning_rate": 3.302516492766966e-05, "loss": 0.1116, "step": 73720 }, { "epoch": 2.679337161130896, "grad_norm": 25.031896591186523, "learning_rate": 3.302025007085146e-05, "loss": 0.1139, "step": 73730 }, { "epoch": 2.6797005596336945, "grad_norm": 0.4031795561313629, "learning_rate": 3.301533486846922e-05, "loss": 0.0996, "step": 73740 }, { "epoch": 2.6800639581364925, "grad_norm": 0.6333705186843872, "learning_rate": 3.301041932073472e-05, "loss": 0.4087, "step": 73750 }, { "epoch": 2.6804273566392904, "grad_norm": 0.9752139449119568, "learning_rate": 3.300550342785975e-05, "loss": 0.1121, "step": 73760 }, { "epoch": 2.680790755142089, "grad_norm": 0.5102497339248657, "learning_rate": 3.300058719005612e-05, "loss": 0.1125, "step": 73770 }, { "epoch": 2.681154153644887, "grad_norm": 0.661267876625061, "learning_rate": 3.299567060753565e-05, "loss": 0.116, "step": 73780 }, { "epoch": 2.6815175521476853, "grad_norm": 4.50182580947876, "learning_rate": 3.2990753680510194e-05, "loss": 0.1047, "step": 73790 }, { "epoch": 2.6818809506504833, "grad_norm": 1.0736654996871948, "learning_rate": 3.298583640919159e-05, "loss": 0.0967, "step": 73800 }, { "epoch": 2.6818809506504833, "eval_loss": 0.3250260353088379, "eval_runtime": 180.3631, "eval_samples_per_second": 41.106, "eval_steps_per_second": 5.14, "eval_wer": 0.150552761994663, "step": 73800 }, { "epoch": 2.6822443491532812, "grad_norm": 0.7733132839202881, "learning_rate": 3.2980918793791706e-05, "loss": 0.0863, "step": 73810 }, { "epoch": 2.6826077476560797, "grad_norm": 1.0003288984298706, "learning_rate": 3.2976000834522424e-05, "loss": 0.1159, "step": 73820 }, { "epoch": 2.6829711461588777, "grad_norm": 1.0312319993972778, "learning_rate": 3.2971082531595646e-05, "loss": 0.1081, "step": 73830 }, { "epoch": 2.683334544661676, "grad_norm": 0.9150594472885132, "learning_rate": 3.2966163885223286e-05, "loss": 0.1042, "step": 73840 }, { "epoch": 2.683697943164474, "grad_norm": 1.0201760530471802, "learning_rate": 3.2961244895617265e-05, "loss": 0.0894, "step": 73850 }, { "epoch": 2.684061341667272, "grad_norm": 0.7284217476844788, "learning_rate": 3.295632556298953e-05, "loss": 0.0844, "step": 73860 }, { "epoch": 2.6844247401700705, "grad_norm": 0.5448662042617798, "learning_rate": 3.2951405887552033e-05, "loss": 0.0978, "step": 73870 }, { "epoch": 2.684788138672869, "grad_norm": 0.558380126953125, "learning_rate": 3.294648586951675e-05, "loss": 0.1168, "step": 73880 }, { "epoch": 2.685151537175667, "grad_norm": 0.5779474377632141, "learning_rate": 3.2941565509095676e-05, "loss": 0.0737, "step": 73890 }, { "epoch": 2.685514935678465, "grad_norm": 1.018198847770691, "learning_rate": 3.293664480650078e-05, "loss": 0.0783, "step": 73900 }, { "epoch": 2.6858783341812633, "grad_norm": 1.5002427101135254, "learning_rate": 3.293172376194411e-05, "loss": 0.103, "step": 73910 }, { "epoch": 2.6862417326840613, "grad_norm": 0.6482456922531128, "learning_rate": 3.2926802375637675e-05, "loss": 0.1128, "step": 73920 }, { "epoch": 2.6866051311868597, "grad_norm": 0.38328006863594055, "learning_rate": 3.292188064779354e-05, "loss": 0.09, "step": 73930 }, { "epoch": 2.6869685296896577, "grad_norm": 0.3640558421611786, "learning_rate": 3.2916958578623746e-05, "loss": 0.0808, "step": 73940 }, { "epoch": 2.6873319281924557, "grad_norm": 0.7267985343933105, "learning_rate": 3.2912036168340376e-05, "loss": 0.0819, "step": 73950 }, { "epoch": 2.687695326695254, "grad_norm": 0.9190502166748047, "learning_rate": 3.2907113417155525e-05, "loss": 0.0973, "step": 73960 }, { "epoch": 2.688058725198052, "grad_norm": 1.3987020254135132, "learning_rate": 3.290219032528128e-05, "loss": 0.0987, "step": 73970 }, { "epoch": 2.6884221237008505, "grad_norm": 0.7504600286483765, "learning_rate": 3.289726689292978e-05, "loss": 0.1197, "step": 73980 }, { "epoch": 2.6887855222036485, "grad_norm": 0.8399171829223633, "learning_rate": 3.2892343120313144e-05, "loss": 0.1476, "step": 73990 }, { "epoch": 2.6891489207064465, "grad_norm": 0.37751835584640503, "learning_rate": 3.288741900764353e-05, "loss": 0.0819, "step": 74000 }, { "epoch": 2.689512319209245, "grad_norm": 49.60390853881836, "learning_rate": 3.288249455513308e-05, "loss": 0.8904, "step": 74010 }, { "epoch": 2.689875717712043, "grad_norm": 0.541854977607727, "learning_rate": 3.2877569762994e-05, "loss": 0.1234, "step": 74020 }, { "epoch": 2.6902391162148414, "grad_norm": 1.2786191701889038, "learning_rate": 3.2872644631438466e-05, "loss": 0.0906, "step": 74030 }, { "epoch": 2.6906025147176393, "grad_norm": 0.8454063534736633, "learning_rate": 3.2867719160678676e-05, "loss": 0.1212, "step": 74040 }, { "epoch": 2.6909659132204373, "grad_norm": 0.7831797003746033, "learning_rate": 3.286279335092687e-05, "loss": 0.0792, "step": 74050 }, { "epoch": 2.6913293117232358, "grad_norm": 0.46578606963157654, "learning_rate": 3.285786720239526e-05, "loss": 0.0707, "step": 74060 }, { "epoch": 2.6916927102260337, "grad_norm": 0.9215951561927795, "learning_rate": 3.285294071529613e-05, "loss": 0.1329, "step": 74070 }, { "epoch": 2.692056108728832, "grad_norm": 0.5061529278755188, "learning_rate": 3.284801388984171e-05, "loss": 0.1024, "step": 74080 }, { "epoch": 2.69241950723163, "grad_norm": 1.1045541763305664, "learning_rate": 3.2843086726244307e-05, "loss": 0.0937, "step": 74090 }, { "epoch": 2.692782905734428, "grad_norm": 0.4563618004322052, "learning_rate": 3.28381592247162e-05, "loss": 0.0796, "step": 74100 }, { "epoch": 2.6931463042372266, "grad_norm": 0.6136996150016785, "learning_rate": 3.28332313854697e-05, "loss": 0.0797, "step": 74110 }, { "epoch": 2.6935097027400245, "grad_norm": 0.3746108114719391, "learning_rate": 3.2828303208717125e-05, "loss": 0.0847, "step": 74120 }, { "epoch": 2.693873101242823, "grad_norm": 9.6745023727417, "learning_rate": 3.282337469467082e-05, "loss": 0.1017, "step": 74130 }, { "epoch": 2.694236499745621, "grad_norm": 0.8015979528427124, "learning_rate": 3.281844584354314e-05, "loss": 0.7825, "step": 74140 }, { "epoch": 2.694599898248419, "grad_norm": 0.7004625797271729, "learning_rate": 3.281351665554644e-05, "loss": 0.1076, "step": 74150 }, { "epoch": 2.6949632967512174, "grad_norm": 0.49726399779319763, "learning_rate": 3.2808587130893107e-05, "loss": 0.09, "step": 74160 }, { "epoch": 2.695326695254016, "grad_norm": 0.7523669600486755, "learning_rate": 3.280365726979555e-05, "loss": 0.1085, "step": 74170 }, { "epoch": 2.695690093756814, "grad_norm": 0.8919450044631958, "learning_rate": 3.279872707246615e-05, "loss": 0.1102, "step": 74180 }, { "epoch": 2.6960534922596118, "grad_norm": 2.7779541015625, "learning_rate": 3.279379653911736e-05, "loss": 0.1118, "step": 74190 }, { "epoch": 2.69641689076241, "grad_norm": 1.5841718912124634, "learning_rate": 3.27888656699616e-05, "loss": 0.214, "step": 74200 }, { "epoch": 2.696780289265208, "grad_norm": 1.1387628316879272, "learning_rate": 3.278393446521134e-05, "loss": 0.0854, "step": 74210 }, { "epoch": 2.6971436877680066, "grad_norm": 3.4580607414245605, "learning_rate": 3.277900292507904e-05, "loss": 0.1282, "step": 74220 }, { "epoch": 2.6975070862708046, "grad_norm": 1.5573906898498535, "learning_rate": 3.277407104977717e-05, "loss": 0.1394, "step": 74230 }, { "epoch": 2.6978704847736026, "grad_norm": 1.1080759763717651, "learning_rate": 3.276913883951824e-05, "loss": 0.1507, "step": 74240 }, { "epoch": 2.698233883276401, "grad_norm": 0.7681221961975098, "learning_rate": 3.276420629451476e-05, "loss": 0.0866, "step": 74250 }, { "epoch": 2.698597281779199, "grad_norm": 0.5694213509559631, "learning_rate": 3.275927341497927e-05, "loss": 0.0853, "step": 74260 }, { "epoch": 2.6989606802819974, "grad_norm": 1.044425129890442, "learning_rate": 3.275434020112428e-05, "loss": 0.1089, "step": 74270 }, { "epoch": 2.6993240787847954, "grad_norm": 0.5060895681381226, "learning_rate": 3.274940665316237e-05, "loss": 0.0757, "step": 74280 }, { "epoch": 2.6996874772875934, "grad_norm": 3.3999178409576416, "learning_rate": 3.274447277130611e-05, "loss": 0.112, "step": 74290 }, { "epoch": 2.700050875790392, "grad_norm": 1.1808927059173584, "learning_rate": 3.273953855576805e-05, "loss": 0.0899, "step": 74300 }, { "epoch": 2.70041427429319, "grad_norm": 0.481868177652359, "learning_rate": 3.273460400676083e-05, "loss": 0.0935, "step": 74310 }, { "epoch": 2.7007776727959882, "grad_norm": 3.8246326446533203, "learning_rate": 3.272966912449703e-05, "loss": 0.094, "step": 74320 }, { "epoch": 2.7011410712987862, "grad_norm": 0.8496780395507812, "learning_rate": 3.27247339091893e-05, "loss": 0.1554, "step": 74330 }, { "epoch": 2.701504469801584, "grad_norm": 1.1947115659713745, "learning_rate": 3.271979836105026e-05, "loss": 0.1302, "step": 74340 }, { "epoch": 2.7018678683043826, "grad_norm": 2.5246219635009766, "learning_rate": 3.271486248029258e-05, "loss": 0.088, "step": 74350 }, { "epoch": 2.7022312668071806, "grad_norm": 1.4565434455871582, "learning_rate": 3.270992626712893e-05, "loss": 0.0862, "step": 74360 }, { "epoch": 2.702594665309979, "grad_norm": 0.473636269569397, "learning_rate": 3.270498972177198e-05, "loss": 0.1168, "step": 74370 }, { "epoch": 2.702958063812777, "grad_norm": 2.2672886848449707, "learning_rate": 3.270005284443445e-05, "loss": 0.2479, "step": 74380 }, { "epoch": 2.703321462315575, "grad_norm": 2.7625412940979004, "learning_rate": 3.269511563532903e-05, "loss": 0.1127, "step": 74390 }, { "epoch": 2.7036848608183734, "grad_norm": 1.6143194437026978, "learning_rate": 3.2690178094668455e-05, "loss": 0.1106, "step": 74400 }, { "epoch": 2.7036848608183734, "eval_loss": 0.30357053875923157, "eval_runtime": 179.9214, "eval_samples_per_second": 41.207, "eval_steps_per_second": 5.152, "eval_wer": 0.15205946956632235, "step": 74400 }, { "epoch": 2.7040482593211714, "grad_norm": 1.560831069946289, "learning_rate": 3.268524022266547e-05, "loss": 0.0948, "step": 74410 }, { "epoch": 2.70441165782397, "grad_norm": 0.5551114082336426, "learning_rate": 3.2680302019532836e-05, "loss": 0.1299, "step": 74420 }, { "epoch": 2.704775056326768, "grad_norm": 1.2796604633331299, "learning_rate": 3.2675363485483304e-05, "loss": 0.1122, "step": 74430 }, { "epoch": 2.705138454829566, "grad_norm": 1.2246671915054321, "learning_rate": 3.2670424620729675e-05, "loss": 0.1183, "step": 74440 }, { "epoch": 2.7055018533323643, "grad_norm": 2.03642201423645, "learning_rate": 3.266548542548474e-05, "loss": 0.0848, "step": 74450 }, { "epoch": 2.7058652518351627, "grad_norm": 1.559633493423462, "learning_rate": 3.2660545899961305e-05, "loss": 0.0841, "step": 74460 }, { "epoch": 2.7062286503379607, "grad_norm": 0.7185676097869873, "learning_rate": 3.265560604437221e-05, "loss": 0.104, "step": 74470 }, { "epoch": 2.7065920488407587, "grad_norm": 0.627554178237915, "learning_rate": 3.265066585893029e-05, "loss": 0.1016, "step": 74480 }, { "epoch": 2.706955447343557, "grad_norm": 0.7802649736404419, "learning_rate": 3.26457253438484e-05, "loss": 0.0987, "step": 74490 }, { "epoch": 2.707318845846355, "grad_norm": 0.4407544732093811, "learning_rate": 3.2640784499339404e-05, "loss": 0.0948, "step": 74500 }, { "epoch": 2.7076822443491535, "grad_norm": 0.7778434157371521, "learning_rate": 3.263584332561619e-05, "loss": 0.0943, "step": 74510 }, { "epoch": 2.7080456428519515, "grad_norm": 0.46505939960479736, "learning_rate": 3.263090182289166e-05, "loss": 0.1109, "step": 74520 }, { "epoch": 2.7084090413547495, "grad_norm": 0.6025874614715576, "learning_rate": 3.2625959991378715e-05, "loss": 0.0936, "step": 74530 }, { "epoch": 2.708772439857548, "grad_norm": 1.0582184791564941, "learning_rate": 3.262101783129029e-05, "loss": 0.0809, "step": 74540 }, { "epoch": 2.709135838360346, "grad_norm": 1.1490769386291504, "learning_rate": 3.261607534283932e-05, "loss": 0.0848, "step": 74550 }, { "epoch": 2.7094992368631443, "grad_norm": 0.6131067276000977, "learning_rate": 3.2611132526238766e-05, "loss": 0.084, "step": 74560 }, { "epoch": 2.7098626353659423, "grad_norm": 1.1936076879501343, "learning_rate": 3.26061893817016e-05, "loss": 0.0876, "step": 74570 }, { "epoch": 2.7102260338687403, "grad_norm": 1.389906406402588, "learning_rate": 3.260124590944078e-05, "loss": 0.0985, "step": 74580 }, { "epoch": 2.7105894323715387, "grad_norm": 1.8453727960586548, "learning_rate": 3.259630210966933e-05, "loss": 0.0959, "step": 74590 }, { "epoch": 2.7109528308743367, "grad_norm": 0.9837631583213806, "learning_rate": 3.259135798260025e-05, "loss": 0.0882, "step": 74600 }, { "epoch": 2.711316229377135, "grad_norm": 1.1993751525878906, "learning_rate": 3.2586413528446555e-05, "loss": 0.0676, "step": 74610 }, { "epoch": 2.711679627879933, "grad_norm": 6.072772026062012, "learning_rate": 3.25814687474213e-05, "loss": 0.0994, "step": 74620 }, { "epoch": 2.712043026382731, "grad_norm": 1.2597107887268066, "learning_rate": 3.257652363973753e-05, "loss": 0.1082, "step": 74630 }, { "epoch": 2.7124064248855295, "grad_norm": 1.3290597200393677, "learning_rate": 3.257157820560831e-05, "loss": 0.128, "step": 74640 }, { "epoch": 2.7127698233883275, "grad_norm": 2.4594569206237793, "learning_rate": 3.256663244524673e-05, "loss": 0.0824, "step": 74650 }, { "epoch": 2.713133221891126, "grad_norm": 0.3981126844882965, "learning_rate": 3.256168635886588e-05, "loss": 0.1033, "step": 74660 }, { "epoch": 2.713496620393924, "grad_norm": 0.6342089772224426, "learning_rate": 3.255673994667887e-05, "loss": 0.1291, "step": 74670 }, { "epoch": 2.713860018896722, "grad_norm": 1.3177196979522705, "learning_rate": 3.2551793208898826e-05, "loss": 0.1083, "step": 74680 }, { "epoch": 2.7142234173995203, "grad_norm": 1.9709926843643188, "learning_rate": 3.2546846145738873e-05, "loss": 0.1416, "step": 74690 }, { "epoch": 2.7145868159023183, "grad_norm": 2.019582986831665, "learning_rate": 3.2541898757412174e-05, "loss": 0.1671, "step": 74700 }, { "epoch": 2.7149502144051167, "grad_norm": 0.7411203980445862, "learning_rate": 3.25369510441319e-05, "loss": 0.0905, "step": 74710 }, { "epoch": 2.7153136129079147, "grad_norm": 0.7094716429710388, "learning_rate": 3.2532003006111215e-05, "loss": 0.1127, "step": 74720 }, { "epoch": 2.7156770114107127, "grad_norm": 0.6261359453201294, "learning_rate": 3.252705464356332e-05, "loss": 0.0934, "step": 74730 }, { "epoch": 2.716040409913511, "grad_norm": 0.4718007445335388, "learning_rate": 3.252210595670142e-05, "loss": 0.094, "step": 74740 }, { "epoch": 2.7164038084163096, "grad_norm": 0.8705607652664185, "learning_rate": 3.2517156945738734e-05, "loss": 0.097, "step": 74750 }, { "epoch": 2.7167672069191076, "grad_norm": 0.4735512435436249, "learning_rate": 3.251220761088851e-05, "loss": 0.0741, "step": 74760 }, { "epoch": 2.7171306054219055, "grad_norm": 0.6891077756881714, "learning_rate": 3.250725795236398e-05, "loss": 0.1007, "step": 74770 }, { "epoch": 2.717494003924704, "grad_norm": 2.493716239929199, "learning_rate": 3.250230797037843e-05, "loss": 0.0849, "step": 74780 }, { "epoch": 2.717857402427502, "grad_norm": 1.8947222232818604, "learning_rate": 3.249735766514512e-05, "loss": 0.1433, "step": 74790 }, { "epoch": 2.7182208009303004, "grad_norm": 0.5233703255653381, "learning_rate": 3.2492407036877334e-05, "loss": 0.0772, "step": 74800 }, { "epoch": 2.7185841994330984, "grad_norm": 0.4244493544101715, "learning_rate": 3.2487456085788395e-05, "loss": 0.0771, "step": 74810 }, { "epoch": 2.7189475979358964, "grad_norm": 0.5100713968276978, "learning_rate": 3.24825048120916e-05, "loss": 0.0915, "step": 74820 }, { "epoch": 2.719310996438695, "grad_norm": 1.360521674156189, "learning_rate": 3.2477553216000314e-05, "loss": 0.1032, "step": 74830 }, { "epoch": 2.7196743949414928, "grad_norm": 0.9262562990188599, "learning_rate": 3.2472601297727853e-05, "loss": 0.0992, "step": 74840 }, { "epoch": 2.720037793444291, "grad_norm": 1.0304359197616577, "learning_rate": 3.246764905748759e-05, "loss": 1.7738, "step": 74850 }, { "epoch": 2.720401191947089, "grad_norm": 0.621614933013916, "learning_rate": 3.246269649549291e-05, "loss": 0.0865, "step": 74860 }, { "epoch": 2.720764590449887, "grad_norm": 0.544062077999115, "learning_rate": 3.245774361195718e-05, "loss": 0.0947, "step": 74870 }, { "epoch": 2.7211279889526856, "grad_norm": 0.8662183284759521, "learning_rate": 3.2452790407093814e-05, "loss": 0.1065, "step": 74880 }, { "epoch": 2.7214913874554836, "grad_norm": 0.6098036170005798, "learning_rate": 3.244783688111622e-05, "loss": 0.1058, "step": 74890 }, { "epoch": 2.721854785958282, "grad_norm": 0.5739080309867859, "learning_rate": 3.2442883034237845e-05, "loss": 1.0575, "step": 74900 }, { "epoch": 2.72221818446108, "grad_norm": 0.8640351891517639, "learning_rate": 3.2437928866672124e-05, "loss": 0.0948, "step": 74910 }, { "epoch": 2.722581582963878, "grad_norm": 1.041825294494629, "learning_rate": 3.2432974378632504e-05, "loss": 0.1079, "step": 74920 }, { "epoch": 2.7229449814666764, "grad_norm": 1.7764942646026611, "learning_rate": 3.242801957033247e-05, "loss": 0.1058, "step": 74930 }, { "epoch": 2.7233083799694744, "grad_norm": 0.5012884140014648, "learning_rate": 3.24230644419855e-05, "loss": 0.0924, "step": 74940 }, { "epoch": 2.723671778472273, "grad_norm": 1.0178241729736328, "learning_rate": 3.241810899380509e-05, "loss": 0.0794, "step": 74950 }, { "epoch": 2.724035176975071, "grad_norm": 0.5228786468505859, "learning_rate": 3.241315322600476e-05, "loss": 0.0947, "step": 74960 }, { "epoch": 2.724398575477869, "grad_norm": 0.7717702388763428, "learning_rate": 3.2408197138798035e-05, "loss": 0.1196, "step": 74970 }, { "epoch": 2.724761973980667, "grad_norm": 0.5628354549407959, "learning_rate": 3.240324073239846e-05, "loss": 0.086, "step": 74980 }, { "epoch": 2.725125372483465, "grad_norm": 0.535048246383667, "learning_rate": 3.239828400701957e-05, "loss": 0.1447, "step": 74990 }, { "epoch": 2.7254887709862636, "grad_norm": 0.42276647686958313, "learning_rate": 3.2393326962874953e-05, "loss": 0.0784, "step": 75000 }, { "epoch": 2.7254887709862636, "eval_loss": 0.3183054029941559, "eval_runtime": 180.0805, "eval_samples_per_second": 41.17, "eval_steps_per_second": 5.148, "eval_wer": 0.1470129068564271, "step": 75000 }, { "epoch": 2.7258521694890616, "grad_norm": 1.1505578756332397, "learning_rate": 3.238836960017818e-05, "loss": 0.0862, "step": 75010 }, { "epoch": 2.7262155679918596, "grad_norm": 2.7842421531677246, "learning_rate": 3.238341191914285e-05, "loss": 0.1111, "step": 75020 }, { "epoch": 2.726578966494658, "grad_norm": 0.8966996073722839, "learning_rate": 3.237845391998257e-05, "loss": 0.085, "step": 75030 }, { "epoch": 2.7269423649974565, "grad_norm": 2.870903491973877, "learning_rate": 3.237349560291096e-05, "loss": 0.103, "step": 75040 }, { "epoch": 2.7273057635002544, "grad_norm": 1.8146476745605469, "learning_rate": 3.236853696814167e-05, "loss": 0.0806, "step": 75050 }, { "epoch": 2.7276691620030524, "grad_norm": 1.370487928390503, "learning_rate": 3.236357801588833e-05, "loss": 0.0923, "step": 75060 }, { "epoch": 2.728032560505851, "grad_norm": 0.7427790760993958, "learning_rate": 3.235861874636462e-05, "loss": 0.1101, "step": 75070 }, { "epoch": 2.728395959008649, "grad_norm": 0.7309651970863342, "learning_rate": 3.23536591597842e-05, "loss": 0.1063, "step": 75080 }, { "epoch": 2.7287593575114473, "grad_norm": 0.5744786262512207, "learning_rate": 3.2348699256360784e-05, "loss": 0.1202, "step": 75090 }, { "epoch": 2.7291227560142453, "grad_norm": 0.7631998658180237, "learning_rate": 3.234373903630806e-05, "loss": 0.7446, "step": 75100 }, { "epoch": 2.7294861545170432, "grad_norm": 0.4925616979598999, "learning_rate": 3.233877849983974e-05, "loss": 0.0862, "step": 75110 }, { "epoch": 2.7298495530198417, "grad_norm": 2.4710123538970947, "learning_rate": 3.233381764716958e-05, "loss": 0.0956, "step": 75120 }, { "epoch": 2.7302129515226397, "grad_norm": 0.8318620920181274, "learning_rate": 3.23288564785113e-05, "loss": 0.1057, "step": 75130 }, { "epoch": 2.730576350025438, "grad_norm": 0.6408945918083191, "learning_rate": 3.2323894994078674e-05, "loss": 0.123, "step": 75140 }, { "epoch": 2.730939748528236, "grad_norm": 1.6067343950271606, "learning_rate": 3.2318933194085474e-05, "loss": 0.1345, "step": 75150 }, { "epoch": 2.731303147031034, "grad_norm": 0.9870972633361816, "learning_rate": 3.231397107874548e-05, "loss": 0.0888, "step": 75160 }, { "epoch": 2.7316665455338325, "grad_norm": 0.7536956071853638, "learning_rate": 3.23090086482725e-05, "loss": 0.1541, "step": 75170 }, { "epoch": 2.7320299440366305, "grad_norm": 0.45174309611320496, "learning_rate": 3.2304045902880334e-05, "loss": 0.0858, "step": 75180 }, { "epoch": 2.732393342539429, "grad_norm": 0.9457273483276367, "learning_rate": 3.229908284278283e-05, "loss": 0.1079, "step": 75190 }, { "epoch": 2.732756741042227, "grad_norm": 0.7832821011543274, "learning_rate": 3.229411946819381e-05, "loss": 0.0844, "step": 75200 }, { "epoch": 2.733120139545025, "grad_norm": 0.7583007216453552, "learning_rate": 3.228915577932713e-05, "loss": 0.1036, "step": 75210 }, { "epoch": 2.7334835380478233, "grad_norm": 0.38852185010910034, "learning_rate": 3.2284191776396675e-05, "loss": 0.092, "step": 75220 }, { "epoch": 2.7338469365506213, "grad_norm": 0.9784302115440369, "learning_rate": 3.22792274596163e-05, "loss": 0.1215, "step": 75230 }, { "epoch": 2.7342103350534197, "grad_norm": 0.6491109728813171, "learning_rate": 3.227426282919992e-05, "loss": 0.118, "step": 75240 }, { "epoch": 2.7345737335562177, "grad_norm": 1.2278261184692383, "learning_rate": 3.226929788536143e-05, "loss": 0.7961, "step": 75250 }, { "epoch": 2.7349371320590157, "grad_norm": 0.7250826358795166, "learning_rate": 3.226433262831477e-05, "loss": 0.2077, "step": 75260 }, { "epoch": 2.735300530561814, "grad_norm": 0.6161037683486938, "learning_rate": 3.2259367058273855e-05, "loss": 0.0958, "step": 75270 }, { "epoch": 2.735663929064612, "grad_norm": 0.6900900602340698, "learning_rate": 3.2254401175452646e-05, "loss": 0.0848, "step": 75280 }, { "epoch": 2.7360273275674105, "grad_norm": 1.1437780857086182, "learning_rate": 3.2249434980065106e-05, "loss": 0.1336, "step": 75290 }, { "epoch": 2.7363907260702085, "grad_norm": 2.3000125885009766, "learning_rate": 3.2244468472325194e-05, "loss": 0.0859, "step": 75300 }, { "epoch": 2.7367541245730065, "grad_norm": 1.2666622400283813, "learning_rate": 3.2239501652446926e-05, "loss": 0.1021, "step": 75310 }, { "epoch": 2.737117523075805, "grad_norm": 0.4458122253417969, "learning_rate": 3.2234534520644275e-05, "loss": 0.0954, "step": 75320 }, { "epoch": 2.7374809215786033, "grad_norm": 1.7729071378707886, "learning_rate": 3.2229567077131285e-05, "loss": 1.6003, "step": 75330 }, { "epoch": 2.7378443200814013, "grad_norm": 0.8443679213523865, "learning_rate": 3.222459932212196e-05, "loss": 0.0879, "step": 75340 }, { "epoch": 2.7382077185841993, "grad_norm": 0.7365388870239258, "learning_rate": 3.221963125583037e-05, "loss": 0.0823, "step": 75350 }, { "epoch": 2.7385711170869977, "grad_norm": 0.6535968780517578, "learning_rate": 3.2214662878470546e-05, "loss": 0.1252, "step": 75360 }, { "epoch": 2.7389345155897957, "grad_norm": 0.674757719039917, "learning_rate": 3.220969419025657e-05, "loss": 0.1166, "step": 75370 }, { "epoch": 2.739297914092594, "grad_norm": 0.6909737586975098, "learning_rate": 3.220472519140253e-05, "loss": 0.1192, "step": 75380 }, { "epoch": 2.739661312595392, "grad_norm": 0.7484961152076721, "learning_rate": 3.219975588212251e-05, "loss": 0.1064, "step": 75390 }, { "epoch": 2.74002471109819, "grad_norm": 2.3563716411590576, "learning_rate": 3.219478626263063e-05, "loss": 0.0755, "step": 75400 }, { "epoch": 2.7403881096009886, "grad_norm": 0.4629516005516052, "learning_rate": 3.2189816333141004e-05, "loss": 0.0974, "step": 75410 }, { "epoch": 2.7407515081037865, "grad_norm": 0.487054705619812, "learning_rate": 3.2184846093867774e-05, "loss": 0.1004, "step": 75420 }, { "epoch": 2.741114906606585, "grad_norm": 3.042552947998047, "learning_rate": 3.2179875545025096e-05, "loss": 0.1113, "step": 75430 }, { "epoch": 2.741478305109383, "grad_norm": 0.5376294255256653, "learning_rate": 3.2174904686827114e-05, "loss": 0.0972, "step": 75440 }, { "epoch": 2.741841703612181, "grad_norm": 0.9033780694007874, "learning_rate": 3.216993351948803e-05, "loss": 0.0784, "step": 75450 }, { "epoch": 2.7422051021149794, "grad_norm": 0.29921913146972656, "learning_rate": 3.2164962043222015e-05, "loss": 0.076, "step": 75460 }, { "epoch": 2.7425685006177773, "grad_norm": 0.30889561772346497, "learning_rate": 3.2159990258243286e-05, "loss": 0.09, "step": 75470 }, { "epoch": 2.742931899120576, "grad_norm": 4.390368938446045, "learning_rate": 3.2155018164766044e-05, "loss": 0.087, "step": 75480 }, { "epoch": 2.7432952976233738, "grad_norm": 2.617569923400879, "learning_rate": 3.2150045763004526e-05, "loss": 0.0989, "step": 75490 }, { "epoch": 2.7436586961261717, "grad_norm": 0.5497812032699585, "learning_rate": 3.214507305317298e-05, "loss": 0.0869, "step": 75500 }, { "epoch": 2.74402209462897, "grad_norm": 2.1748311519622803, "learning_rate": 3.214010003548566e-05, "loss": 0.1087, "step": 75510 }, { "epoch": 2.744385493131768, "grad_norm": 0.7790930271148682, "learning_rate": 3.213512671015683e-05, "loss": 1.0444, "step": 75520 }, { "epoch": 2.7447488916345666, "grad_norm": 0.8997694849967957, "learning_rate": 3.2130153077400784e-05, "loss": 0.2153, "step": 75530 }, { "epoch": 2.7451122901373646, "grad_norm": 0.6659709811210632, "learning_rate": 3.2125179137431805e-05, "loss": 0.1144, "step": 75540 }, { "epoch": 2.7454756886401626, "grad_norm": 0.6743984818458557, "learning_rate": 3.212020489046421e-05, "loss": 0.0858, "step": 75550 }, { "epoch": 2.745839087142961, "grad_norm": 0.6624968647956848, "learning_rate": 3.2115230336712316e-05, "loss": 0.0913, "step": 75560 }, { "epoch": 2.7462024856457594, "grad_norm": 0.6814375519752502, "learning_rate": 3.211025547639047e-05, "loss": 0.089, "step": 75570 }, { "epoch": 2.7465658841485574, "grad_norm": 0.9000943303108215, "learning_rate": 3.210528030971301e-05, "loss": 0.1184, "step": 75580 }, { "epoch": 2.7469292826513554, "grad_norm": 1.0685985088348389, "learning_rate": 3.21003048368943e-05, "loss": 0.1682, "step": 75590 }, { "epoch": 2.7472926811541534, "grad_norm": 0.4757719933986664, "learning_rate": 3.209532905814872e-05, "loss": 0.1029, "step": 75600 }, { "epoch": 2.7472926811541534, "eval_loss": 0.304624080657959, "eval_runtime": 179.9156, "eval_samples_per_second": 41.208, "eval_steps_per_second": 5.152, "eval_wer": 0.1494363461433732, "step": 75600 }, { "epoch": 2.747656079656952, "grad_norm": 1.4344089031219482, "learning_rate": 3.209035297369066e-05, "loss": 0.0954, "step": 75610 }, { "epoch": 2.7480194781597502, "grad_norm": 0.8026723265647888, "learning_rate": 3.208537658373451e-05, "loss": 0.1161, "step": 75620 }, { "epoch": 2.748382876662548, "grad_norm": 0.8414619565010071, "learning_rate": 3.20803998884947e-05, "loss": 0.0969, "step": 75630 }, { "epoch": 2.748746275165346, "grad_norm": 0.46038514375686646, "learning_rate": 3.2075422888185645e-05, "loss": 0.1183, "step": 75640 }, { "epoch": 2.7491096736681446, "grad_norm": 0.6400403380393982, "learning_rate": 3.207044558302179e-05, "loss": 0.0962, "step": 75650 }, { "epoch": 2.7494730721709426, "grad_norm": 0.637139618396759, "learning_rate": 3.206596574790073e-05, "loss": 1.817, "step": 75660 }, { "epoch": 2.749836470673741, "grad_norm": 1.1132330894470215, "learning_rate": 3.206098786410359e-05, "loss": 0.1059, "step": 75670 }, { "epoch": 2.750199869176539, "grad_norm": 0.6702316999435425, "learning_rate": 3.2056009676073615e-05, "loss": 0.0735, "step": 75680 }, { "epoch": 2.750563267679337, "grad_norm": 0.7996656894683838, "learning_rate": 3.205103118402528e-05, "loss": 0.1147, "step": 75690 }, { "epoch": 2.7509266661821354, "grad_norm": 0.6503117680549622, "learning_rate": 3.204605238817311e-05, "loss": 0.0837, "step": 75700 }, { "epoch": 2.7512900646849334, "grad_norm": 0.7380549907684326, "learning_rate": 3.204107328873161e-05, "loss": 0.0742, "step": 75710 }, { "epoch": 2.751653463187732, "grad_norm": 1.3897452354431152, "learning_rate": 3.203609388591531e-05, "loss": 0.1324, "step": 75720 }, { "epoch": 2.75201686169053, "grad_norm": 0.6932911276817322, "learning_rate": 3.203111417993876e-05, "loss": 0.0847, "step": 75730 }, { "epoch": 2.752380260193328, "grad_norm": 0.9964193105697632, "learning_rate": 3.2026134171016516e-05, "loss": 0.098, "step": 75740 }, { "epoch": 2.7527436586961262, "grad_norm": 2.219566822052002, "learning_rate": 3.2021153859363154e-05, "loss": 0.0866, "step": 75750 }, { "epoch": 2.7531070571989242, "grad_norm": 0.4468567967414856, "learning_rate": 3.201617324519325e-05, "loss": 0.0779, "step": 75760 }, { "epoch": 2.7534704557017227, "grad_norm": 1.1400572061538696, "learning_rate": 3.2011192328721406e-05, "loss": 0.1104, "step": 75770 }, { "epoch": 2.7538338542045206, "grad_norm": 0.7320595383644104, "learning_rate": 3.2006211110162234e-05, "loss": 0.0955, "step": 75780 }, { "epoch": 2.7541972527073186, "grad_norm": 1.0638219118118286, "learning_rate": 3.200122958973034e-05, "loss": 0.1557, "step": 75790 }, { "epoch": 2.754560651210117, "grad_norm": 0.8229318261146545, "learning_rate": 3.1996247767640385e-05, "loss": 0.0807, "step": 75800 }, { "epoch": 2.754924049712915, "grad_norm": 0.5131879448890686, "learning_rate": 3.1991265644107005e-05, "loss": 0.0859, "step": 75810 }, { "epoch": 2.7552874482157135, "grad_norm": 0.7201241850852966, "learning_rate": 3.198628321934486e-05, "loss": 0.1255, "step": 75820 }, { "epoch": 2.7556508467185115, "grad_norm": 0.8084592819213867, "learning_rate": 3.198130049356863e-05, "loss": 0.0959, "step": 75830 }, { "epoch": 2.7560142452213094, "grad_norm": 0.9655843377113342, "learning_rate": 3.197631746699301e-05, "loss": 0.1137, "step": 75840 }, { "epoch": 2.756377643724108, "grad_norm": 0.6856592893600464, "learning_rate": 3.197133413983268e-05, "loss": 0.0953, "step": 75850 }, { "epoch": 2.7567410422269063, "grad_norm": 0.8975215554237366, "learning_rate": 3.196635051230237e-05, "loss": 0.2702, "step": 75860 }, { "epoch": 2.7571044407297043, "grad_norm": 1.0239101648330688, "learning_rate": 3.19613665846168e-05, "loss": 0.8354, "step": 75870 }, { "epoch": 2.7574678392325023, "grad_norm": 1.5141791105270386, "learning_rate": 3.195638235699072e-05, "loss": 0.1123, "step": 75880 }, { "epoch": 2.7578312377353003, "grad_norm": 0.787190318107605, "learning_rate": 3.195139782963887e-05, "loss": 0.1198, "step": 75890 }, { "epoch": 2.7581946362380987, "grad_norm": 0.6342429518699646, "learning_rate": 3.1946413002776024e-05, "loss": 0.0838, "step": 75900 }, { "epoch": 2.758558034740897, "grad_norm": 2.490267753601074, "learning_rate": 3.194142787661695e-05, "loss": 0.0861, "step": 75910 }, { "epoch": 2.758921433243695, "grad_norm": 0.7890759110450745, "learning_rate": 3.1936442451376454e-05, "loss": 0.1203, "step": 75920 }, { "epoch": 2.759284831746493, "grad_norm": 1.0969079732894897, "learning_rate": 3.193145672726933e-05, "loss": 0.1042, "step": 75930 }, { "epoch": 2.7596482302492915, "grad_norm": 0.42303451895713806, "learning_rate": 3.1926470704510395e-05, "loss": 0.0979, "step": 75940 }, { "epoch": 2.7600116287520895, "grad_norm": 0.7856914401054382, "learning_rate": 3.192148438331448e-05, "loss": 0.081, "step": 75950 }, { "epoch": 2.760375027254888, "grad_norm": 0.7306569814682007, "learning_rate": 3.191649776389644e-05, "loss": 0.0688, "step": 75960 }, { "epoch": 2.760738425757686, "grad_norm": 0.43717941641807556, "learning_rate": 3.1911510846471115e-05, "loss": 0.1219, "step": 75970 }, { "epoch": 2.761101824260484, "grad_norm": 0.6672983169555664, "learning_rate": 3.190652363125337e-05, "loss": 0.1003, "step": 75980 }, { "epoch": 2.7614652227632823, "grad_norm": 2.3338167667388916, "learning_rate": 3.190153611845811e-05, "loss": 0.117, "step": 75990 }, { "epoch": 2.7618286212660803, "grad_norm": 0.893578052520752, "learning_rate": 3.1896548308300206e-05, "loss": 0.0922, "step": 76000 }, { "epoch": 2.7621920197688787, "grad_norm": 1.831598162651062, "learning_rate": 3.189156020099458e-05, "loss": 0.0921, "step": 76010 }, { "epoch": 2.7625554182716767, "grad_norm": 0.4112573564052582, "learning_rate": 3.1886571796756136e-05, "loss": 0.1237, "step": 76020 }, { "epoch": 2.7629188167744747, "grad_norm": 2.476116418838501, "learning_rate": 3.1881583095799816e-05, "loss": 0.097, "step": 76030 }, { "epoch": 2.763282215277273, "grad_norm": 0.5987531542778015, "learning_rate": 3.1876594098340575e-05, "loss": 0.117, "step": 76040 }, { "epoch": 2.763645613780071, "grad_norm": 0.4382152855396271, "learning_rate": 3.187160480459335e-05, "loss": 0.0773, "step": 76050 }, { "epoch": 2.7640090122828695, "grad_norm": 0.4360668659210205, "learning_rate": 3.186661521477313e-05, "loss": 0.0785, "step": 76060 }, { "epoch": 2.7643724107856675, "grad_norm": 2.2010788917541504, "learning_rate": 3.1861625329094894e-05, "loss": 0.1354, "step": 76070 }, { "epoch": 2.7647358092884655, "grad_norm": 0.8447809815406799, "learning_rate": 3.185663514777363e-05, "loss": 0.0982, "step": 76080 }, { "epoch": 2.765099207791264, "grad_norm": 1.7606275081634521, "learning_rate": 3.185164467102436e-05, "loss": 0.1429, "step": 76090 }, { "epoch": 2.765462606294062, "grad_norm": 0.8025608062744141, "learning_rate": 3.1846653899062094e-05, "loss": 0.0833, "step": 76100 }, { "epoch": 2.7658260047968604, "grad_norm": 1.0630611181259155, "learning_rate": 3.184166283210188e-05, "loss": 0.0663, "step": 76110 }, { "epoch": 2.7661894032996583, "grad_norm": 0.47696417570114136, "learning_rate": 3.1836671470358744e-05, "loss": 0.1282, "step": 76120 }, { "epoch": 2.7665528018024563, "grad_norm": 4.783881187438965, "learning_rate": 3.183167981404777e-05, "loss": 0.1214, "step": 76130 }, { "epoch": 2.7669162003052548, "grad_norm": 1.0744116306304932, "learning_rate": 3.1826687863384006e-05, "loss": 0.1492, "step": 76140 }, { "epoch": 2.767279598808053, "grad_norm": 1.5960917472839355, "learning_rate": 3.182169561858257e-05, "loss": 0.0879, "step": 76150 }, { "epoch": 2.767642997310851, "grad_norm": 0.9969580769538879, "learning_rate": 3.1816703079858535e-05, "loss": 0.0919, "step": 76160 }, { "epoch": 2.768006395813649, "grad_norm": 0.3668254613876343, "learning_rate": 3.181171024742701e-05, "loss": 0.1718, "step": 76170 }, { "epoch": 2.768369794316447, "grad_norm": 0.7729851603507996, "learning_rate": 3.180671712150314e-05, "loss": 0.0842, "step": 76180 }, { "epoch": 2.7687331928192456, "grad_norm": 0.6386042833328247, "learning_rate": 3.1801723702302034e-05, "loss": 0.1199, "step": 76190 }, { "epoch": 2.769096591322044, "grad_norm": 2.096891164779663, "learning_rate": 3.179672999003887e-05, "loss": 0.1033, "step": 76200 }, { "epoch": 2.769096591322044, "eval_loss": 0.31721433997154236, "eval_runtime": 179.2645, "eval_samples_per_second": 41.358, "eval_steps_per_second": 5.171, "eval_wer": 0.14798409788153286, "step": 76200 }, { "epoch": 2.769459989824842, "grad_norm": 2.548231601715088, "learning_rate": 3.1791735984928784e-05, "loss": 0.1199, "step": 76210 }, { "epoch": 2.76982338832764, "grad_norm": 1.2589582204818726, "learning_rate": 3.178674168718696e-05, "loss": 0.1165, "step": 76220 }, { "epoch": 2.7701867868304384, "grad_norm": 1.0916184186935425, "learning_rate": 3.178174709702858e-05, "loss": 0.09, "step": 76230 }, { "epoch": 2.7705501853332364, "grad_norm": 0.47342580556869507, "learning_rate": 3.177675221466885e-05, "loss": 0.0816, "step": 76240 }, { "epoch": 2.770913583836035, "grad_norm": 1.0653049945831299, "learning_rate": 3.177175704032298e-05, "loss": 0.1169, "step": 76250 }, { "epoch": 2.771276982338833, "grad_norm": 0.5227024555206299, "learning_rate": 3.176676157420619e-05, "loss": 0.0903, "step": 76260 }, { "epoch": 2.771640380841631, "grad_norm": 1.8630784749984741, "learning_rate": 3.1761765816533726e-05, "loss": 0.1136, "step": 76270 }, { "epoch": 2.772003779344429, "grad_norm": 1.7517484426498413, "learning_rate": 3.175676976752083e-05, "loss": 0.0897, "step": 76280 }, { "epoch": 2.772367177847227, "grad_norm": 0.7945340871810913, "learning_rate": 3.175177342738276e-05, "loss": 0.1031, "step": 76290 }, { "epoch": 2.7727305763500256, "grad_norm": 0.6702117919921875, "learning_rate": 3.174677679633481e-05, "loss": 0.3309, "step": 76300 }, { "epoch": 2.7730939748528236, "grad_norm": 0.8197999000549316, "learning_rate": 3.174177987459223e-05, "loss": 0.1137, "step": 76310 }, { "epoch": 2.7734573733556216, "grad_norm": 0.35699373483657837, "learning_rate": 3.1736782662370354e-05, "loss": 0.092, "step": 76320 }, { "epoch": 2.77382077185842, "grad_norm": 0.566719114780426, "learning_rate": 3.173178515988449e-05, "loss": 0.1059, "step": 76330 }, { "epoch": 2.774184170361218, "grad_norm": 0.6718754172325134, "learning_rate": 3.172678736734995e-05, "loss": 0.1299, "step": 76340 }, { "epoch": 2.7745475688640164, "grad_norm": 0.5272148847579956, "learning_rate": 3.1721789284982075e-05, "loss": 0.1943, "step": 76350 }, { "epoch": 2.7749109673668144, "grad_norm": 0.7239329218864441, "learning_rate": 3.1716790912996214e-05, "loss": 0.0834, "step": 76360 }, { "epoch": 2.7752743658696124, "grad_norm": 1.7969343662261963, "learning_rate": 3.171179225160774e-05, "loss": 0.1131, "step": 76370 }, { "epoch": 2.775637764372411, "grad_norm": 1.348568320274353, "learning_rate": 3.1706793301032e-05, "loss": 0.1328, "step": 76380 }, { "epoch": 2.776001162875209, "grad_norm": 0.6011419892311096, "learning_rate": 3.170179406148441e-05, "loss": 0.1409, "step": 76390 }, { "epoch": 2.7763645613780072, "grad_norm": 1.8177915811538696, "learning_rate": 3.169679453318036e-05, "loss": 0.0737, "step": 76400 }, { "epoch": 2.7767279598808052, "grad_norm": 0.592851996421814, "learning_rate": 3.1691794716335266e-05, "loss": 0.0998, "step": 76410 }, { "epoch": 2.777091358383603, "grad_norm": 1.3811548948287964, "learning_rate": 3.168679461116454e-05, "loss": 0.0754, "step": 76420 }, { "epoch": 2.7774547568864016, "grad_norm": 2.7338156700134277, "learning_rate": 3.168179421788363e-05, "loss": 0.1265, "step": 76430 }, { "epoch": 2.7778181553892, "grad_norm": 0.604120135307312, "learning_rate": 3.167679353670798e-05, "loss": 0.1086, "step": 76440 }, { "epoch": 2.778181553891998, "grad_norm": 0.8220155239105225, "learning_rate": 3.1671792567853045e-05, "loss": 1.4637, "step": 76450 }, { "epoch": 2.778544952394796, "grad_norm": 0.8582079410552979, "learning_rate": 3.166679131153432e-05, "loss": 0.1023, "step": 76460 }, { "epoch": 2.778908350897594, "grad_norm": 0.5946437120437622, "learning_rate": 3.166178976796727e-05, "loss": 0.1233, "step": 76470 }, { "epoch": 2.7792717494003925, "grad_norm": 1.113297700881958, "learning_rate": 3.165678793736741e-05, "loss": 0.1044, "step": 76480 }, { "epoch": 2.779635147903191, "grad_norm": 96.74727630615234, "learning_rate": 3.165178581995023e-05, "loss": 1.901, "step": 76490 }, { "epoch": 2.779998546405989, "grad_norm": 0.4184577465057373, "learning_rate": 3.164678341593127e-05, "loss": 0.0768, "step": 76500 }, { "epoch": 2.780361944908787, "grad_norm": 0.558016300201416, "learning_rate": 3.164178072552606e-05, "loss": 0.2095, "step": 76510 }, { "epoch": 2.7807253434115853, "grad_norm": 0.7608421444892883, "learning_rate": 3.1636777748950156e-05, "loss": 0.0934, "step": 76520 }, { "epoch": 2.7810887419143833, "grad_norm": 0.43345919251441956, "learning_rate": 3.163177448641911e-05, "loss": 0.0852, "step": 76530 }, { "epoch": 2.7814521404171817, "grad_norm": 3.386565923690796, "learning_rate": 3.1626770938148496e-05, "loss": 0.4973, "step": 76540 }, { "epoch": 2.7818155389199797, "grad_norm": Infinity, "learning_rate": 3.1622267500575804e-05, "loss": 2.2209, "step": 76550 }, { "epoch": 2.7821789374227777, "grad_norm": 1.2026207447052002, "learning_rate": 3.161726340999396e-05, "loss": 0.091, "step": 76560 }, { "epoch": 2.782542335925576, "grad_norm": 0.6033660769462585, "learning_rate": 3.1612259034297784e-05, "loss": 0.1076, "step": 76570 }, { "epoch": 2.782905734428374, "grad_norm": 0.6164398193359375, "learning_rate": 3.1607254373702885e-05, "loss": 0.1115, "step": 76580 }, { "epoch": 2.7832691329311725, "grad_norm": 0.4211709201335907, "learning_rate": 3.1602249428424916e-05, "loss": 0.1061, "step": 76590 }, { "epoch": 2.7836325314339705, "grad_norm": 0.8229207396507263, "learning_rate": 3.1597244198679496e-05, "loss": 0.1703, "step": 76600 }, { "epoch": 2.7839959299367685, "grad_norm": 0.2660597860813141, "learning_rate": 3.159223868468231e-05, "loss": 0.0873, "step": 76610 }, { "epoch": 2.784359328439567, "grad_norm": 0.8367421627044678, "learning_rate": 3.1587232886649006e-05, "loss": 0.0906, "step": 76620 }, { "epoch": 2.784722726942365, "grad_norm": 0.4646151661872864, "learning_rate": 3.158222680479527e-05, "loss": 0.0922, "step": 76630 }, { "epoch": 2.7850861254451633, "grad_norm": 1.0064074993133545, "learning_rate": 3.1577220439336814e-05, "loss": 0.0827, "step": 76640 }, { "epoch": 2.7854495239479613, "grad_norm": 0.7360056638717651, "learning_rate": 3.157221379048932e-05, "loss": 0.1526, "step": 76650 }, { "epoch": 2.7858129224507593, "grad_norm": 0.4394819736480713, "learning_rate": 3.1567206858468524e-05, "loss": 0.0802, "step": 76660 }, { "epoch": 2.7861763209535577, "grad_norm": 2.16237735748291, "learning_rate": 3.1562199643490156e-05, "loss": 0.1079, "step": 76670 }, { "epoch": 2.7865397194563557, "grad_norm": 1.211832046508789, "learning_rate": 3.155719214576994e-05, "loss": 0.0896, "step": 76680 }, { "epoch": 2.786903117959154, "grad_norm": 0.561252772808075, "learning_rate": 3.1552184365523654e-05, "loss": 0.1002, "step": 76690 }, { "epoch": 2.787266516461952, "grad_norm": 0.947999894618988, "learning_rate": 3.1547176302967046e-05, "loss": 0.075, "step": 76700 }, { "epoch": 2.78762991496475, "grad_norm": 0.3279600739479065, "learning_rate": 3.154216795831591e-05, "loss": 0.0808, "step": 76710 }, { "epoch": 2.7879933134675485, "grad_norm": 0.4340432584285736, "learning_rate": 3.1537159331786046e-05, "loss": 0.1208, "step": 76720 }, { "epoch": 2.788356711970347, "grad_norm": 1.4407846927642822, "learning_rate": 3.1532150423593234e-05, "loss": 0.0973, "step": 76730 }, { "epoch": 2.788720110473145, "grad_norm": 1.0775196552276611, "learning_rate": 3.152714123395331e-05, "loss": 0.1013, "step": 76740 }, { "epoch": 2.789083508975943, "grad_norm": 1.0762931108474731, "learning_rate": 3.152213176308209e-05, "loss": 0.0797, "step": 76750 }, { "epoch": 2.789446907478741, "grad_norm": 0.645371675491333, "learning_rate": 3.1517122011195414e-05, "loss": 0.0918, "step": 76760 }, { "epoch": 2.7898103059815393, "grad_norm": 0.4548865556716919, "learning_rate": 3.151211197850914e-05, "loss": 0.1365, "step": 76770 }, { "epoch": 2.7901737044843378, "grad_norm": 0.9523658156394958, "learning_rate": 3.1507101665239136e-05, "loss": 0.103, "step": 76780 }, { "epoch": 2.7905371029871358, "grad_norm": 0.6093083024024963, "learning_rate": 3.150209107160127e-05, "loss": 0.0966, "step": 76790 }, { "epoch": 2.7909005014899337, "grad_norm": 1.1917424201965332, "learning_rate": 3.149708019781143e-05, "loss": 0.0723, "step": 76800 }, { "epoch": 2.7909005014899337, "eval_loss": 0.33958899974823, "eval_runtime": 180.4871, "eval_samples_per_second": 41.078, "eval_steps_per_second": 5.136, "eval_wer": 0.14761195926443627, "step": 76800 }, { "epoch": 2.791263899992732, "grad_norm": 0.5231362581253052, "learning_rate": 3.149206904408553e-05, "loss": 0.0774, "step": 76810 }, { "epoch": 2.79162729849553, "grad_norm": 0.3892790973186493, "learning_rate": 3.148705761063947e-05, "loss": 0.1298, "step": 76820 }, { "epoch": 2.7919906969983286, "grad_norm": 1.2342190742492676, "learning_rate": 3.1482045897689174e-05, "loss": 0.0784, "step": 76830 }, { "epoch": 2.7923540955011266, "grad_norm": 0.6379334926605225, "learning_rate": 3.147703390545059e-05, "loss": 0.1218, "step": 76840 }, { "epoch": 2.7927174940039245, "grad_norm": 4.186805248260498, "learning_rate": 3.1472021634139656e-05, "loss": 0.1091, "step": 76850 }, { "epoch": 2.793080892506723, "grad_norm": 0.9693048596382141, "learning_rate": 3.146700908397234e-05, "loss": 0.081, "step": 76860 }, { "epoch": 2.793444291009521, "grad_norm": 0.41208523511886597, "learning_rate": 3.146199625516461e-05, "loss": 0.1323, "step": 76870 }, { "epoch": 2.7938076895123194, "grad_norm": 0.6204960346221924, "learning_rate": 3.145698314793245e-05, "loss": 0.0843, "step": 76880 }, { "epoch": 2.7941710880151174, "grad_norm": 0.9049692153930664, "learning_rate": 3.145196976249187e-05, "loss": 0.1092, "step": 76890 }, { "epoch": 2.7945344865179154, "grad_norm": 0.9340922832489014, "learning_rate": 3.144695609905887e-05, "loss": 0.1021, "step": 76900 }, { "epoch": 2.794897885020714, "grad_norm": 0.40798988938331604, "learning_rate": 3.144194215784946e-05, "loss": 0.1096, "step": 76910 }, { "epoch": 2.7952612835235118, "grad_norm": 0.4020129144191742, "learning_rate": 3.143692793907968e-05, "loss": 0.0928, "step": 76920 }, { "epoch": 2.79562468202631, "grad_norm": 0.621026873588562, "learning_rate": 3.1431913442965585e-05, "loss": 0.0978, "step": 76930 }, { "epoch": 2.795988080529108, "grad_norm": 0.428535133600235, "learning_rate": 3.1426898669723225e-05, "loss": 0.1034, "step": 76940 }, { "epoch": 2.796351479031906, "grad_norm": 1.3301656246185303, "learning_rate": 3.1421883619568665e-05, "loss": 0.0813, "step": 76950 }, { "epoch": 2.7967148775347046, "grad_norm": 0.4475337266921997, "learning_rate": 3.141686829271799e-05, "loss": 0.0865, "step": 76960 }, { "epoch": 2.7970782760375026, "grad_norm": 2.5869007110595703, "learning_rate": 3.1411852689387294e-05, "loss": 0.1085, "step": 76970 }, { "epoch": 2.797441674540301, "grad_norm": 1.98558509349823, "learning_rate": 3.140683680979268e-05, "loss": 0.0887, "step": 76980 }, { "epoch": 2.797805073043099, "grad_norm": 0.4044126570224762, "learning_rate": 3.1401820654150267e-05, "loss": 0.1273, "step": 76990 }, { "epoch": 2.798168471545897, "grad_norm": 0.5848196148872375, "learning_rate": 3.139680422267617e-05, "loss": 0.9749, "step": 77000 }, { "epoch": 2.7985318700486954, "grad_norm": 1.0990394353866577, "learning_rate": 3.139178751558655e-05, "loss": 0.0864, "step": 77010 }, { "epoch": 2.798895268551494, "grad_norm": 0.5997377634048462, "learning_rate": 3.138677053309753e-05, "loss": 0.1654, "step": 77020 }, { "epoch": 2.799258667054292, "grad_norm": 2.957549571990967, "learning_rate": 3.138175327542531e-05, "loss": 0.0771, "step": 77030 }, { "epoch": 2.79962206555709, "grad_norm": 1.3711345195770264, "learning_rate": 3.137673574278604e-05, "loss": 0.1682, "step": 77040 }, { "epoch": 2.799985464059888, "grad_norm": 1.1909111738204956, "learning_rate": 3.137171793539591e-05, "loss": 0.1002, "step": 77050 }, { "epoch": 2.8003488625626862, "grad_norm": 0.622058093547821, "learning_rate": 3.136669985347113e-05, "loss": 0.0842, "step": 77060 }, { "epoch": 2.8007122610654847, "grad_norm": 1.0402450561523438, "learning_rate": 3.136168149722791e-05, "loss": 0.1096, "step": 77070 }, { "epoch": 2.8010756595682826, "grad_norm": 4.3762969970703125, "learning_rate": 3.135666286688247e-05, "loss": 0.0888, "step": 77080 }, { "epoch": 2.8014390580710806, "grad_norm": 1.1344795227050781, "learning_rate": 3.135164396265103e-05, "loss": 0.1097, "step": 77090 }, { "epoch": 2.801802456573879, "grad_norm": 0.5318688154220581, "learning_rate": 3.134662478474987e-05, "loss": 0.0926, "step": 77100 }, { "epoch": 2.802165855076677, "grad_norm": 1.101820468902588, "learning_rate": 3.1341605333395216e-05, "loss": 0.0989, "step": 77110 }, { "epoch": 2.8025292535794755, "grad_norm": 0.3960217237472534, "learning_rate": 3.133658560880336e-05, "loss": 0.1064, "step": 77120 }, { "epoch": 2.8028926520822735, "grad_norm": 0.6371271014213562, "learning_rate": 3.133156561119057e-05, "loss": 0.0957, "step": 77130 }, { "epoch": 2.8032560505850714, "grad_norm": 0.6579133868217468, "learning_rate": 3.132654534077315e-05, "loss": 0.0861, "step": 77140 }, { "epoch": 2.80361944908787, "grad_norm": 0.3245817720890045, "learning_rate": 3.13215247977674e-05, "loss": 2.9585, "step": 77150 }, { "epoch": 2.803982847590668, "grad_norm": 0.4539554715156555, "learning_rate": 3.131650398238963e-05, "loss": 3.7161, "step": 77160 }, { "epoch": 2.8043462460934663, "grad_norm": 0.5258297324180603, "learning_rate": 3.1311482894856194e-05, "loss": 0.0938, "step": 77170 }, { "epoch": 2.8047096445962643, "grad_norm": 1.8863434791564941, "learning_rate": 3.13064615353834e-05, "loss": 0.1022, "step": 77180 }, { "epoch": 2.8050730430990622, "grad_norm": 0.4523390531539917, "learning_rate": 3.130143990418763e-05, "loss": 0.1094, "step": 77190 }, { "epoch": 2.8054364416018607, "grad_norm": 1.1494712829589844, "learning_rate": 3.1296418001485225e-05, "loss": 0.0911, "step": 77200 }, { "epoch": 2.8057998401046587, "grad_norm": 1.3108868598937988, "learning_rate": 3.129139582749258e-05, "loss": 0.1123, "step": 77210 }, { "epoch": 2.806163238607457, "grad_norm": 0.6619325280189514, "learning_rate": 3.128637338242607e-05, "loss": 0.1178, "step": 77220 }, { "epoch": 2.806526637110255, "grad_norm": 0.9496577978134155, "learning_rate": 3.128135066650209e-05, "loss": 0.0903, "step": 77230 }, { "epoch": 2.806890035613053, "grad_norm": 1.0447412729263306, "learning_rate": 3.127632767993707e-05, "loss": 0.1962, "step": 77240 }, { "epoch": 2.8072534341158515, "grad_norm": 0.6359366774559021, "learning_rate": 3.127130442294742e-05, "loss": 0.0872, "step": 77250 }, { "epoch": 2.8076168326186495, "grad_norm": 0.36711859703063965, "learning_rate": 3.126628089574957e-05, "loss": 0.1117, "step": 77260 }, { "epoch": 2.807980231121448, "grad_norm": 0.46006709337234497, "learning_rate": 3.1261257098559975e-05, "loss": 0.1075, "step": 77270 }, { "epoch": 2.808343629624246, "grad_norm": 0.4880903959274292, "learning_rate": 3.125623303159509e-05, "loss": 0.0933, "step": 77280 }, { "epoch": 2.808707028127044, "grad_norm": 0.6547648906707764, "learning_rate": 3.125120869507138e-05, "loss": 0.0927, "step": 77290 }, { "epoch": 2.8090704266298423, "grad_norm": 0.9286164045333862, "learning_rate": 3.124618408920533e-05, "loss": 0.0701, "step": 77300 }, { "epoch": 2.8094338251326407, "grad_norm": 0.7774373292922974, "learning_rate": 3.1241159214213436e-05, "loss": 0.0885, "step": 77310 }, { "epoch": 2.8097972236354387, "grad_norm": 0.6464956402778625, "learning_rate": 3.12361340703122e-05, "loss": 0.1133, "step": 77320 }, { "epoch": 2.8101606221382367, "grad_norm": 0.6208813190460205, "learning_rate": 3.123110865771813e-05, "loss": 0.1143, "step": 77330 }, { "epoch": 2.8105240206410347, "grad_norm": 0.5069448351860046, "learning_rate": 3.122608297664776e-05, "loss": 0.0991, "step": 77340 }, { "epoch": 2.810887419143833, "grad_norm": 0.6329632997512817, "learning_rate": 3.122105702731762e-05, "loss": 0.0925, "step": 77350 }, { "epoch": 2.8112508176466315, "grad_norm": 0.8677617311477661, "learning_rate": 3.121603080994428e-05, "loss": 0.0819, "step": 77360 }, { "epoch": 2.8116142161494295, "grad_norm": 0.7725453972816467, "learning_rate": 3.1211004324744274e-05, "loss": 0.1081, "step": 77370 }, { "epoch": 2.8119776146522275, "grad_norm": 0.7517724633216858, "learning_rate": 3.12059775719342e-05, "loss": 0.0873, "step": 77380 }, { "epoch": 2.812341013155026, "grad_norm": 0.8779316544532776, "learning_rate": 3.1200950551730636e-05, "loss": 0.1038, "step": 77390 }, { "epoch": 2.812704411657824, "grad_norm": 0.4052380323410034, "learning_rate": 3.119592326435016e-05, "loss": 0.0791, "step": 77400 }, { "epoch": 2.812704411657824, "eval_loss": 0.3295031487941742, "eval_runtime": 180.5257, "eval_samples_per_second": 41.069, "eval_steps_per_second": 5.135, "eval_wer": 0.14710367237279212, "step": 77400 }, { "epoch": 2.8130678101606224, "grad_norm": 0.4020283818244934, "learning_rate": 3.1190895710009416e-05, "loss": 0.1089, "step": 77410 }, { "epoch": 2.8134312086634203, "grad_norm": 0.852902889251709, "learning_rate": 3.118586788892499e-05, "loss": 0.0949, "step": 77420 }, { "epoch": 2.8137946071662183, "grad_norm": 0.7513383030891418, "learning_rate": 3.1180839801313536e-05, "loss": 0.11, "step": 77430 }, { "epoch": 2.8141580056690167, "grad_norm": 0.7311908006668091, "learning_rate": 3.117581144739168e-05, "loss": 0.1077, "step": 77440 }, { "epoch": 2.8145214041718147, "grad_norm": 0.7238545417785645, "learning_rate": 3.117078282737608e-05, "loss": 0.0743, "step": 77450 }, { "epoch": 2.814884802674613, "grad_norm": 0.6685813069343567, "learning_rate": 3.116575394148341e-05, "loss": 0.0895, "step": 77460 }, { "epoch": 2.815248201177411, "grad_norm": 0.6721900105476379, "learning_rate": 3.116072478993034e-05, "loss": 2.3187, "step": 77470 }, { "epoch": 2.815611599680209, "grad_norm": 0.5871604084968567, "learning_rate": 3.1155695372933553e-05, "loss": 0.0879, "step": 77480 }, { "epoch": 2.8159749981830076, "grad_norm": 0.6194286942481995, "learning_rate": 3.1150665690709755e-05, "loss": 0.1139, "step": 77490 }, { "epoch": 2.8163383966858055, "grad_norm": 2.4753482341766357, "learning_rate": 3.114563574347566e-05, "loss": 0.0882, "step": 77500 }, { "epoch": 2.816701795188604, "grad_norm": 0.6420596241950989, "learning_rate": 3.1140605531447985e-05, "loss": 0.0677, "step": 77510 }, { "epoch": 2.817065193691402, "grad_norm": 0.7851647734642029, "learning_rate": 3.1135575054843464e-05, "loss": 0.0943, "step": 77520 }, { "epoch": 2.8174285921942, "grad_norm": 0.5447911024093628, "learning_rate": 3.113054431387885e-05, "loss": 0.0936, "step": 77530 }, { "epoch": 2.8177919906969984, "grad_norm": 1.930080771446228, "learning_rate": 3.1125513308770886e-05, "loss": 0.0979, "step": 77540 }, { "epoch": 2.8181553891997964, "grad_norm": 1.2513458728790283, "learning_rate": 3.112048203973636e-05, "loss": 0.0968, "step": 77550 }, { "epoch": 2.818518787702595, "grad_norm": 0.7506178617477417, "learning_rate": 3.1115450506992025e-05, "loss": 0.0754, "step": 77560 }, { "epoch": 2.8188821862053928, "grad_norm": 2.6080875396728516, "learning_rate": 3.11104187107547e-05, "loss": 0.902, "step": 77570 }, { "epoch": 2.8192455847081908, "grad_norm": 1.5008831024169922, "learning_rate": 3.110538665124117e-05, "loss": 0.1221, "step": 77580 }, { "epoch": 2.819608983210989, "grad_norm": 0.6769622564315796, "learning_rate": 3.1100354328668244e-05, "loss": 0.1255, "step": 77590 }, { "epoch": 2.8199723817137876, "grad_norm": 1.557826280593872, "learning_rate": 3.109532174325277e-05, "loss": 0.4934, "step": 77600 }, { "epoch": 2.8203357802165856, "grad_norm": 0.48424941301345825, "learning_rate": 3.1090288895211554e-05, "loss": 0.1016, "step": 77610 }, { "epoch": 2.8206991787193836, "grad_norm": 0.5457457900047302, "learning_rate": 3.1085255784761466e-05, "loss": 0.1036, "step": 77620 }, { "epoch": 2.821062577222182, "grad_norm": 0.729720950126648, "learning_rate": 3.108022241211936e-05, "loss": 0.0936, "step": 77630 }, { "epoch": 2.82142597572498, "grad_norm": 3.1648247241973877, "learning_rate": 3.1075188777502104e-05, "loss": 0.1059, "step": 77640 }, { "epoch": 2.8217893742277784, "grad_norm": 2.022939920425415, "learning_rate": 3.107015488112658e-05, "loss": 2.3178, "step": 77650 }, { "epoch": 2.8221527727305764, "grad_norm": 0.5269419550895691, "learning_rate": 3.1065120723209676e-05, "loss": 0.1035, "step": 77660 }, { "epoch": 2.8225161712333744, "grad_norm": 2.0776426792144775, "learning_rate": 3.10600863039683e-05, "loss": 0.1209, "step": 77670 }, { "epoch": 2.822879569736173, "grad_norm": 1.718711495399475, "learning_rate": 3.105505162361936e-05, "loss": 0.0931, "step": 77680 }, { "epoch": 2.823242968238971, "grad_norm": 0.9783419370651245, "learning_rate": 3.10500166823798e-05, "loss": 0.1366, "step": 77690 }, { "epoch": 2.8236063667417692, "grad_norm": 0.3474352955818176, "learning_rate": 3.1044981480466544e-05, "loss": 0.0823, "step": 77700 }, { "epoch": 2.823969765244567, "grad_norm": 0.4555971622467041, "learning_rate": 3.103994601809655e-05, "loss": 0.0809, "step": 77710 }, { "epoch": 2.824333163747365, "grad_norm": 0.5148143768310547, "learning_rate": 3.103491029548676e-05, "loss": 0.1185, "step": 77720 }, { "epoch": 2.8246965622501636, "grad_norm": 0.7520084381103516, "learning_rate": 3.102987431285416e-05, "loss": 0.0865, "step": 77730 }, { "epoch": 2.8250599607529616, "grad_norm": 2.1148874759674072, "learning_rate": 3.102483807041574e-05, "loss": 0.0923, "step": 77740 }, { "epoch": 2.82542335925576, "grad_norm": 0.8044182658195496, "learning_rate": 3.1019801568388476e-05, "loss": 0.0848, "step": 77750 }, { "epoch": 2.825786757758558, "grad_norm": 0.5779685974121094, "learning_rate": 3.1014764806989385e-05, "loss": 0.0902, "step": 77760 }, { "epoch": 2.826150156261356, "grad_norm": 0.9560330510139465, "learning_rate": 3.1009727786435474e-05, "loss": 1.5009, "step": 77770 }, { "epoch": 2.8265135547641544, "grad_norm": 0.7544772624969482, "learning_rate": 3.100469050694378e-05, "loss": 0.1239, "step": 77780 }, { "epoch": 2.8268769532669524, "grad_norm": 0.824269711971283, "learning_rate": 3.099965296873134e-05, "loss": 0.0934, "step": 77790 }, { "epoch": 2.827240351769751, "grad_norm": 0.6971185803413391, "learning_rate": 3.099461517201519e-05, "loss": 0.0923, "step": 77800 }, { "epoch": 2.827603750272549, "grad_norm": 0.81715989112854, "learning_rate": 3.098957711701241e-05, "loss": 0.1041, "step": 77810 }, { "epoch": 2.827967148775347, "grad_norm": 0.5701466798782349, "learning_rate": 3.098453880394006e-05, "loss": 0.1016, "step": 77820 }, { "epoch": 2.8283305472781453, "grad_norm": 6.4445881843566895, "learning_rate": 3.0979500233015224e-05, "loss": 0.0902, "step": 77830 }, { "epoch": 2.8286939457809432, "grad_norm": 2.981534004211426, "learning_rate": 3.0974461404455e-05, "loss": 0.1224, "step": 77840 }, { "epoch": 2.8290573442837417, "grad_norm": 0.878025472164154, "learning_rate": 3.096942231847649e-05, "loss": 0.0965, "step": 77850 }, { "epoch": 2.8294207427865397, "grad_norm": 0.45175373554229736, "learning_rate": 3.096438297529681e-05, "loss": 0.0821, "step": 77860 }, { "epoch": 2.8297841412893376, "grad_norm": 1.207901954650879, "learning_rate": 3.0959343375133096e-05, "loss": 0.1296, "step": 77870 }, { "epoch": 2.830147539792136, "grad_norm": 2.8622663021087646, "learning_rate": 3.0954303518202476e-05, "loss": 0.0949, "step": 77880 }, { "epoch": 2.8305109382949345, "grad_norm": 1.028940200805664, "learning_rate": 3.0949263404722104e-05, "loss": 0.1271, "step": 77890 }, { "epoch": 2.8308743367977325, "grad_norm": 0.8942374587059021, "learning_rate": 3.094422303490913e-05, "loss": 1.3726, "step": 77900 }, { "epoch": 2.8312377353005305, "grad_norm": 1.3904191255569458, "learning_rate": 3.093918240898075e-05, "loss": 0.0858, "step": 77910 }, { "epoch": 2.831601133803329, "grad_norm": 3.551011562347412, "learning_rate": 3.093414152715412e-05, "loss": 0.106, "step": 77920 }, { "epoch": 2.831964532306127, "grad_norm": 1.3634971380233765, "learning_rate": 3.092910038964645e-05, "loss": 0.1102, "step": 77930 }, { "epoch": 2.8323279308089253, "grad_norm": 0.9616494178771973, "learning_rate": 3.092405899667494e-05, "loss": 0.0946, "step": 77940 }, { "epoch": 2.8326913293117233, "grad_norm": 1.1661938428878784, "learning_rate": 3.0919017348456805e-05, "loss": 0.0848, "step": 77950 }, { "epoch": 2.8330547278145213, "grad_norm": 1.016788125038147, "learning_rate": 3.091397544520927e-05, "loss": 0.0958, "step": 77960 }, { "epoch": 2.8334181263173197, "grad_norm": 0.3791126012802124, "learning_rate": 3.090893328714958e-05, "loss": 0.0954, "step": 77970 }, { "epoch": 2.8337815248201177, "grad_norm": 1.1772645711898804, "learning_rate": 3.0903890874494975e-05, "loss": 0.117, "step": 77980 }, { "epoch": 2.834144923322916, "grad_norm": 0.7023350596427917, "learning_rate": 3.089884820746272e-05, "loss": 0.0857, "step": 77990 }, { "epoch": 2.834508321825714, "grad_norm": 0.8230845332145691, "learning_rate": 3.0893805286270085e-05, "loss": 0.085, "step": 78000 }, { "epoch": 2.834508321825714, "eval_loss": 0.34080591797828674, "eval_runtime": 179.5232, "eval_samples_per_second": 41.298, "eval_steps_per_second": 5.164, "eval_wer": 0.1498084847604698, "step": 78000 }, { "epoch": 2.834871720328512, "grad_norm": 0.7763819098472595, "learning_rate": 3.088876211113435e-05, "loss": 0.0876, "step": 78010 }, { "epoch": 2.8352351188313105, "grad_norm": 0.9296404719352722, "learning_rate": 3.088371868227281e-05, "loss": 0.115, "step": 78020 }, { "epoch": 2.8355985173341085, "grad_norm": 0.7724182605743408, "learning_rate": 3.087867499990276e-05, "loss": 0.1133, "step": 78030 }, { "epoch": 2.835961915836907, "grad_norm": 0.6081514954566956, "learning_rate": 3.087363106424152e-05, "loss": 0.1185, "step": 78040 }, { "epoch": 2.836325314339705, "grad_norm": 0.692362904548645, "learning_rate": 3.086858687550642e-05, "loss": 0.0796, "step": 78050 }, { "epoch": 2.836688712842503, "grad_norm": 0.7458900213241577, "learning_rate": 3.0863542433914794e-05, "loss": 0.0985, "step": 78060 }, { "epoch": 2.8370521113453013, "grad_norm": 0.2584981322288513, "learning_rate": 3.0858497739683984e-05, "loss": 0.102, "step": 78070 }, { "epoch": 2.8374155098480993, "grad_norm": 1.8372403383255005, "learning_rate": 3.085345279303136e-05, "loss": 0.1121, "step": 78080 }, { "epoch": 2.8377789083508977, "grad_norm": 2.2560524940490723, "learning_rate": 3.0848407594174266e-05, "loss": 0.1086, "step": 78090 }, { "epoch": 2.8381423068536957, "grad_norm": 1.1488791704177856, "learning_rate": 3.0843362143330104e-05, "loss": 1.6247, "step": 78100 }, { "epoch": 2.8385057053564937, "grad_norm": 1.206886649131775, "learning_rate": 3.083831644071626e-05, "loss": 0.0846, "step": 78110 }, { "epoch": 2.838869103859292, "grad_norm": 0.6960283517837524, "learning_rate": 3.083327048655013e-05, "loss": 0.1261, "step": 78120 }, { "epoch": 2.83923250236209, "grad_norm": 0.6910631656646729, "learning_rate": 3.082822428104914e-05, "loss": 0.0979, "step": 78130 }, { "epoch": 2.8395959008648886, "grad_norm": 1.3443272113800049, "learning_rate": 3.082317782443069e-05, "loss": 0.1082, "step": 78140 }, { "epoch": 2.8399592993676865, "grad_norm": 0.290623277425766, "learning_rate": 3.081813111691223e-05, "loss": 0.0773, "step": 78150 }, { "epoch": 2.8403226978704845, "grad_norm": 0.5689085721969604, "learning_rate": 3.08130841587112e-05, "loss": 0.0972, "step": 78160 }, { "epoch": 2.840686096373283, "grad_norm": 0.45181599259376526, "learning_rate": 3.080803695004506e-05, "loss": 0.1002, "step": 78170 }, { "epoch": 2.8410494948760814, "grad_norm": 0.31175151467323303, "learning_rate": 3.080298949113127e-05, "loss": 0.0951, "step": 78180 }, { "epoch": 2.8414128933788794, "grad_norm": 0.6608039140701294, "learning_rate": 3.0797941782187314e-05, "loss": 0.1207, "step": 78190 }, { "epoch": 2.8417762918816774, "grad_norm": 0.9375587701797485, "learning_rate": 3.079289382343068e-05, "loss": 0.0875, "step": 78200 }, { "epoch": 2.842139690384476, "grad_norm": 0.581164538860321, "learning_rate": 3.078784561507885e-05, "loss": 0.204, "step": 78210 }, { "epoch": 2.8425030888872738, "grad_norm": 0.8400561809539795, "learning_rate": 3.078279715734935e-05, "loss": 0.106, "step": 78220 }, { "epoch": 2.842866487390072, "grad_norm": 0.5431386232376099, "learning_rate": 3.07777484504597e-05, "loss": 0.1134, "step": 78230 }, { "epoch": 2.84322988589287, "grad_norm": 0.5342890620231628, "learning_rate": 3.077269949462742e-05, "loss": 0.1102, "step": 78240 }, { "epoch": 2.843593284395668, "grad_norm": 0.6654142737388611, "learning_rate": 3.076765029007006e-05, "loss": 0.0818, "step": 78250 }, { "epoch": 2.8439566828984666, "grad_norm": 0.49340054392814636, "learning_rate": 3.076260083700518e-05, "loss": 0.0899, "step": 78260 }, { "epoch": 2.8443200814012646, "grad_norm": 0.5866402387619019, "learning_rate": 3.0757551135650325e-05, "loss": 0.1185, "step": 78270 }, { "epoch": 2.844683479904063, "grad_norm": 0.45987945795059204, "learning_rate": 3.075250118622308e-05, "loss": 0.1405, "step": 78280 }, { "epoch": 2.845046878406861, "grad_norm": 1.2310110330581665, "learning_rate": 3.0747450988941025e-05, "loss": 0.1569, "step": 78290 }, { "epoch": 2.845410276909659, "grad_norm": 1.251125693321228, "learning_rate": 3.074240054402175e-05, "loss": 0.0897, "step": 78300 }, { "epoch": 2.8457736754124574, "grad_norm": 0.369094580411911, "learning_rate": 3.0737349851682876e-05, "loss": 0.0805, "step": 78310 }, { "epoch": 2.8461370739152554, "grad_norm": 0.8887357711791992, "learning_rate": 3.0732298912142e-05, "loss": 0.1093, "step": 78320 }, { "epoch": 2.846500472418054, "grad_norm": 0.629465639591217, "learning_rate": 3.072724772561677e-05, "loss": 0.1114, "step": 78330 }, { "epoch": 2.846863870920852, "grad_norm": 1.0231704711914062, "learning_rate": 3.072219629232481e-05, "loss": 0.1372, "step": 78340 }, { "epoch": 2.84722726942365, "grad_norm": 0.8119713664054871, "learning_rate": 3.071714461248377e-05, "loss": 0.0918, "step": 78350 }, { "epoch": 2.847590667926448, "grad_norm": 0.886022686958313, "learning_rate": 3.071209268631131e-05, "loss": 0.1005, "step": 78360 }, { "epoch": 2.847954066429246, "grad_norm": 0.7678380012512207, "learning_rate": 3.07070405140251e-05, "loss": 0.1497, "step": 78370 }, { "epoch": 2.8483174649320446, "grad_norm": 1.3270221948623657, "learning_rate": 3.070198809584283e-05, "loss": 0.1596, "step": 78380 }, { "epoch": 2.8486808634348426, "grad_norm": 0.3739996552467346, "learning_rate": 3.0696935431982165e-05, "loss": 0.1301, "step": 78390 }, { "epoch": 2.8490442619376406, "grad_norm": 1.079307198524475, "learning_rate": 3.0691882522660834e-05, "loss": 0.0879, "step": 78400 }, { "epoch": 2.849407660440439, "grad_norm": 2.8571724891662598, "learning_rate": 3.068682936809652e-05, "loss": 0.0914, "step": 78410 }, { "epoch": 2.849771058943237, "grad_norm": 0.8942508697509766, "learning_rate": 3.068177596850698e-05, "loss": 0.1367, "step": 78420 }, { "epoch": 2.8501344574460354, "grad_norm": 0.7308377027511597, "learning_rate": 3.0676722324109924e-05, "loss": 0.0927, "step": 78430 }, { "epoch": 2.8504978559488334, "grad_norm": 0.5024714469909668, "learning_rate": 3.06716684351231e-05, "loss": 0.114, "step": 78440 }, { "epoch": 2.8508612544516314, "grad_norm": 0.8501279354095459, "learning_rate": 3.066661430176426e-05, "loss": 0.0976, "step": 78450 }, { "epoch": 2.85122465295443, "grad_norm": 1.5030112266540527, "learning_rate": 3.066155992425118e-05, "loss": 0.0904, "step": 78460 }, { "epoch": 2.8515880514572283, "grad_norm": 0.9223312139511108, "learning_rate": 3.065650530280162e-05, "loss": 0.088, "step": 78470 }, { "epoch": 2.8519514499600263, "grad_norm": 0.8321495652198792, "learning_rate": 3.0651450437633375e-05, "loss": 0.0871, "step": 78480 }, { "epoch": 2.8523148484628242, "grad_norm": 0.5586594343185425, "learning_rate": 3.064639532896423e-05, "loss": 0.0893, "step": 78490 }, { "epoch": 2.8526782469656227, "grad_norm": 2.804748296737671, "learning_rate": 3.064133997701201e-05, "loss": 0.0791, "step": 78500 }, { "epoch": 2.8530416454684207, "grad_norm": 0.7210208177566528, "learning_rate": 3.063628438199453e-05, "loss": 0.0806, "step": 78510 }, { "epoch": 2.853405043971219, "grad_norm": 0.3784767687320709, "learning_rate": 3.063122854412959e-05, "loss": 0.1137, "step": 78520 }, { "epoch": 2.853768442474017, "grad_norm": 0.754591703414917, "learning_rate": 3.062617246363506e-05, "loss": 0.0763, "step": 78530 }, { "epoch": 2.854131840976815, "grad_norm": 0.5262603759765625, "learning_rate": 3.062111614072877e-05, "loss": 0.1768, "step": 78540 }, { "epoch": 2.8544952394796135, "grad_norm": 0.6021800637245178, "learning_rate": 3.0616059575628596e-05, "loss": 0.0806, "step": 78550 }, { "epoch": 2.8548586379824115, "grad_norm": 0.5301395654678345, "learning_rate": 3.061100276855239e-05, "loss": 0.1013, "step": 78560 }, { "epoch": 2.85522203648521, "grad_norm": 0.6743770837783813, "learning_rate": 3.060594571971804e-05, "loss": 0.0996, "step": 78570 }, { "epoch": 2.855585434988008, "grad_norm": 0.4729292094707489, "learning_rate": 3.060088842934344e-05, "loss": 0.1166, "step": 78580 }, { "epoch": 2.855948833490806, "grad_norm": 1.35919189453125, "learning_rate": 3.059583089764648e-05, "loss": 0.0929, "step": 78590 }, { "epoch": 2.8563122319936043, "grad_norm": 1.0586267709732056, "learning_rate": 3.059077312484507e-05, "loss": 0.1016, "step": 78600 }, { "epoch": 2.8563122319936043, "eval_loss": 0.3411506116390228, "eval_runtime": 179.4197, "eval_samples_per_second": 41.322, "eval_steps_per_second": 5.167, "eval_wer": 0.1493818868335542, "step": 78600 }, { "epoch": 2.8566756304964023, "grad_norm": 1.362656593322754, "learning_rate": 3.0585715111157145e-05, "loss": 0.0997, "step": 78610 }, { "epoch": 2.8570390289992007, "grad_norm": 0.3799169361591339, "learning_rate": 3.058065685680063e-05, "loss": 0.0939, "step": 78620 }, { "epoch": 2.8574024275019987, "grad_norm": 0.5108311772346497, "learning_rate": 3.0575598361993476e-05, "loss": 0.1104, "step": 78630 }, { "epoch": 2.8577658260047967, "grad_norm": 0.7714293599128723, "learning_rate": 3.057053962695361e-05, "loss": 0.1018, "step": 78640 }, { "epoch": 2.858129224507595, "grad_norm": 0.46769365668296814, "learning_rate": 3.056548065189902e-05, "loss": 0.0867, "step": 78650 }, { "epoch": 2.858492623010393, "grad_norm": 0.6245691180229187, "learning_rate": 3.056042143704767e-05, "loss": 0.1026, "step": 78660 }, { "epoch": 2.8588560215131915, "grad_norm": 0.45852673053741455, "learning_rate": 3.055536198261755e-05, "loss": 0.3827, "step": 78670 }, { "epoch": 2.8592194200159895, "grad_norm": 1.1499156951904297, "learning_rate": 3.055030228882663e-05, "loss": 0.1123, "step": 78680 }, { "epoch": 2.8595828185187875, "grad_norm": 0.426826536655426, "learning_rate": 3.054524235589295e-05, "loss": 0.095, "step": 78690 }, { "epoch": 2.859946217021586, "grad_norm": 0.8750178217887878, "learning_rate": 3.05401821840345e-05, "loss": 0.0875, "step": 78700 }, { "epoch": 2.860309615524384, "grad_norm": 0.49052000045776367, "learning_rate": 3.053512177346932e-05, "loss": 0.0902, "step": 78710 }, { "epoch": 2.8606730140271823, "grad_norm": 0.41709479689598083, "learning_rate": 3.0530061124415426e-05, "loss": 0.1248, "step": 78720 }, { "epoch": 2.8610364125299803, "grad_norm": 1.0585765838623047, "learning_rate": 3.052500023709088e-05, "loss": 0.0996, "step": 78730 }, { "epoch": 2.8613998110327783, "grad_norm": 1.3335462808609009, "learning_rate": 3.051993911171373e-05, "loss": 0.1188, "step": 78740 }, { "epoch": 2.8617632095355767, "grad_norm": 4.254768371582031, "learning_rate": 3.051487774850204e-05, "loss": 1.4491, "step": 78750 }, { "epoch": 2.862126608038375, "grad_norm": 0.6429024934768677, "learning_rate": 3.0509816147673897e-05, "loss": 0.0981, "step": 78760 }, { "epoch": 2.862490006541173, "grad_norm": 0.5360010266304016, "learning_rate": 3.050475430944738e-05, "loss": 0.1222, "step": 78770 }, { "epoch": 2.862853405043971, "grad_norm": 0.7741981148719788, "learning_rate": 3.0499692234040577e-05, "loss": 0.0939, "step": 78780 }, { "epoch": 2.8632168035467696, "grad_norm": 0.3562833368778229, "learning_rate": 3.0494629921671614e-05, "loss": 0.1132, "step": 78790 }, { "epoch": 2.8635802020495675, "grad_norm": 0.3981534242630005, "learning_rate": 3.0489567372558585e-05, "loss": 0.1176, "step": 78800 }, { "epoch": 2.863943600552366, "grad_norm": 0.6364398002624512, "learning_rate": 3.0484504586919643e-05, "loss": 0.1225, "step": 78810 }, { "epoch": 2.864306999055164, "grad_norm": 0.9307785034179688, "learning_rate": 3.047944156497291e-05, "loss": 0.1134, "step": 78820 }, { "epoch": 2.864670397557962, "grad_norm": 0.3588425815105438, "learning_rate": 3.047437830693653e-05, "loss": 0.1112, "step": 78830 }, { "epoch": 2.8650337960607604, "grad_norm": 2.3050026893615723, "learning_rate": 3.0469314813028672e-05, "loss": 0.1121, "step": 78840 }, { "epoch": 2.8653971945635583, "grad_norm": 3.113504648208618, "learning_rate": 3.0464251083467492e-05, "loss": 0.085, "step": 78850 }, { "epoch": 2.8657605930663568, "grad_norm": 1.4952332973480225, "learning_rate": 3.0459187118471177e-05, "loss": 0.0785, "step": 78860 }, { "epoch": 2.8661239915691548, "grad_norm": 3.147885322570801, "learning_rate": 3.0454122918257915e-05, "loss": 0.1065, "step": 78870 }, { "epoch": 2.8664873900719527, "grad_norm": 1.8741129636764526, "learning_rate": 3.0449058483045906e-05, "loss": 0.1066, "step": 78880 }, { "epoch": 2.866850788574751, "grad_norm": 0.5779256224632263, "learning_rate": 3.044399381305335e-05, "loss": 0.1182, "step": 78890 }, { "epoch": 2.867214187077549, "grad_norm": 0.8008689880371094, "learning_rate": 3.043892890849847e-05, "loss": 0.0872, "step": 78900 }, { "epoch": 2.8675775855803476, "grad_norm": 0.38459739089012146, "learning_rate": 3.0433863769599498e-05, "loss": 0.0808, "step": 78910 }, { "epoch": 2.8679409840831456, "grad_norm": 0.524728536605835, "learning_rate": 3.0428798396574663e-05, "loss": 0.0871, "step": 78920 }, { "epoch": 2.8683043825859436, "grad_norm": 0.9773525595664978, "learning_rate": 3.042373278964223e-05, "loss": 0.0752, "step": 78930 }, { "epoch": 2.868667781088742, "grad_norm": 2.2767083644866943, "learning_rate": 3.041866694902045e-05, "loss": 0.164, "step": 78940 }, { "epoch": 2.86903117959154, "grad_norm": 1.0827412605285645, "learning_rate": 3.0413600874927578e-05, "loss": 0.0826, "step": 78950 }, { "epoch": 2.8693945780943384, "grad_norm": 0.5473418831825256, "learning_rate": 3.040853456758192e-05, "loss": 0.0948, "step": 78960 }, { "epoch": 2.8697579765971364, "grad_norm": 1.4309738874435425, "learning_rate": 3.0403468027201742e-05, "loss": 0.1169, "step": 78970 }, { "epoch": 2.8701213750999344, "grad_norm": 0.6939014196395874, "learning_rate": 3.0398401254005353e-05, "loss": 0.0897, "step": 78980 }, { "epoch": 2.870484773602733, "grad_norm": 1.1855500936508179, "learning_rate": 3.0393334248211064e-05, "loss": 0.115, "step": 78990 }, { "epoch": 2.870848172105531, "grad_norm": 0.3316340446472168, "learning_rate": 3.0388267010037193e-05, "loss": 0.0837, "step": 79000 }, { "epoch": 2.871211570608329, "grad_norm": 0.5338824987411499, "learning_rate": 3.0383199539702067e-05, "loss": 1.6163, "step": 79010 }, { "epoch": 2.871574969111127, "grad_norm": 0.6817033290863037, "learning_rate": 3.0378131837424024e-05, "loss": 0.0984, "step": 79020 }, { "epoch": 2.871938367613925, "grad_norm": 1.228437066078186, "learning_rate": 3.0373063903421416e-05, "loss": 0.088, "step": 79030 }, { "epoch": 2.8723017661167236, "grad_norm": 1.157533884048462, "learning_rate": 3.0367995737912604e-05, "loss": 0.1136, "step": 79040 }, { "epoch": 2.872665164619522, "grad_norm": 0.695222795009613, "learning_rate": 3.0362927341115954e-05, "loss": 0.074, "step": 79050 }, { "epoch": 2.87302856312232, "grad_norm": 0.658486008644104, "learning_rate": 3.0357858713249844e-05, "loss": 0.9512, "step": 79060 }, { "epoch": 2.873391961625118, "grad_norm": 0.7332690358161926, "learning_rate": 3.035278985453267e-05, "loss": 0.12, "step": 79070 }, { "epoch": 2.8737553601279164, "grad_norm": 0.8998427987098694, "learning_rate": 3.034772076518283e-05, "loss": 0.087, "step": 79080 }, { "epoch": 2.8741187586307144, "grad_norm": 0.6652089953422546, "learning_rate": 3.034265144541872e-05, "loss": 0.1126, "step": 79090 }, { "epoch": 2.874482157133513, "grad_norm": 0.5899316668510437, "learning_rate": 3.0337581895458773e-05, "loss": 0.097, "step": 79100 }, { "epoch": 2.874845555636311, "grad_norm": 2.4437944889068604, "learning_rate": 3.0332512115521407e-05, "loss": 0.0967, "step": 79110 }, { "epoch": 2.875208954139109, "grad_norm": 1.678469181060791, "learning_rate": 3.0327442105825076e-05, "loss": 0.1091, "step": 79120 }, { "epoch": 2.8755723526419072, "grad_norm": 2.5215954780578613, "learning_rate": 3.032237186658821e-05, "loss": 0.0954, "step": 79130 }, { "epoch": 2.8759357511447052, "grad_norm": 0.5362206697463989, "learning_rate": 3.031730139802929e-05, "loss": 0.0925, "step": 79140 }, { "epoch": 2.8762991496475037, "grad_norm": 0.8719716668128967, "learning_rate": 3.0312230700366766e-05, "loss": 0.0721, "step": 79150 }, { "epoch": 2.8766625481503016, "grad_norm": 0.5796521306037903, "learning_rate": 3.030715977381912e-05, "loss": 0.085, "step": 79160 }, { "epoch": 2.8770259466530996, "grad_norm": 0.5834314227104187, "learning_rate": 3.0302088618604844e-05, "loss": 0.0967, "step": 79170 }, { "epoch": 2.877389345155898, "grad_norm": 0.9963647127151489, "learning_rate": 3.029701723494243e-05, "loss": 0.1186, "step": 79180 }, { "epoch": 2.877752743658696, "grad_norm": 0.852916419506073, "learning_rate": 3.02919456230504e-05, "loss": 0.1209, "step": 79190 }, { "epoch": 2.8781161421614945, "grad_norm": 0.6775915622711182, "learning_rate": 3.028687378314725e-05, "loss": 0.093, "step": 79200 }, { "epoch": 2.8781161421614945, "eval_loss": 0.3184477686882019, "eval_runtime": 179.9919, "eval_samples_per_second": 41.191, "eval_steps_per_second": 5.15, "eval_wer": 0.14663169168769402, "step": 79200 }, { "epoch": 2.8784795406642925, "grad_norm": 1.629595160484314, "learning_rate": 3.028180171545153e-05, "loss": 0.0897, "step": 79210 }, { "epoch": 2.8788429391670904, "grad_norm": 0.8693638443946838, "learning_rate": 3.027672942018176e-05, "loss": 0.8562, "step": 79220 }, { "epoch": 2.879206337669889, "grad_norm": 0.8512022495269775, "learning_rate": 3.02716568975565e-05, "loss": 0.0919, "step": 79230 }, { "epoch": 2.879569736172687, "grad_norm": 0.3746062219142914, "learning_rate": 3.0266584147794295e-05, "loss": 0.1221, "step": 79240 }, { "epoch": 2.8799331346754853, "grad_norm": 0.9358767867088318, "learning_rate": 3.026151117111372e-05, "loss": 0.1888, "step": 79250 }, { "epoch": 2.8802965331782833, "grad_norm": 0.5931565165519714, "learning_rate": 3.025643796773335e-05, "loss": 0.1583, "step": 79260 }, { "epoch": 2.8806599316810813, "grad_norm": 0.6608014702796936, "learning_rate": 3.0251364537871767e-05, "loss": 0.1107, "step": 79270 }, { "epoch": 2.8810233301838797, "grad_norm": 0.8677799105644226, "learning_rate": 3.0246290881747574e-05, "loss": 0.1313, "step": 79280 }, { "epoch": 2.8813867286866777, "grad_norm": 1.321589469909668, "learning_rate": 3.0241216999579368e-05, "loss": 0.1476, "step": 79290 }, { "epoch": 2.881750127189476, "grad_norm": 1.2019727230072021, "learning_rate": 3.0236142891585777e-05, "loss": 0.0955, "step": 79300 }, { "epoch": 2.882113525692274, "grad_norm": 0.7772002220153809, "learning_rate": 3.023106855798542e-05, "loss": 0.0799, "step": 79310 }, { "epoch": 2.882476924195072, "grad_norm": 0.4837106764316559, "learning_rate": 3.0225993998996925e-05, "loss": 0.1102, "step": 79320 }, { "epoch": 2.8828403226978705, "grad_norm": 1.1968311071395874, "learning_rate": 3.0220919214838946e-05, "loss": 0.0921, "step": 79330 }, { "epoch": 2.883203721200669, "grad_norm": 0.2687516510486603, "learning_rate": 3.021584420573013e-05, "loss": 0.4854, "step": 79340 }, { "epoch": 2.883567119703467, "grad_norm": 0.661100447177887, "learning_rate": 3.0210768971889152e-05, "loss": 0.0993, "step": 79350 }, { "epoch": 2.883930518206265, "grad_norm": 0.4699995219707489, "learning_rate": 3.0205693513534672e-05, "loss": 0.1043, "step": 79360 }, { "epoch": 2.8842939167090633, "grad_norm": 0.5575037598609924, "learning_rate": 3.0200617830885386e-05, "loss": 0.113, "step": 79370 }, { "epoch": 2.8846573152118613, "grad_norm": 0.5147402286529541, "learning_rate": 3.0195541924159974e-05, "loss": 0.0983, "step": 79380 }, { "epoch": 2.8850207137146597, "grad_norm": 0.8548463582992554, "learning_rate": 3.0190465793577155e-05, "loss": 0.0788, "step": 79390 }, { "epoch": 2.8853841122174577, "grad_norm": 0.46500858664512634, "learning_rate": 3.018538943935563e-05, "loss": 0.0641, "step": 79400 }, { "epoch": 2.8857475107202557, "grad_norm": 0.479408860206604, "learning_rate": 3.018031286171412e-05, "loss": 0.1539, "step": 79410 }, { "epoch": 2.886110909223054, "grad_norm": 0.9867441654205322, "learning_rate": 3.0175236060871366e-05, "loss": 0.0999, "step": 79420 }, { "epoch": 2.886474307725852, "grad_norm": 0.8628416061401367, "learning_rate": 3.0170159037046096e-05, "loss": 0.1166, "step": 79430 }, { "epoch": 2.8868377062286505, "grad_norm": 1.9563437700271606, "learning_rate": 3.0165081790457077e-05, "loss": 0.1211, "step": 79440 }, { "epoch": 2.8872011047314485, "grad_norm": 0.6726937890052795, "learning_rate": 3.0160004321323053e-05, "loss": 0.0926, "step": 79450 }, { "epoch": 2.8875645032342465, "grad_norm": 0.509483814239502, "learning_rate": 3.0154926629862813e-05, "loss": 0.0803, "step": 79460 }, { "epoch": 2.887927901737045, "grad_norm": 0.7619544863700867, "learning_rate": 3.014984871629512e-05, "loss": 0.1251, "step": 79470 }, { "epoch": 2.888291300239843, "grad_norm": 1.0748878717422485, "learning_rate": 3.0144770580838767e-05, "loss": 0.1073, "step": 79480 }, { "epoch": 2.8886546987426414, "grad_norm": 0.7058316469192505, "learning_rate": 3.0139692223712555e-05, "loss": 0.1058, "step": 79490 }, { "epoch": 2.8890180972454393, "grad_norm": 1.453240990638733, "learning_rate": 3.013461364513529e-05, "loss": 0.105, "step": 79500 }, { "epoch": 2.8893814957482373, "grad_norm": 1.7408169507980347, "learning_rate": 3.0129534845325803e-05, "loss": 1.6102, "step": 79510 }, { "epoch": 2.8897448942510358, "grad_norm": 0.4194059669971466, "learning_rate": 3.0124455824502894e-05, "loss": 0.0936, "step": 79520 }, { "epoch": 2.8901082927538337, "grad_norm": 7.034887790679932, "learning_rate": 3.0119376582885427e-05, "loss": 0.1072, "step": 79530 }, { "epoch": 2.890471691256632, "grad_norm": 1.7293283939361572, "learning_rate": 3.0114297120692236e-05, "loss": 0.1205, "step": 79540 }, { "epoch": 2.89083508975943, "grad_norm": 3.1167123317718506, "learning_rate": 3.0109217438142184e-05, "loss": 0.0817, "step": 79550 }, { "epoch": 2.891198488262228, "grad_norm": 1.0231302976608276, "learning_rate": 3.0104137535454124e-05, "loss": 0.1408, "step": 79560 }, { "epoch": 2.8915618867650266, "grad_norm": 0.3560333251953125, "learning_rate": 3.0099057412846942e-05, "loss": 0.127, "step": 79570 }, { "epoch": 2.8919252852678246, "grad_norm": 0.43482980132102966, "learning_rate": 3.009397707053952e-05, "loss": 0.079, "step": 79580 }, { "epoch": 2.892288683770623, "grad_norm": 0.8485931158065796, "learning_rate": 3.008889650875074e-05, "loss": 0.1206, "step": 79590 }, { "epoch": 2.892652082273421, "grad_norm": 2.297492504119873, "learning_rate": 3.0083815727699526e-05, "loss": 0.0991, "step": 79600 }, { "epoch": 2.893015480776219, "grad_norm": 0.5528286695480347, "learning_rate": 3.0078734727604775e-05, "loss": 0.0757, "step": 79610 }, { "epoch": 2.8933788792790174, "grad_norm": 0.5743618607521057, "learning_rate": 3.0073653508685424e-05, "loss": 0.1121, "step": 79620 }, { "epoch": 2.893742277781816, "grad_norm": 0.8158531785011292, "learning_rate": 3.006857207116039e-05, "loss": 0.1045, "step": 79630 }, { "epoch": 2.894105676284614, "grad_norm": 0.7049798369407654, "learning_rate": 3.0063490415248613e-05, "loss": 0.1012, "step": 79640 }, { "epoch": 2.8944690747874118, "grad_norm": 0.3521101772785187, "learning_rate": 3.0058408541169057e-05, "loss": 0.0765, "step": 79650 }, { "epoch": 2.89483247329021, "grad_norm": 1.0171892642974854, "learning_rate": 3.0053326449140674e-05, "loss": 0.1011, "step": 79660 }, { "epoch": 2.895195871793008, "grad_norm": 0.5262839794158936, "learning_rate": 3.0048244139382438e-05, "loss": 0.1322, "step": 79670 }, { "epoch": 2.8955592702958066, "grad_norm": 1.3580801486968994, "learning_rate": 3.0043161612113313e-05, "loss": 0.0946, "step": 79680 }, { "epoch": 2.8959226687986046, "grad_norm": 1.4954004287719727, "learning_rate": 3.0038078867552306e-05, "loss": 0.0929, "step": 79690 }, { "epoch": 2.8962860673014026, "grad_norm": 0.6515393257141113, "learning_rate": 3.0032995905918405e-05, "loss": 0.1044, "step": 79700 }, { "epoch": 2.896649465804201, "grad_norm": 0.647331953048706, "learning_rate": 3.002791272743061e-05, "loss": 0.0747, "step": 79710 }, { "epoch": 2.897012864306999, "grad_norm": 0.9838120341300964, "learning_rate": 3.0022829332307962e-05, "loss": 0.1169, "step": 79720 }, { "epoch": 2.8973762628097974, "grad_norm": 0.6499975919723511, "learning_rate": 3.001774572076945e-05, "loss": 0.1144, "step": 79730 }, { "epoch": 2.8977396613125954, "grad_norm": 0.8443338871002197, "learning_rate": 3.0012661893034143e-05, "loss": 0.1184, "step": 79740 }, { "epoch": 2.8981030598153934, "grad_norm": 0.49458226561546326, "learning_rate": 3.0007577849321062e-05, "loss": 0.0847, "step": 79750 }, { "epoch": 2.898466458318192, "grad_norm": 0.5407196283340454, "learning_rate": 3.0002493589849272e-05, "loss": 0.0981, "step": 79760 }, { "epoch": 2.89882985682099, "grad_norm": 0.4729011654853821, "learning_rate": 2.9997409114837833e-05, "loss": 0.2098, "step": 79770 }, { "epoch": 2.8991932553237882, "grad_norm": 0.9962542653083801, "learning_rate": 2.9992324424505815e-05, "loss": 0.1022, "step": 79780 }, { "epoch": 2.8995566538265862, "grad_norm": 0.4466484487056732, "learning_rate": 2.9987239519072296e-05, "loss": 0.0995, "step": 79790 }, { "epoch": 2.899920052329384, "grad_norm": 1.0443413257598877, "learning_rate": 2.9982154398756372e-05, "loss": 0.0956, "step": 79800 }, { "epoch": 2.899920052329384, "eval_loss": 0.33830875158309937, "eval_runtime": 179.6694, "eval_samples_per_second": 41.265, "eval_steps_per_second": 5.159, "eval_wer": 0.14741227512843322, "step": 79800 }, { "epoch": 2.9002834508321826, "grad_norm": 0.4343029856681824, "learning_rate": 2.9977577606928674e-05, "loss": 3.598, "step": 79810 }, { "epoch": 2.9006468493349806, "grad_norm": 0.4609208405017853, "learning_rate": 2.9972492078939808e-05, "loss": 0.3644, "step": 79820 }, { "epoch": 2.901010247837779, "grad_norm": 0.8700145483016968, "learning_rate": 2.9967406336703952e-05, "loss": 0.1192, "step": 79830 }, { "epoch": 2.901373646340577, "grad_norm": 0.6192082762718201, "learning_rate": 2.9962320380440228e-05, "loss": 0.1008, "step": 79840 }, { "epoch": 2.901737044843375, "grad_norm": 0.41174137592315674, "learning_rate": 2.995723421036778e-05, "loss": 0.1181, "step": 79850 }, { "epoch": 2.9021004433461735, "grad_norm": 3.049891710281372, "learning_rate": 2.9952147826705745e-05, "loss": 0.0981, "step": 79860 }, { "epoch": 2.9024638418489714, "grad_norm": 0.4502425193786621, "learning_rate": 2.9947061229673275e-05, "loss": 0.1306, "step": 79870 }, { "epoch": 2.90282724035177, "grad_norm": 0.5543062686920166, "learning_rate": 2.9941974419489545e-05, "loss": 0.0982, "step": 79880 }, { "epoch": 2.903190638854568, "grad_norm": 0.9228424429893494, "learning_rate": 2.9936887396373715e-05, "loss": 0.1304, "step": 79890 }, { "epoch": 2.903554037357366, "grad_norm": 0.6268784999847412, "learning_rate": 2.9931800160544975e-05, "loss": 0.0823, "step": 79900 }, { "epoch": 2.9039174358601643, "grad_norm": 0.6508156657218933, "learning_rate": 2.9926712712222516e-05, "loss": 0.0938, "step": 79910 }, { "epoch": 2.9042808343629627, "grad_norm": 0.45321550965309143, "learning_rate": 2.9921625051625533e-05, "loss": 0.1246, "step": 79920 }, { "epoch": 2.9046442328657607, "grad_norm": 0.6320390105247498, "learning_rate": 2.9916537178973242e-05, "loss": 0.0835, "step": 79930 }, { "epoch": 2.9050076313685587, "grad_norm": 0.7819294929504395, "learning_rate": 2.9911449094484852e-05, "loss": 0.1086, "step": 79940 }, { "epoch": 2.905371029871357, "grad_norm": 0.7224891185760498, "learning_rate": 2.9906360798379594e-05, "loss": 0.0825, "step": 79950 }, { "epoch": 2.905734428374155, "grad_norm": 0.7941370606422424, "learning_rate": 2.990127229087671e-05, "loss": 0.0839, "step": 79960 }, { "epoch": 2.9060978268769535, "grad_norm": 0.5782437920570374, "learning_rate": 2.9896183572195442e-05, "loss": 0.097, "step": 79970 }, { "epoch": 2.9064612253797515, "grad_norm": 1.7239668369293213, "learning_rate": 2.989109464255504e-05, "loss": 0.0919, "step": 79980 }, { "epoch": 2.9068246238825495, "grad_norm": 1.2387197017669678, "learning_rate": 2.988600550217478e-05, "loss": 0.0851, "step": 79990 }, { "epoch": 2.907188022385348, "grad_norm": 2.639697313308716, "learning_rate": 2.9880916151273926e-05, "loss": 0.0829, "step": 80000 }, { "epoch": 2.907551420888146, "grad_norm": 0.9989453554153442, "learning_rate": 2.9875826590071754e-05, "loss": 0.0789, "step": 80010 }, { "epoch": 2.9079148193909443, "grad_norm": 1.7626229524612427, "learning_rate": 2.987073681878757e-05, "loss": 0.0905, "step": 80020 }, { "epoch": 2.9082782178937423, "grad_norm": 2.506680488586426, "learning_rate": 2.986564683764066e-05, "loss": 0.1006, "step": 80030 }, { "epoch": 2.9086416163965403, "grad_norm": 1.299718976020813, "learning_rate": 2.9860556646850347e-05, "loss": 0.1024, "step": 80040 }, { "epoch": 2.9090050148993387, "grad_norm": 1.0177974700927734, "learning_rate": 2.9855466246635943e-05, "loss": 0.0788, "step": 80050 }, { "epoch": 2.9093684134021367, "grad_norm": 0.6795012354850769, "learning_rate": 2.9850375637216767e-05, "loss": 0.1015, "step": 80060 }, { "epoch": 2.909731811904935, "grad_norm": 0.24664345383644104, "learning_rate": 2.9845284818812164e-05, "loss": 0.0944, "step": 80070 }, { "epoch": 2.910095210407733, "grad_norm": 0.5156140923500061, "learning_rate": 2.984019379164148e-05, "loss": 0.0917, "step": 80080 }, { "epoch": 2.910458608910531, "grad_norm": 1.331092357635498, "learning_rate": 2.9835102555924065e-05, "loss": 0.1069, "step": 80090 }, { "epoch": 2.9108220074133295, "grad_norm": 0.8283532857894897, "learning_rate": 2.983001111187928e-05, "loss": 0.0973, "step": 80100 }, { "epoch": 2.9111854059161275, "grad_norm": 0.8295063972473145, "learning_rate": 2.9824919459726507e-05, "loss": 0.0805, "step": 80110 }, { "epoch": 2.911548804418926, "grad_norm": 1.1419576406478882, "learning_rate": 2.981982759968513e-05, "loss": 0.0974, "step": 80120 }, { "epoch": 2.911912202921724, "grad_norm": 0.5481380224227905, "learning_rate": 2.9814735531974513e-05, "loss": 0.1007, "step": 80130 }, { "epoch": 2.912275601424522, "grad_norm": 1.488004207611084, "learning_rate": 2.9809643256814092e-05, "loss": 0.1599, "step": 80140 }, { "epoch": 2.9126389999273203, "grad_norm": 1.346227765083313, "learning_rate": 2.980455077442324e-05, "loss": 0.0826, "step": 80150 }, { "epoch": 2.9130023984301183, "grad_norm": 0.6229421496391296, "learning_rate": 2.9799458085021396e-05, "loss": 0.1042, "step": 80160 }, { "epoch": 2.9133657969329168, "grad_norm": 0.7724307179450989, "learning_rate": 2.979436518882798e-05, "loss": 0.1068, "step": 80170 }, { "epoch": 2.9137291954357147, "grad_norm": 0.4120637774467468, "learning_rate": 2.9789272086062426e-05, "loss": 0.0949, "step": 80180 }, { "epoch": 2.9140925939385127, "grad_norm": 0.8677302598953247, "learning_rate": 2.9784178776944178e-05, "loss": 0.1253, "step": 80190 }, { "epoch": 2.914455992441311, "grad_norm": 0.5817800760269165, "learning_rate": 2.9779085261692686e-05, "loss": 0.0754, "step": 80200 }, { "epoch": 2.9148193909441096, "grad_norm": 2.1507725715637207, "learning_rate": 2.977399154052742e-05, "loss": 0.2402, "step": 80210 }, { "epoch": 2.9151827894469076, "grad_norm": 0.4526066184043884, "learning_rate": 2.9768897613667836e-05, "loss": 0.1243, "step": 80220 }, { "epoch": 2.9155461879497055, "grad_norm": 1.391575574874878, "learning_rate": 2.976380348133343e-05, "loss": 0.1129, "step": 80230 }, { "epoch": 2.915909586452504, "grad_norm": 0.7592736482620239, "learning_rate": 2.9758709143743678e-05, "loss": 0.1376, "step": 80240 }, { "epoch": 2.916272984955302, "grad_norm": 0.671796441078186, "learning_rate": 2.975361460111808e-05, "loss": 0.0608, "step": 80250 }, { "epoch": 2.9166363834581004, "grad_norm": 2.7102513313293457, "learning_rate": 2.9748519853676142e-05, "loss": 0.0919, "step": 80260 }, { "epoch": 2.9169997819608984, "grad_norm": 1.3529448509216309, "learning_rate": 2.974342490163738e-05, "loss": 0.1079, "step": 80270 }, { "epoch": 2.9173631804636964, "grad_norm": 0.8166323900222778, "learning_rate": 2.9738329745221317e-05, "loss": 0.076, "step": 80280 }, { "epoch": 2.917726578966495, "grad_norm": 0.824936032295227, "learning_rate": 2.973323438464748e-05, "loss": 0.1155, "step": 80290 }, { "epoch": 2.9180899774692928, "grad_norm": 0.4837649166584015, "learning_rate": 2.972813882013542e-05, "loss": 0.0775, "step": 80300 }, { "epoch": 2.918453375972091, "grad_norm": 1.6542714834213257, "learning_rate": 2.9723043051904676e-05, "loss": 0.1013, "step": 80310 }, { "epoch": 2.918816774474889, "grad_norm": 0.2745031416416168, "learning_rate": 2.9717947080174813e-05, "loss": 0.0914, "step": 80320 }, { "epoch": 2.919180172977687, "grad_norm": 1.0925464630126953, "learning_rate": 2.9712850905165402e-05, "loss": 0.1577, "step": 80330 }, { "epoch": 2.9195435714804856, "grad_norm": 0.6618020534515381, "learning_rate": 2.9707754527096004e-05, "loss": 0.1105, "step": 80340 }, { "epoch": 2.9199069699832836, "grad_norm": 0.9204813838005066, "learning_rate": 2.9702657946186223e-05, "loss": 0.0855, "step": 80350 }, { "epoch": 2.920270368486082, "grad_norm": 0.6669716835021973, "learning_rate": 2.9697561162655634e-05, "loss": 0.1003, "step": 80360 }, { "epoch": 2.92063376698888, "grad_norm": 0.4183257818222046, "learning_rate": 2.9692464176723855e-05, "loss": 0.0998, "step": 80370 }, { "epoch": 2.920997165491678, "grad_norm": 6.260075569152832, "learning_rate": 2.9687366988610493e-05, "loss": 0.1007, "step": 80380 }, { "epoch": 2.9213605639944764, "grad_norm": 2.819657325744629, "learning_rate": 2.9682269598535162e-05, "loss": 0.0982, "step": 80390 }, { "epoch": 2.9217239624972744, "grad_norm": 0.5224602818489075, "learning_rate": 2.967717200671749e-05, "loss": 0.1009, "step": 80400 }, { "epoch": 2.9217239624972744, "eval_loss": 0.3313393294811249, "eval_runtime": 180.6766, "eval_samples_per_second": 41.035, "eval_steps_per_second": 5.131, "eval_wer": 0.14564234755931527, "step": 80400 }, { "epoch": 2.922087361000073, "grad_norm": 0.504077672958374, "learning_rate": 2.9672074213377122e-05, "loss": 0.0892, "step": 80410 }, { "epoch": 2.922450759502871, "grad_norm": 0.34101128578186035, "learning_rate": 2.96669762187337e-05, "loss": 0.1225, "step": 80420 }, { "epoch": 2.922814158005669, "grad_norm": 0.7131312489509583, "learning_rate": 2.966187802300688e-05, "loss": 0.0804, "step": 80430 }, { "epoch": 2.9231775565084672, "grad_norm": 1.1708521842956543, "learning_rate": 2.9656779626416314e-05, "loss": 0.1506, "step": 80440 }, { "epoch": 2.923540955011265, "grad_norm": 1.453748106956482, "learning_rate": 2.965168102918169e-05, "loss": 0.1353, "step": 80450 }, { "epoch": 2.9239043535140636, "grad_norm": 0.7086220979690552, "learning_rate": 2.964658223152268e-05, "loss": 0.0892, "step": 80460 }, { "epoch": 2.9242677520168616, "grad_norm": 0.8955743312835693, "learning_rate": 2.9641483233658973e-05, "loss": 0.0989, "step": 80470 }, { "epoch": 2.9246311505196596, "grad_norm": 0.5515048503875732, "learning_rate": 2.9636384035810265e-05, "loss": 0.1187, "step": 80480 }, { "epoch": 2.924994549022458, "grad_norm": 1.164106011390686, "learning_rate": 2.963128463819627e-05, "loss": 0.1057, "step": 80490 }, { "epoch": 2.9253579475252565, "grad_norm": 0.8334591388702393, "learning_rate": 2.9626185041036696e-05, "loss": 0.4326, "step": 80500 }, { "epoch": 2.9257213460280544, "grad_norm": 0.49100545048713684, "learning_rate": 2.9621085244551272e-05, "loss": 0.087, "step": 80510 }, { "epoch": 2.9260847445308524, "grad_norm": 0.5234443545341492, "learning_rate": 2.9615985248959722e-05, "loss": 0.0982, "step": 80520 }, { "epoch": 2.926448143033651, "grad_norm": 1.0264884233474731, "learning_rate": 2.961088505448179e-05, "loss": 0.0965, "step": 80530 }, { "epoch": 2.926811541536449, "grad_norm": 0.7391953468322754, "learning_rate": 2.9605784661337233e-05, "loss": 0.4112, "step": 80540 }, { "epoch": 2.9271749400392473, "grad_norm": 4.123349666595459, "learning_rate": 2.9600684069745803e-05, "loss": 0.0957, "step": 80550 }, { "epoch": 2.9275383385420453, "grad_norm": 2.021984338760376, "learning_rate": 2.9595583279927257e-05, "loss": 0.0952, "step": 80560 }, { "epoch": 2.9279017370448432, "grad_norm": 0.546364426612854, "learning_rate": 2.9590482292101383e-05, "loss": 0.1167, "step": 80570 }, { "epoch": 2.9282651355476417, "grad_norm": 0.6582480669021606, "learning_rate": 2.9585381106487963e-05, "loss": 0.1149, "step": 80580 }, { "epoch": 2.9286285340504397, "grad_norm": 0.6442203521728516, "learning_rate": 2.958027972330678e-05, "loss": 0.114, "step": 80590 }, { "epoch": 2.928991932553238, "grad_norm": 0.36104217171669006, "learning_rate": 2.9575178142777643e-05, "loss": 0.1073, "step": 80600 }, { "epoch": 2.929355331056036, "grad_norm": 1.933703064918518, "learning_rate": 2.9570076365120363e-05, "loss": 0.1025, "step": 80610 }, { "epoch": 2.929718729558834, "grad_norm": 0.6946010589599609, "learning_rate": 2.956497439055475e-05, "loss": 0.098, "step": 80620 }, { "epoch": 2.9300821280616325, "grad_norm": 0.6851116418838501, "learning_rate": 2.9559872219300633e-05, "loss": 0.089, "step": 80630 }, { "epoch": 2.9304455265644305, "grad_norm": 0.7841261625289917, "learning_rate": 2.9554769851577847e-05, "loss": 0.0956, "step": 80640 }, { "epoch": 2.930808925067229, "grad_norm": 0.7110610008239746, "learning_rate": 2.9549667287606225e-05, "loss": 3.0796, "step": 80650 }, { "epoch": 2.931172323570027, "grad_norm": 1.172564148902893, "learning_rate": 2.954456452760564e-05, "loss": 0.1005, "step": 80660 }, { "epoch": 2.931535722072825, "grad_norm": 0.44309452176094055, "learning_rate": 2.9539461571795928e-05, "loss": 0.1204, "step": 80670 }, { "epoch": 2.9318991205756233, "grad_norm": 1.936556339263916, "learning_rate": 2.9534358420396978e-05, "loss": 0.1174, "step": 80680 }, { "epoch": 2.9322625190784213, "grad_norm": 0.7564278841018677, "learning_rate": 2.9529255073628653e-05, "loss": 0.1195, "step": 80690 }, { "epoch": 2.9326259175812197, "grad_norm": 0.443446546792984, "learning_rate": 2.952415153171084e-05, "loss": 0.081, "step": 80700 }, { "epoch": 2.9329893160840177, "grad_norm": 1.2691240310668945, "learning_rate": 2.9519047794863434e-05, "loss": 0.0897, "step": 80710 }, { "epoch": 2.9333527145868157, "grad_norm": 0.9182204008102417, "learning_rate": 2.9513943863306337e-05, "loss": 0.1097, "step": 80720 }, { "epoch": 2.933716113089614, "grad_norm": 0.8882256150245667, "learning_rate": 2.950883973725947e-05, "loss": 0.0928, "step": 80730 }, { "epoch": 2.934079511592412, "grad_norm": 0.7944663166999817, "learning_rate": 2.9503735416942735e-05, "loss": 0.0942, "step": 80740 }, { "epoch": 2.9344429100952105, "grad_norm": 0.4034799337387085, "learning_rate": 2.9498630902576057e-05, "loss": 0.0881, "step": 80750 }, { "epoch": 2.9348063085980085, "grad_norm": 0.37826791405677795, "learning_rate": 2.949352619437939e-05, "loss": 0.0925, "step": 80760 }, { "epoch": 2.9351697071008065, "grad_norm": 0.5798398852348328, "learning_rate": 2.948842129257266e-05, "loss": 0.2732, "step": 80770 }, { "epoch": 2.935533105603605, "grad_norm": 1.879731297492981, "learning_rate": 2.948331619737583e-05, "loss": 0.1367, "step": 80780 }, { "epoch": 2.9358965041064033, "grad_norm": 0.9150714874267578, "learning_rate": 2.9478210909008856e-05, "loss": 0.1096, "step": 80790 }, { "epoch": 2.9362599026092013, "grad_norm": 1.1447314023971558, "learning_rate": 2.947310542769171e-05, "loss": 0.0902, "step": 80800 }, { "epoch": 2.9366233011119993, "grad_norm": 4.874231338500977, "learning_rate": 2.9467999753644367e-05, "loss": 0.1206, "step": 80810 }, { "epoch": 2.9369866996147977, "grad_norm": 0.6774386763572693, "learning_rate": 2.9462893887086807e-05, "loss": 1.3499, "step": 80820 }, { "epoch": 2.9373500981175957, "grad_norm": 1.529350996017456, "learning_rate": 2.945778782823903e-05, "loss": 0.0881, "step": 80830 }, { "epoch": 2.937713496620394, "grad_norm": 1.6456125974655151, "learning_rate": 2.945268157732104e-05, "loss": 0.1505, "step": 80840 }, { "epoch": 2.938076895123192, "grad_norm": 0.8206561207771301, "learning_rate": 2.944757513455284e-05, "loss": 0.0873, "step": 80850 }, { "epoch": 2.93844029362599, "grad_norm": 1.830176830291748, "learning_rate": 2.9442468500154453e-05, "loss": 0.1002, "step": 80860 }, { "epoch": 2.9388036921287886, "grad_norm": 1.3282181024551392, "learning_rate": 2.9437361674345905e-05, "loss": 0.113, "step": 80870 }, { "epoch": 2.9391670906315865, "grad_norm": 1.0826009511947632, "learning_rate": 2.943225465734723e-05, "loss": 0.0938, "step": 80880 }, { "epoch": 2.939530489134385, "grad_norm": 0.4472619891166687, "learning_rate": 2.9427147449378467e-05, "loss": 0.0999, "step": 80890 }, { "epoch": 2.939893887637183, "grad_norm": 0.5009836554527283, "learning_rate": 2.9422040050659682e-05, "loss": 0.0867, "step": 80900 }, { "epoch": 2.940257286139981, "grad_norm": 1.1963528394699097, "learning_rate": 2.941693246141092e-05, "loss": 0.1074, "step": 80910 }, { "epoch": 2.9406206846427794, "grad_norm": 0.3960399627685547, "learning_rate": 2.941182468185225e-05, "loss": 0.1298, "step": 80920 }, { "epoch": 2.9409840831455774, "grad_norm": 0.7249006032943726, "learning_rate": 2.940671671220376e-05, "loss": 0.093, "step": 80930 }, { "epoch": 2.941347481648376, "grad_norm": 0.5996330976486206, "learning_rate": 2.9401608552685517e-05, "loss": 0.1213, "step": 80940 }, { "epoch": 2.9417108801511738, "grad_norm": 1.9566066265106201, "learning_rate": 2.9396500203517628e-05, "loss": 0.0795, "step": 80950 }, { "epoch": 2.9420742786539718, "grad_norm": 1.3713960647583008, "learning_rate": 2.9391391664920186e-05, "loss": 0.0879, "step": 80960 }, { "epoch": 2.94243767715677, "grad_norm": 0.639777660369873, "learning_rate": 2.9386282937113306e-05, "loss": 0.121, "step": 80970 }, { "epoch": 2.942801075659568, "grad_norm": 0.6077538728713989, "learning_rate": 2.9381174020317093e-05, "loss": 0.1197, "step": 80980 }, { "epoch": 2.9431644741623666, "grad_norm": 1.1664655208587646, "learning_rate": 2.937606491475169e-05, "loss": 0.1132, "step": 80990 }, { "epoch": 2.9435278726651646, "grad_norm": 0.44675686955451965, "learning_rate": 2.9370955620637213e-05, "loss": 0.0951, "step": 81000 }, { "epoch": 2.9435278726651646, "eval_loss": 0.3302467167377472, "eval_runtime": 180.9443, "eval_samples_per_second": 40.974, "eval_steps_per_second": 5.123, "eval_wer": 0.14665892134260353, "step": 81000 }, { "epoch": 2.9438912711679626, "grad_norm": 0.5193939208984375, "learning_rate": 2.936584613819381e-05, "loss": 0.0736, "step": 81010 }, { "epoch": 2.944254669670761, "grad_norm": 4.043830394744873, "learning_rate": 2.9360736467641632e-05, "loss": 0.146, "step": 81020 }, { "epoch": 2.944618068173559, "grad_norm": 0.8960684537887573, "learning_rate": 2.9355626609200832e-05, "loss": 0.0933, "step": 81030 }, { "epoch": 2.9449814666763574, "grad_norm": 0.4079131782054901, "learning_rate": 2.9350516563091586e-05, "loss": 0.1555, "step": 81040 }, { "epoch": 2.9453448651791554, "grad_norm": 0.5445089936256409, "learning_rate": 2.934540632953406e-05, "loss": 0.1163, "step": 81050 }, { "epoch": 2.9457082636819534, "grad_norm": 4.034743309020996, "learning_rate": 2.934029590874843e-05, "loss": 0.0817, "step": 81060 }, { "epoch": 2.946071662184752, "grad_norm": 1.0341869592666626, "learning_rate": 2.933518530095489e-05, "loss": 0.1143, "step": 81070 }, { "epoch": 2.9464350606875502, "grad_norm": 1.2305265665054321, "learning_rate": 2.9330074506373644e-05, "loss": 0.0945, "step": 81080 }, { "epoch": 2.946798459190348, "grad_norm": 1.0462018251419067, "learning_rate": 2.9324963525224897e-05, "loss": 0.1181, "step": 81090 }, { "epoch": 2.947161857693146, "grad_norm": 1.0071947574615479, "learning_rate": 2.9319852357728857e-05, "loss": 0.0935, "step": 81100 }, { "epoch": 2.9475252561959446, "grad_norm": 0.3526189625263214, "learning_rate": 2.931474100410575e-05, "loss": 0.0801, "step": 81110 }, { "epoch": 2.9478886546987426, "grad_norm": 0.22515632212162018, "learning_rate": 2.9309629464575804e-05, "loss": 0.0955, "step": 81120 }, { "epoch": 2.948252053201541, "grad_norm": 1.0929369926452637, "learning_rate": 2.930451773935926e-05, "loss": 0.1307, "step": 81130 }, { "epoch": 2.948615451704339, "grad_norm": 1.116426944732666, "learning_rate": 2.9299405828676364e-05, "loss": 0.1067, "step": 81140 }, { "epoch": 2.948978850207137, "grad_norm": 1.081275224685669, "learning_rate": 2.9294293732747358e-05, "loss": 0.09, "step": 81150 }, { "epoch": 2.9493422487099354, "grad_norm": 0.4264758825302124, "learning_rate": 2.9289181451792526e-05, "loss": 0.0715, "step": 81160 }, { "epoch": 2.9497056472127334, "grad_norm": 0.48512741923332214, "learning_rate": 2.928406898603212e-05, "loss": 0.0862, "step": 81170 }, { "epoch": 2.950069045715532, "grad_norm": 1.373490571975708, "learning_rate": 2.9278956335686426e-05, "loss": 0.0913, "step": 81180 }, { "epoch": 2.95043244421833, "grad_norm": 0.8015252351760864, "learning_rate": 2.9273843500975728e-05, "loss": 0.1061, "step": 81190 }, { "epoch": 2.950795842721128, "grad_norm": 1.1844451427459717, "learning_rate": 2.926873048212032e-05, "loss": 0.0869, "step": 81200 }, { "epoch": 2.9511592412239263, "grad_norm": 0.6692541241645813, "learning_rate": 2.9263617279340504e-05, "loss": 0.0701, "step": 81210 }, { "epoch": 2.9515226397267242, "grad_norm": 0.5705549120903015, "learning_rate": 2.9258503892856585e-05, "loss": 0.0923, "step": 81220 }, { "epoch": 2.9518860382295227, "grad_norm": 3.464637041091919, "learning_rate": 2.925339032288889e-05, "loss": 0.0887, "step": 81230 }, { "epoch": 2.9522494367323207, "grad_norm": 1.3816555738449097, "learning_rate": 2.9248276569657745e-05, "loss": 0.1061, "step": 81240 }, { "epoch": 2.9526128352351186, "grad_norm": 0.42636001110076904, "learning_rate": 2.9243162633383463e-05, "loss": 0.0889, "step": 81250 }, { "epoch": 2.952976233737917, "grad_norm": 1.3730337619781494, "learning_rate": 2.9238048514286413e-05, "loss": 0.0846, "step": 81260 }, { "epoch": 2.953339632240715, "grad_norm": 0.708302915096283, "learning_rate": 2.9232934212586925e-05, "loss": 0.1031, "step": 81270 }, { "epoch": 2.9537030307435135, "grad_norm": 0.8678138852119446, "learning_rate": 2.9227819728505367e-05, "loss": 0.1032, "step": 81280 }, { "epoch": 2.9540664292463115, "grad_norm": 1.3964858055114746, "learning_rate": 2.9222705062262096e-05, "loss": 0.0804, "step": 81290 }, { "epoch": 2.9544298277491094, "grad_norm": 0.6818703413009644, "learning_rate": 2.921759021407749e-05, "loss": 0.0871, "step": 81300 }, { "epoch": 2.954793226251908, "grad_norm": 1.078911542892456, "learning_rate": 2.9212475184171932e-05, "loss": 0.084, "step": 81310 }, { "epoch": 2.955156624754706, "grad_norm": 9.535181999206543, "learning_rate": 2.9207359972765795e-05, "loss": 0.0994, "step": 81320 }, { "epoch": 2.9555200232575043, "grad_norm": 0.8145607113838196, "learning_rate": 2.9202244580079496e-05, "loss": 0.0932, "step": 81330 }, { "epoch": 2.9558834217603023, "grad_norm": 0.7624212503433228, "learning_rate": 2.919712900633343e-05, "loss": 0.0864, "step": 81340 }, { "epoch": 2.9562468202631003, "grad_norm": 0.8702255487442017, "learning_rate": 2.9192013251748007e-05, "loss": 0.0634, "step": 81350 }, { "epoch": 2.9566102187658987, "grad_norm": 0.5847821235656738, "learning_rate": 2.9186897316543644e-05, "loss": 0.0777, "step": 81360 }, { "epoch": 2.956973617268697, "grad_norm": 0.6588742136955261, "learning_rate": 2.9181781200940776e-05, "loss": 0.1158, "step": 81370 }, { "epoch": 2.957337015771495, "grad_norm": 0.8579931855201721, "learning_rate": 2.917666490515984e-05, "loss": 0.0941, "step": 81380 }, { "epoch": 2.957700414274293, "grad_norm": 0.9896190166473389, "learning_rate": 2.9171548429421264e-05, "loss": 0.1239, "step": 81390 }, { "epoch": 2.9580638127770915, "grad_norm": 3.1119399070739746, "learning_rate": 2.9166431773945514e-05, "loss": 0.0973, "step": 81400 }, { "epoch": 2.9584272112798895, "grad_norm": 0.6430637240409851, "learning_rate": 2.9161314938953037e-05, "loss": 0.0964, "step": 81410 }, { "epoch": 2.958790609782688, "grad_norm": 0.16759249567985535, "learning_rate": 2.9156197924664312e-05, "loss": 0.0869, "step": 81420 }, { "epoch": 2.959154008285486, "grad_norm": 0.5628185272216797, "learning_rate": 2.915108073129981e-05, "loss": 0.0885, "step": 81430 }, { "epoch": 2.959517406788284, "grad_norm": 1.6805976629257202, "learning_rate": 2.914596335908e-05, "loss": 0.1402, "step": 81440 }, { "epoch": 2.9598808052910823, "grad_norm": 0.996425986289978, "learning_rate": 2.9140845808225388e-05, "loss": 0.0823, "step": 81450 }, { "epoch": 2.9602442037938803, "grad_norm": 0.6585590243339539, "learning_rate": 2.9135728078956453e-05, "loss": 0.0735, "step": 81460 }, { "epoch": 2.9606076022966787, "grad_norm": 12.562392234802246, "learning_rate": 2.9130610171493722e-05, "loss": 0.1128, "step": 81470 }, { "epoch": 2.9609710007994767, "grad_norm": 0.5774283409118652, "learning_rate": 2.9125492086057682e-05, "loss": 0.0844, "step": 81480 }, { "epoch": 2.9613343993022747, "grad_norm": 0.5809444785118103, "learning_rate": 2.912037382286888e-05, "loss": 0.1085, "step": 81490 }, { "epoch": 2.961697797805073, "grad_norm": 0.5010820627212524, "learning_rate": 2.9115255382147827e-05, "loss": 0.0803, "step": 81500 }, { "epoch": 2.962061196307871, "grad_norm": 1.3801108598709106, "learning_rate": 2.9110136764115055e-05, "loss": 0.0661, "step": 81510 }, { "epoch": 2.9624245948106696, "grad_norm": 1.027056336402893, "learning_rate": 2.9105017968991123e-05, "loss": 0.109, "step": 81520 }, { "epoch": 2.9627879933134675, "grad_norm": 0.978444516658783, "learning_rate": 2.909989899699656e-05, "loss": 0.0935, "step": 81530 }, { "epoch": 2.9631513918162655, "grad_norm": 2.2912325859069824, "learning_rate": 2.909477984835195e-05, "loss": 0.0905, "step": 81540 }, { "epoch": 2.963514790319064, "grad_norm": 1.8258506059646606, "learning_rate": 2.9089660523277833e-05, "loss": 0.0858, "step": 81550 }, { "epoch": 2.963878188821862, "grad_norm": 1.961013674736023, "learning_rate": 2.9084541021994804e-05, "loss": 0.0858, "step": 81560 }, { "epoch": 2.9642415873246604, "grad_norm": 0.25523585081100464, "learning_rate": 2.9079421344723435e-05, "loss": 0.0943, "step": 81570 }, { "epoch": 2.9646049858274583, "grad_norm": 0.8947268724441528, "learning_rate": 2.9074301491684313e-05, "loss": 0.1359, "step": 81580 }, { "epoch": 2.9649683843302563, "grad_norm": 0.5048563480377197, "learning_rate": 2.906918146309804e-05, "loss": 0.101, "step": 81590 }, { "epoch": 2.9653317828330548, "grad_norm": 0.5647857785224915, "learning_rate": 2.9064061259185206e-05, "loss": 0.0856, "step": 81600 }, { "epoch": 2.9653317828330548, "eval_loss": 0.3227691650390625, "eval_runtime": 180.1665, "eval_samples_per_second": 41.151, "eval_steps_per_second": 5.145, "eval_wer": 0.14830177718881044, "step": 81600 }, { "epoch": 2.9656951813358527, "grad_norm": 0.39782488346099854, "learning_rate": 2.905894088016644e-05, "loss": 0.0894, "step": 81610 }, { "epoch": 2.966058579838651, "grad_norm": 0.8222696185112, "learning_rate": 2.9053820326262354e-05, "loss": 0.1044, "step": 81620 }, { "epoch": 2.966421978341449, "grad_norm": 1.1362643241882324, "learning_rate": 2.904869959769357e-05, "loss": 0.0877, "step": 81630 }, { "epoch": 2.966785376844247, "grad_norm": 1.2552587985992432, "learning_rate": 2.904357869468073e-05, "loss": 0.1026, "step": 81640 }, { "epoch": 2.9671487753470456, "grad_norm": 0.7745124101638794, "learning_rate": 2.9038457617444466e-05, "loss": 0.0873, "step": 81650 }, { "epoch": 2.967512173849844, "grad_norm": 2.436732769012451, "learning_rate": 2.9033336366205432e-05, "loss": 0.0781, "step": 81660 }, { "epoch": 2.967875572352642, "grad_norm": 0.5839672684669495, "learning_rate": 2.9028214941184284e-05, "loss": 0.1152, "step": 81670 }, { "epoch": 2.96823897085544, "grad_norm": 0.9086770415306091, "learning_rate": 2.9023093342601694e-05, "loss": 0.0919, "step": 81680 }, { "epoch": 2.9686023693582384, "grad_norm": 0.9452347159385681, "learning_rate": 2.901797157067832e-05, "loss": 0.1022, "step": 81690 }, { "epoch": 2.9689657678610364, "grad_norm": 0.7514773607254028, "learning_rate": 2.9012849625634847e-05, "loss": 1.7501, "step": 81700 }, { "epoch": 2.969329166363835, "grad_norm": 1.142943263053894, "learning_rate": 2.9007727507691966e-05, "loss": 0.0906, "step": 81710 }, { "epoch": 2.969692564866633, "grad_norm": 1.344696283340454, "learning_rate": 2.9002605217070363e-05, "loss": 0.106, "step": 81720 }, { "epoch": 2.970055963369431, "grad_norm": 0.7408128976821899, "learning_rate": 2.899748275399074e-05, "loss": 0.1039, "step": 81730 }, { "epoch": 2.970419361872229, "grad_norm": 1.7010142803192139, "learning_rate": 2.8992360118673816e-05, "loss": 0.1071, "step": 81740 }, { "epoch": 2.970782760375027, "grad_norm": 1.1163867712020874, "learning_rate": 2.8987237311340286e-05, "loss": 0.0827, "step": 81750 }, { "epoch": 2.9711461588778256, "grad_norm": 2.118901491165161, "learning_rate": 2.8982114332210903e-05, "loss": 0.1033, "step": 81760 }, { "epoch": 2.9715095573806236, "grad_norm": 0.469307541847229, "learning_rate": 2.897699118150637e-05, "loss": 0.1066, "step": 81770 }, { "epoch": 2.9718729558834216, "grad_norm": 0.7060539722442627, "learning_rate": 2.8971867859447444e-05, "loss": 0.1095, "step": 81780 }, { "epoch": 2.97223635438622, "grad_norm": 1.0039600133895874, "learning_rate": 2.8966744366254856e-05, "loss": 0.1412, "step": 81790 }, { "epoch": 2.972599752889018, "grad_norm": 1.0242499113082886, "learning_rate": 2.8961620702149373e-05, "loss": 0.0833, "step": 81800 }, { "epoch": 2.9729631513918164, "grad_norm": 0.5664736032485962, "learning_rate": 2.8956496867351752e-05, "loss": 0.0793, "step": 81810 }, { "epoch": 2.9733265498946144, "grad_norm": 0.5009458065032959, "learning_rate": 2.8951372862082753e-05, "loss": 0.0925, "step": 81820 }, { "epoch": 2.9736899483974124, "grad_norm": 1.5864497423171997, "learning_rate": 2.894624868656316e-05, "loss": 0.09, "step": 81830 }, { "epoch": 2.974053346900211, "grad_norm": 4.366128444671631, "learning_rate": 2.894112434101375e-05, "loss": 0.2078, "step": 81840 }, { "epoch": 2.974416745403009, "grad_norm": 0.7028205394744873, "learning_rate": 2.8935999825655312e-05, "loss": 0.2251, "step": 81850 }, { "epoch": 2.9747801439058073, "grad_norm": 0.5308458805084229, "learning_rate": 2.8930875140708645e-05, "loss": 0.0903, "step": 81860 }, { "epoch": 2.9751435424086052, "grad_norm": 3.357011318206787, "learning_rate": 2.892575028639456e-05, "loss": 0.1105, "step": 81870 }, { "epoch": 2.975506940911403, "grad_norm": 0.9237788319587708, "learning_rate": 2.8920625262933864e-05, "loss": 0.0774, "step": 81880 }, { "epoch": 2.9758703394142016, "grad_norm": 0.480029821395874, "learning_rate": 2.891550007054737e-05, "loss": 0.1154, "step": 81890 }, { "epoch": 2.9762337379169996, "grad_norm": 0.6338282823562622, "learning_rate": 2.891037470945591e-05, "loss": 4.1728, "step": 81900 }, { "epoch": 2.976597136419798, "grad_norm": 0.628488302230835, "learning_rate": 2.8905249179880318e-05, "loss": 0.078, "step": 81910 }, { "epoch": 2.976960534922596, "grad_norm": 1.6177397966384888, "learning_rate": 2.8900123482041437e-05, "loss": 0.089, "step": 81920 }, { "epoch": 2.977323933425394, "grad_norm": 1.245584487915039, "learning_rate": 2.8894997616160103e-05, "loss": 0.1122, "step": 81930 }, { "epoch": 2.9776873319281925, "grad_norm": 0.6176816821098328, "learning_rate": 2.888987158245719e-05, "loss": 0.0911, "step": 81940 }, { "epoch": 2.978050730430991, "grad_norm": 0.7683790326118469, "learning_rate": 2.888474538115355e-05, "loss": 2.4532, "step": 81950 }, { "epoch": 2.978414128933789, "grad_norm": 0.7443512082099915, "learning_rate": 2.8879619012470045e-05, "loss": 0.109, "step": 81960 }, { "epoch": 2.978777527436587, "grad_norm": 1.6451610326766968, "learning_rate": 2.8874492476627568e-05, "loss": 0.1023, "step": 81970 }, { "epoch": 2.9791409259393853, "grad_norm": 0.39093294739723206, "learning_rate": 2.886936577384699e-05, "loss": 0.0989, "step": 81980 }, { "epoch": 2.9795043244421833, "grad_norm": 0.4320976436138153, "learning_rate": 2.886423890434922e-05, "loss": 0.1103, "step": 81990 }, { "epoch": 2.9798677229449817, "grad_norm": 0.45375579595565796, "learning_rate": 2.8859111868355128e-05, "loss": 0.1922, "step": 82000 }, { "epoch": 2.9802311214477797, "grad_norm": 1.53719162940979, "learning_rate": 2.8853984666085644e-05, "loss": 0.5349, "step": 82010 }, { "epoch": 2.9805945199505777, "grad_norm": 0.5638075470924377, "learning_rate": 2.8848857297761676e-05, "loss": 0.1004, "step": 82020 }, { "epoch": 2.980957918453376, "grad_norm": 0.7393288612365723, "learning_rate": 2.8843729763604138e-05, "loss": 0.1003, "step": 82030 }, { "epoch": 2.981321316956174, "grad_norm": 0.5188916325569153, "learning_rate": 2.8838602063833962e-05, "loss": 0.109, "step": 82040 }, { "epoch": 2.9816847154589725, "grad_norm": 1.9435770511627197, "learning_rate": 2.883347419867208e-05, "loss": 0.0678, "step": 82050 }, { "epoch": 2.9820481139617705, "grad_norm": 0.795820415019989, "learning_rate": 2.882834616833944e-05, "loss": 0.0863, "step": 82060 }, { "epoch": 2.9824115124645685, "grad_norm": 0.5096336007118225, "learning_rate": 2.882321797305697e-05, "loss": 0.0888, "step": 82070 }, { "epoch": 2.982774910967367, "grad_norm": 1.0155811309814453, "learning_rate": 2.881808961304565e-05, "loss": 0.0809, "step": 82080 }, { "epoch": 2.983138309470165, "grad_norm": 0.9700034856796265, "learning_rate": 2.8812961088526436e-05, "loss": 0.1124, "step": 82090 }, { "epoch": 2.9835017079729633, "grad_norm": 1.9299287796020508, "learning_rate": 2.8807832399720292e-05, "loss": 0.0942, "step": 82100 }, { "epoch": 2.9838651064757613, "grad_norm": 0.7534053325653076, "learning_rate": 2.8802703546848204e-05, "loss": 0.1195, "step": 82110 }, { "epoch": 2.9842285049785593, "grad_norm": 1.0273375511169434, "learning_rate": 2.8797574530131138e-05, "loss": 0.1052, "step": 82120 }, { "epoch": 2.9845919034813577, "grad_norm": 0.5971968173980713, "learning_rate": 2.8792445349790108e-05, "loss": 0.0975, "step": 82130 }, { "epoch": 2.9849553019841557, "grad_norm": 1.0438076257705688, "learning_rate": 2.8787316006046096e-05, "loss": 0.1066, "step": 82140 }, { "epoch": 2.985318700486954, "grad_norm": 1.0743767023086548, "learning_rate": 2.8782186499120116e-05, "loss": 0.0779, "step": 82150 }, { "epoch": 2.985682098989752, "grad_norm": 0.9644399285316467, "learning_rate": 2.8777056829233172e-05, "loss": 0.085, "step": 82160 }, { "epoch": 2.98604549749255, "grad_norm": 0.2966119647026062, "learning_rate": 2.8771926996606297e-05, "loss": 0.0879, "step": 82170 }, { "epoch": 2.9864088959953485, "grad_norm": 0.8855867981910706, "learning_rate": 2.87667970014605e-05, "loss": 0.1638, "step": 82180 }, { "epoch": 2.9867722944981465, "grad_norm": 0.5556718111038208, "learning_rate": 2.8761666844016822e-05, "loss": 0.1023, "step": 82190 }, { "epoch": 2.987135693000945, "grad_norm": 0.35973209142684937, "learning_rate": 2.8756536524496313e-05, "loss": 0.0726, "step": 82200 }, { "epoch": 2.987135693000945, "eval_loss": 0.32470783591270447, "eval_runtime": 179.3561, "eval_samples_per_second": 41.337, "eval_steps_per_second": 5.168, "eval_wer": 0.1449252999800316, "step": 82200 }, { "epoch": 2.987499091503743, "grad_norm": 0.5686795711517334, "learning_rate": 2.8751406043119998e-05, "loss": 0.0811, "step": 82210 }, { "epoch": 2.987862490006541, "grad_norm": 0.5881648659706116, "learning_rate": 2.8746275400108956e-05, "loss": 0.1118, "step": 82220 }, { "epoch": 2.9882258885093393, "grad_norm": 0.4261440336704254, "learning_rate": 2.8741144595684227e-05, "loss": 0.0932, "step": 82230 }, { "epoch": 2.9885892870121378, "grad_norm": 0.6575589776039124, "learning_rate": 2.8736013630066894e-05, "loss": 0.1028, "step": 82240 }, { "epoch": 2.9889526855149358, "grad_norm": 0.4109443426132202, "learning_rate": 2.8730882503478024e-05, "loss": 0.0768, "step": 82250 }, { "epoch": 2.9893160840177337, "grad_norm": 0.4477255046367645, "learning_rate": 2.8725751216138706e-05, "loss": 0.0736, "step": 82260 }, { "epoch": 2.989679482520532, "grad_norm": 0.5772513747215271, "learning_rate": 2.8720619768270023e-05, "loss": 0.0978, "step": 82270 }, { "epoch": 2.99004288102333, "grad_norm": 0.8295323252677917, "learning_rate": 2.871548816009307e-05, "loss": 0.0975, "step": 82280 }, { "epoch": 2.9904062795261286, "grad_norm": 1.1587345600128174, "learning_rate": 2.8710356391828953e-05, "loss": 0.2043, "step": 82290 }, { "epoch": 2.9907696780289266, "grad_norm": 0.5980029702186584, "learning_rate": 2.8705224463698778e-05, "loss": 0.0955, "step": 82300 }, { "epoch": 2.9911330765317246, "grad_norm": 0.8250631093978882, "learning_rate": 2.8700092375923666e-05, "loss": 0.0931, "step": 82310 }, { "epoch": 2.991496475034523, "grad_norm": 0.9287375211715698, "learning_rate": 2.8694960128724735e-05, "loss": 0.1066, "step": 82320 }, { "epoch": 2.991859873537321, "grad_norm": 1.5283560752868652, "learning_rate": 2.868982772232312e-05, "loss": 0.0921, "step": 82330 }, { "epoch": 2.9922232720401194, "grad_norm": 1.8111027479171753, "learning_rate": 2.8684695156939955e-05, "loss": 0.1164, "step": 82340 }, { "epoch": 2.9925866705429174, "grad_norm": 0.3591112494468689, "learning_rate": 2.86795624327964e-05, "loss": 0.0832, "step": 82350 }, { "epoch": 2.9929500690457154, "grad_norm": 1.000104308128357, "learning_rate": 2.8674429550113578e-05, "loss": 0.0898, "step": 82360 }, { "epoch": 2.993313467548514, "grad_norm": 3.1828064918518066, "learning_rate": 2.8669296509112666e-05, "loss": 0.144, "step": 82370 }, { "epoch": 2.993676866051312, "grad_norm": 0.5351777076721191, "learning_rate": 2.866416331001482e-05, "loss": 0.0879, "step": 82380 }, { "epoch": 2.99404026455411, "grad_norm": 0.941906213760376, "learning_rate": 2.865902995304121e-05, "loss": 0.1301, "step": 82390 }, { "epoch": 2.994403663056908, "grad_norm": 2.9968576431274414, "learning_rate": 2.8653896438413024e-05, "loss": 0.0856, "step": 82400 }, { "epoch": 2.994767061559706, "grad_norm": 0.454728364944458, "learning_rate": 2.8648762766351438e-05, "loss": 0.0795, "step": 82410 }, { "epoch": 2.9951304600625046, "grad_norm": 0.9574378728866577, "learning_rate": 2.864362893707765e-05, "loss": 0.0924, "step": 82420 }, { "epoch": 2.9954938585653026, "grad_norm": 0.537486732006073, "learning_rate": 2.8638494950812854e-05, "loss": 0.0825, "step": 82430 }, { "epoch": 2.995857257068101, "grad_norm": 0.4603738486766815, "learning_rate": 2.863336080777826e-05, "loss": 0.1493, "step": 82440 }, { "epoch": 2.996220655570899, "grad_norm": 1.2842907905578613, "learning_rate": 2.862822650819507e-05, "loss": 0.0693, "step": 82450 }, { "epoch": 2.996584054073697, "grad_norm": 1.0182005167007446, "learning_rate": 2.862309205228451e-05, "loss": 0.1398, "step": 82460 }, { "epoch": 2.9969474525764954, "grad_norm": 0.6332042217254639, "learning_rate": 2.8617957440267806e-05, "loss": 0.1113, "step": 82470 }, { "epoch": 2.9973108510792934, "grad_norm": 0.7729670405387878, "learning_rate": 2.861282267236619e-05, "loss": 0.0994, "step": 82480 }, { "epoch": 2.997674249582092, "grad_norm": 1.1133567094802856, "learning_rate": 2.8607687748800898e-05, "loss": 0.1356, "step": 82490 }, { "epoch": 2.99803764808489, "grad_norm": 0.6192561388015747, "learning_rate": 2.860255266979318e-05, "loss": 0.0742, "step": 82500 }, { "epoch": 2.998401046587688, "grad_norm": 0.7631018757820129, "learning_rate": 2.8597417435564282e-05, "loss": 0.0794, "step": 82510 }, { "epoch": 2.9987644450904862, "grad_norm": 0.49496951699256897, "learning_rate": 2.859228204633547e-05, "loss": 0.1244, "step": 82520 }, { "epoch": 2.9991278435932847, "grad_norm": 0.5484241247177124, "learning_rate": 2.8587146502328e-05, "loss": 0.0882, "step": 82530 }, { "epoch": 2.9994912420960826, "grad_norm": 0.8066346049308777, "learning_rate": 2.8582010803763153e-05, "loss": 0.1345, "step": 82540 }, { "epoch": 2.9998546405988806, "grad_norm": 0.8499393463134766, "learning_rate": 2.8576874950862205e-05, "loss": 0.0929, "step": 82550 }, { "epoch": 3.000218039101679, "grad_norm": 0.6916410326957703, "learning_rate": 2.8571738943846445e-05, "loss": 0.0913, "step": 82560 }, { "epoch": 3.000581437604477, "grad_norm": 0.7126309871673584, "learning_rate": 2.8566602782937162e-05, "loss": 0.0894, "step": 82570 }, { "epoch": 3.000944836107275, "grad_norm": 0.8689286708831787, "learning_rate": 2.8561466468355648e-05, "loss": 0.2882, "step": 82580 }, { "epoch": 3.0013082346100735, "grad_norm": 0.8167956471443176, "learning_rate": 2.8556330000323223e-05, "loss": 0.0969, "step": 82590 }, { "epoch": 3.0016716331128714, "grad_norm": 1.8583896160125732, "learning_rate": 2.8551193379061186e-05, "loss": 0.0697, "step": 82600 }, { "epoch": 3.00203503161567, "grad_norm": 0.6422175168991089, "learning_rate": 2.8546056604790866e-05, "loss": 0.0794, "step": 82610 }, { "epoch": 3.002398430118468, "grad_norm": 0.8441867232322693, "learning_rate": 2.8540919677733584e-05, "loss": 0.0738, "step": 82620 }, { "epoch": 3.0027618286212663, "grad_norm": 0.6802086234092712, "learning_rate": 2.8535782598110672e-05, "loss": 0.0804, "step": 82630 }, { "epoch": 3.0031252271240643, "grad_norm": 1.3518569469451904, "learning_rate": 2.8530645366143467e-05, "loss": 0.0901, "step": 82640 }, { "epoch": 3.0034886256268623, "grad_norm": 0.32762405276298523, "learning_rate": 2.852550798205331e-05, "loss": 0.0784, "step": 82650 }, { "epoch": 3.0038520241296607, "grad_norm": 0.43205514550209045, "learning_rate": 2.8520370446061567e-05, "loss": 0.0897, "step": 82660 }, { "epoch": 3.0042154226324587, "grad_norm": 0.6062584519386292, "learning_rate": 2.8515232758389582e-05, "loss": 0.0806, "step": 82670 }, { "epoch": 3.004578821135257, "grad_norm": 0.8513742089271545, "learning_rate": 2.8510094919258722e-05, "loss": 0.0741, "step": 82680 }, { "epoch": 3.004942219638055, "grad_norm": 0.43795377016067505, "learning_rate": 2.850495692889037e-05, "loss": 0.0878, "step": 82690 }, { "epoch": 3.005305618140853, "grad_norm": 1.4670331478118896, "learning_rate": 2.8499818787505884e-05, "loss": 0.0879, "step": 82700 }, { "epoch": 3.0056690166436515, "grad_norm": 0.5501256585121155, "learning_rate": 2.8494680495326665e-05, "loss": 0.073, "step": 82710 }, { "epoch": 3.0060324151464495, "grad_norm": 0.9265702962875366, "learning_rate": 2.84895420525741e-05, "loss": 0.0781, "step": 82720 }, { "epoch": 3.006395813649248, "grad_norm": 0.5779339075088501, "learning_rate": 2.8484403459469576e-05, "loss": 0.0829, "step": 82730 }, { "epoch": 3.006759212152046, "grad_norm": 0.31466546654701233, "learning_rate": 2.8479264716234504e-05, "loss": 0.0862, "step": 82740 }, { "epoch": 3.0071226106548443, "grad_norm": 2.829972505569458, "learning_rate": 2.84741258230903e-05, "loss": 0.0945, "step": 82750 }, { "epoch": 3.0074860091576423, "grad_norm": 0.4769342243671417, "learning_rate": 2.8468986780258382e-05, "loss": 0.0896, "step": 82760 }, { "epoch": 3.0078494076604403, "grad_norm": 0.9461230635643005, "learning_rate": 2.8463847587960157e-05, "loss": 0.0749, "step": 82770 }, { "epoch": 3.0082128061632387, "grad_norm": 0.4310390055179596, "learning_rate": 2.8458708246417064e-05, "loss": 0.1645, "step": 82780 }, { "epoch": 3.0085762046660367, "grad_norm": 0.46994635462760925, "learning_rate": 2.845356875585054e-05, "loss": 0.0835, "step": 82790 }, { "epoch": 3.008939603168835, "grad_norm": 1.9111509323120117, "learning_rate": 2.844842911648203e-05, "loss": 0.0781, "step": 82800 }, { "epoch": 3.008939603168835, "eval_loss": 0.33264267444610596, "eval_runtime": 179.8864, "eval_samples_per_second": 41.215, "eval_steps_per_second": 5.153, "eval_wer": 0.14235663586690145, "step": 82800 }, { "epoch": 3.009303001671633, "grad_norm": 0.5043010711669922, "learning_rate": 2.8443289328532973e-05, "loss": 0.0914, "step": 82810 }, { "epoch": 3.009666400174431, "grad_norm": 1.3565329313278198, "learning_rate": 2.8438149392224832e-05, "loss": 0.0642, "step": 82820 }, { "epoch": 3.0100297986772295, "grad_norm": 0.710573136806488, "learning_rate": 2.843300930777907e-05, "loss": 0.0714, "step": 82830 }, { "epoch": 3.0103931971800275, "grad_norm": 0.28599199652671814, "learning_rate": 2.8427869075417145e-05, "loss": 0.0736, "step": 82840 }, { "epoch": 3.010756595682826, "grad_norm": 1.063194751739502, "learning_rate": 2.8422728695360546e-05, "loss": 0.087, "step": 82850 }, { "epoch": 3.011119994185624, "grad_norm": 1.3806395530700684, "learning_rate": 2.841758816783074e-05, "loss": 0.1306, "step": 82860 }, { "epoch": 3.011483392688422, "grad_norm": 0.4422304332256317, "learning_rate": 2.841244749304922e-05, "loss": 0.0665, "step": 82870 }, { "epoch": 3.0118467911912203, "grad_norm": 0.5602436661720276, "learning_rate": 2.840730667123748e-05, "loss": 0.0796, "step": 82880 }, { "epoch": 3.0122101896940183, "grad_norm": 0.39872846007347107, "learning_rate": 2.8402165702617016e-05, "loss": 0.0937, "step": 82890 }, { "epoch": 3.0125735881968168, "grad_norm": 0.5337156057357788, "learning_rate": 2.8397024587409344e-05, "loss": 0.08, "step": 82900 }, { "epoch": 3.0129369866996147, "grad_norm": 0.46881571412086487, "learning_rate": 2.8391883325835962e-05, "loss": 0.0913, "step": 82910 }, { "epoch": 3.013300385202413, "grad_norm": 2.061661720275879, "learning_rate": 2.8386741918118404e-05, "loss": 0.0662, "step": 82920 }, { "epoch": 3.013663783705211, "grad_norm": 1.4211331605911255, "learning_rate": 2.838160036447818e-05, "loss": 0.0677, "step": 82930 }, { "epoch": 3.014027182208009, "grad_norm": 1.1780022382736206, "learning_rate": 2.837645866513683e-05, "loss": 0.1062, "step": 82940 }, { "epoch": 3.0143905807108076, "grad_norm": 1.3207381963729858, "learning_rate": 2.837131682031589e-05, "loss": 0.1001, "step": 82950 }, { "epoch": 3.0147539792136056, "grad_norm": 0.9408227801322937, "learning_rate": 2.8366174830236896e-05, "loss": 0.0934, "step": 82960 }, { "epoch": 3.015117377716404, "grad_norm": 10.08785629272461, "learning_rate": 2.8361032695121416e-05, "loss": 0.0665, "step": 82970 }, { "epoch": 3.015480776219202, "grad_norm": 0.6198272705078125, "learning_rate": 2.835589041519099e-05, "loss": 0.0794, "step": 82980 }, { "epoch": 3.015844174722, "grad_norm": 0.44829145073890686, "learning_rate": 2.8350747990667192e-05, "loss": 0.0853, "step": 82990 }, { "epoch": 3.0162075732247984, "grad_norm": 0.6157195568084717, "learning_rate": 2.834560542177158e-05, "loss": 0.0791, "step": 83000 }, { "epoch": 3.0165709717275964, "grad_norm": 0.40746578574180603, "learning_rate": 2.8340462708725735e-05, "loss": 0.076, "step": 83010 }, { "epoch": 3.016934370230395, "grad_norm": 5.672430038452148, "learning_rate": 2.833531985175124e-05, "loss": 0.0816, "step": 83020 }, { "epoch": 3.0172977687331928, "grad_norm": 1.2975281476974487, "learning_rate": 2.8330176851069672e-05, "loss": 0.0738, "step": 83030 }, { "epoch": 3.017661167235991, "grad_norm": 0.22850178182125092, "learning_rate": 2.8325033706902643e-05, "loss": 0.1293, "step": 83040 }, { "epoch": 3.018024565738789, "grad_norm": 1.8061107397079468, "learning_rate": 2.8319890419471728e-05, "loss": 0.0873, "step": 83050 }, { "epoch": 3.018387964241587, "grad_norm": 0.5460423827171326, "learning_rate": 2.8314746988998564e-05, "loss": 0.0753, "step": 83060 }, { "epoch": 3.0187513627443856, "grad_norm": 0.9149671196937561, "learning_rate": 2.8310117779454733e-05, "loss": 3.5039, "step": 83070 }, { "epoch": 3.0191147612471836, "grad_norm": 1.0777734518051147, "learning_rate": 2.83049740778118e-05, "loss": 0.0834, "step": 83080 }, { "epoch": 3.019478159749982, "grad_norm": 1.6478363275527954, "learning_rate": 2.829983023376929e-05, "loss": 0.1151, "step": 83090 }, { "epoch": 3.01984155825278, "grad_norm": 0.5667656660079956, "learning_rate": 2.8294686247548845e-05, "loss": 0.099, "step": 83100 }, { "epoch": 3.020204956755578, "grad_norm": 0.7810095548629761, "learning_rate": 2.828954211937209e-05, "loss": 0.0826, "step": 83110 }, { "epoch": 3.0205683552583764, "grad_norm": 0.45061448216438293, "learning_rate": 2.8284397849460664e-05, "loss": 0.0758, "step": 83120 }, { "epoch": 3.0209317537611744, "grad_norm": 2.199354887008667, "learning_rate": 2.8279253438036228e-05, "loss": 0.0767, "step": 83130 }, { "epoch": 3.021295152263973, "grad_norm": 0.7062342166900635, "learning_rate": 2.827410888532042e-05, "loss": 0.0847, "step": 83140 }, { "epoch": 3.021658550766771, "grad_norm": 1.1551927328109741, "learning_rate": 2.826896419153492e-05, "loss": 0.0789, "step": 83150 }, { "epoch": 3.022021949269569, "grad_norm": 2.4032013416290283, "learning_rate": 2.826381935690137e-05, "loss": 0.1152, "step": 83160 }, { "epoch": 3.0223853477723672, "grad_norm": 1.0286635160446167, "learning_rate": 2.825867438164147e-05, "loss": 0.0724, "step": 83170 }, { "epoch": 3.022748746275165, "grad_norm": 0.9156087636947632, "learning_rate": 2.8253529265976886e-05, "loss": 0.0808, "step": 83180 }, { "epoch": 3.0231121447779636, "grad_norm": 0.3878247141838074, "learning_rate": 2.8248384010129293e-05, "loss": 0.0736, "step": 83190 }, { "epoch": 3.0234755432807616, "grad_norm": 0.7282688021659851, "learning_rate": 2.8243238614320394e-05, "loss": 0.0954, "step": 83200 }, { "epoch": 3.02383894178356, "grad_norm": 0.40208378434181213, "learning_rate": 2.8238093078771876e-05, "loss": 0.0893, "step": 83210 }, { "epoch": 3.024202340286358, "grad_norm": 4.144686698913574, "learning_rate": 2.823294740370546e-05, "loss": 0.079, "step": 83220 }, { "epoch": 3.024565738789156, "grad_norm": 1.0858464241027832, "learning_rate": 2.8227801589342838e-05, "loss": 0.0808, "step": 83230 }, { "epoch": 3.0249291372919545, "grad_norm": 1.1500715017318726, "learning_rate": 2.822265563590573e-05, "loss": 0.0714, "step": 83240 }, { "epoch": 3.0252925357947524, "grad_norm": 2.532526731491089, "learning_rate": 2.8217509543615862e-05, "loss": 0.0838, "step": 83250 }, { "epoch": 3.025655934297551, "grad_norm": 0.6312755942344666, "learning_rate": 2.8212363312694945e-05, "loss": 0.0853, "step": 83260 }, { "epoch": 3.026019332800349, "grad_norm": 0.5501546859741211, "learning_rate": 2.8207216943364734e-05, "loss": 0.0705, "step": 83270 }, { "epoch": 3.026382731303147, "grad_norm": 6.8282060623168945, "learning_rate": 2.8202070435846946e-05, "loss": 0.076, "step": 83280 }, { "epoch": 3.0267461298059453, "grad_norm": 0.48019352555274963, "learning_rate": 2.8196923790363344e-05, "loss": 0.0919, "step": 83290 }, { "epoch": 3.0271095283087432, "grad_norm": 0.48283651471138, "learning_rate": 2.8191777007135667e-05, "loss": 0.0932, "step": 83300 }, { "epoch": 3.0274729268115417, "grad_norm": 0.7604033350944519, "learning_rate": 2.8186630086385672e-05, "loss": 0.1037, "step": 83310 }, { "epoch": 3.0278363253143397, "grad_norm": 1.1358963251113892, "learning_rate": 2.818148302833513e-05, "loss": 0.0804, "step": 83320 }, { "epoch": 3.028199723817138, "grad_norm": 1.2052534818649292, "learning_rate": 2.81763358332058e-05, "loss": 0.0753, "step": 83330 }, { "epoch": 3.028563122319936, "grad_norm": 0.5695772767066956, "learning_rate": 2.8171188501219464e-05, "loss": 0.0833, "step": 83340 }, { "epoch": 3.028926520822734, "grad_norm": 0.7455033659934998, "learning_rate": 2.81660410325979e-05, "loss": 0.0772, "step": 83350 }, { "epoch": 3.0292899193255325, "grad_norm": 0.7446203231811523, "learning_rate": 2.816089342756289e-05, "loss": 0.1103, "step": 83360 }, { "epoch": 3.0296533178283305, "grad_norm": 2.548922061920166, "learning_rate": 2.8155745686336234e-05, "loss": 0.0707, "step": 83370 }, { "epoch": 3.030016716331129, "grad_norm": 1.2039028406143188, "learning_rate": 2.815059780913971e-05, "loss": 0.0767, "step": 83380 }, { "epoch": 3.030380114833927, "grad_norm": 0.5070446729660034, "learning_rate": 2.8145449796195152e-05, "loss": 0.1082, "step": 83390 }, { "epoch": 3.030743513336725, "grad_norm": 3.70131778717041, "learning_rate": 2.814030164772434e-05, "loss": 0.0848, "step": 83400 }, { "epoch": 3.030743513336725, "eval_loss": 0.3220129907131195, "eval_runtime": 179.9233, "eval_samples_per_second": 41.206, "eval_steps_per_second": 5.152, "eval_wer": 0.14291938206836458, "step": 83400 }, { "epoch": 3.0311069118395233, "grad_norm": 0.40809836983680725, "learning_rate": 2.813515336394911e-05, "loss": 0.0843, "step": 83410 }, { "epoch": 3.0314703103423213, "grad_norm": 0.9957777261734009, "learning_rate": 2.8130004945091283e-05, "loss": 3.893, "step": 83420 }, { "epoch": 3.0318337088451197, "grad_norm": 0.45973557233810425, "learning_rate": 2.8124856391372673e-05, "loss": 0.0776, "step": 83430 }, { "epoch": 3.0321971073479177, "grad_norm": 0.7182989120483398, "learning_rate": 2.811970770301512e-05, "loss": 0.0772, "step": 83440 }, { "epoch": 3.0325605058507157, "grad_norm": 0.5667139291763306, "learning_rate": 2.811455888024046e-05, "loss": 0.0785, "step": 83450 }, { "epoch": 3.032923904353514, "grad_norm": 0.8791239857673645, "learning_rate": 2.810940992327054e-05, "loss": 0.0729, "step": 83460 }, { "epoch": 3.033287302856312, "grad_norm": 1.0834791660308838, "learning_rate": 2.810426083232721e-05, "loss": 0.0763, "step": 83470 }, { "epoch": 3.0336507013591105, "grad_norm": 1.9368480443954468, "learning_rate": 2.809911160763233e-05, "loss": 0.0822, "step": 83480 }, { "epoch": 3.0340140998619085, "grad_norm": 1.531395673751831, "learning_rate": 2.8093962249407755e-05, "loss": 0.1231, "step": 83490 }, { "epoch": 3.034377498364707, "grad_norm": 1.5916883945465088, "learning_rate": 2.8088812757875347e-05, "loss": 0.236, "step": 83500 }, { "epoch": 3.034740896867505, "grad_norm": 0.34502482414245605, "learning_rate": 2.8083663133256998e-05, "loss": 0.0986, "step": 83510 }, { "epoch": 3.035104295370303, "grad_norm": 1.0849754810333252, "learning_rate": 2.8078513375774574e-05, "loss": 0.0704, "step": 83520 }, { "epoch": 3.0354676938731013, "grad_norm": 0.6940005421638489, "learning_rate": 2.8073363485649962e-05, "loss": 0.078, "step": 83530 }, { "epoch": 3.0358310923758993, "grad_norm": 0.4291388988494873, "learning_rate": 2.8068213463105054e-05, "loss": 0.0762, "step": 83540 }, { "epoch": 3.0361944908786977, "grad_norm": 1.4870105981826782, "learning_rate": 2.8063063308361736e-05, "loss": 0.0967, "step": 83550 }, { "epoch": 3.0365578893814957, "grad_norm": 0.8705921769142151, "learning_rate": 2.805791302164193e-05, "loss": 0.069, "step": 83560 }, { "epoch": 3.0369212878842937, "grad_norm": 1.3815371990203857, "learning_rate": 2.805276260316752e-05, "loss": 0.0682, "step": 83570 }, { "epoch": 3.037284686387092, "grad_norm": 0.4686858654022217, "learning_rate": 2.8047612053160443e-05, "loss": 0.0705, "step": 83580 }, { "epoch": 3.03764808488989, "grad_norm": 1.2113518714904785, "learning_rate": 2.80424613718426e-05, "loss": 0.3061, "step": 83590 }, { "epoch": 3.0380114833926886, "grad_norm": 1.3406014442443848, "learning_rate": 2.803731055943592e-05, "loss": 0.0936, "step": 83600 }, { "epoch": 3.0383748818954865, "grad_norm": 2.528625726699829, "learning_rate": 2.803215961616234e-05, "loss": 0.0922, "step": 83610 }, { "epoch": 3.038738280398285, "grad_norm": 0.6336283683776855, "learning_rate": 2.8027008542243784e-05, "loss": 0.086, "step": 83620 }, { "epoch": 3.039101678901083, "grad_norm": 1.0377708673477173, "learning_rate": 2.8021857337902208e-05, "loss": 0.0734, "step": 83630 }, { "epoch": 3.039465077403881, "grad_norm": 0.5187166929244995, "learning_rate": 2.8016706003359545e-05, "loss": 0.0859, "step": 83640 }, { "epoch": 3.0398284759066794, "grad_norm": 0.650485634803772, "learning_rate": 2.801155453883775e-05, "loss": 0.091, "step": 83650 }, { "epoch": 3.0401918744094774, "grad_norm": 1.6549148559570312, "learning_rate": 2.800640294455879e-05, "loss": 0.0801, "step": 83660 }, { "epoch": 3.040555272912276, "grad_norm": 0.75333172082901, "learning_rate": 2.8001251220744628e-05, "loss": 0.0705, "step": 83670 }, { "epoch": 3.0409186714150738, "grad_norm": 1.4784330129623413, "learning_rate": 2.799609936761723e-05, "loss": 0.0881, "step": 83680 }, { "epoch": 3.0412820699178718, "grad_norm": 0.5964809656143188, "learning_rate": 2.7990947385398563e-05, "loss": 0.0899, "step": 83690 }, { "epoch": 3.04164546842067, "grad_norm": 0.6587772369384766, "learning_rate": 2.7985795274310622e-05, "loss": 0.081, "step": 83700 }, { "epoch": 3.042008866923468, "grad_norm": 0.6639897227287292, "learning_rate": 2.7980643034575375e-05, "loss": 0.1012, "step": 83710 }, { "epoch": 3.0423722654262666, "grad_norm": 0.4024466872215271, "learning_rate": 2.797549066641484e-05, "loss": 0.0641, "step": 83720 }, { "epoch": 3.0427356639290646, "grad_norm": 0.6157781481742859, "learning_rate": 2.797033817005099e-05, "loss": 0.074, "step": 83730 }, { "epoch": 3.0430990624318626, "grad_norm": 0.43719834089279175, "learning_rate": 2.7965185545705835e-05, "loss": 0.0881, "step": 83740 }, { "epoch": 3.043462460934661, "grad_norm": 1.6078770160675049, "learning_rate": 2.796003279360139e-05, "loss": 0.0706, "step": 83750 }, { "epoch": 3.043825859437459, "grad_norm": 0.7965851426124573, "learning_rate": 2.7954879913959658e-05, "loss": 0.0649, "step": 83760 }, { "epoch": 3.0441892579402574, "grad_norm": 0.8984066843986511, "learning_rate": 2.7949726907002672e-05, "loss": 0.0769, "step": 83770 }, { "epoch": 3.0445526564430554, "grad_norm": 2.9094557762145996, "learning_rate": 2.794457377295244e-05, "loss": 0.0899, "step": 83780 }, { "epoch": 3.044916054945854, "grad_norm": 0.49150341749191284, "learning_rate": 2.793942051203101e-05, "loss": 1.4083, "step": 83790 }, { "epoch": 3.045279453448652, "grad_norm": 2.3720130920410156, "learning_rate": 2.7934267124460407e-05, "loss": 0.0894, "step": 83800 }, { "epoch": 3.04564285195145, "grad_norm": 0.31627193093299866, "learning_rate": 2.7929113610462665e-05, "loss": 0.0815, "step": 83810 }, { "epoch": 3.046006250454248, "grad_norm": 0.43195366859436035, "learning_rate": 2.792395997025985e-05, "loss": 0.0851, "step": 83820 }, { "epoch": 3.046369648957046, "grad_norm": 0.6193608641624451, "learning_rate": 2.7918806204073995e-05, "loss": 0.0718, "step": 83830 }, { "epoch": 3.0467330474598446, "grad_norm": 0.6569747924804688, "learning_rate": 2.791365231212717e-05, "loss": 0.097, "step": 83840 }, { "epoch": 3.0470964459626426, "grad_norm": 2.10140323638916, "learning_rate": 2.7908498294641437e-05, "loss": 0.0959, "step": 83850 }, { "epoch": 3.0474598444654406, "grad_norm": 0.7795642018318176, "learning_rate": 2.7903344151838856e-05, "loss": 0.0731, "step": 83860 }, { "epoch": 3.047823242968239, "grad_norm": 0.37401872873306274, "learning_rate": 2.7898189883941507e-05, "loss": 0.6129, "step": 83870 }, { "epoch": 3.048186641471037, "grad_norm": 0.9135899543762207, "learning_rate": 2.7893035491171466e-05, "loss": 0.0889, "step": 83880 }, { "epoch": 3.0485500399738354, "grad_norm": 0.572894275188446, "learning_rate": 2.7887880973750817e-05, "loss": 0.0953, "step": 83890 }, { "epoch": 3.0489134384766334, "grad_norm": 0.4888230264186859, "learning_rate": 2.7882726331901655e-05, "loss": 0.0821, "step": 83900 }, { "epoch": 3.049276836979432, "grad_norm": 0.5517320036888123, "learning_rate": 2.787757156584608e-05, "loss": 0.0905, "step": 83910 }, { "epoch": 3.04964023548223, "grad_norm": 0.5624126195907593, "learning_rate": 2.7872416675806174e-05, "loss": 0.0658, "step": 83920 }, { "epoch": 3.050003633985028, "grad_norm": 0.606499969959259, "learning_rate": 2.7867261662004058e-05, "loss": 0.0772, "step": 83930 }, { "epoch": 3.0503670324878263, "grad_norm": 0.30192533135414124, "learning_rate": 2.7862106524661835e-05, "loss": 0.0782, "step": 83940 }, { "epoch": 3.0507304309906242, "grad_norm": 0.5810015797615051, "learning_rate": 2.7856951264001623e-05, "loss": 0.0724, "step": 83950 }, { "epoch": 3.0510938294934227, "grad_norm": 0.8986351490020752, "learning_rate": 2.7851795880245556e-05, "loss": 0.082, "step": 83960 }, { "epoch": 3.0514572279962207, "grad_norm": 0.655348539352417, "learning_rate": 2.7846640373615745e-05, "loss": 0.0694, "step": 83970 }, { "epoch": 3.0518206264990186, "grad_norm": 0.9153086543083191, "learning_rate": 2.784148474433433e-05, "loss": 0.0749, "step": 83980 }, { "epoch": 3.052184025001817, "grad_norm": 0.4834425151348114, "learning_rate": 2.783632899262345e-05, "loss": 0.072, "step": 83990 }, { "epoch": 3.052547423504615, "grad_norm": 0.8401370644569397, "learning_rate": 2.783117311870524e-05, "loss": 0.0781, "step": 84000 }, { "epoch": 3.052547423504615, "eval_loss": 0.3286387026309967, "eval_runtime": 180.0952, "eval_samples_per_second": 41.167, "eval_steps_per_second": 5.147, "eval_wer": 0.14074100967560405, "step": 84000 }, { "epoch": 3.0529108220074135, "grad_norm": 0.7353171706199646, "learning_rate": 2.7826017122801855e-05, "loss": 0.0685, "step": 84010 }, { "epoch": 3.0532742205102115, "grad_norm": 0.4934822916984558, "learning_rate": 2.7820861005135445e-05, "loss": 0.0677, "step": 84020 }, { "epoch": 3.0536376190130095, "grad_norm": 0.6039226651191711, "learning_rate": 2.781570476592818e-05, "loss": 0.071, "step": 84030 }, { "epoch": 3.054001017515808, "grad_norm": 0.7619723677635193, "learning_rate": 2.7810548405402215e-05, "loss": 0.0811, "step": 84040 }, { "epoch": 3.054364416018606, "grad_norm": 0.9137590527534485, "learning_rate": 2.7805391923779704e-05, "loss": 0.104, "step": 84050 }, { "epoch": 3.0547278145214043, "grad_norm": 0.40205273032188416, "learning_rate": 2.7800235321282854e-05, "loss": 0.0756, "step": 84060 }, { "epoch": 3.0550912130242023, "grad_norm": 0.5475990772247314, "learning_rate": 2.779507859813381e-05, "loss": 0.0918, "step": 84070 }, { "epoch": 3.0554546115270007, "grad_norm": 0.5701851844787598, "learning_rate": 2.778992175455479e-05, "loss": 0.0744, "step": 84080 }, { "epoch": 3.0558180100297987, "grad_norm": 0.2219076156616211, "learning_rate": 2.7784764790767957e-05, "loss": 0.2494, "step": 84090 }, { "epoch": 3.0561814085325967, "grad_norm": 4.648733615875244, "learning_rate": 2.7779607706995524e-05, "loss": 0.1106, "step": 84100 }, { "epoch": 3.056544807035395, "grad_norm": 0.6483043432235718, "learning_rate": 2.7774450503459687e-05, "loss": 0.0938, "step": 84110 }, { "epoch": 3.056908205538193, "grad_norm": 0.7879406213760376, "learning_rate": 2.7769293180382645e-05, "loss": 0.0727, "step": 84120 }, { "epoch": 3.0572716040409915, "grad_norm": 0.540431022644043, "learning_rate": 2.7764135737986613e-05, "loss": 0.0692, "step": 84130 }, { "epoch": 3.0576350025437895, "grad_norm": 0.498374879360199, "learning_rate": 2.7758978176493805e-05, "loss": 0.0985, "step": 84140 }, { "epoch": 3.0579984010465875, "grad_norm": 2.0039825439453125, "learning_rate": 2.775382049612645e-05, "loss": 0.1214, "step": 84150 }, { "epoch": 3.058361799549386, "grad_norm": 0.6101000905036926, "learning_rate": 2.7748662697106757e-05, "loss": 0.0777, "step": 84160 }, { "epoch": 3.058725198052184, "grad_norm": 1.092410683631897, "learning_rate": 2.7743504779656976e-05, "loss": 0.0606, "step": 84170 }, { "epoch": 3.0590885965549823, "grad_norm": 0.5803566575050354, "learning_rate": 2.773834674399934e-05, "loss": 0.0756, "step": 84180 }, { "epoch": 3.0594519950577803, "grad_norm": 0.4680107831954956, "learning_rate": 2.7733188590356073e-05, "loss": 0.0878, "step": 84190 }, { "epoch": 3.0598153935605787, "grad_norm": 4.1969380378723145, "learning_rate": 2.7728030318949448e-05, "loss": 0.0911, "step": 84200 }, { "epoch": 3.0601787920633767, "grad_norm": 0.45293834805488586, "learning_rate": 2.7722871930001694e-05, "loss": 0.1119, "step": 84210 }, { "epoch": 3.0605421905661747, "grad_norm": 1.130631685256958, "learning_rate": 2.7718229279634755e-05, "loss": 2.6943, "step": 84220 }, { "epoch": 3.060905589068973, "grad_norm": 0.3156570494174957, "learning_rate": 2.7713070667971197e-05, "loss": 0.0641, "step": 84230 }, { "epoch": 3.061268987571771, "grad_norm": 0.860851526260376, "learning_rate": 2.7707911939411078e-05, "loss": 0.093, "step": 84240 }, { "epoch": 3.0616323860745696, "grad_norm": 1.0920404195785522, "learning_rate": 2.7702753094176664e-05, "loss": 0.1426, "step": 84250 }, { "epoch": 3.0619957845773675, "grad_norm": 0.4944891333580017, "learning_rate": 2.769759413249024e-05, "loss": 0.1016, "step": 84260 }, { "epoch": 3.0623591830801655, "grad_norm": 0.503200113773346, "learning_rate": 2.7692435054574084e-05, "loss": 0.0753, "step": 84270 }, { "epoch": 3.062722581582964, "grad_norm": 0.9437207579612732, "learning_rate": 2.7687275860650476e-05, "loss": 0.0665, "step": 84280 }, { "epoch": 3.063085980085762, "grad_norm": 0.91017085313797, "learning_rate": 2.7682116550941716e-05, "loss": 0.1268, "step": 84290 }, { "epoch": 3.0634493785885604, "grad_norm": 1.9001520872116089, "learning_rate": 2.7676957125670096e-05, "loss": 0.0818, "step": 84300 }, { "epoch": 3.0638127770913584, "grad_norm": 0.6754614114761353, "learning_rate": 2.767179758505792e-05, "loss": 0.0902, "step": 84310 }, { "epoch": 3.0641761755941563, "grad_norm": 1.6156865358352661, "learning_rate": 2.766663792932749e-05, "loss": 0.0744, "step": 84320 }, { "epoch": 3.0645395740969548, "grad_norm": 0.5703209042549133, "learning_rate": 2.7661478158701122e-05, "loss": 0.082, "step": 84330 }, { "epoch": 3.0649029725997528, "grad_norm": 0.4021599590778351, "learning_rate": 2.7656318273401128e-05, "loss": 0.0933, "step": 84340 }, { "epoch": 3.065266371102551, "grad_norm": 0.5536443591117859, "learning_rate": 2.7651158273649825e-05, "loss": 0.0867, "step": 84350 }, { "epoch": 3.065629769605349, "grad_norm": 0.4906123876571655, "learning_rate": 2.7645998159669557e-05, "loss": 0.0784, "step": 84360 }, { "epoch": 3.0659931681081476, "grad_norm": 1.6186575889587402, "learning_rate": 2.7640837931682627e-05, "loss": 0.1294, "step": 84370 }, { "epoch": 3.0663565666109456, "grad_norm": 1.145196557044983, "learning_rate": 2.76356775899114e-05, "loss": 0.0663, "step": 84380 }, { "epoch": 3.0667199651137436, "grad_norm": 0.4692659378051758, "learning_rate": 2.76305171345782e-05, "loss": 0.091, "step": 84390 }, { "epoch": 3.067083363616542, "grad_norm": 3.6405162811279297, "learning_rate": 2.7625356565905376e-05, "loss": 0.0835, "step": 84400 }, { "epoch": 3.06744676211934, "grad_norm": 1.138875126838684, "learning_rate": 2.762019588411528e-05, "loss": 0.1015, "step": 84410 }, { "epoch": 3.0678101606221384, "grad_norm": 0.8834431767463684, "learning_rate": 2.7615035089430262e-05, "loss": 0.0729, "step": 84420 }, { "epoch": 3.0681735591249364, "grad_norm": 0.36101579666137695, "learning_rate": 2.7609874182072693e-05, "loss": 0.0694, "step": 84430 }, { "epoch": 3.0685369576277344, "grad_norm": 0.5472438931465149, "learning_rate": 2.760471316226492e-05, "loss": 0.0904, "step": 84440 }, { "epoch": 3.068900356130533, "grad_norm": 0.7334174513816833, "learning_rate": 2.7599552030229337e-05, "loss": 0.1055, "step": 84450 }, { "epoch": 3.069263754633331, "grad_norm": 3.81465220451355, "learning_rate": 2.75943907861883e-05, "loss": 0.0912, "step": 84460 }, { "epoch": 3.069627153136129, "grad_norm": 0.5324450135231018, "learning_rate": 2.7589229430364196e-05, "loss": 0.3292, "step": 84470 }, { "epoch": 3.069990551638927, "grad_norm": 0.7580345273017883, "learning_rate": 2.758406796297941e-05, "loss": 0.1129, "step": 84480 }, { "epoch": 3.0703539501417256, "grad_norm": 0.8137314915657043, "learning_rate": 2.757890638425632e-05, "loss": 0.0886, "step": 84490 }, { "epoch": 3.0707173486445236, "grad_norm": 1.109212875366211, "learning_rate": 2.7573744694417343e-05, "loss": 0.0996, "step": 84500 }, { "epoch": 3.0710807471473216, "grad_norm": 0.991165816783905, "learning_rate": 2.7568582893684852e-05, "loss": 0.1194, "step": 84510 }, { "epoch": 3.07144414565012, "grad_norm": 0.8121623396873474, "learning_rate": 2.7563420982281275e-05, "loss": 0.0811, "step": 84520 }, { "epoch": 3.071807544152918, "grad_norm": 0.5951569080352783, "learning_rate": 2.7558258960429007e-05, "loss": 0.0876, "step": 84530 }, { "epoch": 3.0721709426557164, "grad_norm": 0.7901829481124878, "learning_rate": 2.7553096828350454e-05, "loss": 0.0911, "step": 84540 }, { "epoch": 3.0725343411585144, "grad_norm": 0.6474528312683105, "learning_rate": 2.7547934586268048e-05, "loss": 0.1048, "step": 84550 }, { "epoch": 3.0728977396613124, "grad_norm": 0.7994125485420227, "learning_rate": 2.75427722344042e-05, "loss": 0.0767, "step": 84560 }, { "epoch": 3.073261138164111, "grad_norm": 0.5310266017913818, "learning_rate": 2.7537609772981353e-05, "loss": 0.0821, "step": 84570 }, { "epoch": 3.073624536666909, "grad_norm": 0.7052491307258606, "learning_rate": 2.753244720222193e-05, "loss": 0.0586, "step": 84580 }, { "epoch": 3.0739879351697073, "grad_norm": 0.32586991786956787, "learning_rate": 2.7527284522348362e-05, "loss": 0.096, "step": 84590 }, { "epoch": 3.0743513336725052, "grad_norm": 1.7913528680801392, "learning_rate": 2.7522121733583102e-05, "loss": 0.0981, "step": 84600 }, { "epoch": 3.0743513336725052, "eval_loss": 0.33099600672721863, "eval_runtime": 180.5075, "eval_samples_per_second": 41.073, "eval_steps_per_second": 5.136, "eval_wer": 0.14155789932288926, "step": 84600 }, { "epoch": 3.0747147321753037, "grad_norm": 0.8068686723709106, "learning_rate": 2.7516958836148583e-05, "loss": 0.121, "step": 84610 }, { "epoch": 3.0750781306781017, "grad_norm": 2.0272300243377686, "learning_rate": 2.7511795830267268e-05, "loss": 0.2779, "step": 84620 }, { "epoch": 3.0754415291808996, "grad_norm": 0.571723997592926, "learning_rate": 2.7506632716161603e-05, "loss": 0.0819, "step": 84630 }, { "epoch": 3.075804927683698, "grad_norm": 0.5370312929153442, "learning_rate": 2.7501469494054056e-05, "loss": 0.1944, "step": 84640 }, { "epoch": 3.076168326186496, "grad_norm": 0.9437683820724487, "learning_rate": 2.749630616416709e-05, "loss": 0.0791, "step": 84650 }, { "epoch": 3.0765317246892945, "grad_norm": 0.29546335339546204, "learning_rate": 2.749114272672317e-05, "loss": 0.1011, "step": 84660 }, { "epoch": 3.0768951231920925, "grad_norm": 1.31322181224823, "learning_rate": 2.7485979181944778e-05, "loss": 0.0709, "step": 84670 }, { "epoch": 3.0772585216948904, "grad_norm": 0.3159250319004059, "learning_rate": 2.748081553005438e-05, "loss": 0.0715, "step": 84680 }, { "epoch": 3.077621920197689, "grad_norm": 0.9355630278587341, "learning_rate": 2.747565177127448e-05, "loss": 0.5317, "step": 84690 }, { "epoch": 3.077985318700487, "grad_norm": 0.462240993976593, "learning_rate": 2.7470487905827542e-05, "loss": 0.0819, "step": 84700 }, { "epoch": 3.0783487172032853, "grad_norm": 1.28692626953125, "learning_rate": 2.7465323933936077e-05, "loss": 0.0753, "step": 84710 }, { "epoch": 3.0787121157060833, "grad_norm": 0.413011372089386, "learning_rate": 2.7460159855822577e-05, "loss": 0.0573, "step": 84720 }, { "epoch": 3.0790755142088813, "grad_norm": 0.4972487986087799, "learning_rate": 2.745499567170953e-05, "loss": 0.7906, "step": 84730 }, { "epoch": 3.0794389127116797, "grad_norm": 0.48013266921043396, "learning_rate": 2.744983138181947e-05, "loss": 0.0815, "step": 84740 }, { "epoch": 3.0798023112144777, "grad_norm": 0.597145676612854, "learning_rate": 2.7444666986374877e-05, "loss": 0.0945, "step": 84750 }, { "epoch": 3.080165709717276, "grad_norm": 1.169415831565857, "learning_rate": 2.7439502485598296e-05, "loss": 0.0948, "step": 84760 }, { "epoch": 3.080529108220074, "grad_norm": 0.42434608936309814, "learning_rate": 2.7434337879712217e-05, "loss": 0.937, "step": 84770 }, { "epoch": 3.0808925067228725, "grad_norm": 1.248987078666687, "learning_rate": 2.7429173168939194e-05, "loss": 0.0861, "step": 84780 }, { "epoch": 3.0812559052256705, "grad_norm": 0.3468557894229889, "learning_rate": 2.742400835350174e-05, "loss": 0.1311, "step": 84790 }, { "epoch": 3.0816193037284685, "grad_norm": 0.5820382237434387, "learning_rate": 2.7418843433622387e-05, "loss": 0.0835, "step": 84800 }, { "epoch": 3.081982702231267, "grad_norm": 0.7529087066650391, "learning_rate": 2.741367840952368e-05, "loss": 0.0695, "step": 84810 }, { "epoch": 3.082346100734065, "grad_norm": 0.4199211895465851, "learning_rate": 2.7408513281428156e-05, "loss": 0.0736, "step": 84820 }, { "epoch": 3.0827094992368633, "grad_norm": 0.3389349579811096, "learning_rate": 2.7403348049558363e-05, "loss": 0.0825, "step": 84830 }, { "epoch": 3.0830728977396613, "grad_norm": 0.3386542797088623, "learning_rate": 2.739818271413686e-05, "loss": 0.0791, "step": 84840 }, { "epoch": 3.0834362962424593, "grad_norm": 1.0355015993118286, "learning_rate": 2.7393017275386186e-05, "loss": 0.0781, "step": 84850 }, { "epoch": 3.0837996947452577, "grad_norm": 0.6449688673019409, "learning_rate": 2.7387851733528918e-05, "loss": 0.078, "step": 84860 }, { "epoch": 3.0841630932480557, "grad_norm": 0.6027774214744568, "learning_rate": 2.7382686088787613e-05, "loss": 0.0954, "step": 84870 }, { "epoch": 3.084526491750854, "grad_norm": 0.3841930627822876, "learning_rate": 2.737752034138485e-05, "loss": 0.0701, "step": 84880 }, { "epoch": 3.084889890253652, "grad_norm": 0.4170621633529663, "learning_rate": 2.7372354491543184e-05, "loss": 0.0901, "step": 84890 }, { "epoch": 3.0852532887564506, "grad_norm": 0.8120031356811523, "learning_rate": 2.736718853948521e-05, "loss": 0.0809, "step": 84900 }, { "epoch": 3.0856166872592485, "grad_norm": 0.36227869987487793, "learning_rate": 2.73620224854335e-05, "loss": 0.1081, "step": 84910 }, { "epoch": 3.0859800857620465, "grad_norm": 0.6383411884307861, "learning_rate": 2.7356856329610646e-05, "loss": 0.0825, "step": 84920 }, { "epoch": 3.086343484264845, "grad_norm": 0.4589132070541382, "learning_rate": 2.7351690072239246e-05, "loss": 0.0631, "step": 84930 }, { "epoch": 3.086706882767643, "grad_norm": 0.5622755885124207, "learning_rate": 2.734652371354188e-05, "loss": 0.0708, "step": 84940 }, { "epoch": 3.0870702812704414, "grad_norm": 2.5855283737182617, "learning_rate": 2.7341357253741155e-05, "loss": 0.0931, "step": 84950 }, { "epoch": 3.0874336797732393, "grad_norm": 0.4554671049118042, "learning_rate": 2.7336190693059688e-05, "loss": 0.0817, "step": 84960 }, { "epoch": 3.0877970782760373, "grad_norm": 0.824824869632721, "learning_rate": 2.7331024031720065e-05, "loss": 0.0966, "step": 84970 }, { "epoch": 3.0881604767788358, "grad_norm": 1.0286164283752441, "learning_rate": 2.7325857269944916e-05, "loss": 0.0838, "step": 84980 }, { "epoch": 3.0885238752816337, "grad_norm": 0.5027440190315247, "learning_rate": 2.7320690407956844e-05, "loss": 0.0819, "step": 84990 }, { "epoch": 3.088887273784432, "grad_norm": 0.5407890677452087, "learning_rate": 2.7315523445978486e-05, "loss": 0.1, "step": 85000 }, { "epoch": 3.08925067228723, "grad_norm": 0.7916487455368042, "learning_rate": 2.731035638423246e-05, "loss": 0.0742, "step": 85010 }, { "epoch": 3.089614070790028, "grad_norm": 189.06448364257812, "learning_rate": 2.7305189222941403e-05, "loss": 3.2542, "step": 85020 }, { "epoch": 3.0899774692928266, "grad_norm": 1.7703773975372314, "learning_rate": 2.730002196232794e-05, "loss": 0.0749, "step": 85030 }, { "epoch": 3.0903408677956246, "grad_norm": 0.6317950487136841, "learning_rate": 2.7294854602614712e-05, "loss": 0.0938, "step": 85040 }, { "epoch": 3.090704266298423, "grad_norm": 1.0689524412155151, "learning_rate": 2.7289687144024368e-05, "loss": 0.0934, "step": 85050 }, { "epoch": 3.091067664801221, "grad_norm": 0.41926833987236023, "learning_rate": 2.7284519586779545e-05, "loss": 0.0997, "step": 85060 }, { "epoch": 3.0914310633040194, "grad_norm": 0.8323477506637573, "learning_rate": 2.727935193110291e-05, "loss": 0.0701, "step": 85070 }, { "epoch": 3.0917944618068174, "grad_norm": 1.0824620723724365, "learning_rate": 2.7274184177217104e-05, "loss": 0.0732, "step": 85080 }, { "epoch": 3.0921578603096154, "grad_norm": 0.6392784118652344, "learning_rate": 2.72690163253448e-05, "loss": 0.1011, "step": 85090 }, { "epoch": 3.092521258812414, "grad_norm": 1.3124550580978394, "learning_rate": 2.7263848375708646e-05, "loss": 0.0908, "step": 85100 }, { "epoch": 3.092884657315212, "grad_norm": 0.42463892698287964, "learning_rate": 2.7258680328531326e-05, "loss": 0.0768, "step": 85110 }, { "epoch": 3.09324805581801, "grad_norm": 0.8673943281173706, "learning_rate": 2.725351218403551e-05, "loss": 0.101, "step": 85120 }, { "epoch": 3.093611454320808, "grad_norm": 0.26485633850097656, "learning_rate": 2.7248343942443866e-05, "loss": 0.0684, "step": 85130 }, { "epoch": 3.093974852823606, "grad_norm": 1.3009984493255615, "learning_rate": 2.7243175603979088e-05, "loss": 0.1174, "step": 85140 }, { "epoch": 3.0943382513264046, "grad_norm": 1.3685442209243774, "learning_rate": 2.723800716886385e-05, "loss": 0.0942, "step": 85150 }, { "epoch": 3.0947016498292026, "grad_norm": 1.106117844581604, "learning_rate": 2.7232838637320846e-05, "loss": 0.1083, "step": 85160 }, { "epoch": 3.095065048332001, "grad_norm": 0.48407310247421265, "learning_rate": 2.722767000957277e-05, "loss": 0.0596, "step": 85170 }, { "epoch": 3.095428446834799, "grad_norm": 0.6600604057312012, "learning_rate": 2.7222501285842315e-05, "loss": 0.0893, "step": 85180 }, { "epoch": 3.0957918453375974, "grad_norm": 0.9965558648109436, "learning_rate": 2.7217332466352192e-05, "loss": 0.1042, "step": 85190 }, { "epoch": 3.0961552438403954, "grad_norm": 0.9833908081054688, "learning_rate": 2.7212163551325105e-05, "loss": 0.0964, "step": 85200 }, { "epoch": 3.0961552438403954, "eval_loss": 0.3095722794532776, "eval_runtime": 179.4671, "eval_samples_per_second": 41.311, "eval_steps_per_second": 5.165, "eval_wer": 0.14271969793236153, "step": 85200 }, { "epoch": 3.0965186423431934, "grad_norm": 2.14083194732666, "learning_rate": 2.720699454098376e-05, "loss": 0.087, "step": 85210 }, { "epoch": 3.096882040845992, "grad_norm": 1.5555180311203003, "learning_rate": 2.720182543555087e-05, "loss": 0.0806, "step": 85220 }, { "epoch": 3.09724543934879, "grad_norm": 0.7186011075973511, "learning_rate": 2.7196656235249157e-05, "loss": 0.0788, "step": 85230 }, { "epoch": 3.0976088378515882, "grad_norm": 0.28633779287338257, "learning_rate": 2.719148694030134e-05, "loss": 0.1674, "step": 85240 }, { "epoch": 3.0979722363543862, "grad_norm": 0.8628358244895935, "learning_rate": 2.718631755093016e-05, "loss": 0.0893, "step": 85250 }, { "epoch": 3.098335634857184, "grad_norm": 0.41827306151390076, "learning_rate": 2.718114806735832e-05, "loss": 0.0989, "step": 85260 }, { "epoch": 3.0986990333599826, "grad_norm": 0.5456732511520386, "learning_rate": 2.7175978489808577e-05, "loss": 0.0693, "step": 85270 }, { "epoch": 3.0990624318627806, "grad_norm": 0.7632073760032654, "learning_rate": 2.7170808818503667e-05, "loss": 0.1152, "step": 85280 }, { "epoch": 3.099425830365579, "grad_norm": 0.419622540473938, "learning_rate": 2.716563905366632e-05, "loss": 0.0821, "step": 85290 }, { "epoch": 3.099789228868377, "grad_norm": 0.8410579562187195, "learning_rate": 2.71604691955193e-05, "loss": 0.0906, "step": 85300 }, { "epoch": 3.100152627371175, "grad_norm": 1.2106753587722778, "learning_rate": 2.7155299244285343e-05, "loss": 0.0966, "step": 85310 }, { "epoch": 3.1005160258739735, "grad_norm": 1.0297480821609497, "learning_rate": 2.7150129200187213e-05, "loss": 0.0814, "step": 85320 }, { "epoch": 3.1008794243767714, "grad_norm": 0.5016379356384277, "learning_rate": 2.7144959063447667e-05, "loss": 0.0808, "step": 85330 }, { "epoch": 3.10124282287957, "grad_norm": 0.8718425035476685, "learning_rate": 2.7139788834289463e-05, "loss": 0.094, "step": 85340 }, { "epoch": 3.101606221382368, "grad_norm": 1.8094639778137207, "learning_rate": 2.7134618512935372e-05, "loss": 0.097, "step": 85350 }, { "epoch": 3.1019696198851663, "grad_norm": 0.45816463232040405, "learning_rate": 2.7129448099608167e-05, "loss": 0.0649, "step": 85360 }, { "epoch": 3.1023330183879643, "grad_norm": 5.5397868156433105, "learning_rate": 2.7124277594530624e-05, "loss": 0.0717, "step": 85370 }, { "epoch": 3.1026964168907623, "grad_norm": 0.3643113076686859, "learning_rate": 2.7119106997925513e-05, "loss": 0.0649, "step": 85380 }, { "epoch": 3.1030598153935607, "grad_norm": 1.1016013622283936, "learning_rate": 2.711393631001562e-05, "loss": 0.0926, "step": 85390 }, { "epoch": 3.1034232138963587, "grad_norm": 2.1157453060150146, "learning_rate": 2.7108765531023733e-05, "loss": 0.0879, "step": 85400 }, { "epoch": 3.103786612399157, "grad_norm": 0.9492454528808594, "learning_rate": 2.7103594661172644e-05, "loss": 0.0777, "step": 85410 }, { "epoch": 3.104150010901955, "grad_norm": 0.3126516342163086, "learning_rate": 2.7098423700685143e-05, "loss": 0.0567, "step": 85420 }, { "epoch": 3.104513409404753, "grad_norm": 0.3263763189315796, "learning_rate": 2.7093252649784035e-05, "loss": 0.067, "step": 85430 }, { "epoch": 3.1048768079075515, "grad_norm": 0.8375474214553833, "learning_rate": 2.7088081508692115e-05, "loss": 0.0907, "step": 85440 }, { "epoch": 3.1052402064103495, "grad_norm": 0.8583451509475708, "learning_rate": 2.7082910277632194e-05, "loss": 0.1196, "step": 85450 }, { "epoch": 3.105603604913148, "grad_norm": 1.2481297254562378, "learning_rate": 2.707773895682708e-05, "loss": 0.0821, "step": 85460 }, { "epoch": 3.105967003415946, "grad_norm": 0.3148916959762573, "learning_rate": 2.7072567546499585e-05, "loss": 0.0726, "step": 85470 }, { "epoch": 3.1063304019187443, "grad_norm": 0.3623039126396179, "learning_rate": 2.7067396046872533e-05, "loss": 0.0876, "step": 85480 }, { "epoch": 3.1066938004215423, "grad_norm": 0.5890102982521057, "learning_rate": 2.7062224458168733e-05, "loss": 0.1002, "step": 85490 }, { "epoch": 3.1070571989243403, "grad_norm": 1.0585196018218994, "learning_rate": 2.705705278061103e-05, "loss": 0.0956, "step": 85500 }, { "epoch": 3.1074205974271387, "grad_norm": 0.3192172944545746, "learning_rate": 2.7051881014422232e-05, "loss": 0.1025, "step": 85510 }, { "epoch": 3.1077839959299367, "grad_norm": 0.47871172428131104, "learning_rate": 2.7046709159825184e-05, "loss": 0.092, "step": 85520 }, { "epoch": 3.108147394432735, "grad_norm": 0.516779899597168, "learning_rate": 2.704153721704273e-05, "loss": 0.0672, "step": 85530 }, { "epoch": 3.108510792935533, "grad_norm": 0.5075317621231079, "learning_rate": 2.7036365186297685e-05, "loss": 0.0803, "step": 85540 }, { "epoch": 3.108874191438331, "grad_norm": 0.664806604385376, "learning_rate": 2.703119306781292e-05, "loss": 0.0831, "step": 85550 }, { "epoch": 3.1092375899411295, "grad_norm": 0.8135377168655396, "learning_rate": 2.702602086181127e-05, "loss": 0.1029, "step": 85560 }, { "epoch": 3.1096009884439275, "grad_norm": 1.3879525661468506, "learning_rate": 2.702084856851559e-05, "loss": 0.0779, "step": 85570 }, { "epoch": 3.109964386946726, "grad_norm": 0.43086570501327515, "learning_rate": 2.7015676188148732e-05, "loss": 0.0809, "step": 85580 }, { "epoch": 3.110327785449524, "grad_norm": 0.5117005705833435, "learning_rate": 2.7010503720933567e-05, "loss": 0.103, "step": 85590 }, { "epoch": 3.110691183952322, "grad_norm": 1.2577043771743774, "learning_rate": 2.7005331167092945e-05, "loss": 0.1145, "step": 85600 }, { "epoch": 3.1110545824551203, "grad_norm": 0.43926846981048584, "learning_rate": 2.7000158526849734e-05, "loss": 0.5229, "step": 85610 }, { "epoch": 3.1114179809579183, "grad_norm": 0.6876174807548523, "learning_rate": 2.6994985800426813e-05, "loss": 0.0711, "step": 85620 }, { "epoch": 3.1117813794607168, "grad_norm": 0.5573021769523621, "learning_rate": 2.6989812988047053e-05, "loss": 0.0801, "step": 85630 }, { "epoch": 3.1121447779635147, "grad_norm": 0.8710441589355469, "learning_rate": 2.6984640089933332e-05, "loss": 0.0774, "step": 85640 }, { "epoch": 3.112508176466313, "grad_norm": 0.9022230505943298, "learning_rate": 2.697946710630853e-05, "loss": 0.0983, "step": 85650 }, { "epoch": 3.112871574969111, "grad_norm": 1.3726192712783813, "learning_rate": 2.6974294037395533e-05, "loss": 0.1686, "step": 85660 }, { "epoch": 3.113234973471909, "grad_norm": 0.6950212121009827, "learning_rate": 2.6969120883417228e-05, "loss": 0.0732, "step": 85670 }, { "epoch": 3.1135983719747076, "grad_norm": 0.5118197202682495, "learning_rate": 2.696394764459651e-05, "loss": 0.1856, "step": 85680 }, { "epoch": 3.1139617704775056, "grad_norm": 0.7940520644187927, "learning_rate": 2.6958774321156278e-05, "loss": 0.0907, "step": 85690 }, { "epoch": 3.114325168980304, "grad_norm": 4.099023342132568, "learning_rate": 2.6953600913319427e-05, "loss": 0.0699, "step": 85700 }, { "epoch": 3.114688567483102, "grad_norm": 0.6201004385948181, "learning_rate": 2.694842742130887e-05, "loss": 0.0649, "step": 85710 }, { "epoch": 3.1150519659859, "grad_norm": 1.4649277925491333, "learning_rate": 2.6943253845347506e-05, "loss": 0.0812, "step": 85720 }, { "epoch": 3.1154153644886984, "grad_norm": 1.2290292978286743, "learning_rate": 2.6938080185658242e-05, "loss": 0.088, "step": 85730 }, { "epoch": 3.1157787629914964, "grad_norm": 3.397437334060669, "learning_rate": 2.6932906442464005e-05, "loss": 0.1178, "step": 85740 }, { "epoch": 3.116142161494295, "grad_norm": 1.0771185159683228, "learning_rate": 2.69277326159877e-05, "loss": 0.086, "step": 85750 }, { "epoch": 3.116505559997093, "grad_norm": 0.9583094716072083, "learning_rate": 2.692255870645226e-05, "loss": 0.0721, "step": 85760 }, { "epoch": 3.116868958499891, "grad_norm": 0.3439558744430542, "learning_rate": 2.6917384714080606e-05, "loss": 0.0629, "step": 85770 }, { "epoch": 3.117232357002689, "grad_norm": 0.909960150718689, "learning_rate": 2.691221063909567e-05, "loss": 0.0839, "step": 85780 }, { "epoch": 3.117595755505487, "grad_norm": 0.5029392242431641, "learning_rate": 2.6907036481720377e-05, "loss": 0.09, "step": 85790 }, { "epoch": 3.1179591540082856, "grad_norm": 0.8775815963745117, "learning_rate": 2.6901862242177667e-05, "loss": 0.0989, "step": 85800 }, { "epoch": 3.1179591540082856, "eval_loss": 0.33487364649772644, "eval_runtime": 179.4409, "eval_samples_per_second": 41.317, "eval_steps_per_second": 5.166, "eval_wer": 0.14279231034545356, "step": 85800 }, { "epoch": 3.1183225525110836, "grad_norm": 0.7273783683776855, "learning_rate": 2.689668792069048e-05, "loss": 0.0869, "step": 85810 }, { "epoch": 3.118685951013882, "grad_norm": 0.24581924080848694, "learning_rate": 2.689151351748176e-05, "loss": 0.0647, "step": 85820 }, { "epoch": 3.11904934951668, "grad_norm": 0.9781889915466309, "learning_rate": 2.688633903277445e-05, "loss": 0.0781, "step": 85830 }, { "epoch": 3.119412748019478, "grad_norm": 0.4281591773033142, "learning_rate": 2.688116446679151e-05, "loss": 0.1053, "step": 85840 }, { "epoch": 3.1197761465222764, "grad_norm": 1.24199378490448, "learning_rate": 2.6875989819755876e-05, "loss": 0.0792, "step": 85850 }, { "epoch": 3.1201395450250744, "grad_norm": 1.8490883111953735, "learning_rate": 2.6870815091890523e-05, "loss": 0.1136, "step": 85860 }, { "epoch": 3.120502943527873, "grad_norm": 0.7298690676689148, "learning_rate": 2.6865640283418398e-05, "loss": 0.082, "step": 85870 }, { "epoch": 3.120866342030671, "grad_norm": 0.5065509080886841, "learning_rate": 2.6860465394562478e-05, "loss": 0.0723, "step": 85880 }, { "epoch": 3.121229740533469, "grad_norm": 0.9484612941741943, "learning_rate": 2.6855290425545713e-05, "loss": 0.1147, "step": 85890 }, { "epoch": 3.1215931390362672, "grad_norm": 0.5501681566238403, "learning_rate": 2.68501153765911e-05, "loss": 0.1155, "step": 85900 }, { "epoch": 3.121956537539065, "grad_norm": 1.5682904720306396, "learning_rate": 2.684494024792159e-05, "loss": 0.1119, "step": 85910 }, { "epoch": 3.1223199360418636, "grad_norm": 0.8644644618034363, "learning_rate": 2.6839765039760168e-05, "loss": 0.0715, "step": 85920 }, { "epoch": 3.1226833345446616, "grad_norm": 0.5618588924407959, "learning_rate": 2.683458975232982e-05, "loss": 0.0808, "step": 85930 }, { "epoch": 3.12304673304746, "grad_norm": 0.45507627725601196, "learning_rate": 2.682941438585352e-05, "loss": 0.0747, "step": 85940 }, { "epoch": 3.123410131550258, "grad_norm": 0.4180305302143097, "learning_rate": 2.682423894055427e-05, "loss": 0.0642, "step": 85950 }, { "epoch": 3.123773530053056, "grad_norm": 1.2814691066741943, "learning_rate": 2.6819063416655054e-05, "loss": 0.1001, "step": 85960 }, { "epoch": 3.1241369285558545, "grad_norm": 0.5419327020645142, "learning_rate": 2.6813887814378864e-05, "loss": 1.3428, "step": 85970 }, { "epoch": 3.1245003270586524, "grad_norm": 0.9683062434196472, "learning_rate": 2.6808712133948705e-05, "loss": 0.0685, "step": 85980 }, { "epoch": 3.124863725561451, "grad_norm": 1.1266988515853882, "learning_rate": 2.6803536375587572e-05, "loss": 0.103, "step": 85990 }, { "epoch": 3.125227124064249, "grad_norm": 0.9783998727798462, "learning_rate": 2.679836053951848e-05, "loss": 0.0788, "step": 86000 }, { "epoch": 3.125590522567047, "grad_norm": 2.445962905883789, "learning_rate": 2.6793184625964425e-05, "loss": 0.0841, "step": 86010 }, { "epoch": 3.1259539210698453, "grad_norm": 0.41320154070854187, "learning_rate": 2.678800863514843e-05, "loss": 0.0663, "step": 86020 }, { "epoch": 3.1263173195726432, "grad_norm": 0.4643288850784302, "learning_rate": 2.6782832567293504e-05, "loss": 0.0801, "step": 86030 }, { "epoch": 3.1266807180754417, "grad_norm": 0.5545969605445862, "learning_rate": 2.677765642262266e-05, "loss": 0.0976, "step": 86040 }, { "epoch": 3.1270441165782397, "grad_norm": 0.8363248109817505, "learning_rate": 2.677248020135893e-05, "loss": 0.0715, "step": 86050 }, { "epoch": 3.127407515081038, "grad_norm": 1.5308492183685303, "learning_rate": 2.6767303903725332e-05, "loss": 0.1622, "step": 86060 }, { "epoch": 3.127770913583836, "grad_norm": 2.161001682281494, "learning_rate": 2.6762127529944903e-05, "loss": 0.081, "step": 86070 }, { "epoch": 3.128134312086634, "grad_norm": 0.6405352354049683, "learning_rate": 2.6756951080240662e-05, "loss": 0.0689, "step": 86080 }, { "epoch": 3.1284977105894325, "grad_norm": 12.528480529785156, "learning_rate": 2.675177455483565e-05, "loss": 0.9386, "step": 86090 }, { "epoch": 3.1288611090922305, "grad_norm": 3.0616352558135986, "learning_rate": 2.674659795395291e-05, "loss": 0.0755, "step": 86100 }, { "epoch": 3.129224507595029, "grad_norm": 0.49330297112464905, "learning_rate": 2.6741421277815475e-05, "loss": 0.0792, "step": 86110 }, { "epoch": 3.129587906097827, "grad_norm": 7.257319450378418, "learning_rate": 2.6736244526646398e-05, "loss": 0.0865, "step": 86120 }, { "epoch": 3.129951304600625, "grad_norm": 1.1979786157608032, "learning_rate": 2.6731067700668712e-05, "loss": 0.0974, "step": 86130 }, { "epoch": 3.1303147031034233, "grad_norm": 0.2251403033733368, "learning_rate": 2.6725890800105486e-05, "loss": 0.0755, "step": 86140 }, { "epoch": 3.1306781016062213, "grad_norm": 0.9207643270492554, "learning_rate": 2.6720713825179767e-05, "loss": 0.1739, "step": 86150 }, { "epoch": 3.1310415001090197, "grad_norm": 0.5833568572998047, "learning_rate": 2.671553677611461e-05, "loss": 0.0747, "step": 86160 }, { "epoch": 3.1314048986118177, "grad_norm": 0.5385452508926392, "learning_rate": 2.6710359653133078e-05, "loss": 2.187, "step": 86170 }, { "epoch": 3.1317682971146157, "grad_norm": 0.7017218470573425, "learning_rate": 2.670518245645823e-05, "loss": 0.0838, "step": 86180 }, { "epoch": 3.132131695617414, "grad_norm": 0.682905375957489, "learning_rate": 2.670000518631314e-05, "loss": 0.0946, "step": 86190 }, { "epoch": 3.132495094120212, "grad_norm": 0.5854523181915283, "learning_rate": 2.669482784292087e-05, "loss": 0.1085, "step": 86200 }, { "epoch": 3.1328584926230105, "grad_norm": 0.4975448548793793, "learning_rate": 2.6689650426504504e-05, "loss": 0.0851, "step": 86210 }, { "epoch": 3.1332218911258085, "grad_norm": 3.5613021850585938, "learning_rate": 2.6684472937287115e-05, "loss": 0.0911, "step": 86220 }, { "epoch": 3.1335852896286065, "grad_norm": 1.3843308687210083, "learning_rate": 2.6679295375491776e-05, "loss": 0.0627, "step": 86230 }, { "epoch": 3.133948688131405, "grad_norm": 3.7349839210510254, "learning_rate": 2.6674117741341575e-05, "loss": 0.0982, "step": 86240 }, { "epoch": 3.134312086634203, "grad_norm": 0.9458445906639099, "learning_rate": 2.66689400350596e-05, "loss": 0.1023, "step": 86250 }, { "epoch": 3.1346754851370013, "grad_norm": 0.45296090841293335, "learning_rate": 2.6663762256868928e-05, "loss": 0.0625, "step": 86260 }, { "epoch": 3.1350388836397993, "grad_norm": 0.7236283421516418, "learning_rate": 2.665858440699267e-05, "loss": 0.0708, "step": 86270 }, { "epoch": 3.1354022821425978, "grad_norm": 2.072012424468994, "learning_rate": 2.665340648565391e-05, "loss": 0.0683, "step": 86280 }, { "epoch": 3.1357656806453957, "grad_norm": 0.8755659461021423, "learning_rate": 2.6648228493075744e-05, "loss": 0.0866, "step": 86290 }, { "epoch": 3.1361290791481937, "grad_norm": 0.7825614213943481, "learning_rate": 2.6643050429481275e-05, "loss": 0.0887, "step": 86300 }, { "epoch": 3.136492477650992, "grad_norm": 0.9423545002937317, "learning_rate": 2.6637872295093612e-05, "loss": 0.093, "step": 86310 }, { "epoch": 3.13685587615379, "grad_norm": 0.37489980459213257, "learning_rate": 2.6632694090135856e-05, "loss": 0.0797, "step": 86320 }, { "epoch": 3.1372192746565886, "grad_norm": 0.4927680790424347, "learning_rate": 2.6627515814831126e-05, "loss": 0.0708, "step": 86330 }, { "epoch": 3.1375826731593865, "grad_norm": 0.7253931760787964, "learning_rate": 2.6622337469402537e-05, "loss": 0.0974, "step": 86340 }, { "epoch": 3.137946071662185, "grad_norm": 0.8817862272262573, "learning_rate": 2.6617159054073182e-05, "loss": 0.0893, "step": 86350 }, { "epoch": 3.138309470164983, "grad_norm": 0.9500261545181274, "learning_rate": 2.6611980569066208e-05, "loss": 0.0715, "step": 86360 }, { "epoch": 3.138672868667781, "grad_norm": 0.5115967988967896, "learning_rate": 2.660680201460472e-05, "loss": 0.0713, "step": 86370 }, { "epoch": 3.1390362671705794, "grad_norm": 0.752263069152832, "learning_rate": 2.6601623390911857e-05, "loss": 0.0685, "step": 86380 }, { "epoch": 3.1393996656733774, "grad_norm": 0.39566388726234436, "learning_rate": 2.6596444698210738e-05, "loss": 1.6043, "step": 86390 }, { "epoch": 3.139763064176176, "grad_norm": 0.3248102068901062, "learning_rate": 2.6591265936724495e-05, "loss": 0.2024, "step": 86400 }, { "epoch": 3.139763064176176, "eval_loss": 0.30238452553749084, "eval_runtime": 179.688, "eval_samples_per_second": 41.26, "eval_steps_per_second": 5.159, "eval_wer": 0.142565396554541, "step": 86400 }, { "epoch": 3.1401264626789738, "grad_norm": 0.7450832724571228, "learning_rate": 2.6586087106676272e-05, "loss": 0.0831, "step": 86410 }, { "epoch": 3.1404898611817718, "grad_norm": 0.35402756929397583, "learning_rate": 2.658090820828919e-05, "loss": 0.0802, "step": 86420 }, { "epoch": 3.14085325968457, "grad_norm": 0.7039386034011841, "learning_rate": 2.65757292417864e-05, "loss": 0.0693, "step": 86430 }, { "epoch": 3.141216658187368, "grad_norm": 0.5584103465080261, "learning_rate": 2.6570550207391043e-05, "loss": 0.0747, "step": 86440 }, { "epoch": 3.1415800566901666, "grad_norm": 0.5645958185195923, "learning_rate": 2.656537110532627e-05, "loss": 0.0887, "step": 86450 }, { "epoch": 3.1419434551929646, "grad_norm": 0.7481971979141235, "learning_rate": 2.656019193581522e-05, "loss": 0.1027, "step": 86460 }, { "epoch": 3.1423068536957626, "grad_norm": 0.48159149289131165, "learning_rate": 2.6555012699081057e-05, "loss": 0.0642, "step": 86470 }, { "epoch": 3.142670252198561, "grad_norm": 0.6558395028114319, "learning_rate": 2.654983339534693e-05, "loss": 0.0767, "step": 86480 }, { "epoch": 3.143033650701359, "grad_norm": 0.8240875601768494, "learning_rate": 2.654465402483599e-05, "loss": 0.1036, "step": 86490 }, { "epoch": 3.1433970492041574, "grad_norm": 1.9741828441619873, "learning_rate": 2.6539474587771406e-05, "loss": 0.0872, "step": 86500 }, { "epoch": 3.1437604477069554, "grad_norm": 0.4783799350261688, "learning_rate": 2.6534295084376337e-05, "loss": 3.1495, "step": 86510 }, { "epoch": 3.1441238462097534, "grad_norm": 0.5813152194023132, "learning_rate": 2.652911551487396e-05, "loss": 0.078, "step": 86520 }, { "epoch": 3.144487244712552, "grad_norm": 1.1872795820236206, "learning_rate": 2.6523935879487432e-05, "loss": 0.0817, "step": 86530 }, { "epoch": 3.14485064321535, "grad_norm": 0.5647823214530945, "learning_rate": 2.6518756178439925e-05, "loss": 1.1158, "step": 86540 }, { "epoch": 3.1452140417181482, "grad_norm": 1.1907984018325806, "learning_rate": 2.6513576411954627e-05, "loss": 0.1104, "step": 86550 }, { "epoch": 3.145577440220946, "grad_norm": 0.5368396043777466, "learning_rate": 2.6508396580254697e-05, "loss": 0.0929, "step": 86560 }, { "epoch": 3.1459408387237446, "grad_norm": 0.34880682826042175, "learning_rate": 2.650321668356333e-05, "loss": 0.0714, "step": 86570 }, { "epoch": 3.1463042372265426, "grad_norm": 0.4075338542461395, "learning_rate": 2.6498036722103703e-05, "loss": 0.0836, "step": 86580 }, { "epoch": 3.1466676357293406, "grad_norm": 5.59214973449707, "learning_rate": 2.6492856696099006e-05, "loss": 0.0829, "step": 86590 }, { "epoch": 3.147031034232139, "grad_norm": 0.9753894209861755, "learning_rate": 2.6487676605772426e-05, "loss": 0.0899, "step": 86600 }, { "epoch": 3.147394432734937, "grad_norm": 0.3571523129940033, "learning_rate": 2.648249645134715e-05, "loss": 0.0722, "step": 86610 }, { "epoch": 3.1477578312377354, "grad_norm": 0.5043341517448425, "learning_rate": 2.647731623304638e-05, "loss": 0.0663, "step": 86620 }, { "epoch": 3.1481212297405334, "grad_norm": 5.329759120941162, "learning_rate": 2.647213595109331e-05, "loss": 0.0844, "step": 86630 }, { "epoch": 3.148484628243332, "grad_norm": 0.3437232971191406, "learning_rate": 2.6466955605711136e-05, "loss": 0.1001, "step": 86640 }, { "epoch": 3.14884802674613, "grad_norm": 2.181110143661499, "learning_rate": 2.646177519712307e-05, "loss": 0.0972, "step": 86650 }, { "epoch": 3.149211425248928, "grad_norm": 0.8751040101051331, "learning_rate": 2.6456594725552302e-05, "loss": 0.0811, "step": 86660 }, { "epoch": 3.1495748237517263, "grad_norm": 0.36447498202323914, "learning_rate": 2.6451414191222062e-05, "loss": 0.0606, "step": 86670 }, { "epoch": 3.1499382222545242, "grad_norm": 0.47651347517967224, "learning_rate": 2.644623359435554e-05, "loss": 0.0665, "step": 86680 }, { "epoch": 3.1503016207573227, "grad_norm": 0.4340047836303711, "learning_rate": 2.6441052935175964e-05, "loss": 0.0787, "step": 86690 }, { "epoch": 3.1506650192601207, "grad_norm": 6.734339714050293, "learning_rate": 2.6435872213906538e-05, "loss": 0.0808, "step": 86700 }, { "epoch": 3.1510284177629186, "grad_norm": 0.3784515857696533, "learning_rate": 2.6430691430770494e-05, "loss": 0.0739, "step": 86710 }, { "epoch": 3.151391816265717, "grad_norm": 1.0528838634490967, "learning_rate": 2.6425510585991047e-05, "loss": 0.0704, "step": 86720 }, { "epoch": 3.151755214768515, "grad_norm": 0.8241010308265686, "learning_rate": 2.6420329679791412e-05, "loss": 0.0657, "step": 86730 }, { "epoch": 3.1521186132713135, "grad_norm": 0.5599542260169983, "learning_rate": 2.6415148712394833e-05, "loss": 0.0743, "step": 86740 }, { "epoch": 3.1524820117741115, "grad_norm": 1.3045130968093872, "learning_rate": 2.6409967684024522e-05, "loss": 0.0808, "step": 86750 }, { "epoch": 3.1528454102769095, "grad_norm": 0.8639160394668579, "learning_rate": 2.640478659490373e-05, "loss": 0.0784, "step": 86760 }, { "epoch": 3.153208808779708, "grad_norm": 0.4310116767883301, "learning_rate": 2.6399605445255677e-05, "loss": 0.0792, "step": 86770 }, { "epoch": 3.153572207282506, "grad_norm": 0.40688809752464294, "learning_rate": 2.6394424235303606e-05, "loss": 0.069, "step": 86780 }, { "epoch": 3.1539356057853043, "grad_norm": 0.7720523476600647, "learning_rate": 2.638924296527076e-05, "loss": 0.0791, "step": 86790 }, { "epoch": 3.1542990042881023, "grad_norm": 0.520601212978363, "learning_rate": 2.638406163538037e-05, "loss": 0.0901, "step": 86800 }, { "epoch": 3.1546624027909003, "grad_norm": 0.37290602922439575, "learning_rate": 2.6378880245855698e-05, "loss": 0.1093, "step": 86810 }, { "epoch": 3.1550258012936987, "grad_norm": 0.9572335481643677, "learning_rate": 2.637369879691997e-05, "loss": 0.6554, "step": 86820 }, { "epoch": 3.1553891997964967, "grad_norm": 0.8544325232505798, "learning_rate": 2.6368517288796456e-05, "loss": 0.0846, "step": 86830 }, { "epoch": 3.155752598299295, "grad_norm": 1.4652429819107056, "learning_rate": 2.6363335721708403e-05, "loss": 0.1105, "step": 86840 }, { "epoch": 3.156115996802093, "grad_norm": 3.825007438659668, "learning_rate": 2.6358154095879063e-05, "loss": 0.095, "step": 86850 }, { "epoch": 3.1564793953048915, "grad_norm": 1.822260856628418, "learning_rate": 2.6352972411531696e-05, "loss": 0.0825, "step": 86860 }, { "epoch": 3.1568427938076895, "grad_norm": 0.46824485063552856, "learning_rate": 2.6347790668889553e-05, "loss": 0.0716, "step": 86870 }, { "epoch": 3.1572061923104875, "grad_norm": 1.1813397407531738, "learning_rate": 2.6342608868175916e-05, "loss": 0.0891, "step": 86880 }, { "epoch": 3.157569590813286, "grad_norm": 0.7336288690567017, "learning_rate": 2.6337427009614034e-05, "loss": 0.1021, "step": 86890 }, { "epoch": 3.157932989316084, "grad_norm": 0.4944972097873688, "learning_rate": 2.6332245093427187e-05, "loss": 0.0873, "step": 86900 }, { "epoch": 3.1582963878188823, "grad_norm": 0.5169403553009033, "learning_rate": 2.6327063119838634e-05, "loss": 0.0966, "step": 86910 }, { "epoch": 3.1586597863216803, "grad_norm": 0.6052831411361694, "learning_rate": 2.6321881089071655e-05, "loss": 0.0665, "step": 86920 }, { "epoch": 3.1590231848244787, "grad_norm": 0.4833454489707947, "learning_rate": 2.6316699001349526e-05, "loss": 0.0732, "step": 86930 }, { "epoch": 3.1593865833272767, "grad_norm": 0.6335532069206238, "learning_rate": 2.6311516856895512e-05, "loss": 0.0874, "step": 86940 }, { "epoch": 3.1597499818300747, "grad_norm": 1.6006765365600586, "learning_rate": 2.630633465593291e-05, "loss": 0.1257, "step": 86950 }, { "epoch": 3.160113380332873, "grad_norm": 0.3963083028793335, "learning_rate": 2.6301152398684998e-05, "loss": 0.0822, "step": 86960 }, { "epoch": 3.160476778835671, "grad_norm": 0.44627153873443604, "learning_rate": 2.6295970085375054e-05, "loss": 0.0622, "step": 86970 }, { "epoch": 3.1608401773384696, "grad_norm": 0.7904446125030518, "learning_rate": 2.629078771622637e-05, "loss": 0.0753, "step": 86980 }, { "epoch": 3.1612035758412675, "grad_norm": 0.5941157937049866, "learning_rate": 2.628560529146224e-05, "loss": 0.0884, "step": 86990 }, { "epoch": 3.1615669743440655, "grad_norm": 0.6121344566345215, "learning_rate": 2.6280422811305948e-05, "loss": 0.0678, "step": 87000 }, { "epoch": 3.1615669743440655, "eval_loss": 0.33216938376426697, "eval_runtime": 180.0386, "eval_samples_per_second": 41.18, "eval_steps_per_second": 5.149, "eval_wer": 0.1409860765697896, "step": 87000 }, { "epoch": 3.161930372846864, "grad_norm": 1.1521292924880981, "learning_rate": 2.6275240275980795e-05, "loss": 0.0904, "step": 87010 }, { "epoch": 3.162293771349662, "grad_norm": 0.48100772500038147, "learning_rate": 2.6270057685710074e-05, "loss": 0.0725, "step": 87020 }, { "epoch": 3.1626571698524604, "grad_norm": 0.4358821511268616, "learning_rate": 2.6264875040717092e-05, "loss": 0.0714, "step": 87030 }, { "epoch": 3.1630205683552584, "grad_norm": 0.907632052898407, "learning_rate": 2.625969234122514e-05, "loss": 0.0949, "step": 87040 }, { "epoch": 3.1633839668580563, "grad_norm": 0.6242339015007019, "learning_rate": 2.6254509587457527e-05, "loss": 0.0856, "step": 87050 }, { "epoch": 3.1637473653608548, "grad_norm": 0.8153877854347229, "learning_rate": 2.6249326779637555e-05, "loss": 0.1291, "step": 87060 }, { "epoch": 3.1641107638636528, "grad_norm": 1.0637160539627075, "learning_rate": 2.6244143917988544e-05, "loss": 0.0709, "step": 87070 }, { "epoch": 3.164474162366451, "grad_norm": 0.33303794264793396, "learning_rate": 2.6238961002733796e-05, "loss": 0.0828, "step": 87080 }, { "epoch": 3.164837560869249, "grad_norm": 0.676785945892334, "learning_rate": 2.623377803409663e-05, "loss": 0.1132, "step": 87090 }, { "epoch": 3.165200959372047, "grad_norm": 1.7492424249649048, "learning_rate": 2.6228595012300356e-05, "loss": 0.0876, "step": 87100 }, { "epoch": 3.1655643578748456, "grad_norm": 0.5225327610969543, "learning_rate": 2.622341193756829e-05, "loss": 0.1208, "step": 87110 }, { "epoch": 3.1659277563776436, "grad_norm": 0.5629643201828003, "learning_rate": 2.6218228810123763e-05, "loss": 0.0862, "step": 87120 }, { "epoch": 3.166291154880442, "grad_norm": 1.335174560546875, "learning_rate": 2.6213045630190084e-05, "loss": 0.0746, "step": 87130 }, { "epoch": 3.16665455338324, "grad_norm": 0.6635318398475647, "learning_rate": 2.6207862397990597e-05, "loss": 0.0903, "step": 87140 }, { "epoch": 3.1670179518860384, "grad_norm": 0.9929222464561462, "learning_rate": 2.620267911374861e-05, "loss": 0.1024, "step": 87150 }, { "epoch": 3.1673813503888364, "grad_norm": 0.5913928151130676, "learning_rate": 2.619749577768745e-05, "loss": 0.114, "step": 87160 }, { "epoch": 3.1677447488916344, "grad_norm": 0.9411669373512268, "learning_rate": 2.6192312390030472e-05, "loss": 0.0878, "step": 87170 }, { "epoch": 3.168108147394433, "grad_norm": 2.4124414920806885, "learning_rate": 2.6187128951000982e-05, "loss": 0.0937, "step": 87180 }, { "epoch": 3.168471545897231, "grad_norm": 0.3180709481239319, "learning_rate": 2.6181945460822343e-05, "loss": 0.0956, "step": 87190 }, { "epoch": 3.168834944400029, "grad_norm": 0.9713842272758484, "learning_rate": 2.6176761919717867e-05, "loss": 0.111, "step": 87200 }, { "epoch": 3.169198342902827, "grad_norm": 0.7940172553062439, "learning_rate": 2.6171578327910918e-05, "loss": 0.0794, "step": 87210 }, { "epoch": 3.1695617414056256, "grad_norm": 0.7418053150177002, "learning_rate": 2.6166394685624823e-05, "loss": 0.0692, "step": 87220 }, { "epoch": 3.1699251399084236, "grad_norm": 0.4988052546977997, "learning_rate": 2.6161210993082925e-05, "loss": 0.0702, "step": 87230 }, { "epoch": 3.1702885384112216, "grad_norm": 0.41006624698638916, "learning_rate": 2.6156027250508587e-05, "loss": 0.1156, "step": 87240 }, { "epoch": 3.17065193691402, "grad_norm": 0.7204731702804565, "learning_rate": 2.615084345812514e-05, "loss": 0.0822, "step": 87250 }, { "epoch": 3.171015335416818, "grad_norm": 0.5459019541740417, "learning_rate": 2.6145659616155948e-05, "loss": 0.076, "step": 87260 }, { "epoch": 3.1713787339196164, "grad_norm": 0.5608823299407959, "learning_rate": 2.6140475724824355e-05, "loss": 0.076, "step": 87270 }, { "epoch": 3.1717421324224144, "grad_norm": 0.6672724485397339, "learning_rate": 2.613529178435372e-05, "loss": 0.0617, "step": 87280 }, { "epoch": 3.1721055309252124, "grad_norm": 0.4979981482028961, "learning_rate": 2.6130107794967412e-05, "loss": 0.0852, "step": 87290 }, { "epoch": 3.172468929428011, "grad_norm": 4.004956245422363, "learning_rate": 2.612492375688877e-05, "loss": 0.1044, "step": 87300 }, { "epoch": 3.172832327930809, "grad_norm": 0.3019026517868042, "learning_rate": 2.611973967034117e-05, "loss": 0.1081, "step": 87310 }, { "epoch": 3.1731957264336073, "grad_norm": 0.5312141180038452, "learning_rate": 2.6114555535547964e-05, "loss": 0.072, "step": 87320 }, { "epoch": 3.1735591249364052, "grad_norm": 0.5587911605834961, "learning_rate": 2.6109371352732537e-05, "loss": 0.0663, "step": 87330 }, { "epoch": 3.1739225234392032, "grad_norm": 0.7223249077796936, "learning_rate": 2.610418712211824e-05, "loss": 0.0689, "step": 87340 }, { "epoch": 3.1742859219420017, "grad_norm": 0.9574065208435059, "learning_rate": 2.6099002843928444e-05, "loss": 0.1149, "step": 87350 }, { "epoch": 3.1746493204447996, "grad_norm": 1.298269510269165, "learning_rate": 2.6093818518386535e-05, "loss": 0.0749, "step": 87360 }, { "epoch": 3.175012718947598, "grad_norm": 215.23536682128906, "learning_rate": 2.6088634145715867e-05, "loss": 3.8479, "step": 87370 }, { "epoch": 3.175376117450396, "grad_norm": 0.8245293498039246, "learning_rate": 2.608344972613984e-05, "loss": 0.0947, "step": 87380 }, { "epoch": 3.1757395159531945, "grad_norm": 0.462643027305603, "learning_rate": 2.6078265259881805e-05, "loss": 0.0876, "step": 87390 }, { "epoch": 3.1761029144559925, "grad_norm": 1.069300651550293, "learning_rate": 2.607308074716517e-05, "loss": 0.0977, "step": 87400 }, { "epoch": 3.1764663129587904, "grad_norm": 0.46647369861602783, "learning_rate": 2.6067896188213296e-05, "loss": 0.1076, "step": 87410 }, { "epoch": 3.176829711461589, "grad_norm": 0.46599775552749634, "learning_rate": 2.6062711583249578e-05, "loss": 0.1663, "step": 87420 }, { "epoch": 3.177193109964387, "grad_norm": 0.6090976595878601, "learning_rate": 2.60575269324974e-05, "loss": 0.0798, "step": 87430 }, { "epoch": 3.1775565084671853, "grad_norm": 3.0198044776916504, "learning_rate": 2.6052342236180144e-05, "loss": 0.0952, "step": 87440 }, { "epoch": 3.1779199069699833, "grad_norm": 0.8521358370780945, "learning_rate": 2.604715749452121e-05, "loss": 0.0942, "step": 87450 }, { "epoch": 3.1782833054727813, "grad_norm": 1.1309806108474731, "learning_rate": 2.604197270774398e-05, "loss": 0.0858, "step": 87460 }, { "epoch": 3.1786467039755797, "grad_norm": 2.8067383766174316, "learning_rate": 2.603678787607186e-05, "loss": 0.0718, "step": 87470 }, { "epoch": 3.1790101024783777, "grad_norm": 1.6722538471221924, "learning_rate": 2.603160299972824e-05, "loss": 0.0787, "step": 87480 }, { "epoch": 3.179373500981176, "grad_norm": 5.742082595825195, "learning_rate": 2.602641807893651e-05, "loss": 0.1014, "step": 87490 }, { "epoch": 3.179736899483974, "grad_norm": 1.0743470191955566, "learning_rate": 2.6021233113920078e-05, "loss": 0.1036, "step": 87500 }, { "epoch": 3.1801002979867725, "grad_norm": 2.879175901412964, "learning_rate": 2.6016048104902345e-05, "loss": 0.1001, "step": 87510 }, { "epoch": 3.1804636964895705, "grad_norm": 0.37631672620773315, "learning_rate": 2.601086305210672e-05, "loss": 0.0747, "step": 87520 }, { "epoch": 3.1808270949923685, "grad_norm": 0.38719940185546875, "learning_rate": 2.6005677955756603e-05, "loss": 0.8272, "step": 87530 }, { "epoch": 3.181190493495167, "grad_norm": 0.6106435060501099, "learning_rate": 2.6000492816075395e-05, "loss": 0.085, "step": 87540 }, { "epoch": 3.181553891997965, "grad_norm": 0.6239719986915588, "learning_rate": 2.5995307633286515e-05, "loss": 0.079, "step": 87550 }, { "epoch": 3.1819172905007633, "grad_norm": 0.8448728919029236, "learning_rate": 2.5990122407613366e-05, "loss": 0.0744, "step": 87560 }, { "epoch": 3.1822806890035613, "grad_norm": 0.7143642902374268, "learning_rate": 2.598493713927937e-05, "loss": 0.0686, "step": 87570 }, { "epoch": 3.1826440875063593, "grad_norm": 0.44550129771232605, "learning_rate": 2.597975182850793e-05, "loss": 0.0585, "step": 87580 }, { "epoch": 3.1830074860091577, "grad_norm": 0.43901458382606506, "learning_rate": 2.5974566475522478e-05, "loss": 0.0831, "step": 87590 }, { "epoch": 3.1833708845119557, "grad_norm": 1.0826005935668945, "learning_rate": 2.5969381080546417e-05, "loss": 0.0793, "step": 87600 }, { "epoch": 3.1833708845119557, "eval_loss": 0.3327307105064392, "eval_runtime": 180.1166, "eval_samples_per_second": 41.162, "eval_steps_per_second": 5.147, "eval_wer": 0.1424837075898125, "step": 87600 }, { "epoch": 3.183734283014754, "grad_norm": 0.43947404623031616, "learning_rate": 2.596419564380318e-05, "loss": 0.1618, "step": 87610 }, { "epoch": 3.184097681517552, "grad_norm": 1.2502785921096802, "learning_rate": 2.5959010165516185e-05, "loss": 0.081, "step": 87620 }, { "epoch": 3.18446108002035, "grad_norm": 0.45034274458885193, "learning_rate": 2.5953824645908848e-05, "loss": 0.0821, "step": 87630 }, { "epoch": 3.1848244785231485, "grad_norm": 0.5315303206443787, "learning_rate": 2.5948639085204607e-05, "loss": 0.1093, "step": 87640 }, { "epoch": 3.1851878770259465, "grad_norm": 1.181911826133728, "learning_rate": 2.5943453483626873e-05, "loss": 0.0766, "step": 87650 }, { "epoch": 3.185551275528745, "grad_norm": 0.4337679147720337, "learning_rate": 2.5938267841399096e-05, "loss": 0.0886, "step": 87660 }, { "epoch": 3.185914674031543, "grad_norm": 4.992573261260986, "learning_rate": 2.5933082158744686e-05, "loss": 0.0955, "step": 87670 }, { "epoch": 3.1862780725343414, "grad_norm": 0.401584267616272, "learning_rate": 2.5927896435887094e-05, "loss": 0.0802, "step": 87680 }, { "epoch": 3.1866414710371394, "grad_norm": 0.39341244101524353, "learning_rate": 2.592271067304975e-05, "loss": 0.092, "step": 87690 }, { "epoch": 3.1870048695399373, "grad_norm": 0.8146529197692871, "learning_rate": 2.5917524870456074e-05, "loss": 0.0875, "step": 87700 }, { "epoch": 3.1873682680427358, "grad_norm": 0.4977721869945526, "learning_rate": 2.5912339028329524e-05, "loss": 0.0709, "step": 87710 }, { "epoch": 3.1877316665455337, "grad_norm": 0.9400126338005066, "learning_rate": 2.5907153146893527e-05, "loss": 0.0756, "step": 87720 }, { "epoch": 3.188095065048332, "grad_norm": 1.8934528827667236, "learning_rate": 2.590196722637153e-05, "loss": 0.1116, "step": 87730 }, { "epoch": 3.18845846355113, "grad_norm": 0.3290179669857025, "learning_rate": 2.5896781266986974e-05, "loss": 0.0794, "step": 87740 }, { "epoch": 3.188821862053928, "grad_norm": 0.42296847701072693, "learning_rate": 2.589159526896331e-05, "loss": 0.0917, "step": 87750 }, { "epoch": 3.1891852605567266, "grad_norm": 1.1522185802459717, "learning_rate": 2.5886409232523966e-05, "loss": 0.0855, "step": 87760 }, { "epoch": 3.1895486590595246, "grad_norm": 0.9515447616577148, "learning_rate": 2.5881223157892415e-05, "loss": 0.0809, "step": 87770 }, { "epoch": 3.189912057562323, "grad_norm": 0.8773075342178345, "learning_rate": 2.587603704529209e-05, "loss": 0.0754, "step": 87780 }, { "epoch": 3.190275456065121, "grad_norm": 0.6473771333694458, "learning_rate": 2.587085089494644e-05, "loss": 0.0887, "step": 87790 }, { "epoch": 3.1906388545679194, "grad_norm": 1.6101555824279785, "learning_rate": 2.586566470707893e-05, "loss": 0.0901, "step": 87800 }, { "epoch": 3.1910022530707174, "grad_norm": 0.25950807332992554, "learning_rate": 2.5860478481912996e-05, "loss": 0.1501, "step": 87810 }, { "epoch": 3.1913656515735154, "grad_norm": 1.339440941810608, "learning_rate": 2.585529221967212e-05, "loss": 0.0645, "step": 87820 }, { "epoch": 3.191729050076314, "grad_norm": 0.8550190329551697, "learning_rate": 2.5850105920579736e-05, "loss": 0.0686, "step": 87830 }, { "epoch": 3.192092448579112, "grad_norm": 0.5492476224899292, "learning_rate": 2.584491958485932e-05, "loss": 0.0913, "step": 87840 }, { "epoch": 3.19245584708191, "grad_norm": 0.8055387735366821, "learning_rate": 2.5839733212734323e-05, "loss": 0.0886, "step": 87850 }, { "epoch": 3.192819245584708, "grad_norm": 0.5844281315803528, "learning_rate": 2.5834546804428207e-05, "loss": 0.0589, "step": 87860 }, { "epoch": 3.193182644087506, "grad_norm": 0.47842904925346375, "learning_rate": 2.582936036016444e-05, "loss": 0.0664, "step": 87870 }, { "epoch": 3.1935460425903046, "grad_norm": 0.5088633894920349, "learning_rate": 2.5824173880166486e-05, "loss": 0.0858, "step": 87880 }, { "epoch": 3.1939094410931026, "grad_norm": 0.4572700262069702, "learning_rate": 2.581898736465781e-05, "loss": 0.0813, "step": 87890 }, { "epoch": 3.194272839595901, "grad_norm": 1.1615513563156128, "learning_rate": 2.5813800813861884e-05, "loss": 0.1006, "step": 87900 }, { "epoch": 3.194636238098699, "grad_norm": 0.410319060087204, "learning_rate": 2.5808614228002174e-05, "loss": 0.0918, "step": 87910 }, { "epoch": 3.194999636601497, "grad_norm": 0.28735676407814026, "learning_rate": 2.5803427607302154e-05, "loss": 0.0691, "step": 87920 }, { "epoch": 3.1953630351042954, "grad_norm": 0.9969823956489563, "learning_rate": 2.5798240951985302e-05, "loss": 0.0749, "step": 87930 }, { "epoch": 3.1957264336070934, "grad_norm": 1.1910429000854492, "learning_rate": 2.579305426227509e-05, "loss": 0.1894, "step": 87940 }, { "epoch": 3.196089832109892, "grad_norm": 0.9283407926559448, "learning_rate": 2.5787867538394982e-05, "loss": 0.0854, "step": 87950 }, { "epoch": 3.19645323061269, "grad_norm": 0.5121541619300842, "learning_rate": 2.5782680780568476e-05, "loss": 0.4152, "step": 87960 }, { "epoch": 3.1968166291154883, "grad_norm": 0.6104360222816467, "learning_rate": 2.577749398901903e-05, "loss": 0.0793, "step": 87970 }, { "epoch": 3.1971800276182862, "grad_norm": 0.3884080648422241, "learning_rate": 2.577230716397014e-05, "loss": 0.0808, "step": 87980 }, { "epoch": 3.197543426121084, "grad_norm": 1.0838251113891602, "learning_rate": 2.5767120305645277e-05, "loss": 0.0883, "step": 87990 }, { "epoch": 3.1979068246238826, "grad_norm": 0.7806040048599243, "learning_rate": 2.5761933414267936e-05, "loss": 0.0721, "step": 88000 }, { "epoch": 3.1982702231266806, "grad_norm": 0.4943070709705353, "learning_rate": 2.575674649006159e-05, "loss": 0.0751, "step": 88010 }, { "epoch": 3.198633621629479, "grad_norm": 1.1027365922927856, "learning_rate": 2.575155953324973e-05, "loss": 0.0714, "step": 88020 }, { "epoch": 3.198997020132277, "grad_norm": 0.5191725492477417, "learning_rate": 2.5746372544055847e-05, "loss": 0.0699, "step": 88030 }, { "epoch": 3.199360418635075, "grad_norm": 0.9866086840629578, "learning_rate": 2.5741185522703424e-05, "loss": 0.1314, "step": 88040 }, { "epoch": 3.1997238171378735, "grad_norm": 2.110804319381714, "learning_rate": 2.5735998469415956e-05, "loss": 0.0873, "step": 88050 }, { "epoch": 3.2000872156406714, "grad_norm": 0.9099189639091492, "learning_rate": 2.573081138441693e-05, "loss": 0.1216, "step": 88060 }, { "epoch": 3.20045061414347, "grad_norm": 0.6158716082572937, "learning_rate": 2.572562426792985e-05, "loss": 0.0683, "step": 88070 }, { "epoch": 3.200814012646268, "grad_norm": 0.6630299091339111, "learning_rate": 2.5720437120178186e-05, "loss": 0.089, "step": 88080 }, { "epoch": 3.2011774111490663, "grad_norm": 0.4174249768257141, "learning_rate": 2.5715249941385467e-05, "loss": 0.0949, "step": 88090 }, { "epoch": 3.2015408096518643, "grad_norm": 0.7170994877815247, "learning_rate": 2.5710062731775164e-05, "loss": 0.0864, "step": 88100 }, { "epoch": 3.2019042081546623, "grad_norm": 0.5610363483428955, "learning_rate": 2.5704875491570784e-05, "loss": 0.0878, "step": 88110 }, { "epoch": 3.2022676066574607, "grad_norm": 0.41790106892585754, "learning_rate": 2.5699688220995834e-05, "loss": 3.9644, "step": 88120 }, { "epoch": 3.2026310051602587, "grad_norm": 0.8751020431518555, "learning_rate": 2.5694500920273795e-05, "loss": 0.0729, "step": 88130 }, { "epoch": 3.202994403663057, "grad_norm": 0.5966963768005371, "learning_rate": 2.5689313589628193e-05, "loss": 0.187, "step": 88140 }, { "epoch": 3.203357802165855, "grad_norm": 0.8189564943313599, "learning_rate": 2.5684126229282516e-05, "loss": 0.0749, "step": 88150 }, { "epoch": 3.203721200668653, "grad_norm": 1.0785588026046753, "learning_rate": 2.5678938839460283e-05, "loss": 0.0835, "step": 88160 }, { "epoch": 3.2040845991714515, "grad_norm": 1.5373785495758057, "learning_rate": 2.5673751420384983e-05, "loss": 0.0789, "step": 88170 }, { "epoch": 3.2044479976742495, "grad_norm": 0.6006679534912109, "learning_rate": 2.566856397228013e-05, "loss": 0.0694, "step": 88180 }, { "epoch": 3.204811396177048, "grad_norm": 0.9026358127593994, "learning_rate": 2.5663376495369242e-05, "loss": 0.0847, "step": 88190 }, { "epoch": 3.205174794679846, "grad_norm": 1.896236777305603, "learning_rate": 2.565818898987581e-05, "loss": 0.0814, "step": 88200 }, { "epoch": 3.205174794679846, "eval_loss": 0.3309068977832794, "eval_runtime": 180.8021, "eval_samples_per_second": 41.006, "eval_steps_per_second": 5.127, "eval_wer": 0.142601702761087, "step": 88200 }, { "epoch": 3.205538193182644, "grad_norm": 0.5485680103302002, "learning_rate": 2.565300145602336e-05, "loss": 0.0943, "step": 88210 }, { "epoch": 3.2059015916854423, "grad_norm": 0.5330252647399902, "learning_rate": 2.564833265149393e-05, "loss": 0.8654, "step": 88220 }, { "epoch": 3.2062649901882403, "grad_norm": 1.7009663581848145, "learning_rate": 2.564314506437512e-05, "loss": 0.0911, "step": 88230 }, { "epoch": 3.2066283886910387, "grad_norm": 0.3746320903301239, "learning_rate": 2.563795744954548e-05, "loss": 0.0656, "step": 88240 }, { "epoch": 3.2069917871938367, "grad_norm": 1.1512928009033203, "learning_rate": 2.5632769807228512e-05, "loss": 0.0897, "step": 88250 }, { "epoch": 3.207355185696635, "grad_norm": 3.660646915435791, "learning_rate": 2.562758213764776e-05, "loss": 0.0859, "step": 88260 }, { "epoch": 3.207718584199433, "grad_norm": 0.3683335781097412, "learning_rate": 2.5622394441026716e-05, "loss": 0.1157, "step": 88270 }, { "epoch": 3.208081982702231, "grad_norm": 0.9600638151168823, "learning_rate": 2.561720671758891e-05, "loss": 0.077, "step": 88280 }, { "epoch": 3.2084453812050295, "grad_norm": 1.4279944896697998, "learning_rate": 2.5612018967557866e-05, "loss": 0.0889, "step": 88290 }, { "epoch": 3.2088087797078275, "grad_norm": 0.37430229783058167, "learning_rate": 2.5606831191157103e-05, "loss": 0.0924, "step": 88300 }, { "epoch": 3.209172178210626, "grad_norm": 0.4506620764732361, "learning_rate": 2.5601643388610137e-05, "loss": 0.095, "step": 88310 }, { "epoch": 3.209535576713424, "grad_norm": 0.5026054382324219, "learning_rate": 2.5596455560140504e-05, "loss": 0.0703, "step": 88320 }, { "epoch": 3.209898975216222, "grad_norm": 0.2740514278411865, "learning_rate": 2.559126770597173e-05, "loss": 0.081, "step": 88330 }, { "epoch": 3.2102623737190203, "grad_norm": 0.4658343493938446, "learning_rate": 2.558607982632732e-05, "loss": 0.1092, "step": 88340 }, { "epoch": 3.2106257722218183, "grad_norm": 0.7769015431404114, "learning_rate": 2.5580891921430822e-05, "loss": 0.0985, "step": 88350 }, { "epoch": 3.2109891707246168, "grad_norm": 0.4521372616291046, "learning_rate": 2.5575703991505758e-05, "loss": 0.0765, "step": 88360 }, { "epoch": 3.2113525692274147, "grad_norm": 0.41286054253578186, "learning_rate": 2.5570516036775656e-05, "loss": 0.0758, "step": 88370 }, { "epoch": 3.211715967730213, "grad_norm": 0.5163076519966125, "learning_rate": 2.5565328057464044e-05, "loss": 0.0822, "step": 88380 }, { "epoch": 3.212079366233011, "grad_norm": 0.8043489456176758, "learning_rate": 2.556014005379447e-05, "loss": 0.0742, "step": 88390 }, { "epoch": 3.212442764735809, "grad_norm": 5.606634616851807, "learning_rate": 2.555495202599044e-05, "loss": 0.0879, "step": 88400 }, { "epoch": 3.2128061632386076, "grad_norm": 0.7967556118965149, "learning_rate": 2.554976397427551e-05, "loss": 0.0851, "step": 88410 }, { "epoch": 3.2131695617414056, "grad_norm": 0.8802252411842346, "learning_rate": 2.5544575898873208e-05, "loss": 0.067, "step": 88420 }, { "epoch": 3.213532960244204, "grad_norm": 1.1108994483947754, "learning_rate": 2.553938780000706e-05, "loss": 0.092, "step": 88430 }, { "epoch": 3.213896358747002, "grad_norm": 0.37617167830467224, "learning_rate": 2.5534199677900618e-05, "loss": 0.1041, "step": 88440 }, { "epoch": 3.2142597572498, "grad_norm": 0.6323724389076233, "learning_rate": 2.5529011532777407e-05, "loss": 0.0805, "step": 88450 }, { "epoch": 3.2146231557525984, "grad_norm": 1.353440284729004, "learning_rate": 2.5523823364860978e-05, "loss": 0.0927, "step": 88460 }, { "epoch": 3.2149865542553964, "grad_norm": 0.5883220434188843, "learning_rate": 2.551863517437486e-05, "loss": 0.0714, "step": 88470 }, { "epoch": 3.215349952758195, "grad_norm": 0.545572817325592, "learning_rate": 2.551344696154259e-05, "loss": 0.063, "step": 88480 }, { "epoch": 3.215713351260993, "grad_norm": 1.073169469833374, "learning_rate": 2.550825872658773e-05, "loss": 0.1216, "step": 88490 }, { "epoch": 3.2160767497637908, "grad_norm": 0.45323723554611206, "learning_rate": 2.5503070469733804e-05, "loss": 0.0881, "step": 88500 }, { "epoch": 3.216440148266589, "grad_norm": 0.47871801257133484, "learning_rate": 2.5497882191204365e-05, "loss": 0.1577, "step": 88510 }, { "epoch": 3.216803546769387, "grad_norm": 0.21129682660102844, "learning_rate": 2.5492693891222952e-05, "loss": 0.0599, "step": 88520 }, { "epoch": 3.2171669452721856, "grad_norm": 1.4315932989120483, "learning_rate": 2.548750557001311e-05, "loss": 0.1047, "step": 88530 }, { "epoch": 3.2175303437749836, "grad_norm": 0.4861602187156677, "learning_rate": 2.5482317227798393e-05, "loss": 0.0807, "step": 88540 }, { "epoch": 3.217893742277782, "grad_norm": 0.9959172010421753, "learning_rate": 2.547712886480233e-05, "loss": 0.0929, "step": 88550 }, { "epoch": 3.21825714078058, "grad_norm": 1.7969993352890015, "learning_rate": 2.5471940481248496e-05, "loss": 0.0884, "step": 88560 }, { "epoch": 3.218620539283378, "grad_norm": 0.49897995591163635, "learning_rate": 2.5466752077360417e-05, "loss": 0.069, "step": 88570 }, { "epoch": 3.2189839377861764, "grad_norm": 1.5426241159439087, "learning_rate": 2.5461563653361658e-05, "loss": 0.0803, "step": 88580 }, { "epoch": 3.2193473362889744, "grad_norm": 0.3128204345703125, "learning_rate": 2.5456375209475765e-05, "loss": 0.08, "step": 88590 }, { "epoch": 3.219710734791773, "grad_norm": 1.4503716230392456, "learning_rate": 2.5451186745926276e-05, "loss": 0.0908, "step": 88600 }, { "epoch": 3.220074133294571, "grad_norm": 1.7355505228042603, "learning_rate": 2.5445998262936765e-05, "loss": 0.0784, "step": 88610 }, { "epoch": 3.220437531797369, "grad_norm": 0.8922900557518005, "learning_rate": 2.5440809760730773e-05, "loss": 0.1035, "step": 88620 }, { "epoch": 3.2208009303001672, "grad_norm": 0.41246405243873596, "learning_rate": 2.543562123953186e-05, "loss": 0.0966, "step": 88630 }, { "epoch": 3.221164328802965, "grad_norm": 0.5685162544250488, "learning_rate": 2.5430432699563577e-05, "loss": 0.0869, "step": 88640 }, { "epoch": 3.2215277273057636, "grad_norm": 1.1315709352493286, "learning_rate": 2.5425244141049477e-05, "loss": 0.096, "step": 88650 }, { "epoch": 3.2218911258085616, "grad_norm": 0.5966361165046692, "learning_rate": 2.542005556421312e-05, "loss": 0.0861, "step": 88660 }, { "epoch": 3.22225452431136, "grad_norm": 0.6581082344055176, "learning_rate": 2.541486696927806e-05, "loss": 0.0604, "step": 88670 }, { "epoch": 3.222617922814158, "grad_norm": 0.4752594530582428, "learning_rate": 2.540967835646787e-05, "loss": 0.2735, "step": 88680 }, { "epoch": 3.222981321316956, "grad_norm": 1.1282252073287964, "learning_rate": 2.5404489726006085e-05, "loss": 0.0928, "step": 88690 }, { "epoch": 3.2233447198197545, "grad_norm": 0.7483789324760437, "learning_rate": 2.5399301078116287e-05, "loss": 0.1104, "step": 88700 }, { "epoch": 3.2237081183225524, "grad_norm": 0.338220477104187, "learning_rate": 2.5394112413022024e-05, "loss": 0.0564, "step": 88710 }, { "epoch": 3.224071516825351, "grad_norm": 0.5026964545249939, "learning_rate": 2.538892373094685e-05, "loss": 0.0771, "step": 88720 }, { "epoch": 3.224434915328149, "grad_norm": 0.5595789551734924, "learning_rate": 2.5383735032114348e-05, "loss": 0.0752, "step": 88730 }, { "epoch": 3.224798313830947, "grad_norm": 0.37049752473831177, "learning_rate": 2.5378546316748066e-05, "loss": 0.0846, "step": 88740 }, { "epoch": 3.2251617123337453, "grad_norm": 0.7598865628242493, "learning_rate": 2.5373357585071568e-05, "loss": 0.0657, "step": 88750 }, { "epoch": 3.2255251108365433, "grad_norm": 2.472294569015503, "learning_rate": 2.536816883730842e-05, "loss": 0.1018, "step": 88760 }, { "epoch": 3.2258885093393417, "grad_norm": 0.44734901189804077, "learning_rate": 2.5362980073682186e-05, "loss": 0.08, "step": 88770 }, { "epoch": 3.2262519078421397, "grad_norm": 0.9893713593482971, "learning_rate": 2.5357791294416433e-05, "loss": 0.097, "step": 88780 }, { "epoch": 3.2266153063449377, "grad_norm": 0.5891297459602356, "learning_rate": 2.5352602499734725e-05, "loss": 0.0738, "step": 88790 }, { "epoch": 3.226978704847736, "grad_norm": 1.1289516687393188, "learning_rate": 2.5347413689860634e-05, "loss": 0.0851, "step": 88800 }, { "epoch": 3.226978704847736, "eval_loss": 0.32641828060150146, "eval_runtime": 181.2974, "eval_samples_per_second": 40.894, "eval_steps_per_second": 5.113, "eval_wer": 0.13833572349193093, "step": 88800 }, { "epoch": 3.227342103350534, "grad_norm": 1.0237905979156494, "learning_rate": 2.5342224865017715e-05, "loss": 0.0795, "step": 88810 }, { "epoch": 3.2277055018533325, "grad_norm": 1.2821190357208252, "learning_rate": 2.5337036025429555e-05, "loss": 0.0745, "step": 88820 }, { "epoch": 3.2280689003561305, "grad_norm": 0.4639264941215515, "learning_rate": 2.5331847171319707e-05, "loss": 0.0825, "step": 88830 }, { "epoch": 3.228432298858929, "grad_norm": 0.4527970254421234, "learning_rate": 2.5327177190389585e-05, "loss": 1.9595, "step": 88840 }, { "epoch": 3.228795697361727, "grad_norm": 4.547738552093506, "learning_rate": 2.532198830930447e-05, "loss": 0.0768, "step": 88850 }, { "epoch": 3.229159095864525, "grad_norm": 1.0073570013046265, "learning_rate": 2.5316799414346026e-05, "loss": 0.0892, "step": 88860 }, { "epoch": 3.2295224943673233, "grad_norm": 0.5932965874671936, "learning_rate": 2.5311610505737827e-05, "loss": 0.064, "step": 88870 }, { "epoch": 3.2298858928701213, "grad_norm": 0.6178048253059387, "learning_rate": 2.530642158370343e-05, "loss": 0.0592, "step": 88880 }, { "epoch": 3.2302492913729197, "grad_norm": 0.31980982422828674, "learning_rate": 2.5301232648466427e-05, "loss": 0.1194, "step": 88890 }, { "epoch": 3.2306126898757177, "grad_norm": 0.910460889339447, "learning_rate": 2.529604370025037e-05, "loss": 0.084, "step": 88900 }, { "epoch": 3.2309760883785157, "grad_norm": 0.4696938991546631, "learning_rate": 2.5290854739278857e-05, "loss": 0.1013, "step": 88910 }, { "epoch": 3.231339486881314, "grad_norm": 0.5140429139137268, "learning_rate": 2.5286184663683343e-05, "loss": 1.1616, "step": 88920 }, { "epoch": 3.231702885384112, "grad_norm": 1.7606545686721802, "learning_rate": 2.5280995679092382e-05, "loss": 0.0786, "step": 88930 }, { "epoch": 3.2320662838869105, "grad_norm": 1.0249334573745728, "learning_rate": 2.527580668239431e-05, "loss": 0.0797, "step": 88940 }, { "epoch": 3.2324296823897085, "grad_norm": 0.7938006520271301, "learning_rate": 2.5270617673812712e-05, "loss": 0.0956, "step": 88950 }, { "epoch": 3.232793080892507, "grad_norm": 0.998557984828949, "learning_rate": 2.526542865357116e-05, "loss": 0.0837, "step": 88960 }, { "epoch": 3.233156479395305, "grad_norm": 3.8933629989624023, "learning_rate": 2.5260239621893232e-05, "loss": 0.0633, "step": 88970 }, { "epoch": 3.233519877898103, "grad_norm": 0.6665277481079102, "learning_rate": 2.525556948378978e-05, "loss": 2.7581, "step": 88980 }, { "epoch": 3.2338832764009013, "grad_norm": 3.3742940425872803, "learning_rate": 2.52503804309987e-05, "loss": 0.0782, "step": 88990 }, { "epoch": 3.2342466749036993, "grad_norm": 0.37812352180480957, "learning_rate": 2.524519136741961e-05, "loss": 0.0937, "step": 89000 }, { "epoch": 3.2346100734064978, "grad_norm": 0.42430004477500916, "learning_rate": 2.5240002293276092e-05, "loss": 0.067, "step": 89010 }, { "epoch": 3.2349734719092957, "grad_norm": 0.669116735458374, "learning_rate": 2.523481320879174e-05, "loss": 0.0901, "step": 89020 }, { "epoch": 3.2353368704120937, "grad_norm": 0.8977360725402832, "learning_rate": 2.5229624114190113e-05, "loss": 0.0802, "step": 89030 }, { "epoch": 3.235700268914892, "grad_norm": 0.6342353224754333, "learning_rate": 2.5224435009694808e-05, "loss": 0.316, "step": 89040 }, { "epoch": 3.23606366741769, "grad_norm": 1.2511061429977417, "learning_rate": 2.5219245895529385e-05, "loss": 0.0862, "step": 89050 }, { "epoch": 3.2364270659204886, "grad_norm": 0.7103098034858704, "learning_rate": 2.5214056771917448e-05, "loss": 0.0743, "step": 89060 }, { "epoch": 3.2367904644232866, "grad_norm": 1.1459332704544067, "learning_rate": 2.5208867639082562e-05, "loss": 0.0808, "step": 89070 }, { "epoch": 3.2371538629260845, "grad_norm": 0.4795287251472473, "learning_rate": 2.5203678497248314e-05, "loss": 0.0596, "step": 89080 }, { "epoch": 3.237517261428883, "grad_norm": 1.7111159563064575, "learning_rate": 2.5198489346638282e-05, "loss": 0.0803, "step": 89090 }, { "epoch": 3.237880659931681, "grad_norm": 0.9549298286437988, "learning_rate": 2.519330018747605e-05, "loss": 0.0808, "step": 89100 }, { "epoch": 3.2382440584344794, "grad_norm": 1.708971381187439, "learning_rate": 2.5188111019985204e-05, "loss": 0.072, "step": 89110 }, { "epoch": 3.2386074569372774, "grad_norm": 1.0324857234954834, "learning_rate": 2.5182921844389317e-05, "loss": 0.0593, "step": 89120 }, { "epoch": 3.238970855440076, "grad_norm": 0.7414169311523438, "learning_rate": 2.517773266091199e-05, "loss": 0.067, "step": 89130 }, { "epoch": 3.2393342539428738, "grad_norm": 0.739587128162384, "learning_rate": 2.5172543469776783e-05, "loss": 0.0832, "step": 89140 }, { "epoch": 3.2396976524456718, "grad_norm": 0.6406792402267456, "learning_rate": 2.5167354271207298e-05, "loss": 0.0713, "step": 89150 }, { "epoch": 3.24006105094847, "grad_norm": 0.40510469675064087, "learning_rate": 2.5162165065427114e-05, "loss": 0.0698, "step": 89160 }, { "epoch": 3.240424449451268, "grad_norm": 0.508594810962677, "learning_rate": 2.515697585265981e-05, "loss": 0.0691, "step": 89170 }, { "epoch": 3.2407878479540666, "grad_norm": 0.5306017994880676, "learning_rate": 2.5151786633128983e-05, "loss": 0.0975, "step": 89180 }, { "epoch": 3.2411512464568646, "grad_norm": 1.0172330141067505, "learning_rate": 2.51465974070582e-05, "loss": 0.0897, "step": 89190 }, { "epoch": 3.2415146449596626, "grad_norm": 1.291803240776062, "learning_rate": 2.514140817467106e-05, "loss": 0.0812, "step": 89200 }, { "epoch": 3.241878043462461, "grad_norm": 0.5256122946739197, "learning_rate": 2.513621893619115e-05, "loss": 0.1297, "step": 89210 }, { "epoch": 3.242241441965259, "grad_norm": 1.4141759872436523, "learning_rate": 2.513102969184204e-05, "loss": 0.0578, "step": 89220 }, { "epoch": 3.2426048404680574, "grad_norm": 0.35552000999450684, "learning_rate": 2.5125840441847336e-05, "loss": 0.0774, "step": 89230 }, { "epoch": 3.2429682389708554, "grad_norm": 0.5986491441726685, "learning_rate": 2.512065118643061e-05, "loss": 0.096, "step": 89240 }, { "epoch": 3.243331637473654, "grad_norm": 0.6903038024902344, "learning_rate": 2.5115461925815458e-05, "loss": 0.0827, "step": 89250 }, { "epoch": 3.243695035976452, "grad_norm": 0.6188151240348816, "learning_rate": 2.5110272660225454e-05, "loss": 0.0887, "step": 89260 }, { "epoch": 3.24405843447925, "grad_norm": 0.27323612570762634, "learning_rate": 2.5105083389884205e-05, "loss": 1.8973, "step": 89270 }, { "epoch": 3.2444218329820482, "grad_norm": 0.4321115016937256, "learning_rate": 2.5099894115015284e-05, "loss": 0.091, "step": 89280 }, { "epoch": 3.244785231484846, "grad_norm": 0.5513198375701904, "learning_rate": 2.5094704835842276e-05, "loss": 0.0713, "step": 89290 }, { "epoch": 3.2451486299876446, "grad_norm": 0.5108364224433899, "learning_rate": 2.5089515552588776e-05, "loss": 0.0746, "step": 89300 }, { "epoch": 3.2455120284904426, "grad_norm": 0.5740257501602173, "learning_rate": 2.5084326265478365e-05, "loss": 0.0713, "step": 89310 }, { "epoch": 3.2458754269932406, "grad_norm": 0.46265411376953125, "learning_rate": 2.507913697473464e-05, "loss": 0.0768, "step": 89320 }, { "epoch": 3.246238825496039, "grad_norm": 0.573864758014679, "learning_rate": 2.5073947680581184e-05, "loss": 0.0928, "step": 89330 }, { "epoch": 3.246602223998837, "grad_norm": 0.34501180052757263, "learning_rate": 2.5068758383241586e-05, "loss": 0.0843, "step": 89340 }, { "epoch": 3.2469656225016355, "grad_norm": 1.3891141414642334, "learning_rate": 2.5063569082939437e-05, "loss": 0.0825, "step": 89350 }, { "epoch": 3.2473290210044334, "grad_norm": 0.40869995951652527, "learning_rate": 2.505837977989831e-05, "loss": 2.1322, "step": 89360 }, { "epoch": 3.2476924195072314, "grad_norm": 0.3573366403579712, "learning_rate": 2.505319047434182e-05, "loss": 0.0659, "step": 89370 }, { "epoch": 3.24805581801003, "grad_norm": 1.208050012588501, "learning_rate": 2.5048001166493536e-05, "loss": 0.0684, "step": 89380 }, { "epoch": 3.248419216512828, "grad_norm": 2.167421340942383, "learning_rate": 2.5042811856577054e-05, "loss": 0.5461, "step": 89390 }, { "epoch": 3.2487826150156263, "grad_norm": 0.7320494055747986, "learning_rate": 2.5037622544815958e-05, "loss": 0.0968, "step": 89400 }, { "epoch": 3.2487826150156263, "eval_loss": 0.28867700695991516, "eval_runtime": 180.0941, "eval_samples_per_second": 41.167, "eval_steps_per_second": 5.147, "eval_wer": 0.1410223827763356, "step": 89400 }, { "epoch": 3.2491460135184242, "grad_norm": 0.4674376845359802, "learning_rate": 2.5032433231433845e-05, "loss": 0.1201, "step": 89410 }, { "epoch": 3.2495094120212227, "grad_norm": 0.5038551688194275, "learning_rate": 2.5027243916654296e-05, "loss": 0.0698, "step": 89420 }, { "epoch": 3.2498728105240207, "grad_norm": 0.5582560896873474, "learning_rate": 2.5022054600700902e-05, "loss": 0.0666, "step": 89430 }, { "epoch": 3.2502362090268186, "grad_norm": 3.2635886669158936, "learning_rate": 2.501686528379726e-05, "loss": 0.0858, "step": 89440 }, { "epoch": 3.250599607529617, "grad_norm": 6.544740676879883, "learning_rate": 2.5011675966166947e-05, "loss": 0.0792, "step": 89450 }, { "epoch": 3.250963006032415, "grad_norm": 0.33596140146255493, "learning_rate": 2.500648664803356e-05, "loss": 0.0678, "step": 89460 }, { "epoch": 3.2513264045352135, "grad_norm": 0.7997293472290039, "learning_rate": 2.500129732962069e-05, "loss": 0.0636, "step": 89470 }, { "epoch": 3.2516898030380115, "grad_norm": 0.2458629459142685, "learning_rate": 2.4996108011151922e-05, "loss": 0.0711, "step": 89480 }, { "epoch": 3.25205320154081, "grad_norm": 0.5491876006126404, "learning_rate": 2.4990918692850834e-05, "loss": 0.0982, "step": 89490 }, { "epoch": 3.252416600043608, "grad_norm": 1.1274678707122803, "learning_rate": 2.4985729374941038e-05, "loss": 0.1037, "step": 89500 }, { "epoch": 3.252779998546406, "grad_norm": 0.576329231262207, "learning_rate": 2.4980540057646117e-05, "loss": 0.0813, "step": 89510 }, { "epoch": 3.2531433970492043, "grad_norm": 2.285304069519043, "learning_rate": 2.497535074118965e-05, "loss": 0.0752, "step": 89520 }, { "epoch": 3.2535067955520023, "grad_norm": 1.1110707521438599, "learning_rate": 2.4970161425795233e-05, "loss": 0.0663, "step": 89530 }, { "epoch": 3.2538701940548007, "grad_norm": 0.6601670980453491, "learning_rate": 2.4964972111686443e-05, "loss": 0.0782, "step": 89540 }, { "epoch": 3.2542335925575987, "grad_norm": 1.520925521850586, "learning_rate": 2.4959782799086897e-05, "loss": 0.0842, "step": 89550 }, { "epoch": 3.2545969910603967, "grad_norm": 0.5269960165023804, "learning_rate": 2.495459348822016e-05, "loss": 0.0606, "step": 89560 }, { "epoch": 3.254960389563195, "grad_norm": 2.8767483234405518, "learning_rate": 2.494940417930983e-05, "loss": 1.3044, "step": 89570 }, { "epoch": 3.255323788065993, "grad_norm": 0.3898687958717346, "learning_rate": 2.4944214872579484e-05, "loss": 0.1111, "step": 89580 }, { "epoch": 3.2556871865687915, "grad_norm": 0.7580005526542664, "learning_rate": 2.4939025568252732e-05, "loss": 0.1151, "step": 89590 }, { "epoch": 3.2560505850715895, "grad_norm": 1.3324828147888184, "learning_rate": 2.4933836266553147e-05, "loss": 0.0821, "step": 89600 }, { "epoch": 3.2564139835743875, "grad_norm": 0.6260429620742798, "learning_rate": 2.4928646967704323e-05, "loss": 0.0501, "step": 89610 }, { "epoch": 3.256777382077186, "grad_norm": 0.3619268238544464, "learning_rate": 2.492345767192985e-05, "loss": 0.0667, "step": 89620 }, { "epoch": 3.257140780579984, "grad_norm": 1.3483206033706665, "learning_rate": 2.4918268379453302e-05, "loss": 0.062, "step": 89630 }, { "epoch": 3.2575041790827823, "grad_norm": 31.592674255371094, "learning_rate": 2.491307909049829e-05, "loss": 0.2416, "step": 89640 }, { "epoch": 3.2578675775855803, "grad_norm": 0.5905751585960388, "learning_rate": 2.490788980528839e-05, "loss": 0.0816, "step": 89650 }, { "epoch": 3.2582309760883783, "grad_norm": 1.89798903465271, "learning_rate": 2.4902700524047194e-05, "loss": 0.0913, "step": 89660 }, { "epoch": 3.2585943745911767, "grad_norm": 0.883533239364624, "learning_rate": 2.489751124699828e-05, "loss": 0.0588, "step": 89670 }, { "epoch": 3.2589577730939747, "grad_norm": 0.48883989453315735, "learning_rate": 2.4892321974365235e-05, "loss": 0.0794, "step": 89680 }, { "epoch": 3.259321171596773, "grad_norm": 0.4255521595478058, "learning_rate": 2.4887132706371663e-05, "loss": 0.0727, "step": 89690 }, { "epoch": 3.259684570099571, "grad_norm": 1.9494534730911255, "learning_rate": 2.488194344324114e-05, "loss": 0.0798, "step": 89700 }, { "epoch": 3.260047968602369, "grad_norm": 0.4608819782733917, "learning_rate": 2.4876754185197255e-05, "loss": 0.1128, "step": 89710 }, { "epoch": 3.2604113671051675, "grad_norm": 0.2869090139865875, "learning_rate": 2.487156493246359e-05, "loss": 0.0729, "step": 89720 }, { "epoch": 3.2607747656079655, "grad_norm": 0.4189178943634033, "learning_rate": 2.4866375685263733e-05, "loss": 0.0711, "step": 89730 }, { "epoch": 3.261138164110764, "grad_norm": 0.8501338958740234, "learning_rate": 2.4861186443821273e-05, "loss": 0.0915, "step": 89740 }, { "epoch": 3.261501562613562, "grad_norm": 1.5623184442520142, "learning_rate": 2.48559972083598e-05, "loss": 0.0871, "step": 89750 }, { "epoch": 3.2618649611163604, "grad_norm": 0.3259228765964508, "learning_rate": 2.4850807979102895e-05, "loss": 0.0864, "step": 89760 }, { "epoch": 3.2622283596191584, "grad_norm": 1.2263481616973877, "learning_rate": 2.4845618756274147e-05, "loss": 0.0599, "step": 89770 }, { "epoch": 3.262591758121957, "grad_norm": 4.90409517288208, "learning_rate": 2.484042954009712e-05, "loss": 0.0895, "step": 89780 }, { "epoch": 3.2629551566247548, "grad_norm": 0.4526398479938507, "learning_rate": 2.483524033079543e-05, "loss": 0.0734, "step": 89790 }, { "epoch": 3.2633185551275528, "grad_norm": 0.7492665648460388, "learning_rate": 2.483005112859265e-05, "loss": 0.0904, "step": 89800 }, { "epoch": 3.263681953630351, "grad_norm": 0.5449044108390808, "learning_rate": 2.482486193371236e-05, "loss": 0.0764, "step": 89810 }, { "epoch": 3.264045352133149, "grad_norm": 0.5052310824394226, "learning_rate": 2.4819672746378137e-05, "loss": 0.0617, "step": 89820 }, { "epoch": 3.2644087506359476, "grad_norm": 0.5267361998558044, "learning_rate": 2.4814483566813582e-05, "loss": 0.077, "step": 89830 }, { "epoch": 3.2647721491387456, "grad_norm": 1.3752968311309814, "learning_rate": 2.4809294395242273e-05, "loss": 0.0719, "step": 89840 }, { "epoch": 3.2651355476415436, "grad_norm": 1.6243940591812134, "learning_rate": 2.4804105231887787e-05, "loss": 0.1557, "step": 89850 }, { "epoch": 3.265498946144342, "grad_norm": 1.946386694908142, "learning_rate": 2.479891607697371e-05, "loss": 0.0984, "step": 89860 }, { "epoch": 3.26586234464714, "grad_norm": 1.5876291990280151, "learning_rate": 2.4793726930723615e-05, "loss": 0.0768, "step": 89870 }, { "epoch": 3.2662257431499384, "grad_norm": 0.359464555978775, "learning_rate": 2.4788537793361104e-05, "loss": 0.0734, "step": 89880 }, { "epoch": 3.2665891416527364, "grad_norm": 1.2184336185455322, "learning_rate": 2.4783348665109743e-05, "loss": 0.0963, "step": 89890 }, { "epoch": 3.2669525401555344, "grad_norm": 0.5689897537231445, "learning_rate": 2.477815954619312e-05, "loss": 0.0818, "step": 89900 }, { "epoch": 3.267315938658333, "grad_norm": 0.8023828268051147, "learning_rate": 2.4773489347334143e-05, "loss": 0.0947, "step": 89910 }, { "epoch": 3.267679337161131, "grad_norm": 0.41300055384635925, "learning_rate": 2.476830024676948e-05, "loss": 0.0646, "step": 89920 }, { "epoch": 3.268042735663929, "grad_norm": 1.4045326709747314, "learning_rate": 2.4763111156187922e-05, "loss": 0.0702, "step": 89930 }, { "epoch": 3.268406134166727, "grad_norm": 0.2815437614917755, "learning_rate": 2.4757922075813077e-05, "loss": 0.0844, "step": 89940 }, { "epoch": 3.268769532669525, "grad_norm": 0.8915033340454102, "learning_rate": 2.475273300586851e-05, "loss": 0.1112, "step": 89950 }, { "epoch": 3.2691329311723236, "grad_norm": 0.7407945394515991, "learning_rate": 2.474754394657779e-05, "loss": 0.0744, "step": 89960 }, { "epoch": 3.2694963296751216, "grad_norm": 0.6766940951347351, "learning_rate": 2.474235489816451e-05, "loss": 0.771, "step": 89970 }, { "epoch": 3.26985972817792, "grad_norm": 0.9781772494316101, "learning_rate": 2.4737165860852227e-05, "loss": 0.5104, "step": 89980 }, { "epoch": 3.270223126680718, "grad_norm": 0.4817918539047241, "learning_rate": 2.4731976834864544e-05, "loss": 0.105, "step": 89990 }, { "epoch": 3.270586525183516, "grad_norm": 0.5263279676437378, "learning_rate": 2.4726787820425023e-05, "loss": 0.0872, "step": 90000 }, { "epoch": 3.270586525183516, "eval_loss": 0.29889926314353943, "eval_runtime": 179.1872, "eval_samples_per_second": 41.376, "eval_steps_per_second": 5.173, "eval_wer": 0.14040517726505347, "step": 90000 }, { "epoch": 3.2709499236863144, "grad_norm": 21.08318328857422, "learning_rate": 2.4721598817757247e-05, "loss": 0.3013, "step": 90010 }, { "epoch": 3.2713133221891124, "grad_norm": 0.4224644601345062, "learning_rate": 2.4716409827084783e-05, "loss": 0.079, "step": 90020 }, { "epoch": 3.271676720691911, "grad_norm": 0.4048929214477539, "learning_rate": 2.4711220848631203e-05, "loss": 0.0684, "step": 90030 }, { "epoch": 3.272040119194709, "grad_norm": 0.522287905216217, "learning_rate": 2.47060318826201e-05, "loss": 0.1021, "step": 90040 }, { "epoch": 3.2724035176975073, "grad_norm": 1.5194038152694702, "learning_rate": 2.4700842929275033e-05, "loss": 0.0878, "step": 90050 }, { "epoch": 3.2727669162003052, "grad_norm": 0.3342646062374115, "learning_rate": 2.4695653988819584e-05, "loss": 0.0697, "step": 90060 }, { "epoch": 3.2731303147031037, "grad_norm": 0.7479965686798096, "learning_rate": 2.469046506147732e-05, "loss": 0.0573, "step": 90070 }, { "epoch": 3.2734937132059017, "grad_norm": 0.4107944071292877, "learning_rate": 2.4685276147471804e-05, "loss": 0.0553, "step": 90080 }, { "epoch": 3.2738571117086996, "grad_norm": 0.8151494860649109, "learning_rate": 2.468008724702663e-05, "loss": 2.1021, "step": 90090 }, { "epoch": 3.274220510211498, "grad_norm": 0.6547772884368896, "learning_rate": 2.4674898360365357e-05, "loss": 0.0704, "step": 90100 }, { "epoch": 3.274583908714296, "grad_norm": 0.7743812203407288, "learning_rate": 2.4669709487711562e-05, "loss": 0.0689, "step": 90110 }, { "epoch": 3.2749473072170945, "grad_norm": 0.546514630317688, "learning_rate": 2.4664520629288795e-05, "loss": 0.0693, "step": 90120 }, { "epoch": 3.2753107057198925, "grad_norm": 1.609788179397583, "learning_rate": 2.4659331785320652e-05, "loss": 0.0814, "step": 90130 }, { "epoch": 3.2756741042226905, "grad_norm": 0.8801899552345276, "learning_rate": 2.4654142956030692e-05, "loss": 0.0745, "step": 90140 }, { "epoch": 3.276037502725489, "grad_norm": 0.5829627513885498, "learning_rate": 2.464895414164248e-05, "loss": 0.0597, "step": 90150 }, { "epoch": 3.276400901228287, "grad_norm": 0.7144574522972107, "learning_rate": 2.4643765342379584e-05, "loss": 0.1288, "step": 90160 }, { "epoch": 3.2767642997310853, "grad_norm": 0.4121115505695343, "learning_rate": 2.4638576558465562e-05, "loss": 0.0911, "step": 90170 }, { "epoch": 3.2771276982338833, "grad_norm": 0.4146971106529236, "learning_rate": 2.4633387790124003e-05, "loss": 0.066, "step": 90180 }, { "epoch": 3.2774910967366813, "grad_norm": 0.6736780405044556, "learning_rate": 2.4628199037578457e-05, "loss": 0.0708, "step": 90190 }, { "epoch": 3.2778544952394797, "grad_norm": 2.1393494606018066, "learning_rate": 2.462301030105249e-05, "loss": 0.0841, "step": 90200 }, { "epoch": 3.2782178937422777, "grad_norm": 1.2542799711227417, "learning_rate": 2.461782158076967e-05, "loss": 0.0797, "step": 90210 }, { "epoch": 3.278581292245076, "grad_norm": 1.3703997135162354, "learning_rate": 2.461263287695355e-05, "loss": 0.0578, "step": 90220 }, { "epoch": 3.278944690747874, "grad_norm": 0.32917872071266174, "learning_rate": 2.4607444189827708e-05, "loss": 0.0609, "step": 90230 }, { "epoch": 3.279308089250672, "grad_norm": 0.6270015835762024, "learning_rate": 2.4602255519615702e-05, "loss": 0.0925, "step": 90240 }, { "epoch": 3.2796714877534705, "grad_norm": 0.564975917339325, "learning_rate": 2.4597066866541087e-05, "loss": 0.0892, "step": 90250 }, { "epoch": 3.2800348862562685, "grad_norm": 0.6385096311569214, "learning_rate": 2.4591878230827433e-05, "loss": 0.0876, "step": 90260 }, { "epoch": 3.280398284759067, "grad_norm": 1.597822666168213, "learning_rate": 2.4586689612698277e-05, "loss": 0.1932, "step": 90270 }, { "epoch": 3.280761683261865, "grad_norm": 0.7112919688224792, "learning_rate": 2.458150101237721e-05, "loss": 0.0928, "step": 90280 }, { "epoch": 3.281125081764663, "grad_norm": 0.4111541509628296, "learning_rate": 2.4576312430087772e-05, "loss": 0.068, "step": 90290 }, { "epoch": 3.2814884802674613, "grad_norm": 0.9870650172233582, "learning_rate": 2.4571123866053525e-05, "loss": 0.08, "step": 90300 }, { "epoch": 3.2818518787702593, "grad_norm": 2.479681968688965, "learning_rate": 2.4565935320498016e-05, "loss": 4.4447, "step": 90310 }, { "epoch": 3.2822152772730577, "grad_norm": 0.686183512210846, "learning_rate": 2.456074679364482e-05, "loss": 0.0719, "step": 90320 }, { "epoch": 3.2825786757758557, "grad_norm": 0.5197970271110535, "learning_rate": 2.455555828571748e-05, "loss": 0.0548, "step": 90330 }, { "epoch": 3.282942074278654, "grad_norm": 1.1354804039001465, "learning_rate": 2.4550369796939547e-05, "loss": 0.0908, "step": 90340 }, { "epoch": 3.283305472781452, "grad_norm": 0.7994649410247803, "learning_rate": 2.4545181327534588e-05, "loss": 0.0862, "step": 90350 }, { "epoch": 3.2836688712842506, "grad_norm": 1.2239160537719727, "learning_rate": 2.4539992877726135e-05, "loss": 0.2734, "step": 90360 }, { "epoch": 3.2840322697870485, "grad_norm": 2.116908311843872, "learning_rate": 2.4534804447737762e-05, "loss": 0.0638, "step": 90370 }, { "epoch": 3.2843956682898465, "grad_norm": 0.5922090411186218, "learning_rate": 2.452961603779301e-05, "loss": 0.0883, "step": 90380 }, { "epoch": 3.284759066792645, "grad_norm": 0.7629362940788269, "learning_rate": 2.4524427648115427e-05, "loss": 0.07, "step": 90390 }, { "epoch": 3.285122465295443, "grad_norm": 0.5463785529136658, "learning_rate": 2.451923927892857e-05, "loss": 0.0813, "step": 90400 }, { "epoch": 3.2854858637982414, "grad_norm": 0.6801964044570923, "learning_rate": 2.4514050930455965e-05, "loss": 0.0673, "step": 90410 }, { "epoch": 3.2858492623010394, "grad_norm": 1.022977590560913, "learning_rate": 2.450886260292119e-05, "loss": 0.0685, "step": 90420 }, { "epoch": 3.2862126608038373, "grad_norm": 0.8831082582473755, "learning_rate": 2.450367429654778e-05, "loss": 0.0728, "step": 90430 }, { "epoch": 3.2865760593066358, "grad_norm": 0.6400480270385742, "learning_rate": 2.4498486011559277e-05, "loss": 0.0967, "step": 90440 }, { "epoch": 3.2869394578094338, "grad_norm": 0.8099062442779541, "learning_rate": 2.4493297748179225e-05, "loss": 0.066, "step": 90450 }, { "epoch": 3.287302856312232, "grad_norm": 0.3613360524177551, "learning_rate": 2.4488109506631163e-05, "loss": 0.1143, "step": 90460 }, { "epoch": 3.28766625481503, "grad_norm": 0.5503920316696167, "learning_rate": 2.448292128713865e-05, "loss": 0.0591, "step": 90470 }, { "epoch": 3.288029653317828, "grad_norm": 0.34772297739982605, "learning_rate": 2.447773308992522e-05, "loss": 0.0738, "step": 90480 }, { "epoch": 3.2883930518206266, "grad_norm": 1.3450576066970825, "learning_rate": 2.4472544915214414e-05, "loss": 0.0832, "step": 90490 }, { "epoch": 3.2887564503234246, "grad_norm": 0.5817814469337463, "learning_rate": 2.4467356763229756e-05, "loss": 0.0962, "step": 90500 }, { "epoch": 3.289119848826223, "grad_norm": 0.669734001159668, "learning_rate": 2.446216863419481e-05, "loss": 0.0779, "step": 90510 }, { "epoch": 3.289483247329021, "grad_norm": 1.3286347389221191, "learning_rate": 2.445698052833311e-05, "loss": 0.07, "step": 90520 }, { "epoch": 3.289846645831819, "grad_norm": 1.7495746612548828, "learning_rate": 2.4451792445868186e-05, "loss": 0.07, "step": 90530 }, { "epoch": 3.2902100443346174, "grad_norm": 0.6228033304214478, "learning_rate": 2.4446604387023572e-05, "loss": 0.1081, "step": 90540 }, { "epoch": 3.2905734428374154, "grad_norm": 0.8681657910346985, "learning_rate": 2.4441416352022793e-05, "loss": 0.0826, "step": 90550 }, { "epoch": 3.290936841340214, "grad_norm": 1.1657065153121948, "learning_rate": 2.4436228341089412e-05, "loss": 0.0801, "step": 90560 }, { "epoch": 3.291300239843012, "grad_norm": 1.196022391319275, "learning_rate": 2.4431040354446944e-05, "loss": 0.0665, "step": 90570 }, { "epoch": 3.2916636383458098, "grad_norm": 0.8635385632514954, "learning_rate": 2.442585239231892e-05, "loss": 0.0694, "step": 90580 }, { "epoch": 3.292027036848608, "grad_norm": 0.664546549320221, "learning_rate": 2.442066445492888e-05, "loss": 0.0938, "step": 90590 }, { "epoch": 3.292390435351406, "grad_norm": 0.6976110339164734, "learning_rate": 2.441547654250033e-05, "loss": 0.0818, "step": 90600 }, { "epoch": 3.292390435351406, "eval_loss": 0.33689337968826294, "eval_runtime": 179.0908, "eval_samples_per_second": 41.398, "eval_steps_per_second": 5.176, "eval_wer": 0.1381995752173834, "step": 90600 }, { "epoch": 3.2927538338542046, "grad_norm": 0.48501139879226685, "learning_rate": 2.441028865525683e-05, "loss": 0.0915, "step": 90610 }, { "epoch": 3.2931172323570026, "grad_norm": 0.7468106746673584, "learning_rate": 2.4405100793421892e-05, "loss": 0.1325, "step": 90620 }, { "epoch": 3.293480630859801, "grad_norm": 0.38419654965400696, "learning_rate": 2.4399912957219048e-05, "loss": 0.071, "step": 90630 }, { "epoch": 3.293844029362599, "grad_norm": 0.44008463621139526, "learning_rate": 2.4394725146871817e-05, "loss": 0.0885, "step": 90640 }, { "epoch": 3.2942074278653974, "grad_norm": 4.322512149810791, "learning_rate": 2.4389537362603714e-05, "loss": 0.0892, "step": 90650 }, { "epoch": 3.2945708263681954, "grad_norm": 0.5549167990684509, "learning_rate": 2.4384349604638285e-05, "loss": 0.0908, "step": 90660 }, { "epoch": 3.2949342248709934, "grad_norm": 0.5810611248016357, "learning_rate": 2.437916187319904e-05, "loss": 0.0805, "step": 90670 }, { "epoch": 3.295297623373792, "grad_norm": 0.5657238364219666, "learning_rate": 2.4373974168509507e-05, "loss": 0.0607, "step": 90680 }, { "epoch": 3.29566102187659, "grad_norm": 0.6547468304634094, "learning_rate": 2.436878649079318e-05, "loss": 0.0987, "step": 90690 }, { "epoch": 3.2960244203793883, "grad_norm": 1.354154109954834, "learning_rate": 2.4363598840273617e-05, "loss": 0.0898, "step": 90700 }, { "epoch": 3.2963878188821862, "grad_norm": 3.738417387008667, "learning_rate": 2.4358411217174308e-05, "loss": 0.0716, "step": 90710 }, { "epoch": 3.2967512173849842, "grad_norm": 1.3510026931762695, "learning_rate": 2.435322362171878e-05, "loss": 0.0855, "step": 90720 }, { "epoch": 3.2971146158877827, "grad_norm": 1.4833030700683594, "learning_rate": 2.4348036054130545e-05, "loss": 0.0859, "step": 90730 }, { "epoch": 3.2974780143905806, "grad_norm": 0.327749639749527, "learning_rate": 2.4342848514633107e-05, "loss": 0.0798, "step": 90740 }, { "epoch": 3.297841412893379, "grad_norm": 2.8632781505584717, "learning_rate": 2.4337661003449998e-05, "loss": 0.0958, "step": 90750 }, { "epoch": 3.298204811396177, "grad_norm": 1.3341703414916992, "learning_rate": 2.433247352080472e-05, "loss": 2.2308, "step": 90760 }, { "epoch": 3.298568209898975, "grad_norm": 1.0177751779556274, "learning_rate": 2.4327286066920785e-05, "loss": 0.0721, "step": 90770 }, { "epoch": 3.2989316084017735, "grad_norm": 0.5713348388671875, "learning_rate": 2.4322098642021697e-05, "loss": 0.0657, "step": 90780 }, { "epoch": 3.2992950069045714, "grad_norm": 0.32323822379112244, "learning_rate": 2.4316911246330955e-05, "loss": 0.2474, "step": 90790 }, { "epoch": 3.29965840540737, "grad_norm": 0.4968611001968384, "learning_rate": 2.431172388007209e-05, "loss": 0.0831, "step": 90800 }, { "epoch": 3.300021803910168, "grad_norm": 0.9985103607177734, "learning_rate": 2.430653654346859e-05, "loss": 0.0802, "step": 90810 }, { "epoch": 3.300385202412966, "grad_norm": 0.5295760631561279, "learning_rate": 2.430134923674397e-05, "loss": 0.0702, "step": 90820 }, { "epoch": 3.3007486009157643, "grad_norm": 0.6976117491722107, "learning_rate": 2.429616196012172e-05, "loss": 0.0797, "step": 90830 }, { "epoch": 3.3011119994185623, "grad_norm": 0.5984934568405151, "learning_rate": 2.4290974713825338e-05, "loss": 0.3996, "step": 90840 }, { "epoch": 3.3014753979213607, "grad_norm": 0.9891178607940674, "learning_rate": 2.4285787498078343e-05, "loss": 0.0631, "step": 90850 }, { "epoch": 3.3018387964241587, "grad_norm": 0.42125028371810913, "learning_rate": 2.4280600313104227e-05, "loss": 0.0814, "step": 90860 }, { "epoch": 3.3022021949269567, "grad_norm": 0.25509488582611084, "learning_rate": 2.427541315912648e-05, "loss": 0.0648, "step": 90870 }, { "epoch": 3.302565593429755, "grad_norm": 0.5609679222106934, "learning_rate": 2.42702260363686e-05, "loss": 0.0945, "step": 90880 }, { "epoch": 3.302928991932553, "grad_norm": 1.2793158292770386, "learning_rate": 2.4265038945054076e-05, "loss": 0.0853, "step": 90890 }, { "epoch": 3.3032923904353515, "grad_norm": 0.8435468673706055, "learning_rate": 2.425985188540642e-05, "loss": 0.1439, "step": 90900 }, { "epoch": 3.3036557889381495, "grad_norm": 1.7698181867599487, "learning_rate": 2.425466485764911e-05, "loss": 0.073, "step": 90910 }, { "epoch": 3.304019187440948, "grad_norm": 0.8225585222244263, "learning_rate": 2.4249477862005646e-05, "loss": 0.0733, "step": 90920 }, { "epoch": 3.304382585943746, "grad_norm": 1.2218375205993652, "learning_rate": 2.4244290898699494e-05, "loss": 0.0723, "step": 90930 }, { "epoch": 3.3047459844465443, "grad_norm": 2.20505952835083, "learning_rate": 2.423910396795417e-05, "loss": 0.0815, "step": 90940 }, { "epoch": 3.3051093829493423, "grad_norm": 0.4661131501197815, "learning_rate": 2.423391706999315e-05, "loss": 0.0872, "step": 90950 }, { "epoch": 3.3054727814521403, "grad_norm": 0.6685827970504761, "learning_rate": 2.4228730205039916e-05, "loss": 0.066, "step": 90960 }, { "epoch": 3.3058361799549387, "grad_norm": 0.89743572473526, "learning_rate": 2.4223543373317954e-05, "loss": 0.0665, "step": 90970 }, { "epoch": 3.3061995784577367, "grad_norm": 0.9550055265426636, "learning_rate": 2.4218356575050734e-05, "loss": 0.0722, "step": 90980 }, { "epoch": 3.306562976960535, "grad_norm": 0.4779164791107178, "learning_rate": 2.421316981046176e-05, "loss": 0.0724, "step": 90990 }, { "epoch": 3.306926375463333, "grad_norm": 1.0296430587768555, "learning_rate": 2.4207983079774498e-05, "loss": 0.0718, "step": 91000 }, { "epoch": 3.307289773966131, "grad_norm": 0.6730550527572632, "learning_rate": 2.4202796383212427e-05, "loss": 0.0622, "step": 91010 }, { "epoch": 3.3076531724689295, "grad_norm": 0.5089064240455627, "learning_rate": 2.4197609720999027e-05, "loss": 0.056, "step": 91020 }, { "epoch": 3.3080165709717275, "grad_norm": 1.3337105512619019, "learning_rate": 2.4192423093357754e-05, "loss": 0.0799, "step": 91030 }, { "epoch": 3.308379969474526, "grad_norm": 0.7988773584365845, "learning_rate": 2.418723650051211e-05, "loss": 0.094, "step": 91040 }, { "epoch": 3.308743367977324, "grad_norm": 2.004700183868408, "learning_rate": 2.4182049942685553e-05, "loss": 0.073, "step": 91050 }, { "epoch": 3.309106766480122, "grad_norm": 0.5091060996055603, "learning_rate": 2.4176863420101553e-05, "loss": 0.0869, "step": 91060 }, { "epoch": 3.3094701649829203, "grad_norm": 1.5232765674591064, "learning_rate": 2.417167693298358e-05, "loss": 0.0926, "step": 91070 }, { "epoch": 3.3098335634857183, "grad_norm": 2.1751208305358887, "learning_rate": 2.416649048155509e-05, "loss": 0.0679, "step": 91080 }, { "epoch": 3.3101969619885168, "grad_norm": 0.7265444397926331, "learning_rate": 2.416130406603957e-05, "loss": 0.6392, "step": 91090 }, { "epoch": 3.3105603604913147, "grad_norm": 1.931279182434082, "learning_rate": 2.4156117686660473e-05, "loss": 0.097, "step": 91100 }, { "epoch": 3.3109237589941127, "grad_norm": 0.5885264277458191, "learning_rate": 2.4150931343641264e-05, "loss": 0.087, "step": 91110 }, { "epoch": 3.311287157496911, "grad_norm": 1.2313237190246582, "learning_rate": 2.4145745037205388e-05, "loss": 0.0718, "step": 91120 }, { "epoch": 3.311650555999709, "grad_norm": 0.4470736086368561, "learning_rate": 2.4140558767576336e-05, "loss": 0.0614, "step": 91130 }, { "epoch": 3.3120139545025076, "grad_norm": 0.9219857454299927, "learning_rate": 2.4135372534977542e-05, "loss": 0.1004, "step": 91140 }, { "epoch": 3.3123773530053056, "grad_norm": 0.5581304430961609, "learning_rate": 2.4130186339632473e-05, "loss": 0.0861, "step": 91150 }, { "epoch": 3.3127407515081035, "grad_norm": 0.6495917439460754, "learning_rate": 2.412500018176458e-05, "loss": 0.0892, "step": 91160 }, { "epoch": 3.313104150010902, "grad_norm": 0.8792677521705627, "learning_rate": 2.4119814061597304e-05, "loss": 0.0804, "step": 91170 }, { "epoch": 3.3134675485137, "grad_norm": 0.7889745235443115, "learning_rate": 2.411462797935412e-05, "loss": 0.068, "step": 91180 }, { "epoch": 3.3138309470164984, "grad_norm": 0.883449137210846, "learning_rate": 2.4109441935258465e-05, "loss": 0.1013, "step": 91190 }, { "epoch": 3.3141943455192964, "grad_norm": 0.7440003156661987, "learning_rate": 2.410425592953379e-05, "loss": 0.0737, "step": 91200 }, { "epoch": 3.3141943455192964, "eval_loss": 0.3343234956264496, "eval_runtime": 179.7554, "eval_samples_per_second": 41.245, "eval_steps_per_second": 5.157, "eval_wer": 0.13966997658249677, "step": 91200 }, { "epoch": 3.314557744022095, "grad_norm": 0.6900414228439331, "learning_rate": 2.409906996240353e-05, "loss": 0.0944, "step": 91210 }, { "epoch": 3.314921142524893, "grad_norm": 1.4145492315292358, "learning_rate": 2.4093884034091148e-05, "loss": 0.0583, "step": 91220 }, { "epoch": 3.315284541027691, "grad_norm": 1.502017855644226, "learning_rate": 2.4088698144820076e-05, "loss": 0.073, "step": 91230 }, { "epoch": 3.315647939530489, "grad_norm": 0.4413808584213257, "learning_rate": 2.4083512294813765e-05, "loss": 0.0817, "step": 91240 }, { "epoch": 3.316011338033287, "grad_norm": 4.0073957443237305, "learning_rate": 2.4078326484295642e-05, "loss": 0.0913, "step": 91250 }, { "epoch": 3.3163747365360856, "grad_norm": 1.807396650314331, "learning_rate": 2.407314071348915e-05, "loss": 0.0713, "step": 91260 }, { "epoch": 3.3167381350388836, "grad_norm": 0.6919573545455933, "learning_rate": 2.4067954982617726e-05, "loss": 0.0856, "step": 91270 }, { "epoch": 3.317101533541682, "grad_norm": 0.31657874584198, "learning_rate": 2.4062769291904814e-05, "loss": 0.0806, "step": 91280 }, { "epoch": 3.31746493204448, "grad_norm": 2.0308666229248047, "learning_rate": 2.4057583641573835e-05, "loss": 0.0987, "step": 91290 }, { "epoch": 3.317828330547278, "grad_norm": 1.023868441581726, "learning_rate": 2.4052398031848224e-05, "loss": 0.0692, "step": 91300 }, { "epoch": 3.3181917290500764, "grad_norm": 0.5633085370063782, "learning_rate": 2.4047212462951402e-05, "loss": 0.0779, "step": 91310 }, { "epoch": 3.3185551275528744, "grad_norm": 0.48355531692504883, "learning_rate": 2.4042026935106812e-05, "loss": 0.0692, "step": 91320 }, { "epoch": 3.318918526055673, "grad_norm": 1.3398791551589966, "learning_rate": 2.4036841448537876e-05, "loss": 0.0805, "step": 91330 }, { "epoch": 3.319281924558471, "grad_norm": 0.36602282524108887, "learning_rate": 2.4031656003468016e-05, "loss": 0.0789, "step": 91340 }, { "epoch": 3.319645323061269, "grad_norm": 0.6087961196899414, "learning_rate": 2.4026470600120643e-05, "loss": 0.0816, "step": 91350 }, { "epoch": 3.3200087215640672, "grad_norm": 0.5523571372032166, "learning_rate": 2.402128523871919e-05, "loss": 0.0945, "step": 91360 }, { "epoch": 3.320372120066865, "grad_norm": 0.5356813669204712, "learning_rate": 2.401609991948708e-05, "loss": 0.0543, "step": 91370 }, { "epoch": 3.3207355185696636, "grad_norm": 0.7490220665931702, "learning_rate": 2.4010914642647725e-05, "loss": 0.0802, "step": 91380 }, { "epoch": 3.3210989170724616, "grad_norm": 0.3810897469520569, "learning_rate": 2.4005729408424538e-05, "loss": 0.0986, "step": 91390 }, { "epoch": 3.3214623155752596, "grad_norm": 2.465951919555664, "learning_rate": 2.4000544217040926e-05, "loss": 0.0977, "step": 91400 }, { "epoch": 3.321825714078058, "grad_norm": 0.69112628698349, "learning_rate": 2.399535906872031e-05, "loss": 0.0679, "step": 91410 }, { "epoch": 3.322189112580856, "grad_norm": 0.594582200050354, "learning_rate": 2.3990173963686103e-05, "loss": 0.0622, "step": 91420 }, { "epoch": 3.3225525110836545, "grad_norm": 0.5401979088783264, "learning_rate": 2.3984988902161706e-05, "loss": 0.0557, "step": 91430 }, { "epoch": 3.3229159095864524, "grad_norm": 0.34108448028564453, "learning_rate": 2.397980388437052e-05, "loss": 0.0941, "step": 91440 }, { "epoch": 3.3232793080892504, "grad_norm": 0.8815538287162781, "learning_rate": 2.3974618910535958e-05, "loss": 0.0491, "step": 91450 }, { "epoch": 3.323642706592049, "grad_norm": 1.5742990970611572, "learning_rate": 2.3969433980881417e-05, "loss": 0.151, "step": 91460 }, { "epoch": 3.324006105094847, "grad_norm": 0.6522462368011475, "learning_rate": 2.3964249095630302e-05, "loss": 0.0642, "step": 91470 }, { "epoch": 3.3243695035976453, "grad_norm": 2.5433623790740967, "learning_rate": 2.3959064255006012e-05, "loss": 0.0702, "step": 91480 }, { "epoch": 3.3247329021004433, "grad_norm": 0.345841646194458, "learning_rate": 2.395387945923193e-05, "loss": 0.1386, "step": 91490 }, { "epoch": 3.3250963006032417, "grad_norm": 0.6606214642524719, "learning_rate": 2.394869470853146e-05, "loss": 0.3363, "step": 91500 }, { "epoch": 3.3254596991060397, "grad_norm": 0.638674795627594, "learning_rate": 2.3943510003128004e-05, "loss": 0.0987, "step": 91510 }, { "epoch": 3.325823097608838, "grad_norm": 0.4758436381816864, "learning_rate": 2.393832534324494e-05, "loss": 0.061, "step": 91520 }, { "epoch": 3.326186496111636, "grad_norm": 4.208878040313721, "learning_rate": 2.393314072910565e-05, "loss": 0.0707, "step": 91530 }, { "epoch": 3.326549894614434, "grad_norm": 0.49438315629959106, "learning_rate": 2.3927956160933544e-05, "loss": 0.0792, "step": 91540 }, { "epoch": 3.3269132931172325, "grad_norm": 1.3179932832717896, "learning_rate": 2.3922771638951983e-05, "loss": 0.0857, "step": 91550 }, { "epoch": 3.3272766916200305, "grad_norm": 0.5983903408050537, "learning_rate": 2.3917587163384365e-05, "loss": 0.0743, "step": 91560 }, { "epoch": 3.327640090122829, "grad_norm": 0.30007684230804443, "learning_rate": 2.3912402734454063e-05, "loss": 0.0538, "step": 91570 }, { "epoch": 3.328003488625627, "grad_norm": 2.637645959854126, "learning_rate": 2.3907218352384452e-05, "loss": 0.066, "step": 91580 }, { "epoch": 3.328366887128425, "grad_norm": 0.9720343351364136, "learning_rate": 2.3902034017398923e-05, "loss": 0.0899, "step": 91590 }, { "epoch": 3.3287302856312233, "grad_norm": 0.5020787715911865, "learning_rate": 2.3896849729720834e-05, "loss": 0.0858, "step": 91600 }, { "epoch": 3.3290936841340213, "grad_norm": 0.7373344898223877, "learning_rate": 2.3891665489573573e-05, "loss": 0.0704, "step": 91610 }, { "epoch": 3.3294570826368197, "grad_norm": 0.8118528127670288, "learning_rate": 2.3886481297180494e-05, "loss": 0.0735, "step": 91620 }, { "epoch": 3.3298204811396177, "grad_norm": 0.37041768431663513, "learning_rate": 2.388129715276498e-05, "loss": 0.0747, "step": 91630 }, { "epoch": 3.3301838796424157, "grad_norm": 0.9788088798522949, "learning_rate": 2.3876113056550392e-05, "loss": 0.0728, "step": 91640 }, { "epoch": 3.330547278145214, "grad_norm": 1.582607626914978, "learning_rate": 2.3870929008760087e-05, "loss": 0.0798, "step": 91650 }, { "epoch": 3.330910676648012, "grad_norm": 4.744373321533203, "learning_rate": 2.386574500961744e-05, "loss": 0.0922, "step": 91660 }, { "epoch": 3.3312740751508105, "grad_norm": 0.6684284806251526, "learning_rate": 2.38605610593458e-05, "loss": 0.0615, "step": 91670 }, { "epoch": 3.3316374736536085, "grad_norm": 0.49481749534606934, "learning_rate": 2.3855377158168535e-05, "loss": 0.0838, "step": 91680 }, { "epoch": 3.3320008721564065, "grad_norm": 0.9517963528633118, "learning_rate": 2.385019330630899e-05, "loss": 0.0822, "step": 91690 }, { "epoch": 3.332364270659205, "grad_norm": 1.185115098953247, "learning_rate": 2.384500950399053e-05, "loss": 0.0902, "step": 91700 }, { "epoch": 3.332727669162003, "grad_norm": 0.7998439073562622, "learning_rate": 2.38398257514365e-05, "loss": 0.0977, "step": 91710 }, { "epoch": 3.3330910676648013, "grad_norm": 0.7033588886260986, "learning_rate": 2.383464204887025e-05, "loss": 4.2273, "step": 91720 }, { "epoch": 3.3334544661675993, "grad_norm": 0.37441256642341614, "learning_rate": 2.3829458396515128e-05, "loss": 0.0806, "step": 91730 }, { "epoch": 3.3338178646703973, "grad_norm": 1.9060165882110596, "learning_rate": 2.3824274794594473e-05, "loss": 1.8747, "step": 91740 }, { "epoch": 3.3341812631731957, "grad_norm": 1.7355316877365112, "learning_rate": 2.3819091243331643e-05, "loss": 0.0927, "step": 91750 }, { "epoch": 3.3345446616759937, "grad_norm": 0.8641614317893982, "learning_rate": 2.381390774294996e-05, "loss": 0.1018, "step": 91760 }, { "epoch": 3.334908060178792, "grad_norm": 1.4142506122589111, "learning_rate": 2.380872429367278e-05, "loss": 0.0714, "step": 91770 }, { "epoch": 3.33527145868159, "grad_norm": 0.9375418424606323, "learning_rate": 2.3803540895723433e-05, "loss": 0.0922, "step": 91780 }, { "epoch": 3.3356348571843886, "grad_norm": 0.28046151995658875, "learning_rate": 2.3798357549325245e-05, "loss": 0.0794, "step": 91790 }, { "epoch": 3.3359982556871866, "grad_norm": 0.9596878290176392, "learning_rate": 2.3793174254701557e-05, "loss": 0.0864, "step": 91800 }, { "epoch": 3.3359982556871866, "eval_loss": 0.3040144741535187, "eval_runtime": 179.6763, "eval_samples_per_second": 41.263, "eval_steps_per_second": 5.159, "eval_wer": 0.1398333545119538, "step": 91800 }, { "epoch": 3.336361654189985, "grad_norm": 1.36545729637146, "learning_rate": 2.3787991012075697e-05, "loss": 2.5138, "step": 91810 }, { "epoch": 3.336725052692783, "grad_norm": 1.0148299932479858, "learning_rate": 2.3782807821670993e-05, "loss": 0.0763, "step": 91820 }, { "epoch": 3.337088451195581, "grad_norm": 1.4392248392105103, "learning_rate": 2.3777624683710768e-05, "loss": 0.0623, "step": 91830 }, { "epoch": 3.3374518496983794, "grad_norm": 0.4060908854007721, "learning_rate": 2.3772441598418347e-05, "loss": 0.1011, "step": 91840 }, { "epoch": 3.3378152482011774, "grad_norm": 2.786815643310547, "learning_rate": 2.3767258566017045e-05, "loss": 0.0724, "step": 91850 }, { "epoch": 3.338178646703976, "grad_norm": 0.658647894859314, "learning_rate": 2.3762075586730194e-05, "loss": 0.0953, "step": 91860 }, { "epoch": 3.338542045206774, "grad_norm": 0.6986158490180969, "learning_rate": 2.3756892660781096e-05, "loss": 0.0742, "step": 91870 }, { "epoch": 3.3389054437095718, "grad_norm": 0.4815951883792877, "learning_rate": 2.375170978839307e-05, "loss": 0.0624, "step": 91880 }, { "epoch": 3.33926884221237, "grad_norm": 0.5518103241920471, "learning_rate": 2.3746526969789432e-05, "loss": 0.0692, "step": 91890 }, { "epoch": 3.339632240715168, "grad_norm": 0.5782762765884399, "learning_rate": 2.374134420519348e-05, "loss": 0.0755, "step": 91900 }, { "epoch": 3.3399956392179666, "grad_norm": 0.35663267970085144, "learning_rate": 2.3736161494828535e-05, "loss": 0.0877, "step": 91910 }, { "epoch": 3.3403590377207646, "grad_norm": 0.7183496952056885, "learning_rate": 2.373097883891789e-05, "loss": 0.0501, "step": 91920 }, { "epoch": 3.3407224362235626, "grad_norm": 0.6491569876670837, "learning_rate": 2.3725796237684853e-05, "loss": 0.1178, "step": 91930 }, { "epoch": 3.341085834726361, "grad_norm": 0.3948687016963959, "learning_rate": 2.372061369135272e-05, "loss": 0.0991, "step": 91940 }, { "epoch": 3.341449233229159, "grad_norm": 0.5275573134422302, "learning_rate": 2.3715431200144793e-05, "loss": 0.1027, "step": 91950 }, { "epoch": 3.3418126317319574, "grad_norm": 0.5675976872444153, "learning_rate": 2.371024876428437e-05, "loss": 0.0734, "step": 91960 }, { "epoch": 3.3421760302347554, "grad_norm": 2.3037304878234863, "learning_rate": 2.3705066383994738e-05, "loss": 0.0692, "step": 91970 }, { "epoch": 3.3425394287375534, "grad_norm": 0.8419054746627808, "learning_rate": 2.369988405949918e-05, "loss": 0.0693, "step": 91980 }, { "epoch": 3.342902827240352, "grad_norm": 0.7216833829879761, "learning_rate": 2.3694701791020994e-05, "loss": 0.9264, "step": 91990 }, { "epoch": 3.34326622574315, "grad_norm": 0.7542405724525452, "learning_rate": 2.3689519578783467e-05, "loss": 0.0725, "step": 92000 }, { "epoch": 3.3436296242459482, "grad_norm": 1.4204015731811523, "learning_rate": 2.368433742300988e-05, "loss": 0.0826, "step": 92010 }, { "epoch": 3.343993022748746, "grad_norm": 3.0590789318084717, "learning_rate": 2.3679155323923514e-05, "loss": 0.0686, "step": 92020 }, { "epoch": 3.344356421251544, "grad_norm": 0.6067277789115906, "learning_rate": 2.3673973281747634e-05, "loss": 0.0887, "step": 92030 }, { "epoch": 3.3447198197543426, "grad_norm": 0.8979749083518982, "learning_rate": 2.3668791296705533e-05, "loss": 0.0774, "step": 92040 }, { "epoch": 3.3450832182571406, "grad_norm": 2.3768551349639893, "learning_rate": 2.3663609369020484e-05, "loss": 0.0991, "step": 92050 }, { "epoch": 3.345446616759939, "grad_norm": 0.9666934609413147, "learning_rate": 2.365842749891575e-05, "loss": 0.0693, "step": 92060 }, { "epoch": 3.345810015262737, "grad_norm": 0.48444709181785583, "learning_rate": 2.3653245686614603e-05, "loss": 0.0599, "step": 92070 }, { "epoch": 3.3461734137655355, "grad_norm": 0.5083462595939636, "learning_rate": 2.36480639323403e-05, "loss": 0.0561, "step": 92080 }, { "epoch": 3.3465368122683334, "grad_norm": 0.41698160767555237, "learning_rate": 2.3642882236316115e-05, "loss": 0.0903, "step": 92090 }, { "epoch": 3.346900210771132, "grad_norm": 0.29507341980934143, "learning_rate": 2.3637700598765313e-05, "loss": 0.0729, "step": 92100 }, { "epoch": 3.34726360927393, "grad_norm": 0.414693146944046, "learning_rate": 2.3632519019911142e-05, "loss": 0.1065, "step": 92110 }, { "epoch": 3.347627007776728, "grad_norm": 0.46231353282928467, "learning_rate": 2.3627337499976855e-05, "loss": 0.5836, "step": 92120 }, { "epoch": 3.3479904062795263, "grad_norm": 0.6952545046806335, "learning_rate": 2.362215603918571e-05, "loss": 0.0865, "step": 92130 }, { "epoch": 3.3483538047823243, "grad_norm": 0.5076987743377686, "learning_rate": 2.361697463776097e-05, "loss": 0.0992, "step": 92140 }, { "epoch": 3.3487172032851227, "grad_norm": 0.7665526866912842, "learning_rate": 2.3611793295925865e-05, "loss": 0.0715, "step": 92150 }, { "epoch": 3.3490806017879207, "grad_norm": 0.5041813254356384, "learning_rate": 2.360661201390365e-05, "loss": 0.0631, "step": 92160 }, { "epoch": 3.3494440002907186, "grad_norm": 0.6250981092453003, "learning_rate": 2.360143079191756e-05, "loss": 0.0595, "step": 92170 }, { "epoch": 3.349807398793517, "grad_norm": 0.5737594962120056, "learning_rate": 2.3596249630190846e-05, "loss": 0.0698, "step": 92180 }, { "epoch": 3.350170797296315, "grad_norm": 0.33314749598503113, "learning_rate": 2.359106852894674e-05, "loss": 0.058, "step": 92190 }, { "epoch": 3.3505341957991135, "grad_norm": 1.2376643419265747, "learning_rate": 2.3585887488408483e-05, "loss": 0.0721, "step": 92200 }, { "epoch": 3.3508975943019115, "grad_norm": 6.422807216644287, "learning_rate": 2.35807065087993e-05, "loss": 0.1657, "step": 92210 }, { "epoch": 3.3512609928047095, "grad_norm": 1.1383922100067139, "learning_rate": 2.357552559034241e-05, "loss": 0.0738, "step": 92220 }, { "epoch": 3.351624391307508, "grad_norm": 0.5397285223007202, "learning_rate": 2.357034473326107e-05, "loss": 0.0631, "step": 92230 }, { "epoch": 3.351987789810306, "grad_norm": 0.4672096073627472, "learning_rate": 2.3565163937778485e-05, "loss": 0.0836, "step": 92240 }, { "epoch": 3.3523511883131043, "grad_norm": 0.714462161064148, "learning_rate": 2.3559983204117886e-05, "loss": 0.0699, "step": 92250 }, { "epoch": 3.3527145868159023, "grad_norm": 0.5036824941635132, "learning_rate": 2.355480253250248e-05, "loss": 0.6057, "step": 92260 }, { "epoch": 3.3530779853187003, "grad_norm": 0.6620817184448242, "learning_rate": 2.3549621923155486e-05, "loss": 0.0665, "step": 92270 }, { "epoch": 3.3534413838214987, "grad_norm": 0.5807569622993469, "learning_rate": 2.354444137630013e-05, "loss": 0.0691, "step": 92280 }, { "epoch": 3.3538047823242967, "grad_norm": 0.5693409442901611, "learning_rate": 2.3539260892159618e-05, "loss": 0.0721, "step": 92290 }, { "epoch": 3.354168180827095, "grad_norm": 0.7940452098846436, "learning_rate": 2.3534080470957157e-05, "loss": 0.0749, "step": 92300 }, { "epoch": 3.354531579329893, "grad_norm": 0.3544544577598572, "learning_rate": 2.352890011291594e-05, "loss": 0.0782, "step": 92310 }, { "epoch": 3.354894977832691, "grad_norm": 0.5041877031326294, "learning_rate": 2.3523719818259196e-05, "loss": 0.0592, "step": 92320 }, { "epoch": 3.3552583763354895, "grad_norm": 0.4897719919681549, "learning_rate": 2.3518539587210112e-05, "loss": 0.0678, "step": 92330 }, { "epoch": 3.3556217748382875, "grad_norm": 1.1827727556228638, "learning_rate": 2.3513359419991884e-05, "loss": 0.0838, "step": 92340 }, { "epoch": 3.355985173341086, "grad_norm": 0.646219789981842, "learning_rate": 2.3508179316827713e-05, "loss": 0.068, "step": 92350 }, { "epoch": 3.356348571843884, "grad_norm": 0.49982723593711853, "learning_rate": 2.3502999277940772e-05, "loss": 0.0703, "step": 92360 }, { "epoch": 3.3567119703466823, "grad_norm": 0.8181835412979126, "learning_rate": 2.3497819303554276e-05, "loss": 0.0847, "step": 92370 }, { "epoch": 3.3570753688494803, "grad_norm": 1.0289931297302246, "learning_rate": 2.3492639393891408e-05, "loss": 0.0778, "step": 92380 }, { "epoch": 3.3574387673522788, "grad_norm": 0.6994947791099548, "learning_rate": 2.348745954917534e-05, "loss": 0.0865, "step": 92390 }, { "epoch": 3.3578021658550767, "grad_norm": 0.9630132913589478, "learning_rate": 2.348227976962926e-05, "loss": 0.0974, "step": 92400 }, { "epoch": 3.3578021658550767, "eval_loss": 0.31169602274894714, "eval_runtime": 178.7618, "eval_samples_per_second": 41.474, "eval_steps_per_second": 5.186, "eval_wer": 0.14020549312905042, "step": 92400 }, { "epoch": 3.3581655643578747, "grad_norm": 0.2899853587150574, "learning_rate": 2.3477100055476334e-05, "loss": 0.0749, "step": 92410 }, { "epoch": 3.358528962860673, "grad_norm": 0.3742106854915619, "learning_rate": 2.347192040693976e-05, "loss": 0.0738, "step": 92420 }, { "epoch": 3.358892361363471, "grad_norm": 0.5455346703529358, "learning_rate": 2.3466740824242695e-05, "loss": 0.063, "step": 92430 }, { "epoch": 3.3592557598662696, "grad_norm": 0.29540014266967773, "learning_rate": 2.3461561307608315e-05, "loss": 0.0981, "step": 92440 }, { "epoch": 3.3596191583690675, "grad_norm": 1.146060824394226, "learning_rate": 2.3456381857259785e-05, "loss": 0.0866, "step": 92450 }, { "epoch": 3.3599825568718655, "grad_norm": 0.8182836174964905, "learning_rate": 2.345120247342026e-05, "loss": 0.1022, "step": 92460 }, { "epoch": 3.360345955374664, "grad_norm": 0.33180689811706543, "learning_rate": 2.3446023156312915e-05, "loss": 0.0737, "step": 92470 }, { "epoch": 3.360709353877462, "grad_norm": 0.6751521229743958, "learning_rate": 2.3440843906160907e-05, "loss": 0.0652, "step": 92480 }, { "epoch": 3.3610727523802604, "grad_norm": 0.37743857502937317, "learning_rate": 2.3435664723187384e-05, "loss": 0.0711, "step": 92490 }, { "epoch": 3.3614361508830584, "grad_norm": 0.6196742057800293, "learning_rate": 2.3430485607615494e-05, "loss": 0.0684, "step": 92500 }, { "epoch": 3.3617995493858563, "grad_norm": 0.6973705291748047, "learning_rate": 2.3425306559668404e-05, "loss": 0.1036, "step": 92510 }, { "epoch": 3.3621629478886548, "grad_norm": 0.2943952679634094, "learning_rate": 2.3420127579569257e-05, "loss": 0.131, "step": 92520 }, { "epoch": 3.3625263463914528, "grad_norm": 0.7704665660858154, "learning_rate": 2.3414948667541187e-05, "loss": 0.062, "step": 92530 }, { "epoch": 3.362889744894251, "grad_norm": 0.4366964101791382, "learning_rate": 2.3409769823807337e-05, "loss": 0.2334, "step": 92540 }, { "epoch": 3.363253143397049, "grad_norm": 1.0481702089309692, "learning_rate": 2.340459104859084e-05, "loss": 0.0911, "step": 92550 }, { "epoch": 3.363616541899847, "grad_norm": 0.21111765503883362, "learning_rate": 2.339941234211485e-05, "loss": 0.0729, "step": 92560 }, { "epoch": 3.3639799404026456, "grad_norm": 0.42436930537223816, "learning_rate": 2.3394233704602484e-05, "loss": 0.6901, "step": 92570 }, { "epoch": 3.3643433389054436, "grad_norm": 1.4946538209915161, "learning_rate": 2.3389055136276874e-05, "loss": 0.0645, "step": 92580 }, { "epoch": 3.364706737408242, "grad_norm": 0.7017802000045776, "learning_rate": 2.3383876637361148e-05, "loss": 0.0914, "step": 92590 }, { "epoch": 3.36507013591104, "grad_norm": 0.6439974904060364, "learning_rate": 2.337869820807842e-05, "loss": 0.0786, "step": 92600 }, { "epoch": 3.365433534413838, "grad_norm": 0.9561066627502441, "learning_rate": 2.337351984865182e-05, "loss": 0.0977, "step": 92610 }, { "epoch": 3.3657969329166364, "grad_norm": 0.6108697652816772, "learning_rate": 2.336834155930447e-05, "loss": 0.0608, "step": 92620 }, { "epoch": 3.3661603314194344, "grad_norm": 0.5634490251541138, "learning_rate": 2.3363163340259476e-05, "loss": 0.2946, "step": 92630 }, { "epoch": 3.366523729922233, "grad_norm": 0.5420652031898499, "learning_rate": 2.335798519173995e-05, "loss": 0.0797, "step": 92640 }, { "epoch": 3.366887128425031, "grad_norm": 0.5687423944473267, "learning_rate": 2.3352807113968985e-05, "loss": 0.069, "step": 92650 }, { "epoch": 3.3672505269278292, "grad_norm": 0.5029795169830322, "learning_rate": 2.3347629107169715e-05, "loss": 0.0894, "step": 92660 }, { "epoch": 3.367613925430627, "grad_norm": 0.4098545014858246, "learning_rate": 2.3342451171565227e-05, "loss": 0.0689, "step": 92670 }, { "epoch": 3.3679773239334256, "grad_norm": 0.4281129539012909, "learning_rate": 2.333727330737862e-05, "loss": 0.0883, "step": 92680 }, { "epoch": 3.3683407224362236, "grad_norm": 1.2608349323272705, "learning_rate": 2.333209551483298e-05, "loss": 0.0812, "step": 92690 }, { "epoch": 3.3687041209390216, "grad_norm": 0.6354079842567444, "learning_rate": 2.332691779415142e-05, "loss": 0.1057, "step": 92700 }, { "epoch": 3.36906751944182, "grad_norm": 0.5533850193023682, "learning_rate": 2.3321740145557018e-05, "loss": 0.081, "step": 92710 }, { "epoch": 3.369430917944618, "grad_norm": 0.7266316413879395, "learning_rate": 2.3316562569272865e-05, "loss": 0.0744, "step": 92720 }, { "epoch": 3.3697943164474164, "grad_norm": 0.7804214358329773, "learning_rate": 2.3311385065522038e-05, "loss": 0.0609, "step": 92730 }, { "epoch": 3.3701577149502144, "grad_norm": 1.9746454954147339, "learning_rate": 2.330620763452761e-05, "loss": 0.09, "step": 92740 }, { "epoch": 3.3705211134530124, "grad_norm": 0.7240809202194214, "learning_rate": 2.330103027651268e-05, "loss": 0.2196, "step": 92750 }, { "epoch": 3.370884511955811, "grad_norm": 0.37045255303382874, "learning_rate": 2.3295852991700314e-05, "loss": 0.0786, "step": 92760 }, { "epoch": 3.371247910458609, "grad_norm": 0.32502445578575134, "learning_rate": 2.3290675780313577e-05, "loss": 0.0598, "step": 92770 }, { "epoch": 3.3716113089614073, "grad_norm": 0.5612372159957886, "learning_rate": 2.3285498642575535e-05, "loss": 0.0646, "step": 92780 }, { "epoch": 3.3719747074642052, "grad_norm": 0.3779931664466858, "learning_rate": 2.328032157870925e-05, "loss": 0.0859, "step": 92790 }, { "epoch": 3.3723381059670032, "grad_norm": 1.5073649883270264, "learning_rate": 2.3275144588937797e-05, "loss": 0.0893, "step": 92800 }, { "epoch": 3.3727015044698017, "grad_norm": 2.095792293548584, "learning_rate": 2.3269967673484227e-05, "loss": 0.0537, "step": 92810 }, { "epoch": 3.3730649029725996, "grad_norm": 1.275193452835083, "learning_rate": 2.32647908325716e-05, "loss": 0.0813, "step": 92820 }, { "epoch": 3.373428301475398, "grad_norm": 0.4080447256565094, "learning_rate": 2.3259614066422957e-05, "loss": 0.0716, "step": 92830 }, { "epoch": 3.373791699978196, "grad_norm": 0.6313503980636597, "learning_rate": 2.325443737526134e-05, "loss": 0.0792, "step": 92840 }, { "epoch": 3.374155098480994, "grad_norm": 0.5047944188117981, "learning_rate": 2.324926075930982e-05, "loss": 0.0532, "step": 92850 }, { "epoch": 3.3745184969837925, "grad_norm": 1.3697469234466553, "learning_rate": 2.3244084218791422e-05, "loss": 0.0832, "step": 92860 }, { "epoch": 3.3748818954865905, "grad_norm": 0.7875816226005554, "learning_rate": 2.3238907753929188e-05, "loss": 0.0736, "step": 92870 }, { "epoch": 3.375245293989389, "grad_norm": 0.43678218126296997, "learning_rate": 2.3233731364946143e-05, "loss": 0.5897, "step": 92880 }, { "epoch": 3.375608692492187, "grad_norm": 0.36059579253196716, "learning_rate": 2.322855505206534e-05, "loss": 0.1002, "step": 92890 }, { "epoch": 3.375972090994985, "grad_norm": 0.7359516620635986, "learning_rate": 2.3223378815509795e-05, "loss": 0.1078, "step": 92900 }, { "epoch": 3.3763354894977833, "grad_norm": 0.4600794017314911, "learning_rate": 2.3218202655502538e-05, "loss": 0.0678, "step": 92910 }, { "epoch": 3.3766988880005813, "grad_norm": 0.5198982357978821, "learning_rate": 2.321302657226659e-05, "loss": 0.0595, "step": 92920 }, { "epoch": 3.3770622865033797, "grad_norm": 1.273722529411316, "learning_rate": 2.320785056602495e-05, "loss": 0.0675, "step": 92930 }, { "epoch": 3.3774256850061777, "grad_norm": 0.33449608087539673, "learning_rate": 2.3202674637000675e-05, "loss": 0.0971, "step": 92940 }, { "epoch": 3.377789083508976, "grad_norm": 0.9987308382987976, "learning_rate": 2.3197498785416746e-05, "loss": 0.127, "step": 92950 }, { "epoch": 3.378152482011774, "grad_norm": 0.3726591467857361, "learning_rate": 2.3192323011496186e-05, "loss": 0.0732, "step": 92960 }, { "epoch": 3.3785158805145725, "grad_norm": 1.3593406677246094, "learning_rate": 2.3187147315461994e-05, "loss": 0.0619, "step": 92970 }, { "epoch": 3.3788792790173705, "grad_norm": 0.44492968916893005, "learning_rate": 2.3181971697537165e-05, "loss": 0.0628, "step": 92980 }, { "epoch": 3.3792426775201685, "grad_norm": 0.7739204168319702, "learning_rate": 2.3176796157944713e-05, "loss": 0.0881, "step": 92990 }, { "epoch": 3.379606076022967, "grad_norm": 0.6815133690834045, "learning_rate": 2.317162069690763e-05, "loss": 0.0798, "step": 93000 }, { "epoch": 3.379606076022967, "eval_loss": 0.3195069134235382, "eval_runtime": 179.6087, "eval_samples_per_second": 41.279, "eval_steps_per_second": 5.161, "eval_wer": 0.13676548005881606, "step": 93000 }, { "epoch": 3.379969474525765, "grad_norm": 0.5755239725112915, "learning_rate": 2.316644531464891e-05, "loss": 0.072, "step": 93010 }, { "epoch": 3.3803328730285633, "grad_norm": 0.6259827017784119, "learning_rate": 2.3161270011391535e-05, "loss": 0.0585, "step": 93020 }, { "epoch": 3.3806962715313613, "grad_norm": 0.35530751943588257, "learning_rate": 2.315609478735848e-05, "loss": 0.0735, "step": 93030 }, { "epoch": 3.3810596700341593, "grad_norm": 0.5004699230194092, "learning_rate": 2.3150919642772752e-05, "loss": 0.0884, "step": 93040 }, { "epoch": 3.3814230685369577, "grad_norm": 0.8541852235794067, "learning_rate": 2.3145744577857316e-05, "loss": 0.0844, "step": 93050 }, { "epoch": 3.3817864670397557, "grad_norm": 1.4642248153686523, "learning_rate": 2.314056959283515e-05, "loss": 0.0759, "step": 93060 }, { "epoch": 3.382149865542554, "grad_norm": 1.696931004524231, "learning_rate": 2.3135394687929225e-05, "loss": 0.0605, "step": 93070 }, { "epoch": 3.382513264045352, "grad_norm": 2.729449987411499, "learning_rate": 2.31302198633625e-05, "loss": 0.068, "step": 93080 }, { "epoch": 3.38287666254815, "grad_norm": 0.6647607684135437, "learning_rate": 2.3125045119357953e-05, "loss": 0.0832, "step": 93090 }, { "epoch": 3.3832400610509485, "grad_norm": 0.4802834093570709, "learning_rate": 2.3119870456138545e-05, "loss": 0.0764, "step": 93100 }, { "epoch": 3.3836034595537465, "grad_norm": 1.4223417043685913, "learning_rate": 2.311469587392723e-05, "loss": 0.0824, "step": 93110 }, { "epoch": 3.383966858056545, "grad_norm": 1.8741366863250732, "learning_rate": 2.310952137294695e-05, "loss": 0.0715, "step": 93120 }, { "epoch": 3.384330256559343, "grad_norm": 0.6077579855918884, "learning_rate": 2.3104346953420676e-05, "loss": 0.0657, "step": 93130 }, { "epoch": 3.384693655062141, "grad_norm": 0.47796136140823364, "learning_rate": 2.3099172615571353e-05, "loss": 0.0814, "step": 93140 }, { "epoch": 3.3850570535649394, "grad_norm": 1.0871938467025757, "learning_rate": 2.3093998359621916e-05, "loss": 0.0979, "step": 93150 }, { "epoch": 3.3854204520677373, "grad_norm": 0.46470001339912415, "learning_rate": 2.3088824185795305e-05, "loss": 0.0796, "step": 93160 }, { "epoch": 3.3857838505705358, "grad_norm": 0.7592546343803406, "learning_rate": 2.3083650094314453e-05, "loss": 0.0859, "step": 93170 }, { "epoch": 3.3861472490733338, "grad_norm": 0.29902932047843933, "learning_rate": 2.307847608540231e-05, "loss": 0.0703, "step": 93180 }, { "epoch": 3.3865106475761317, "grad_norm": 0.5462153553962708, "learning_rate": 2.307330215928179e-05, "loss": 0.069, "step": 93190 }, { "epoch": 3.38687404607893, "grad_norm": 1.0044230222702026, "learning_rate": 2.3068128316175834e-05, "loss": 0.0714, "step": 93200 }, { "epoch": 3.387237444581728, "grad_norm": 0.9057084918022156, "learning_rate": 2.306295455630735e-05, "loss": 0.0883, "step": 93210 }, { "epoch": 3.3876008430845266, "grad_norm": 0.3063741624355316, "learning_rate": 2.3057780879899252e-05, "loss": 0.0672, "step": 93220 }, { "epoch": 3.3879642415873246, "grad_norm": 0.4650433361530304, "learning_rate": 2.3052607287174475e-05, "loss": 0.0631, "step": 93230 }, { "epoch": 3.388327640090123, "grad_norm": 0.5927343368530273, "learning_rate": 2.3047433778355925e-05, "loss": 0.1448, "step": 93240 }, { "epoch": 3.388691038592921, "grad_norm": 0.8953503370285034, "learning_rate": 2.3042260353666503e-05, "loss": 0.0677, "step": 93250 }, { "epoch": 3.3890544370957194, "grad_norm": 1.3331018686294556, "learning_rate": 2.303708701332912e-05, "loss": 0.0651, "step": 93260 }, { "epoch": 3.3894178355985174, "grad_norm": 0.5606054067611694, "learning_rate": 2.303191375756666e-05, "loss": 0.1045, "step": 93270 }, { "epoch": 3.3897812341013154, "grad_norm": 0.4888154864311218, "learning_rate": 2.3026740586602043e-05, "loss": 0.0742, "step": 93280 }, { "epoch": 3.390144632604114, "grad_norm": 0.45617663860321045, "learning_rate": 2.3021567500658156e-05, "loss": 0.0822, "step": 93290 }, { "epoch": 3.390508031106912, "grad_norm": 0.9321984052658081, "learning_rate": 2.3016394499957886e-05, "loss": 0.0726, "step": 93300 }, { "epoch": 3.39087142960971, "grad_norm": 0.3022707402706146, "learning_rate": 2.3011221584724108e-05, "loss": 0.0992, "step": 93310 }, { "epoch": 3.391234828112508, "grad_norm": 1.342934250831604, "learning_rate": 2.3006048755179723e-05, "loss": 0.0897, "step": 93320 }, { "epoch": 3.391598226615306, "grad_norm": 0.9461100101470947, "learning_rate": 2.3000876011547607e-05, "loss": 0.0695, "step": 93330 }, { "epoch": 3.3919616251181046, "grad_norm": 0.7840179204940796, "learning_rate": 2.299570335405063e-05, "loss": 0.0822, "step": 93340 }, { "epoch": 3.3923250236209026, "grad_norm": 0.6755959391593933, "learning_rate": 2.2990530782911664e-05, "loss": 0.1182, "step": 93350 }, { "epoch": 3.392688422123701, "grad_norm": 0.8153521418571472, "learning_rate": 2.2985358298353566e-05, "loss": 0.0852, "step": 93360 }, { "epoch": 3.393051820626499, "grad_norm": 0.6111595630645752, "learning_rate": 2.2980185900599222e-05, "loss": 0.0635, "step": 93370 }, { "epoch": 3.393415219129297, "grad_norm": 0.3639895021915436, "learning_rate": 2.297501358987148e-05, "loss": 0.1101, "step": 93380 }, { "epoch": 3.3937786176320954, "grad_norm": 0.7763181924819946, "learning_rate": 2.2969841366393195e-05, "loss": 0.088, "step": 93390 }, { "epoch": 3.3941420161348934, "grad_norm": 1.595831274986267, "learning_rate": 2.2964669230387228e-05, "loss": 0.1059, "step": 93400 }, { "epoch": 3.394505414637692, "grad_norm": 0.5875428915023804, "learning_rate": 2.2959497182076408e-05, "loss": 0.0743, "step": 93410 }, { "epoch": 3.39486881314049, "grad_norm": 1.4419046640396118, "learning_rate": 2.2954325221683606e-05, "loss": 0.085, "step": 93420 }, { "epoch": 3.395232211643288, "grad_norm": 0.7197487354278564, "learning_rate": 2.294915334943165e-05, "loss": 0.0828, "step": 93430 }, { "epoch": 3.3955956101460862, "grad_norm": 0.403689444065094, "learning_rate": 2.294398156554338e-05, "loss": 0.0969, "step": 93440 }, { "epoch": 3.3959590086488842, "grad_norm": 0.3563007712364197, "learning_rate": 2.2938809870241632e-05, "loss": 0.1025, "step": 93450 }, { "epoch": 3.3963224071516827, "grad_norm": 0.41774362325668335, "learning_rate": 2.2933638263749218e-05, "loss": 0.09, "step": 93460 }, { "epoch": 3.3966858056544806, "grad_norm": 1.4661532640457153, "learning_rate": 2.2928466746288993e-05, "loss": 0.5754, "step": 93470 }, { "epoch": 3.3970492041572786, "grad_norm": 0.6082340478897095, "learning_rate": 2.2923295318083766e-05, "loss": 0.0627, "step": 93480 }, { "epoch": 3.397412602660077, "grad_norm": 1.2264482975006104, "learning_rate": 2.2918123979356353e-05, "loss": 0.0981, "step": 93490 }, { "epoch": 3.397776001162875, "grad_norm": 1.3761318922042847, "learning_rate": 2.2912952730329555e-05, "loss": 0.0723, "step": 93500 }, { "epoch": 3.3981393996656735, "grad_norm": 0.6967355608940125, "learning_rate": 2.290778157122622e-05, "loss": 0.0643, "step": 93510 }, { "epoch": 3.3985027981684715, "grad_norm": 0.5545636415481567, "learning_rate": 2.2902610502269122e-05, "loss": 0.0619, "step": 93520 }, { "epoch": 3.39886619667127, "grad_norm": 0.4898998737335205, "learning_rate": 2.289743952368108e-05, "loss": 1.7187, "step": 93530 }, { "epoch": 3.399229595174068, "grad_norm": 0.593694806098938, "learning_rate": 2.2892268635684885e-05, "loss": 0.0942, "step": 93540 }, { "epoch": 3.3995929936768663, "grad_norm": 0.8465686440467834, "learning_rate": 2.2887097838503327e-05, "loss": 0.0883, "step": 93550 }, { "epoch": 3.3999563921796643, "grad_norm": 0.9347935318946838, "learning_rate": 2.2881927132359214e-05, "loss": 0.0872, "step": 93560 }, { "epoch": 3.4003197906824623, "grad_norm": 0.5704132914543152, "learning_rate": 2.287675651747533e-05, "loss": 1.4816, "step": 93570 }, { "epoch": 3.4006831891852607, "grad_norm": 0.25210240483283997, "learning_rate": 2.287158599407445e-05, "loss": 0.0696, "step": 93580 }, { "epoch": 3.4010465876880587, "grad_norm": 0.7095610499382019, "learning_rate": 2.2866415562379356e-05, "loss": 0.0818, "step": 93590 }, { "epoch": 3.401409986190857, "grad_norm": 1.3713339567184448, "learning_rate": 2.2861245222612812e-05, "loss": 0.088, "step": 93600 }, { "epoch": 3.401409986190857, "eval_loss": 0.29974231123924255, "eval_runtime": 178.5228, "eval_samples_per_second": 41.53, "eval_steps_per_second": 5.193, "eval_wer": 0.13912538348430664, "step": 93600 }, { "epoch": 3.401773384693655, "grad_norm": 1.1637344360351562, "learning_rate": 2.285659199560597e-05, "loss": 3.4301, "step": 93610 }, { "epoch": 3.402136783196453, "grad_norm": 0.5053747296333313, "learning_rate": 2.285142183111744e-05, "loss": 0.0693, "step": 93620 }, { "epoch": 3.4025001816992515, "grad_norm": 0.740875780582428, "learning_rate": 2.2846251759203496e-05, "loss": 0.0629, "step": 93630 }, { "epoch": 3.4028635802020495, "grad_norm": 5.731196880340576, "learning_rate": 2.2841081780086904e-05, "loss": 0.0998, "step": 93640 }, { "epoch": 3.403226978704848, "grad_norm": 0.7567720413208008, "learning_rate": 2.2835911893990414e-05, "loss": 0.0844, "step": 93650 }, { "epoch": 3.403590377207646, "grad_norm": 0.4909075200557709, "learning_rate": 2.283074210113677e-05, "loss": 0.0724, "step": 93660 }, { "epoch": 3.403953775710444, "grad_norm": 0.5206305980682373, "learning_rate": 2.282557240174874e-05, "loss": 0.0583, "step": 93670 }, { "epoch": 3.4043171742132423, "grad_norm": 0.6271891593933105, "learning_rate": 2.2820402796049063e-05, "loss": 0.0776, "step": 93680 }, { "epoch": 3.4046805727160403, "grad_norm": 0.6527193784713745, "learning_rate": 2.281523328426047e-05, "loss": 0.085, "step": 93690 }, { "epoch": 3.4050439712188387, "grad_norm": 0.8757163882255554, "learning_rate": 2.2810063866605706e-05, "loss": 0.0707, "step": 93700 }, { "epoch": 3.4054073697216367, "grad_norm": 0.8993749022483826, "learning_rate": 2.280489454330748e-05, "loss": 0.0713, "step": 93710 }, { "epoch": 3.4057707682244347, "grad_norm": 0.611003577709198, "learning_rate": 2.2799725314588555e-05, "loss": 0.0635, "step": 93720 }, { "epoch": 3.406134166727233, "grad_norm": 0.4539841413497925, "learning_rate": 2.2794556180671636e-05, "loss": 0.0718, "step": 93730 }, { "epoch": 3.406497565230031, "grad_norm": 0.2736055850982666, "learning_rate": 2.2789387141779445e-05, "loss": 0.1861, "step": 93740 }, { "epoch": 3.4068609637328295, "grad_norm": 0.6625291109085083, "learning_rate": 2.2784218198134695e-05, "loss": 0.107, "step": 93750 }, { "epoch": 3.4072243622356275, "grad_norm": 0.49704796075820923, "learning_rate": 2.277904934996009e-05, "loss": 0.0621, "step": 93760 }, { "epoch": 3.4075877607384255, "grad_norm": 0.471886545419693, "learning_rate": 2.2773880597478356e-05, "loss": 0.1414, "step": 93770 }, { "epoch": 3.407951159241224, "grad_norm": 0.4752335548400879, "learning_rate": 2.2768711940912185e-05, "loss": 0.0657, "step": 93780 }, { "epoch": 3.408314557744022, "grad_norm": 0.6662150025367737, "learning_rate": 2.276354338048428e-05, "loss": 0.0933, "step": 93790 }, { "epoch": 3.4086779562468204, "grad_norm": 0.9255740642547607, "learning_rate": 2.275837491641732e-05, "loss": 0.1099, "step": 93800 }, { "epoch": 3.4090413547496183, "grad_norm": 0.515019953250885, "learning_rate": 2.2753206548934024e-05, "loss": 0.0861, "step": 93810 }, { "epoch": 3.4094047532524168, "grad_norm": 0.33219701051712036, "learning_rate": 2.2748038278257063e-05, "loss": 0.0842, "step": 93820 }, { "epoch": 3.4097681517552147, "grad_norm": 0.40631362795829773, "learning_rate": 2.2742870104609114e-05, "loss": 0.0673, "step": 93830 }, { "epoch": 3.410131550258013, "grad_norm": 0.4353393316268921, "learning_rate": 2.2737702028212868e-05, "loss": 0.0768, "step": 93840 }, { "epoch": 3.410494948760811, "grad_norm": 1.10258150100708, "learning_rate": 2.273253404929098e-05, "loss": 0.0884, "step": 93850 }, { "epoch": 3.410858347263609, "grad_norm": 3.386838912963867, "learning_rate": 2.2727366168066142e-05, "loss": 0.1114, "step": 93860 }, { "epoch": 3.4112217457664076, "grad_norm": 1.1398248672485352, "learning_rate": 2.2722198384761008e-05, "loss": 0.0541, "step": 93870 }, { "epoch": 3.4115851442692056, "grad_norm": 0.950499951839447, "learning_rate": 2.2717030699598245e-05, "loss": 0.0961, "step": 93880 }, { "epoch": 3.411948542772004, "grad_norm": 0.48193359375, "learning_rate": 2.2711863112800506e-05, "loss": 0.4101, "step": 93890 }, { "epoch": 3.412311941274802, "grad_norm": 0.827944815158844, "learning_rate": 2.270669562459043e-05, "loss": 0.0864, "step": 93900 }, { "epoch": 3.4126753397776, "grad_norm": 1.7731389999389648, "learning_rate": 2.270152823519069e-05, "loss": 0.1077, "step": 93910 }, { "epoch": 3.4130387382803984, "grad_norm": 0.750033438205719, "learning_rate": 2.2696360944823923e-05, "loss": 0.1013, "step": 93920 }, { "epoch": 3.4134021367831964, "grad_norm": 1.206369161605835, "learning_rate": 2.269119375371277e-05, "loss": 0.3487, "step": 93930 }, { "epoch": 3.413765535285995, "grad_norm": 0.38662195205688477, "learning_rate": 2.2686026662079858e-05, "loss": 0.1012, "step": 93940 }, { "epoch": 3.414128933788793, "grad_norm": 0.5544074177742004, "learning_rate": 2.2680859670147815e-05, "loss": 0.0925, "step": 93950 }, { "epoch": 3.4144923322915908, "grad_norm": 1.074537754058838, "learning_rate": 2.267569277813929e-05, "loss": 0.0875, "step": 93960 }, { "epoch": 3.414855730794389, "grad_norm": 3.1776864528656006, "learning_rate": 2.267052598627689e-05, "loss": 3.4631, "step": 93970 }, { "epoch": 3.415219129297187, "grad_norm": 0.5976073741912842, "learning_rate": 2.266535929478324e-05, "loss": 0.0741, "step": 93980 }, { "epoch": 3.4155825277999856, "grad_norm": 0.507327675819397, "learning_rate": 2.2660192703880935e-05, "loss": 0.0834, "step": 93990 }, { "epoch": 3.4159459263027836, "grad_norm": 0.46804341673851013, "learning_rate": 2.2655026213792617e-05, "loss": 0.0912, "step": 94000 }, { "epoch": 3.4163093248055816, "grad_norm": 0.6629424095153809, "learning_rate": 2.2649859824740876e-05, "loss": 0.0829, "step": 94010 }, { "epoch": 3.41667272330838, "grad_norm": 1.2816437482833862, "learning_rate": 2.2644693536948315e-05, "loss": 0.0895, "step": 94020 }, { "epoch": 3.417036121811178, "grad_norm": 0.32198429107666016, "learning_rate": 2.2639527350637525e-05, "loss": 0.0852, "step": 94030 }, { "epoch": 3.4173995203139764, "grad_norm": 0.49124446511268616, "learning_rate": 2.263436126603109e-05, "loss": 0.5835, "step": 94040 }, { "epoch": 3.4177629188167744, "grad_norm": 0.45547664165496826, "learning_rate": 2.262919528335163e-05, "loss": 0.0716, "step": 94050 }, { "epoch": 3.4181263173195724, "grad_norm": 0.4709664285182953, "learning_rate": 2.2624029402821705e-05, "loss": 0.0837, "step": 94060 }, { "epoch": 3.418489715822371, "grad_norm": 0.8313547372817993, "learning_rate": 2.2618863624663898e-05, "loss": 0.0755, "step": 94070 }, { "epoch": 3.418853114325169, "grad_norm": 0.8527863025665283, "learning_rate": 2.2613697949100782e-05, "loss": 0.0617, "step": 94080 }, { "epoch": 3.4192165128279672, "grad_norm": 0.39365309476852417, "learning_rate": 2.2608532376354932e-05, "loss": 0.08, "step": 94090 }, { "epoch": 3.419579911330765, "grad_norm": 0.9611566662788391, "learning_rate": 2.2603366906648916e-05, "loss": 0.0964, "step": 94100 }, { "epoch": 3.4199433098335636, "grad_norm": 0.5890967845916748, "learning_rate": 2.2598201540205294e-05, "loss": 0.0627, "step": 94110 }, { "epoch": 3.4203067083363616, "grad_norm": 0.24214434623718262, "learning_rate": 2.259303627724662e-05, "loss": 0.0614, "step": 94120 }, { "epoch": 3.42067010683916, "grad_norm": 0.6111648678779602, "learning_rate": 2.2587871117995445e-05, "loss": 0.0712, "step": 94130 }, { "epoch": 3.421033505341958, "grad_norm": 3.44565486907959, "learning_rate": 2.2582706062674325e-05, "loss": 0.6448, "step": 94140 }, { "epoch": 3.421396903844756, "grad_norm": 2.6556754112243652, "learning_rate": 2.25775411115058e-05, "loss": 0.0893, "step": 94150 }, { "epoch": 3.4217603023475545, "grad_norm": 1.2639325857162476, "learning_rate": 2.257237626471241e-05, "loss": 0.0819, "step": 94160 }, { "epoch": 3.4221237008503524, "grad_norm": 0.7145587801933289, "learning_rate": 2.2567211522516685e-05, "loss": 0.0752, "step": 94170 }, { "epoch": 3.422487099353151, "grad_norm": 1.2810157537460327, "learning_rate": 2.2562046885141167e-05, "loss": 0.0675, "step": 94180 }, { "epoch": 3.422850497855949, "grad_norm": 0.4140676259994507, "learning_rate": 2.2556882352808367e-05, "loss": 0.0819, "step": 94190 }, { "epoch": 3.423213896358747, "grad_norm": 0.9687098860740662, "learning_rate": 2.2551717925740817e-05, "loss": 0.0944, "step": 94200 }, { "epoch": 3.423213896358747, "eval_loss": 0.31988459825515747, "eval_runtime": 180.0602, "eval_samples_per_second": 41.175, "eval_steps_per_second": 5.148, "eval_wer": 0.1369107048850001, "step": 94200 }, { "epoch": 3.4235772948615453, "grad_norm": 0.8577378392219543, "learning_rate": 2.2546553604161032e-05, "loss": 0.0767, "step": 94210 }, { "epoch": 3.4239406933643433, "grad_norm": 0.5929591655731201, "learning_rate": 2.254138938829152e-05, "loss": 0.0623, "step": 94220 }, { "epoch": 3.4243040918671417, "grad_norm": 0.5392001867294312, "learning_rate": 2.2536225278354787e-05, "loss": 0.0848, "step": 94230 }, { "epoch": 3.4246674903699397, "grad_norm": 0.5557697415351868, "learning_rate": 2.253106127457335e-05, "loss": 0.0784, "step": 94240 }, { "epoch": 3.4250308888727377, "grad_norm": 0.5030058026313782, "learning_rate": 2.2525897377169696e-05, "loss": 0.0786, "step": 94250 }, { "epoch": 3.425394287375536, "grad_norm": 1.1300536394119263, "learning_rate": 2.2520733586366323e-05, "loss": 0.0868, "step": 94260 }, { "epoch": 3.425757685878334, "grad_norm": 0.6011260747909546, "learning_rate": 2.2515569902385714e-05, "loss": 0.0811, "step": 94270 }, { "epoch": 3.4261210843811325, "grad_norm": 0.9162232279777527, "learning_rate": 2.2510406325450357e-05, "loss": 0.0749, "step": 94280 }, { "epoch": 3.4264844828839305, "grad_norm": 1.2079869508743286, "learning_rate": 2.2505242855782737e-05, "loss": 0.0844, "step": 94290 }, { "epoch": 3.4268478813867285, "grad_norm": 0.703209638595581, "learning_rate": 2.2500079493605327e-05, "loss": 0.0921, "step": 94300 }, { "epoch": 3.427211279889527, "grad_norm": 1.7158406972885132, "learning_rate": 2.249491623914059e-05, "loss": 0.0942, "step": 94310 }, { "epoch": 3.427574678392325, "grad_norm": 0.5088964700698853, "learning_rate": 2.248975309261101e-05, "loss": 0.0584, "step": 94320 }, { "epoch": 3.4279380768951233, "grad_norm": 0.3864693343639374, "learning_rate": 2.2484590054239024e-05, "loss": 0.0579, "step": 94330 }, { "epoch": 3.4283014753979213, "grad_norm": 0.4104454517364502, "learning_rate": 2.2479427124247117e-05, "loss": 0.1607, "step": 94340 }, { "epoch": 3.4286648739007197, "grad_norm": 1.0903159379959106, "learning_rate": 2.247426430285772e-05, "loss": 0.0783, "step": 94350 }, { "epoch": 3.4290282724035177, "grad_norm": 0.8514654636383057, "learning_rate": 2.2469101590293284e-05, "loss": 0.065, "step": 94360 }, { "epoch": 3.4293916709063157, "grad_norm": 0.6110685467720032, "learning_rate": 2.246393898677626e-05, "loss": 0.0635, "step": 94370 }, { "epoch": 3.429755069409114, "grad_norm": 0.38304954767227173, "learning_rate": 2.245877649252908e-05, "loss": 0.069, "step": 94380 }, { "epoch": 3.430118467911912, "grad_norm": 1.2440117597579956, "learning_rate": 2.245361410777418e-05, "loss": 0.1056, "step": 94390 }, { "epoch": 3.4304818664147105, "grad_norm": 0.7492786645889282, "learning_rate": 2.2448451832733987e-05, "loss": 0.0809, "step": 94400 }, { "epoch": 3.4308452649175085, "grad_norm": 0.4058247208595276, "learning_rate": 2.244328966763093e-05, "loss": 0.0934, "step": 94410 }, { "epoch": 3.431208663420307, "grad_norm": 1.7672019004821777, "learning_rate": 2.243812761268742e-05, "loss": 0.0696, "step": 94420 }, { "epoch": 3.431572061923105, "grad_norm": 0.4631694555282593, "learning_rate": 2.2432965668125878e-05, "loss": 2.0515, "step": 94430 }, { "epoch": 3.431935460425903, "grad_norm": 1.0094584226608276, "learning_rate": 2.2427803834168716e-05, "loss": 0.0686, "step": 94440 }, { "epoch": 3.4322988589287013, "grad_norm": 0.7469279766082764, "learning_rate": 2.2422642111038328e-05, "loss": 0.0753, "step": 94450 }, { "epoch": 3.4326622574314993, "grad_norm": 0.5456721186637878, "learning_rate": 2.2417480498957126e-05, "loss": 0.1019, "step": 94460 }, { "epoch": 3.4330256559342978, "grad_norm": 2.7943344116210938, "learning_rate": 2.2412318998147492e-05, "loss": 0.0705, "step": 94470 }, { "epoch": 3.4333890544370957, "grad_norm": 5.648090839385986, "learning_rate": 2.2407157608831836e-05, "loss": 0.0853, "step": 94480 }, { "epoch": 3.4337524529398937, "grad_norm": 0.7561296224594116, "learning_rate": 2.2401996331232528e-05, "loss": 0.0796, "step": 94490 }, { "epoch": 3.434115851442692, "grad_norm": 1.078397274017334, "learning_rate": 2.2396835165571954e-05, "loss": 0.0706, "step": 94500 }, { "epoch": 3.43447924994549, "grad_norm": 0.31419476866722107, "learning_rate": 2.2391674112072498e-05, "loss": 0.0871, "step": 94510 }, { "epoch": 3.4348426484482886, "grad_norm": 0.4185982644557953, "learning_rate": 2.2386513170956513e-05, "loss": 0.0643, "step": 94520 }, { "epoch": 3.4352060469510866, "grad_norm": 0.8538812398910522, "learning_rate": 2.2381352342446385e-05, "loss": 0.0627, "step": 94530 }, { "epoch": 3.4355694454538845, "grad_norm": 0.42258143424987793, "learning_rate": 2.2376191626764462e-05, "loss": 0.0774, "step": 94540 }, { "epoch": 3.435932843956683, "grad_norm": 0.7472050786018372, "learning_rate": 2.237103102413311e-05, "loss": 0.0717, "step": 94550 }, { "epoch": 3.436296242459481, "grad_norm": 1.003833293914795, "learning_rate": 2.2365870534774678e-05, "loss": 0.1762, "step": 94560 }, { "epoch": 3.4366596409622794, "grad_norm": 3.8594932556152344, "learning_rate": 2.2360710158911507e-05, "loss": 0.0672, "step": 94570 }, { "epoch": 3.4370230394650774, "grad_norm": 2.909346103668213, "learning_rate": 2.235554989676595e-05, "loss": 0.0676, "step": 94580 }, { "epoch": 3.4373864379678754, "grad_norm": 0.6960200667381287, "learning_rate": 2.235038974856033e-05, "loss": 0.0823, "step": 94590 }, { "epoch": 3.437749836470674, "grad_norm": 0.5080627202987671, "learning_rate": 2.2345229714516998e-05, "loss": 0.1036, "step": 94600 }, { "epoch": 3.4381132349734718, "grad_norm": 0.4168925881385803, "learning_rate": 2.2340069794858267e-05, "loss": 0.0925, "step": 94610 }, { "epoch": 3.43847663347627, "grad_norm": 0.32866325974464417, "learning_rate": 2.233490998980647e-05, "loss": 0.0729, "step": 94620 }, { "epoch": 3.438840031979068, "grad_norm": 0.5904275178909302, "learning_rate": 2.2329750299583913e-05, "loss": 0.2434, "step": 94630 }, { "epoch": 3.4392034304818666, "grad_norm": 0.8439253568649292, "learning_rate": 2.232459072441292e-05, "loss": 0.0738, "step": 94640 }, { "epoch": 3.4395668289846646, "grad_norm": 2.668860912322998, "learning_rate": 2.2319431264515792e-05, "loss": 0.0934, "step": 94650 }, { "epoch": 3.4399302274874626, "grad_norm": 0.3184053301811218, "learning_rate": 2.231427192011483e-05, "loss": 0.0986, "step": 94660 }, { "epoch": 3.440293625990261, "grad_norm": 1.6480865478515625, "learning_rate": 2.2309112691432337e-05, "loss": 3.1534, "step": 94670 }, { "epoch": 3.440657024493059, "grad_norm": 1.2210397720336914, "learning_rate": 2.2303953578690602e-05, "loss": 0.0899, "step": 94680 }, { "epoch": 3.4410204229958574, "grad_norm": 0.8659685254096985, "learning_rate": 2.2298794582111922e-05, "loss": 0.0755, "step": 94690 }, { "epoch": 3.4413838214986554, "grad_norm": 1.4027395248413086, "learning_rate": 2.229363570191857e-05, "loss": 0.0773, "step": 94700 }, { "epoch": 3.441747220001454, "grad_norm": 0.47845137119293213, "learning_rate": 2.228847693833282e-05, "loss": 0.0872, "step": 94710 }, { "epoch": 3.442110618504252, "grad_norm": 1.3525196313858032, "learning_rate": 2.228331829157695e-05, "loss": 0.0653, "step": 94720 }, { "epoch": 3.44247401700705, "grad_norm": 1.1353908777236938, "learning_rate": 2.2278159761873235e-05, "loss": 0.0758, "step": 94730 }, { "epoch": 3.4428374155098482, "grad_norm": 1.6163307428359985, "learning_rate": 2.2273001349443935e-05, "loss": 0.8463, "step": 94740 }, { "epoch": 3.443200814012646, "grad_norm": 0.5957239866256714, "learning_rate": 2.22678430545113e-05, "loss": 0.0654, "step": 94750 }, { "epoch": 3.4435642125154446, "grad_norm": 1.3498693704605103, "learning_rate": 2.2262684877297586e-05, "loss": 0.0716, "step": 94760 }, { "epoch": 3.4439276110182426, "grad_norm": 4.1495208740234375, "learning_rate": 2.2257526818025036e-05, "loss": 0.0578, "step": 94770 }, { "epoch": 3.4442910095210406, "grad_norm": 0.5842284560203552, "learning_rate": 2.2252368876915903e-05, "loss": 0.0851, "step": 94780 }, { "epoch": 3.444654408023839, "grad_norm": 0.6782490015029907, "learning_rate": 2.2247211054192425e-05, "loss": 0.0831, "step": 94790 }, { "epoch": 3.445017806526637, "grad_norm": 1.1757545471191406, "learning_rate": 2.224205335007682e-05, "loss": 0.0995, "step": 94800 }, { "epoch": 3.445017806526637, "eval_loss": 0.31772205233573914, "eval_runtime": 178.9226, "eval_samples_per_second": 41.437, "eval_steps_per_second": 5.181, "eval_wer": 0.1385807903861165, "step": 94800 }, { "epoch": 3.4453812050294355, "grad_norm": 2.117018461227417, "learning_rate": 2.223689576479132e-05, "loss": 0.0569, "step": 94810 }, { "epoch": 3.4457446035322334, "grad_norm": 0.7748499512672424, "learning_rate": 2.2231738298558158e-05, "loss": 0.0602, "step": 94820 }, { "epoch": 3.4461080020350314, "grad_norm": 0.43736356496810913, "learning_rate": 2.2226580951599544e-05, "loss": 0.0952, "step": 94830 }, { "epoch": 3.44647140053783, "grad_norm": 0.4574269950389862, "learning_rate": 2.222142372413769e-05, "loss": 0.07, "step": 94840 }, { "epoch": 3.446834799040628, "grad_norm": 0.883139431476593, "learning_rate": 2.2216266616394793e-05, "loss": 0.089, "step": 94850 }, { "epoch": 3.4471981975434263, "grad_norm": 0.8173096179962158, "learning_rate": 2.2211109628593067e-05, "loss": 0.1002, "step": 94860 }, { "epoch": 3.4475615960462243, "grad_norm": 0.5318263173103333, "learning_rate": 2.2205952760954704e-05, "loss": 0.0586, "step": 94870 }, { "epoch": 3.4479249945490222, "grad_norm": 0.18389153480529785, "learning_rate": 2.2200796013701898e-05, "loss": 0.0589, "step": 94880 }, { "epoch": 3.4482883930518207, "grad_norm": 0.47492220997810364, "learning_rate": 2.2195639387056833e-05, "loss": 0.6377, "step": 94890 }, { "epoch": 3.4486517915546187, "grad_norm": 0.3797650635242462, "learning_rate": 2.219048288124168e-05, "loss": 0.086, "step": 94900 }, { "epoch": 3.449015190057417, "grad_norm": 1.8727638721466064, "learning_rate": 2.218532649647863e-05, "loss": 0.0876, "step": 94910 }, { "epoch": 3.449378588560215, "grad_norm": 1.5461180210113525, "learning_rate": 2.218017023298985e-05, "loss": 0.0739, "step": 94920 }, { "epoch": 3.4497419870630135, "grad_norm": 0.7635065317153931, "learning_rate": 2.2175014090997497e-05, "loss": 0.0724, "step": 94930 }, { "epoch": 3.4501053855658115, "grad_norm": 0.9960238337516785, "learning_rate": 2.216985807072374e-05, "loss": 0.1, "step": 94940 }, { "epoch": 3.4504687840686095, "grad_norm": 0.8764038681983948, "learning_rate": 2.2164702172390717e-05, "loss": 0.0761, "step": 94950 }, { "epoch": 3.450832182571408, "grad_norm": 0.46660447120666504, "learning_rate": 2.21595463962206e-05, "loss": 0.0609, "step": 94960 }, { "epoch": 3.451195581074206, "grad_norm": 0.7150638103485107, "learning_rate": 2.215439074243552e-05, "loss": 0.0697, "step": 94970 }, { "epoch": 3.4515589795770043, "grad_norm": 1.3236603736877441, "learning_rate": 2.2149235211257624e-05, "loss": 0.0812, "step": 94980 }, { "epoch": 3.4519223780798023, "grad_norm": 0.8594760894775391, "learning_rate": 2.214407980290903e-05, "loss": 0.0899, "step": 94990 }, { "epoch": 3.4522857765826007, "grad_norm": 0.6767681837081909, "learning_rate": 2.2138924517611874e-05, "loss": 0.0807, "step": 95000 }, { "epoch": 3.4526491750853987, "grad_norm": 0.4284761846065521, "learning_rate": 2.213376935558829e-05, "loss": 0.0719, "step": 95010 }, { "epoch": 3.4530125735881967, "grad_norm": 0.35260239243507385, "learning_rate": 2.2128614317060385e-05, "loss": 0.0685, "step": 95020 }, { "epoch": 3.453375972090995, "grad_norm": 0.7802332043647766, "learning_rate": 2.2123459402250275e-05, "loss": 0.0772, "step": 95030 }, { "epoch": 3.453739370593793, "grad_norm": 0.5842748284339905, "learning_rate": 2.211830461138005e-05, "loss": 0.0744, "step": 95040 }, { "epoch": 3.4541027690965915, "grad_norm": 0.6803625822067261, "learning_rate": 2.2113149944671842e-05, "loss": 2.0802, "step": 95050 }, { "epoch": 3.4544661675993895, "grad_norm": 3.472691774368286, "learning_rate": 2.2107995402347726e-05, "loss": 0.1059, "step": 95060 }, { "epoch": 3.4548295661021875, "grad_norm": 0.8646115660667419, "learning_rate": 2.21028409846298e-05, "loss": 0.0745, "step": 95070 }, { "epoch": 3.455192964604986, "grad_norm": 0.9967368245124817, "learning_rate": 2.2097686691740148e-05, "loss": 0.0703, "step": 95080 }, { "epoch": 3.455556363107784, "grad_norm": 3.0647103786468506, "learning_rate": 2.2092532523900842e-05, "loss": 0.0838, "step": 95090 }, { "epoch": 3.4559197616105823, "grad_norm": 1.3772906064987183, "learning_rate": 2.208737848133397e-05, "loss": 0.1081, "step": 95100 }, { "epoch": 3.4562831601133803, "grad_norm": 0.9632165431976318, "learning_rate": 2.20822245642616e-05, "loss": 0.0736, "step": 95110 }, { "epoch": 3.4566465586161783, "grad_norm": 0.6966424584388733, "learning_rate": 2.207707077290579e-05, "loss": 0.0656, "step": 95120 }, { "epoch": 3.4570099571189767, "grad_norm": 0.45801427960395813, "learning_rate": 2.2071917107488604e-05, "loss": 0.0616, "step": 95130 }, { "epoch": 3.4573733556217747, "grad_norm": 0.4734851121902466, "learning_rate": 2.206676356823208e-05, "loss": 0.0897, "step": 95140 }, { "epoch": 3.457736754124573, "grad_norm": 0.842993438243866, "learning_rate": 2.2061610155358287e-05, "loss": 0.0648, "step": 95150 }, { "epoch": 3.458100152627371, "grad_norm": 4.05435848236084, "learning_rate": 2.2056456869089256e-05, "loss": 0.0896, "step": 95160 }, { "epoch": 3.458463551130169, "grad_norm": 0.8322821855545044, "learning_rate": 2.2051303709647027e-05, "loss": 0.07, "step": 95170 }, { "epoch": 3.4588269496329676, "grad_norm": 0.5541922450065613, "learning_rate": 2.2046150677253618e-05, "loss": 0.0852, "step": 95180 }, { "epoch": 3.4591903481357655, "grad_norm": 0.5772917866706848, "learning_rate": 2.2040997772131077e-05, "loss": 0.0837, "step": 95190 }, { "epoch": 3.459553746638564, "grad_norm": 0.5851882100105286, "learning_rate": 2.2035844994501418e-05, "loss": 0.089, "step": 95200 }, { "epoch": 3.459917145141362, "grad_norm": 1.0712413787841797, "learning_rate": 2.2030692344586647e-05, "loss": 0.1188, "step": 95210 }, { "epoch": 3.4602805436441604, "grad_norm": 0.4828985333442688, "learning_rate": 2.202553982260878e-05, "loss": 0.0737, "step": 95220 }, { "epoch": 3.4606439421469584, "grad_norm": 0.3994056284427643, "learning_rate": 2.2020387428789807e-05, "loss": 0.0601, "step": 95230 }, { "epoch": 3.4610073406497563, "grad_norm": 0.8096089363098145, "learning_rate": 2.201523516335175e-05, "loss": 0.081, "step": 95240 }, { "epoch": 3.4613707391525548, "grad_norm": 1.1060764789581299, "learning_rate": 2.2010083026516588e-05, "loss": 0.0827, "step": 95250 }, { "epoch": 3.4617341376553528, "grad_norm": 0.3895101547241211, "learning_rate": 2.2004931018506313e-05, "loss": 0.0759, "step": 95260 }, { "epoch": 3.462097536158151, "grad_norm": 0.4792887568473816, "learning_rate": 2.1999779139542903e-05, "loss": 0.0839, "step": 95270 }, { "epoch": 3.462460934660949, "grad_norm": 0.8363034129142761, "learning_rate": 2.1994627389848325e-05, "loss": 0.0668, "step": 95280 }, { "epoch": 3.4628243331637476, "grad_norm": 0.4887206256389618, "learning_rate": 2.198947576964457e-05, "loss": 0.0653, "step": 95290 }, { "epoch": 3.4631877316665456, "grad_norm": 4.43966817855835, "learning_rate": 2.198432427915359e-05, "loss": 0.113, "step": 95300 }, { "epoch": 3.4635511301693436, "grad_norm": 0.7550996541976929, "learning_rate": 2.197917291859735e-05, "loss": 0.1061, "step": 95310 }, { "epoch": 3.463914528672142, "grad_norm": 0.64354407787323, "learning_rate": 2.1974021688197797e-05, "loss": 0.0585, "step": 95320 }, { "epoch": 3.46427792717494, "grad_norm": 1.6076833009719849, "learning_rate": 2.1968870588176877e-05, "loss": 0.0663, "step": 95330 }, { "epoch": 3.4646413256777384, "grad_norm": 0.5022009015083313, "learning_rate": 2.1963719618756548e-05, "loss": 0.1249, "step": 95340 }, { "epoch": 3.4650047241805364, "grad_norm": 0.5584505796432495, "learning_rate": 2.1958568780158736e-05, "loss": 0.082, "step": 95350 }, { "epoch": 3.4653681226833344, "grad_norm": 1.0235668420791626, "learning_rate": 2.1953418072605375e-05, "loss": 0.079, "step": 95360 }, { "epoch": 3.465731521186133, "grad_norm": 1.6643403768539429, "learning_rate": 2.194826749631839e-05, "loss": 0.0644, "step": 95370 }, { "epoch": 3.466094919688931, "grad_norm": 0.6343129873275757, "learning_rate": 2.1943117051519688e-05, "loss": 0.0642, "step": 95380 }, { "epoch": 3.4664583181917292, "grad_norm": 0.3787631392478943, "learning_rate": 2.193796673843121e-05, "loss": 0.0764, "step": 95390 }, { "epoch": 3.466821716694527, "grad_norm": 0.6121103763580322, "learning_rate": 2.1932816557274846e-05, "loss": 0.1018, "step": 95400 }, { "epoch": 3.466821716694527, "eval_loss": 0.32295721769332886, "eval_runtime": 179.2759, "eval_samples_per_second": 41.355, "eval_steps_per_second": 5.171, "eval_wer": 0.13823588142392942, "step": 95400 }, { "epoch": 3.467185115197325, "grad_norm": 0.6954236626625061, "learning_rate": 2.1927666508272505e-05, "loss": 0.0636, "step": 95410 }, { "epoch": 3.4675485137001236, "grad_norm": 1.4941720962524414, "learning_rate": 2.1922516591646072e-05, "loss": 0.0725, "step": 95420 }, { "epoch": 3.4679119122029216, "grad_norm": 0.487307071685791, "learning_rate": 2.1917366807617463e-05, "loss": 0.0668, "step": 95430 }, { "epoch": 3.46827531070572, "grad_norm": 0.6700599193572998, "learning_rate": 2.1912217156408547e-05, "loss": 0.0679, "step": 95440 }, { "epoch": 3.468638709208518, "grad_norm": 0.6767346858978271, "learning_rate": 2.1907067638241208e-05, "loss": 0.0813, "step": 95450 }, { "epoch": 3.469002107711316, "grad_norm": 2.5367612838745117, "learning_rate": 2.190191825333732e-05, "loss": 0.1077, "step": 95460 }, { "epoch": 3.4693655062141144, "grad_norm": 0.5402595400810242, "learning_rate": 2.1896769001918742e-05, "loss": 0.073, "step": 95470 }, { "epoch": 3.4697289047169124, "grad_norm": 0.6329225301742554, "learning_rate": 2.1891619884207354e-05, "loss": 0.085, "step": 95480 }, { "epoch": 3.470092303219711, "grad_norm": 0.5831683874130249, "learning_rate": 2.1886470900425008e-05, "loss": 0.0892, "step": 95490 }, { "epoch": 3.470455701722509, "grad_norm": 2.0767431259155273, "learning_rate": 2.188132205079355e-05, "loss": 0.0894, "step": 95500 }, { "epoch": 3.4708191002253073, "grad_norm": 0.6538608074188232, "learning_rate": 2.1876173335534835e-05, "loss": 0.0625, "step": 95510 }, { "epoch": 3.4711824987281052, "grad_norm": 2.3269765377044678, "learning_rate": 2.1871024754870677e-05, "loss": 0.066, "step": 95520 }, { "epoch": 3.4715458972309032, "grad_norm": 0.8417708873748779, "learning_rate": 2.1865876309022947e-05, "loss": 0.0716, "step": 95530 }, { "epoch": 3.4719092957337017, "grad_norm": 0.220072403550148, "learning_rate": 2.186072799821345e-05, "loss": 0.0811, "step": 95540 }, { "epoch": 3.4722726942364996, "grad_norm": 0.7579740285873413, "learning_rate": 2.185557982266402e-05, "loss": 0.0944, "step": 95550 }, { "epoch": 3.472636092739298, "grad_norm": 0.6758162379264832, "learning_rate": 2.1850431782596466e-05, "loss": 0.0807, "step": 95560 }, { "epoch": 3.472999491242096, "grad_norm": 1.1941717863082886, "learning_rate": 2.1845283878232585e-05, "loss": 0.0713, "step": 95570 }, { "epoch": 3.4733628897448945, "grad_norm": 1.0327314138412476, "learning_rate": 2.1840136109794213e-05, "loss": 0.094, "step": 95580 }, { "epoch": 3.4737262882476925, "grad_norm": 0.4671393036842346, "learning_rate": 2.183498847750313e-05, "loss": 0.074, "step": 95590 }, { "epoch": 3.4740896867504905, "grad_norm": 0.4938909113407135, "learning_rate": 2.1829840981581134e-05, "loss": 0.0846, "step": 95600 }, { "epoch": 3.474453085253289, "grad_norm": 1.201877474784851, "learning_rate": 2.182469362225e-05, "loss": 0.0852, "step": 95610 }, { "epoch": 3.474816483756087, "grad_norm": 0.8695741295814514, "learning_rate": 2.181954639973153e-05, "loss": 0.0635, "step": 95620 }, { "epoch": 3.4751798822588853, "grad_norm": 0.5013503432273865, "learning_rate": 2.1814399314247492e-05, "loss": 0.0752, "step": 95630 }, { "epoch": 3.4755432807616833, "grad_norm": 0.45891714096069336, "learning_rate": 2.180925236601965e-05, "loss": 0.0795, "step": 95640 }, { "epoch": 3.4759066792644813, "grad_norm": 0.9783682823181152, "learning_rate": 2.1804105555269772e-05, "loss": 0.0782, "step": 95650 }, { "epoch": 3.4762700777672797, "grad_norm": 0.3729395270347595, "learning_rate": 2.17989588822196e-05, "loss": 0.0787, "step": 95660 }, { "epoch": 3.4766334762700777, "grad_norm": 0.9214646220207214, "learning_rate": 2.1793812347090918e-05, "loss": 0.0526, "step": 95670 }, { "epoch": 3.476996874772876, "grad_norm": 0.4130672216415405, "learning_rate": 2.178866595010545e-05, "loss": 0.0806, "step": 95680 }, { "epoch": 3.477360273275674, "grad_norm": 0.38258814811706543, "learning_rate": 2.178351969148494e-05, "loss": 0.08, "step": 95690 }, { "epoch": 3.477723671778472, "grad_norm": 0.7837095260620117, "learning_rate": 2.1778373571451124e-05, "loss": 0.187, "step": 95700 }, { "epoch": 3.4780870702812705, "grad_norm": 1.1877186298370361, "learning_rate": 2.177322759022572e-05, "loss": 0.0982, "step": 95710 }, { "epoch": 3.4784504687840685, "grad_norm": 0.8703027367591858, "learning_rate": 2.1768081748030463e-05, "loss": 0.066, "step": 95720 }, { "epoch": 3.478813867286867, "grad_norm": 0.47968795895576477, "learning_rate": 2.176293604508707e-05, "loss": 0.0758, "step": 95730 }, { "epoch": 3.479177265789665, "grad_norm": 0.5117019414901733, "learning_rate": 2.175779048161724e-05, "loss": 0.0838, "step": 95740 }, { "epoch": 3.479540664292463, "grad_norm": 0.738413393497467, "learning_rate": 2.1752645057842686e-05, "loss": 0.1082, "step": 95750 }, { "epoch": 3.4799040627952613, "grad_norm": 1.208146333694458, "learning_rate": 2.1747499773985092e-05, "loss": 0.0773, "step": 95760 }, { "epoch": 3.4802674612980593, "grad_norm": 0.4587624967098236, "learning_rate": 2.1742354630266172e-05, "loss": 0.8829, "step": 95770 }, { "epoch": 3.4806308598008577, "grad_norm": 0.5824334025382996, "learning_rate": 2.1737209626907594e-05, "loss": 0.0732, "step": 95780 }, { "epoch": 3.4809942583036557, "grad_norm": 0.649512767791748, "learning_rate": 2.173206476413105e-05, "loss": 0.0883, "step": 95790 }, { "epoch": 3.481357656806454, "grad_norm": 1.6176427602767944, "learning_rate": 2.1726920042158194e-05, "loss": 0.1347, "step": 95800 }, { "epoch": 3.481721055309252, "grad_norm": 0.5751357674598694, "learning_rate": 2.1721775461210718e-05, "loss": 0.0736, "step": 95810 }, { "epoch": 3.48208445381205, "grad_norm": 0.701714813709259, "learning_rate": 2.171663102151028e-05, "loss": 0.0742, "step": 95820 }, { "epoch": 3.4824478523148485, "grad_norm": 0.4845195710659027, "learning_rate": 2.1711486723278522e-05, "loss": 0.0668, "step": 95830 }, { "epoch": 3.4828112508176465, "grad_norm": 0.7643341422080994, "learning_rate": 2.1706342566737105e-05, "loss": 0.0767, "step": 95840 }, { "epoch": 3.483174649320445, "grad_norm": 0.4030478894710541, "learning_rate": 2.170119855210765e-05, "loss": 0.0836, "step": 95850 }, { "epoch": 3.483538047823243, "grad_norm": 0.9113463163375854, "learning_rate": 2.1696054679611828e-05, "loss": 0.1111, "step": 95860 }, { "epoch": 3.4839014463260414, "grad_norm": 0.46975961327552795, "learning_rate": 2.1690910949471255e-05, "loss": 0.0592, "step": 95870 }, { "epoch": 3.4842648448288394, "grad_norm": 0.4419771134853363, "learning_rate": 2.1685767361907554e-05, "loss": 0.0996, "step": 95880 }, { "epoch": 3.4846282433316373, "grad_norm": 0.6320810914039612, "learning_rate": 2.168062391714235e-05, "loss": 0.1194, "step": 95890 }, { "epoch": 3.4849916418344358, "grad_norm": 0.3942996859550476, "learning_rate": 2.1675480615397234e-05, "loss": 0.0825, "step": 95900 }, { "epoch": 3.4853550403372338, "grad_norm": 0.3566824197769165, "learning_rate": 2.167033745689384e-05, "loss": 0.0666, "step": 95910 }, { "epoch": 3.485718438840032, "grad_norm": 0.48564252257347107, "learning_rate": 2.1665194441853765e-05, "loss": 0.0687, "step": 95920 }, { "epoch": 3.48608183734283, "grad_norm": 0.3420655429363251, "learning_rate": 2.166005157049859e-05, "loss": 0.0627, "step": 95930 }, { "epoch": 3.486445235845628, "grad_norm": 0.3921029567718506, "learning_rate": 2.165490884304991e-05, "loss": 0.0621, "step": 95940 }, { "epoch": 3.4868086343484266, "grad_norm": 0.609154224395752, "learning_rate": 2.1649766259729298e-05, "loss": 0.0788, "step": 95950 }, { "epoch": 3.4871720328512246, "grad_norm": 0.4822045564651489, "learning_rate": 2.1644623820758347e-05, "loss": 0.086, "step": 95960 }, { "epoch": 3.487535431354023, "grad_norm": 0.9078019261360168, "learning_rate": 2.1639481526358616e-05, "loss": 0.0632, "step": 95970 }, { "epoch": 3.487898829856821, "grad_norm": 0.43268144130706787, "learning_rate": 2.163433937675167e-05, "loss": 0.704, "step": 95980 }, { "epoch": 3.488262228359619, "grad_norm": 0.4717707633972168, "learning_rate": 2.1629197372159055e-05, "loss": 0.0839, "step": 95990 }, { "epoch": 3.4886256268624174, "grad_norm": 1.3348411321640015, "learning_rate": 2.162405551280234e-05, "loss": 0.0634, "step": 96000 }, { "epoch": 3.4886256268624174, "eval_loss": 0.3231545090675354, "eval_runtime": 179.599, "eval_samples_per_second": 41.281, "eval_steps_per_second": 5.161, "eval_wer": 0.13457803111441902, "step": 96000 }, { "epoch": 3.4889890253652154, "grad_norm": 0.6468439698219299, "learning_rate": 2.1618913798903064e-05, "loss": 0.0726, "step": 96010 }, { "epoch": 3.489352423868014, "grad_norm": 0.6067370772361755, "learning_rate": 2.1613772230682762e-05, "loss": 0.0662, "step": 96020 }, { "epoch": 3.489715822370812, "grad_norm": 4.149559020996094, "learning_rate": 2.1608630808362966e-05, "loss": 0.0685, "step": 96030 }, { "epoch": 3.4900792208736098, "grad_norm": 0.5562649369239807, "learning_rate": 2.1603489532165194e-05, "loss": 0.0805, "step": 96040 }, { "epoch": 3.490442619376408, "grad_norm": 1.4775289297103882, "learning_rate": 2.159834840231098e-05, "loss": 0.0917, "step": 96050 }, { "epoch": 3.490806017879206, "grad_norm": 0.4842393100261688, "learning_rate": 2.1593207419021833e-05, "loss": 0.0956, "step": 96060 }, { "epoch": 3.4911694163820046, "grad_norm": 0.5330924391746521, "learning_rate": 2.1588066582519257e-05, "loss": 1.4342, "step": 96070 }, { "epoch": 3.4915328148848026, "grad_norm": 0.2691187858581543, "learning_rate": 2.158292589302475e-05, "loss": 0.0785, "step": 96080 }, { "epoch": 3.491896213387601, "grad_norm": 0.42408475279808044, "learning_rate": 2.15777853507598e-05, "loss": 0.0903, "step": 96090 }, { "epoch": 3.492259611890399, "grad_norm": 2.1866910457611084, "learning_rate": 2.157264495594591e-05, "loss": 0.0759, "step": 96100 }, { "epoch": 3.492623010393197, "grad_norm": 0.2777433693408966, "learning_rate": 2.1567504708804557e-05, "loss": 0.097, "step": 96110 }, { "epoch": 3.4929864088959954, "grad_norm": 0.4376242756843567, "learning_rate": 2.156236460955721e-05, "loss": 0.0691, "step": 96120 }, { "epoch": 3.4933498073987934, "grad_norm": 0.6688746213912964, "learning_rate": 2.1557224658425347e-05, "loss": 0.1149, "step": 96130 }, { "epoch": 3.493713205901592, "grad_norm": 1.0745078325271606, "learning_rate": 2.155208485563041e-05, "loss": 0.0955, "step": 96140 }, { "epoch": 3.49407660440439, "grad_norm": 2.5494561195373535, "learning_rate": 2.154694520139388e-05, "loss": 0.1092, "step": 96150 }, { "epoch": 3.4944400029071883, "grad_norm": 0.681524932384491, "learning_rate": 2.1541805695937192e-05, "loss": 0.0966, "step": 96160 }, { "epoch": 3.4948034014099862, "grad_norm": 0.8694483637809753, "learning_rate": 2.1536666339481797e-05, "loss": 0.2408, "step": 96170 }, { "epoch": 3.4951667999127842, "grad_norm": 0.36922356486320496, "learning_rate": 2.1531527132249113e-05, "loss": 0.0785, "step": 96180 }, { "epoch": 3.4955301984155827, "grad_norm": 0.3221088945865631, "learning_rate": 2.1526388074460594e-05, "loss": 0.0954, "step": 96190 }, { "epoch": 3.4958935969183806, "grad_norm": 0.868403434753418, "learning_rate": 2.1521249166337658e-05, "loss": 0.086, "step": 96200 }, { "epoch": 3.496256995421179, "grad_norm": 0.6299740076065063, "learning_rate": 2.1516110408101714e-05, "loss": 0.0829, "step": 96210 }, { "epoch": 3.496620393923977, "grad_norm": 0.744305431842804, "learning_rate": 2.1510971799974177e-05, "loss": 0.0669, "step": 96220 }, { "epoch": 3.496983792426775, "grad_norm": 0.9115591049194336, "learning_rate": 2.1505833342176442e-05, "loss": 0.0635, "step": 96230 }, { "epoch": 3.4973471909295735, "grad_norm": 0.4782477617263794, "learning_rate": 2.1500695034929926e-05, "loss": 0.0879, "step": 96240 }, { "epoch": 3.4977105894323715, "grad_norm": 0.6979982852935791, "learning_rate": 2.1495556878456014e-05, "loss": 0.0725, "step": 96250 }, { "epoch": 3.49807398793517, "grad_norm": 0.4281218945980072, "learning_rate": 2.1490932666723033e-05, "loss": 3.0583, "step": 96260 }, { "epoch": 3.498437386437968, "grad_norm": 2.243551254272461, "learning_rate": 2.148579479732697e-05, "loss": 0.0783, "step": 96270 }, { "epoch": 3.498800784940766, "grad_norm": 0.8817722797393799, "learning_rate": 2.1480657079345505e-05, "loss": 0.0782, "step": 96280 }, { "epoch": 3.4991641834435643, "grad_norm": 0.6434484124183655, "learning_rate": 2.1475519513000002e-05, "loss": 0.0924, "step": 96290 }, { "epoch": 3.4995275819463623, "grad_norm": 0.7872079014778137, "learning_rate": 2.1470382098511813e-05, "loss": 0.0852, "step": 96300 }, { "epoch": 3.4998909804491607, "grad_norm": 1.4359321594238281, "learning_rate": 2.1465244836102312e-05, "loss": 0.0759, "step": 96310 }, { "epoch": 3.5002543789519587, "grad_norm": 0.307167112827301, "learning_rate": 2.1460107725992838e-05, "loss": 0.0755, "step": 96320 }, { "epoch": 3.5006177774547567, "grad_norm": 0.6518979072570801, "learning_rate": 2.1454970768404724e-05, "loss": 0.063, "step": 96330 }, { "epoch": 3.500981175957555, "grad_norm": 1.06710684299469, "learning_rate": 2.1449833963559293e-05, "loss": 0.0851, "step": 96340 }, { "epoch": 3.501344574460353, "grad_norm": 0.6126148104667664, "learning_rate": 2.14446973116779e-05, "loss": 0.0888, "step": 96350 }, { "epoch": 3.5017079729631515, "grad_norm": 0.7071236968040466, "learning_rate": 2.1439560812981848e-05, "loss": 0.1182, "step": 96360 }, { "epoch": 3.5020713714659495, "grad_norm": 2.222172737121582, "learning_rate": 2.1434424467692455e-05, "loss": 0.1948, "step": 96370 }, { "epoch": 3.5024347699687475, "grad_norm": 1.0301228761672974, "learning_rate": 2.142928827603102e-05, "loss": 0.0754, "step": 96380 }, { "epoch": 3.502798168471546, "grad_norm": 1.2635794878005981, "learning_rate": 2.142415223821884e-05, "loss": 0.0892, "step": 96390 }, { "epoch": 3.5031615669743443, "grad_norm": 0.6022728681564331, "learning_rate": 2.141901635447723e-05, "loss": 0.0853, "step": 96400 }, { "epoch": 3.5035249654771423, "grad_norm": 1.4563450813293457, "learning_rate": 2.1413880625027462e-05, "loss": 0.1804, "step": 96410 }, { "epoch": 3.5038883639799403, "grad_norm": 0.5189762711524963, "learning_rate": 2.140874505009082e-05, "loss": 0.068, "step": 96420 }, { "epoch": 3.5042517624827387, "grad_norm": 1.153496503829956, "learning_rate": 2.1403609629888578e-05, "loss": 0.1057, "step": 96430 }, { "epoch": 3.5046151609855367, "grad_norm": 0.8835933208465576, "learning_rate": 2.139847436464199e-05, "loss": 0.0967, "step": 96440 }, { "epoch": 3.504978559488335, "grad_norm": 0.9221778512001038, "learning_rate": 2.1393339254572334e-05, "loss": 0.0876, "step": 96450 }, { "epoch": 3.505341957991133, "grad_norm": 0.32772889733314514, "learning_rate": 2.138820429990086e-05, "loss": 0.0628, "step": 96460 }, { "epoch": 3.505705356493931, "grad_norm": 0.482295960187912, "learning_rate": 2.138306950084881e-05, "loss": 0.0768, "step": 96470 }, { "epoch": 3.5060687549967295, "grad_norm": 0.7036164999008179, "learning_rate": 2.1377934857637427e-05, "loss": 0.0804, "step": 96480 }, { "epoch": 3.5064321534995275, "grad_norm": 0.6191168427467346, "learning_rate": 2.137280037048793e-05, "loss": 0.0966, "step": 96490 }, { "epoch": 3.506795552002326, "grad_norm": 1.3983656167984009, "learning_rate": 2.1367666039621575e-05, "loss": 0.0961, "step": 96500 }, { "epoch": 3.507158950505124, "grad_norm": 0.36156126856803894, "learning_rate": 2.1362531865259564e-05, "loss": 0.1172, "step": 96510 }, { "epoch": 3.507522349007922, "grad_norm": 0.7854031324386597, "learning_rate": 2.135739784762311e-05, "loss": 0.0675, "step": 96520 }, { "epoch": 3.5078857475107204, "grad_norm": 1.4320755004882812, "learning_rate": 2.135226398693342e-05, "loss": 0.0677, "step": 96530 }, { "epoch": 3.5082491460135183, "grad_norm": 1.0837007761001587, "learning_rate": 2.13471302834117e-05, "loss": 0.064, "step": 96540 }, { "epoch": 3.5086125445163168, "grad_norm": 2.1308250427246094, "learning_rate": 2.134199673727914e-05, "loss": 0.094, "step": 96550 }, { "epoch": 3.5089759430191148, "grad_norm": 0.3453007638454437, "learning_rate": 2.1336863348756927e-05, "loss": 0.1269, "step": 96560 }, { "epoch": 3.5093393415219127, "grad_norm": 0.2881056070327759, "learning_rate": 2.133173011806624e-05, "loss": 0.1384, "step": 96570 }, { "epoch": 3.509702740024711, "grad_norm": 1.0903687477111816, "learning_rate": 2.1326597045428236e-05, "loss": 0.0777, "step": 96580 }, { "epoch": 3.510066138527509, "grad_norm": 0.44503089785575867, "learning_rate": 2.1321464131064105e-05, "loss": 0.0847, "step": 96590 }, { "epoch": 3.5104295370303076, "grad_norm": 0.562317430973053, "learning_rate": 2.1316331375195002e-05, "loss": 0.0654, "step": 96600 }, { "epoch": 3.5104295370303076, "eval_loss": 0.3297887444496155, "eval_runtime": 180.5411, "eval_samples_per_second": 41.065, "eval_steps_per_second": 5.135, "eval_wer": 0.13554922213952475, "step": 96600 }, { "epoch": 3.5107929355331056, "grad_norm": 0.9230484962463379, "learning_rate": 2.131119877804207e-05, "loss": 0.0732, "step": 96610 }, { "epoch": 3.5111563340359035, "grad_norm": 4.310789585113525, "learning_rate": 2.1306066339826457e-05, "loss": 0.1152, "step": 96620 }, { "epoch": 3.511519732538702, "grad_norm": 0.3380495011806488, "learning_rate": 2.1300934060769296e-05, "loss": 0.2172, "step": 96630 }, { "epoch": 3.5118831310415, "grad_norm": 0.524796187877655, "learning_rate": 2.129580194109173e-05, "loss": 0.0933, "step": 96640 }, { "epoch": 3.5122465295442984, "grad_norm": 0.6239128708839417, "learning_rate": 2.1290669981014882e-05, "loss": 0.0738, "step": 96650 }, { "epoch": 3.5126099280470964, "grad_norm": 0.976793646812439, "learning_rate": 2.128553818075987e-05, "loss": 0.0777, "step": 96660 }, { "epoch": 3.5129733265498944, "grad_norm": 1.0099037885665894, "learning_rate": 2.1280406540547794e-05, "loss": 0.0661, "step": 96670 }, { "epoch": 3.513336725052693, "grad_norm": 0.6158185601234436, "learning_rate": 2.127527506059976e-05, "loss": 0.0782, "step": 96680 }, { "epoch": 3.513700123555491, "grad_norm": 0.28984469175338745, "learning_rate": 2.1270143741136884e-05, "loss": 0.0849, "step": 96690 }, { "epoch": 3.514063522058289, "grad_norm": 1.0383470058441162, "learning_rate": 2.126501258238024e-05, "loss": 0.0993, "step": 96700 }, { "epoch": 3.514426920561087, "grad_norm": 0.32447656989097595, "learning_rate": 2.1259881584550912e-05, "loss": 0.0801, "step": 96710 }, { "epoch": 3.5147903190638856, "grad_norm": 0.6273922920227051, "learning_rate": 2.1254750747869972e-05, "loss": 0.0771, "step": 96720 }, { "epoch": 3.5151537175666836, "grad_norm": 0.7431948781013489, "learning_rate": 2.1249620072558508e-05, "loss": 0.09, "step": 96730 }, { "epoch": 3.515517116069482, "grad_norm": 0.4066786468029022, "learning_rate": 2.124448955883757e-05, "loss": 0.0772, "step": 96740 }, { "epoch": 3.51588051457228, "grad_norm": 0.31640565395355225, "learning_rate": 2.1239359206928214e-05, "loss": 0.0722, "step": 96750 }, { "epoch": 3.516243913075078, "grad_norm": 0.4770644009113312, "learning_rate": 2.1234229017051488e-05, "loss": 0.0742, "step": 96760 }, { "epoch": 3.5166073115778764, "grad_norm": 0.25661101937294006, "learning_rate": 2.122909898942843e-05, "loss": 2.1078, "step": 96770 }, { "epoch": 3.5169707100806744, "grad_norm": 0.7563055157661438, "learning_rate": 2.1223969124280086e-05, "loss": 0.0827, "step": 96780 }, { "epoch": 3.517334108583473, "grad_norm": 0.690726637840271, "learning_rate": 2.1218839421827474e-05, "loss": 0.0743, "step": 96790 }, { "epoch": 3.517697507086271, "grad_norm": 1.4046658277511597, "learning_rate": 2.1213709882291623e-05, "loss": 0.0789, "step": 96800 }, { "epoch": 3.518060905589069, "grad_norm": 0.5457620620727539, "learning_rate": 2.1208580505893542e-05, "loss": 0.1015, "step": 96810 }, { "epoch": 3.5184243040918672, "grad_norm": 1.5679082870483398, "learning_rate": 2.1203451292854222e-05, "loss": 0.0695, "step": 96820 }, { "epoch": 3.5187877025946652, "grad_norm": 0.6426934003829956, "learning_rate": 2.119832224339469e-05, "loss": 0.0706, "step": 96830 }, { "epoch": 3.5191511010974637, "grad_norm": 2.433039665222168, "learning_rate": 2.119319335773593e-05, "loss": 0.1022, "step": 96840 }, { "epoch": 3.5195144996002616, "grad_norm": 1.6734727621078491, "learning_rate": 2.118806463609892e-05, "loss": 0.0889, "step": 96850 }, { "epoch": 3.5198778981030596, "grad_norm": 0.5065173506736755, "learning_rate": 2.1182936078704636e-05, "loss": 0.1207, "step": 96860 }, { "epoch": 3.520241296605858, "grad_norm": 0.8844788670539856, "learning_rate": 2.117780768577406e-05, "loss": 0.0761, "step": 96870 }, { "epoch": 3.520604695108656, "grad_norm": 0.48743927478790283, "learning_rate": 2.117267945752815e-05, "loss": 0.0719, "step": 96880 }, { "epoch": 3.5209680936114545, "grad_norm": 0.8191256523132324, "learning_rate": 2.116755139418787e-05, "loss": 0.0796, "step": 96890 }, { "epoch": 3.5213314921142524, "grad_norm": 0.6994876265525818, "learning_rate": 2.116242349597416e-05, "loss": 0.0961, "step": 96900 }, { "epoch": 3.5216948906170504, "grad_norm": 0.2623302638530731, "learning_rate": 2.115729576310796e-05, "loss": 0.0902, "step": 96910 }, { "epoch": 3.522058289119849, "grad_norm": 0.44012176990509033, "learning_rate": 2.1152168195810222e-05, "loss": 0.0695, "step": 96920 }, { "epoch": 3.522421687622647, "grad_norm": 0.39194273948669434, "learning_rate": 2.114704079430187e-05, "loss": 0.0626, "step": 96930 }, { "epoch": 3.5227850861254453, "grad_norm": 2.427734851837158, "learning_rate": 2.1141913558803818e-05, "loss": 0.084, "step": 96940 }, { "epoch": 3.5231484846282433, "grad_norm": 0.9978201985359192, "learning_rate": 2.1136786489536975e-05, "loss": 0.1378, "step": 96950 }, { "epoch": 3.5235118831310412, "grad_norm": 0.9334393739700317, "learning_rate": 2.113165958672226e-05, "loss": 0.0942, "step": 96960 }, { "epoch": 3.5238752816338397, "grad_norm": 0.5911192893981934, "learning_rate": 2.1126532850580578e-05, "loss": 0.0585, "step": 96970 }, { "epoch": 3.524238680136638, "grad_norm": 0.35171112418174744, "learning_rate": 2.112140628133281e-05, "loss": 0.0747, "step": 96980 }, { "epoch": 3.524602078639436, "grad_norm": 0.4741590917110443, "learning_rate": 2.1116279879199846e-05, "loss": 0.1048, "step": 96990 }, { "epoch": 3.524965477142234, "grad_norm": 4.185072422027588, "learning_rate": 2.1111153644402558e-05, "loss": 0.095, "step": 97000 }, { "epoch": 3.5253288756450325, "grad_norm": 0.4775453209877014, "learning_rate": 2.110602757716182e-05, "loss": 0.0866, "step": 97010 }, { "epoch": 3.5256922741478305, "grad_norm": 4.56588077545166, "learning_rate": 2.110090167769851e-05, "loss": 0.0696, "step": 97020 }, { "epoch": 3.526055672650629, "grad_norm": 0.5117329359054565, "learning_rate": 2.109577594623347e-05, "loss": 0.0655, "step": 97030 }, { "epoch": 3.526419071153427, "grad_norm": 0.45523038506507874, "learning_rate": 2.109065038298755e-05, "loss": 0.0838, "step": 97040 }, { "epoch": 3.526782469656225, "grad_norm": 0.7840531468391418, "learning_rate": 2.10855249881816e-05, "loss": 0.0915, "step": 97050 }, { "epoch": 3.5271458681590233, "grad_norm": 0.42048537731170654, "learning_rate": 2.1080399762036447e-05, "loss": 0.0882, "step": 97060 }, { "epoch": 3.5275092666618213, "grad_norm": 0.36128684878349304, "learning_rate": 2.1075274704772924e-05, "loss": 0.0609, "step": 97070 }, { "epoch": 3.5278726651646197, "grad_norm": 0.4942743182182312, "learning_rate": 2.107014981661185e-05, "loss": 0.0883, "step": 97080 }, { "epoch": 3.5282360636674177, "grad_norm": 0.442184716463089, "learning_rate": 2.1065025097774034e-05, "loss": 0.1055, "step": 97090 }, { "epoch": 3.5285994621702157, "grad_norm": 0.8446380496025085, "learning_rate": 2.105990054848029e-05, "loss": 0.069, "step": 97100 }, { "epoch": 3.528962860673014, "grad_norm": 0.5443778038024902, "learning_rate": 2.1054776168951412e-05, "loss": 0.0868, "step": 97110 }, { "epoch": 3.529326259175812, "grad_norm": 0.6392617225646973, "learning_rate": 2.1049651959408194e-05, "loss": 0.0639, "step": 97120 }, { "epoch": 3.5296896576786105, "grad_norm": 0.7572323083877563, "learning_rate": 2.104452792007141e-05, "loss": 0.0894, "step": 97130 }, { "epoch": 3.5300530561814085, "grad_norm": 1.0398154258728027, "learning_rate": 2.1039404051161852e-05, "loss": 0.0756, "step": 97140 }, { "epoch": 3.5304164546842065, "grad_norm": 1.265731930732727, "learning_rate": 2.1034280352900277e-05, "loss": 0.0822, "step": 97150 }, { "epoch": 3.530779853187005, "grad_norm": 0.5231419205665588, "learning_rate": 2.1029156825507453e-05, "loss": 0.0723, "step": 97160 }, { "epoch": 3.531143251689803, "grad_norm": 0.6905182600021362, "learning_rate": 2.1024033469204134e-05, "loss": 0.0611, "step": 97170 }, { "epoch": 3.5315066501926013, "grad_norm": 0.5370905995368958, "learning_rate": 2.1018910284211067e-05, "loss": 0.0803, "step": 97180 }, { "epoch": 3.5318700486953993, "grad_norm": 0.41777387261390686, "learning_rate": 2.1013787270748992e-05, "loss": 0.0868, "step": 97190 }, { "epoch": 3.5322334471981973, "grad_norm": 0.6612346172332764, "learning_rate": 2.1008664429038633e-05, "loss": 0.0969, "step": 97200 }, { "epoch": 3.5322334471981973, "eval_loss": 0.33734017610549927, "eval_runtime": 179.3542, "eval_samples_per_second": 41.337, "eval_steps_per_second": 5.169, "eval_wer": 0.13551291593297873, "step": 97200 }, { "epoch": 3.5325968457009957, "grad_norm": 0.5762473940849304, "learning_rate": 2.1003541759300732e-05, "loss": 0.0925, "step": 97210 }, { "epoch": 3.5329602442037937, "grad_norm": 1.3973640203475952, "learning_rate": 2.0998419261755994e-05, "loss": 0.1158, "step": 97220 }, { "epoch": 3.533323642706592, "grad_norm": 0.452146977186203, "learning_rate": 2.099329693662513e-05, "loss": 0.0726, "step": 97230 }, { "epoch": 3.53368704120939, "grad_norm": 0.8005980253219604, "learning_rate": 2.0988174784128847e-05, "loss": 0.0796, "step": 97240 }, { "epoch": 3.534050439712188, "grad_norm": 0.8147309422492981, "learning_rate": 2.0983052804487835e-05, "loss": 0.0779, "step": 97250 }, { "epoch": 3.5344138382149866, "grad_norm": 1.6006520986557007, "learning_rate": 2.097793099792279e-05, "loss": 0.0699, "step": 97260 }, { "epoch": 3.534777236717785, "grad_norm": 0.44036349654197693, "learning_rate": 2.0972809364654384e-05, "loss": 0.0663, "step": 97270 }, { "epoch": 3.535140635220583, "grad_norm": 0.8019761443138123, "learning_rate": 2.0967687904903296e-05, "loss": 0.0705, "step": 97280 }, { "epoch": 3.535504033723381, "grad_norm": 0.49069860577583313, "learning_rate": 2.0962566618890188e-05, "loss": 0.0941, "step": 97290 }, { "epoch": 3.5358674322261794, "grad_norm": 0.7364464998245239, "learning_rate": 2.095744550683572e-05, "loss": 0.0856, "step": 97300 }, { "epoch": 3.5362308307289774, "grad_norm": 0.6838924288749695, "learning_rate": 2.0952324568960543e-05, "loss": 0.0649, "step": 97310 }, { "epoch": 3.536594229231776, "grad_norm": 0.5033947229385376, "learning_rate": 2.0947203805485293e-05, "loss": 0.0916, "step": 97320 }, { "epoch": 3.536957627734574, "grad_norm": 1.3875548839569092, "learning_rate": 2.0942083216630622e-05, "loss": 0.0615, "step": 97330 }, { "epoch": 3.5373210262373718, "grad_norm": 0.6745208501815796, "learning_rate": 2.0936962802617137e-05, "loss": 0.077, "step": 97340 }, { "epoch": 3.53768442474017, "grad_norm": 0.5911176800727844, "learning_rate": 2.093184256366547e-05, "loss": 0.0725, "step": 97350 }, { "epoch": 3.538047823242968, "grad_norm": 0.6544129848480225, "learning_rate": 2.092672249999623e-05, "loss": 0.1027, "step": 97360 }, { "epoch": 3.5384112217457666, "grad_norm": 0.6527793407440186, "learning_rate": 2.0921602611830036e-05, "loss": 0.0601, "step": 97370 }, { "epoch": 3.5387746202485646, "grad_norm": 0.569724977016449, "learning_rate": 2.0916482899387467e-05, "loss": 0.0709, "step": 97380 }, { "epoch": 3.5391380187513626, "grad_norm": 0.6393773555755615, "learning_rate": 2.0911363362889118e-05, "loss": 0.0999, "step": 97390 }, { "epoch": 3.539501417254161, "grad_norm": 1.7416220903396606, "learning_rate": 2.090624400255558e-05, "loss": 0.0772, "step": 97400 }, { "epoch": 3.539864815756959, "grad_norm": 0.6363007426261902, "learning_rate": 2.0901124818607417e-05, "loss": 0.0923, "step": 97410 }, { "epoch": 3.5402282142597574, "grad_norm": 0.5386870503425598, "learning_rate": 2.0896005811265207e-05, "loss": 0.0625, "step": 97420 }, { "epoch": 3.5405916127625554, "grad_norm": 1.5430821180343628, "learning_rate": 2.0890886980749504e-05, "loss": 0.0777, "step": 97430 }, { "epoch": 3.5409550112653534, "grad_norm": 0.41363903880119324, "learning_rate": 2.0885768327280854e-05, "loss": 0.1062, "step": 97440 }, { "epoch": 3.541318409768152, "grad_norm": 0.7380490303039551, "learning_rate": 2.0880649851079812e-05, "loss": 0.0607, "step": 97450 }, { "epoch": 3.54168180827095, "grad_norm": 0.5600608587265015, "learning_rate": 2.0875531552366914e-05, "loss": 0.0785, "step": 97460 }, { "epoch": 3.5420452067737482, "grad_norm": 0.46273377537727356, "learning_rate": 2.0870413431362687e-05, "loss": 0.0725, "step": 97470 }, { "epoch": 3.542408605276546, "grad_norm": 1.1162970066070557, "learning_rate": 2.0865295488287655e-05, "loss": 0.0599, "step": 97480 }, { "epoch": 3.542772003779344, "grad_norm": 0.5625380277633667, "learning_rate": 2.0860177723362316e-05, "loss": 0.0767, "step": 97490 }, { "epoch": 3.5431354022821426, "grad_norm": 0.7391917109489441, "learning_rate": 2.08550601368072e-05, "loss": 0.0871, "step": 97500 }, { "epoch": 3.5434988007849406, "grad_norm": 0.8282499313354492, "learning_rate": 2.084994272884279e-05, "loss": 0.0997, "step": 97510 }, { "epoch": 3.543862199287739, "grad_norm": 1.1234526634216309, "learning_rate": 2.0844825499689587e-05, "loss": 0.068, "step": 97520 }, { "epoch": 3.544225597790537, "grad_norm": 0.3851841986179352, "learning_rate": 2.083970844956806e-05, "loss": 0.1154, "step": 97530 }, { "epoch": 3.544588996293335, "grad_norm": 0.4419216811656952, "learning_rate": 2.0834591578698704e-05, "loss": 0.0946, "step": 97540 }, { "epoch": 3.5449523947961334, "grad_norm": 0.4050745368003845, "learning_rate": 2.082947488730197e-05, "loss": 0.0904, "step": 97550 }, { "epoch": 3.545315793298932, "grad_norm": 0.9602497220039368, "learning_rate": 2.082435837559833e-05, "loss": 0.0892, "step": 97560 }, { "epoch": 3.54567919180173, "grad_norm": 1.1554557085037231, "learning_rate": 2.081924204380823e-05, "loss": 0.0786, "step": 97570 }, { "epoch": 3.546042590304528, "grad_norm": 0.33205777406692505, "learning_rate": 2.0814125892152105e-05, "loss": 0.0993, "step": 97580 }, { "epoch": 3.5464059888073263, "grad_norm": 0.7094134092330933, "learning_rate": 2.080900992085041e-05, "loss": 0.0809, "step": 97590 }, { "epoch": 3.5467693873101243, "grad_norm": 0.7614325284957886, "learning_rate": 2.0803894130123562e-05, "loss": 0.0984, "step": 97600 }, { "epoch": 3.5471327858129227, "grad_norm": 0.18374527990818024, "learning_rate": 2.0798778520191994e-05, "loss": 0.1204, "step": 97610 }, { "epoch": 3.5474961843157207, "grad_norm": 0.5296422243118286, "learning_rate": 2.0793663091276107e-05, "loss": 0.0515, "step": 97620 }, { "epoch": 3.5478595828185187, "grad_norm": 0.4739711880683899, "learning_rate": 2.078854784359631e-05, "loss": 0.0678, "step": 97630 }, { "epoch": 3.548222981321317, "grad_norm": 0.4815096855163574, "learning_rate": 2.0783432777373e-05, "loss": 0.0897, "step": 97640 }, { "epoch": 3.548586379824115, "grad_norm": 0.6116788387298584, "learning_rate": 2.0778317892826574e-05, "loss": 0.0893, "step": 97650 }, { "epoch": 3.5489497783269135, "grad_norm": 1.2628870010375977, "learning_rate": 2.077320319017741e-05, "loss": 0.1013, "step": 97660 }, { "epoch": 3.5493131768297115, "grad_norm": 0.7226377129554749, "learning_rate": 2.076808866964588e-05, "loss": 0.0703, "step": 97670 }, { "epoch": 3.5496765753325095, "grad_norm": 0.8184316158294678, "learning_rate": 2.0762974331452344e-05, "loss": 0.0705, "step": 97680 }, { "epoch": 3.550039973835308, "grad_norm": 0.7743292450904846, "learning_rate": 2.0757860175817176e-05, "loss": 0.0931, "step": 97690 }, { "epoch": 3.550403372338106, "grad_norm": 0.8815683722496033, "learning_rate": 2.075274620296072e-05, "loss": 0.0708, "step": 97700 }, { "epoch": 3.5507667708409043, "grad_norm": 0.478040486574173, "learning_rate": 2.074763241310332e-05, "loss": 0.0812, "step": 97710 }, { "epoch": 3.5511301693437023, "grad_norm": 0.7928186058998108, "learning_rate": 2.0742518806465306e-05, "loss": 0.0594, "step": 97720 }, { "epoch": 3.5514935678465003, "grad_norm": 0.3057189881801605, "learning_rate": 2.0737405383267002e-05, "loss": 0.0703, "step": 97730 }, { "epoch": 3.5518569663492987, "grad_norm": 0.7220034599304199, "learning_rate": 2.0732292143728744e-05, "loss": 0.0692, "step": 97740 }, { "epoch": 3.5522203648520967, "grad_norm": 0.40750908851623535, "learning_rate": 2.0727179088070833e-05, "loss": 0.0778, "step": 97750 }, { "epoch": 3.552583763354895, "grad_norm": 0.542911171913147, "learning_rate": 2.072206621651357e-05, "loss": 0.084, "step": 97760 }, { "epoch": 3.552947161857693, "grad_norm": 0.37515130639076233, "learning_rate": 2.0716953529277244e-05, "loss": 0.175, "step": 97770 }, { "epoch": 3.553310560360491, "grad_norm": 0.7778531908988953, "learning_rate": 2.071184102658216e-05, "loss": 0.0599, "step": 97780 }, { "epoch": 3.5536739588632895, "grad_norm": 0.4933672249317169, "learning_rate": 2.0706728708648593e-05, "loss": 0.0913, "step": 97790 }, { "epoch": 3.5540373573660875, "grad_norm": 0.4018378257751465, "learning_rate": 2.0701616575696807e-05, "loss": 0.1055, "step": 97800 }, { "epoch": 3.5540373573660875, "eval_loss": 0.3385712802410126, "eval_runtime": 179.9463, "eval_samples_per_second": 41.201, "eval_steps_per_second": 5.152, "eval_wer": 0.13599397316971337, "step": 97800 }, { "epoch": 3.554400755868886, "grad_norm": 1.089429497718811, "learning_rate": 2.069650462794707e-05, "loss": 0.0872, "step": 97810 }, { "epoch": 3.554764154371684, "grad_norm": 0.2929953932762146, "learning_rate": 2.0691392865619623e-05, "loss": 0.0634, "step": 97820 }, { "epoch": 3.555127552874482, "grad_norm": 2.5961267948150635, "learning_rate": 2.0686281288934743e-05, "loss": 0.0939, "step": 97830 }, { "epoch": 3.5554909513772803, "grad_norm": 0.6162799596786499, "learning_rate": 2.0681169898112652e-05, "loss": 0.0877, "step": 97840 }, { "epoch": 3.5558543498800788, "grad_norm": 0.6476810574531555, "learning_rate": 2.0676058693373583e-05, "loss": 0.0798, "step": 97850 }, { "epoch": 3.5562177483828767, "grad_norm": 0.8159734010696411, "learning_rate": 2.067094767493776e-05, "loss": 0.0762, "step": 97860 }, { "epoch": 3.5565811468856747, "grad_norm": 4.349702835083008, "learning_rate": 2.0665836843025387e-05, "loss": 0.0897, "step": 97870 }, { "epoch": 3.556944545388473, "grad_norm": 0.616278886795044, "learning_rate": 2.0660726197856696e-05, "loss": 0.0996, "step": 97880 }, { "epoch": 3.557307943891271, "grad_norm": 0.6084387302398682, "learning_rate": 2.0655615739651874e-05, "loss": 0.0876, "step": 97890 }, { "epoch": 3.5576713423940696, "grad_norm": 0.9170116186141968, "learning_rate": 2.0650505468631114e-05, "loss": 0.0824, "step": 97900 }, { "epoch": 3.5580347408968676, "grad_norm": 1.841464638710022, "learning_rate": 2.0645395385014584e-05, "loss": 0.0863, "step": 97910 }, { "epoch": 3.5583981393996655, "grad_norm": 1.3778067827224731, "learning_rate": 2.0640285489022483e-05, "loss": 0.072, "step": 97920 }, { "epoch": 3.558761537902464, "grad_norm": 0.49323418736457825, "learning_rate": 2.063517578087497e-05, "loss": 0.0877, "step": 97930 }, { "epoch": 3.559124936405262, "grad_norm": 1.1919158697128296, "learning_rate": 2.0630066260792206e-05, "loss": 0.0773, "step": 97940 }, { "epoch": 3.5594883349080604, "grad_norm": 0.5721442103385925, "learning_rate": 2.0624956928994335e-05, "loss": 0.0723, "step": 97950 }, { "epoch": 3.5598517334108584, "grad_norm": 0.7183822989463806, "learning_rate": 2.0619847785701494e-05, "loss": 0.0705, "step": 97960 }, { "epoch": 3.5602151319136564, "grad_norm": 0.5326651334762573, "learning_rate": 2.0614738831133836e-05, "loss": 0.0699, "step": 97970 }, { "epoch": 3.560578530416455, "grad_norm": 0.797173023223877, "learning_rate": 2.0609630065511482e-05, "loss": 0.0937, "step": 97980 }, { "epoch": 3.5609419289192528, "grad_norm": 0.49510565400123596, "learning_rate": 2.0604521489054547e-05, "loss": 0.077, "step": 97990 }, { "epoch": 3.561305327422051, "grad_norm": 0.6226716041564941, "learning_rate": 2.059941310198314e-05, "loss": 0.0858, "step": 98000 }, { "epoch": 3.561668725924849, "grad_norm": 0.6301719546318054, "learning_rate": 2.0594304904517355e-05, "loss": 0.1003, "step": 98010 }, { "epoch": 3.562032124427647, "grad_norm": 0.9622916579246521, "learning_rate": 2.0589196896877304e-05, "loss": 0.0663, "step": 98020 }, { "epoch": 3.5623955229304456, "grad_norm": 0.38021838665008545, "learning_rate": 2.058408907928307e-05, "loss": 0.0638, "step": 98030 }, { "epoch": 3.5627589214332436, "grad_norm": 0.8715338706970215, "learning_rate": 2.0578981451954723e-05, "loss": 0.1114, "step": 98040 }, { "epoch": 3.563122319936042, "grad_norm": 0.40252813696861267, "learning_rate": 2.0573874015112337e-05, "loss": 0.0942, "step": 98050 }, { "epoch": 3.56348571843884, "grad_norm": 0.9255901575088501, "learning_rate": 2.056876676897596e-05, "loss": 0.074, "step": 98060 }, { "epoch": 3.563849116941638, "grad_norm": 0.5384101867675781, "learning_rate": 2.0563659713765664e-05, "loss": 0.0983, "step": 98070 }, { "epoch": 3.5642125154444364, "grad_norm": 0.4538724422454834, "learning_rate": 2.055855284970149e-05, "loss": 0.0769, "step": 98080 }, { "epoch": 3.5645759139472344, "grad_norm": 0.8953961133956909, "learning_rate": 2.055344617700347e-05, "loss": 0.0899, "step": 98090 }, { "epoch": 3.564939312450033, "grad_norm": 2.060249090194702, "learning_rate": 2.0548339695891625e-05, "loss": 0.081, "step": 98100 }, { "epoch": 3.565302710952831, "grad_norm": 0.4483121335506439, "learning_rate": 2.054323340658599e-05, "loss": 0.0894, "step": 98110 }, { "epoch": 3.565666109455629, "grad_norm": 0.5069551467895508, "learning_rate": 2.0538127309306572e-05, "loss": 0.0892, "step": 98120 }, { "epoch": 3.566029507958427, "grad_norm": 1.5760865211486816, "learning_rate": 2.0533021404273375e-05, "loss": 0.0611, "step": 98130 }, { "epoch": 3.5663929064612256, "grad_norm": 0.7342379093170166, "learning_rate": 2.0527915691706388e-05, "loss": 0.0638, "step": 98140 }, { "epoch": 3.5667563049640236, "grad_norm": 1.9149831533432007, "learning_rate": 2.0522810171825597e-05, "loss": 0.0889, "step": 98150 }, { "epoch": 3.5671197034668216, "grad_norm": 0.7659269571304321, "learning_rate": 2.0517704844850993e-05, "loss": 0.0989, "step": 98160 }, { "epoch": 3.56748310196962, "grad_norm": 0.696357250213623, "learning_rate": 2.051259971100254e-05, "loss": 0.0673, "step": 98170 }, { "epoch": 3.567846500472418, "grad_norm": 0.9041746258735657, "learning_rate": 2.0507494770500197e-05, "loss": 0.0644, "step": 98180 }, { "epoch": 3.5682098989752165, "grad_norm": 0.9114351868629456, "learning_rate": 2.0502390023563923e-05, "loss": 0.0838, "step": 98190 }, { "epoch": 3.5685732974780144, "grad_norm": 3.253389596939087, "learning_rate": 2.0497285470413645e-05, "loss": 0.0796, "step": 98200 }, { "epoch": 3.5689366959808124, "grad_norm": 0.4176378548145294, "learning_rate": 2.0492181111269333e-05, "loss": 0.0853, "step": 98210 }, { "epoch": 3.569300094483611, "grad_norm": 0.5694284439086914, "learning_rate": 2.048707694635089e-05, "loss": 0.0584, "step": 98220 }, { "epoch": 3.569663492986409, "grad_norm": 1.451493740081787, "learning_rate": 2.048197297587825e-05, "loss": 0.0784, "step": 98230 }, { "epoch": 3.5700268914892073, "grad_norm": 0.7314192652702332, "learning_rate": 2.047686920007132e-05, "loss": 0.0838, "step": 98240 }, { "epoch": 3.5703902899920053, "grad_norm": 1.8838427066802979, "learning_rate": 2.0471765619149992e-05, "loss": 0.0795, "step": 98250 }, { "epoch": 3.5707536884948032, "grad_norm": 1.1557743549346924, "learning_rate": 2.0466662233334176e-05, "loss": 0.0831, "step": 98260 }, { "epoch": 3.5711170869976017, "grad_norm": 0.4788287580013275, "learning_rate": 2.0461559042843762e-05, "loss": 0.0695, "step": 98270 }, { "epoch": 3.5714804855003996, "grad_norm": 0.6254255771636963, "learning_rate": 2.0456456047898618e-05, "loss": 0.0753, "step": 98280 }, { "epoch": 3.571843884003198, "grad_norm": 0.5113767981529236, "learning_rate": 2.0451353248718608e-05, "loss": 0.2545, "step": 98290 }, { "epoch": 3.572207282505996, "grad_norm": 5.397027015686035, "learning_rate": 2.044625064552361e-05, "loss": 0.124, "step": 98300 }, { "epoch": 3.572570681008794, "grad_norm": 1.188666820526123, "learning_rate": 2.044114823853347e-05, "loss": 0.0784, "step": 98310 }, { "epoch": 3.5729340795115925, "grad_norm": 0.7919589281082153, "learning_rate": 2.0436046027968033e-05, "loss": 0.0778, "step": 98320 }, { "epoch": 3.5732974780143905, "grad_norm": 0.7725494503974915, "learning_rate": 2.0430944014047135e-05, "loss": 0.078, "step": 98330 }, { "epoch": 3.573660876517189, "grad_norm": 0.5569599866867065, "learning_rate": 2.0425842196990592e-05, "loss": 0.0849, "step": 98340 }, { "epoch": 3.574024275019987, "grad_norm": 0.9411738514900208, "learning_rate": 2.0420740577018243e-05, "loss": 0.1068, "step": 98350 }, { "epoch": 3.574387673522785, "grad_norm": 0.9715979695320129, "learning_rate": 2.0415639154349893e-05, "loss": 0.0867, "step": 98360 }, { "epoch": 3.5747510720255833, "grad_norm": 0.4651619493961334, "learning_rate": 2.0410537929205336e-05, "loss": 0.0678, "step": 98370 }, { "epoch": 3.5751144705283813, "grad_norm": 0.8822535872459412, "learning_rate": 2.040543690180437e-05, "loss": 0.073, "step": 98380 }, { "epoch": 3.5754778690311797, "grad_norm": 2.6987640857696533, "learning_rate": 2.0400336072366772e-05, "loss": 0.0967, "step": 98390 }, { "epoch": 3.5758412675339777, "grad_norm": 3.3761913776397705, "learning_rate": 2.0395235441112336e-05, "loss": 0.0978, "step": 98400 }, { "epoch": 3.5758412675339777, "eval_loss": 0.32311710715293884, "eval_runtime": 179.7355, "eval_samples_per_second": 41.249, "eval_steps_per_second": 5.158, "eval_wer": 0.1381451159075644, "step": 98400 }, { "epoch": 3.5762046660367757, "grad_norm": 0.4446313977241516, "learning_rate": 2.0390135008260823e-05, "loss": 0.5498, "step": 98410 }, { "epoch": 3.576568064539574, "grad_norm": 2.1820120811462402, "learning_rate": 2.038503477403199e-05, "loss": 0.0726, "step": 98420 }, { "epoch": 3.5769314630423725, "grad_norm": 0.36170488595962524, "learning_rate": 2.037993473864559e-05, "loss": 0.0754, "step": 98430 }, { "epoch": 3.5772948615451705, "grad_norm": 0.7031393051147461, "learning_rate": 2.0374834902321352e-05, "loss": 0.083, "step": 98440 }, { "epoch": 3.5776582600479685, "grad_norm": 3.428339719772339, "learning_rate": 2.036973526527903e-05, "loss": 0.0938, "step": 98450 }, { "epoch": 3.578021658550767, "grad_norm": 0.41565191745758057, "learning_rate": 2.0364635827738347e-05, "loss": 0.0878, "step": 98460 }, { "epoch": 3.578385057053565, "grad_norm": 0.4806919991970062, "learning_rate": 2.0359536589919005e-05, "loss": 0.2698, "step": 98470 }, { "epoch": 3.5787484555563633, "grad_norm": 12.289422988891602, "learning_rate": 2.0354437552040717e-05, "loss": 0.2066, "step": 98480 }, { "epoch": 3.5791118540591613, "grad_norm": 0.4719155430793762, "learning_rate": 2.0349848589081458e-05, "loss": 0.3243, "step": 98490 }, { "epoch": 3.5794752525619593, "grad_norm": 1.4685314893722534, "learning_rate": 2.034474993169644e-05, "loss": 0.0745, "step": 98500 }, { "epoch": 3.5798386510647577, "grad_norm": 0.3435138165950775, "learning_rate": 2.0339651474889595e-05, "loss": 0.0915, "step": 98510 }, { "epoch": 3.5802020495675557, "grad_norm": 1.1693735122680664, "learning_rate": 2.033455321888059e-05, "loss": 0.0781, "step": 98520 }, { "epoch": 3.580565448070354, "grad_norm": 0.24628905951976776, "learning_rate": 2.0329455163889084e-05, "loss": 0.063, "step": 98530 }, { "epoch": 3.580928846573152, "grad_norm": 1.4354606866836548, "learning_rate": 2.0324357310134738e-05, "loss": 0.1105, "step": 98540 }, { "epoch": 3.58129224507595, "grad_norm": 0.526882529258728, "learning_rate": 2.0319259657837186e-05, "loss": 0.0967, "step": 98550 }, { "epoch": 3.5816556435787485, "grad_norm": 0.3548150360584259, "learning_rate": 2.0314162207216096e-05, "loss": 0.0647, "step": 98560 }, { "epoch": 3.5820190420815465, "grad_norm": 0.8922990560531616, "learning_rate": 2.0309064958491082e-05, "loss": 0.07, "step": 98570 }, { "epoch": 3.582382440584345, "grad_norm": 0.615703284740448, "learning_rate": 2.030396791188177e-05, "loss": 0.0667, "step": 98580 }, { "epoch": 3.582745839087143, "grad_norm": 0.4474499225616455, "learning_rate": 2.0298871067607768e-05, "loss": 0.0679, "step": 98590 }, { "epoch": 3.583109237589941, "grad_norm": 0.8109280467033386, "learning_rate": 2.029377442588868e-05, "loss": 0.0848, "step": 98600 }, { "epoch": 3.5834726360927394, "grad_norm": 0.7447373270988464, "learning_rate": 2.0288677986944116e-05, "loss": 0.0649, "step": 98610 }, { "epoch": 3.5838360345955373, "grad_norm": 0.4216397702693939, "learning_rate": 2.0283581750993653e-05, "loss": 0.0683, "step": 98620 }, { "epoch": 3.5841994330983358, "grad_norm": 0.39569661021232605, "learning_rate": 2.0278485718256873e-05, "loss": 0.0643, "step": 98630 }, { "epoch": 3.5845628316011338, "grad_norm": 0.3963533043861389, "learning_rate": 2.027338988895333e-05, "loss": 0.077, "step": 98640 }, { "epoch": 3.5849262301039317, "grad_norm": 1.1114723682403564, "learning_rate": 2.0268294263302615e-05, "loss": 0.0787, "step": 98650 }, { "epoch": 3.58528962860673, "grad_norm": 0.5580173134803772, "learning_rate": 2.0263198841524262e-05, "loss": 0.118, "step": 98660 }, { "epoch": 3.585653027109528, "grad_norm": 0.6341946721076965, "learning_rate": 2.0258103623837817e-05, "loss": 0.0648, "step": 98670 }, { "epoch": 3.5860164256123266, "grad_norm": 0.7503349184989929, "learning_rate": 2.0253008610462818e-05, "loss": 0.0822, "step": 98680 }, { "epoch": 3.5863798241151246, "grad_norm": 0.47050052881240845, "learning_rate": 2.0247913801618773e-05, "loss": 0.1365, "step": 98690 }, { "epoch": 3.5867432226179226, "grad_norm": 0.7919335961341858, "learning_rate": 2.0242819197525228e-05, "loss": 0.0775, "step": 98700 }, { "epoch": 3.587106621120721, "grad_norm": 0.4918152093887329, "learning_rate": 2.023772479840168e-05, "loss": 0.0759, "step": 98710 }, { "epoch": 3.5874700196235194, "grad_norm": 1.0589570999145508, "learning_rate": 2.0232630604467623e-05, "loss": 0.1025, "step": 98720 }, { "epoch": 3.5878334181263174, "grad_norm": 0.4748883843421936, "learning_rate": 2.022753661594255e-05, "loss": 0.0647, "step": 98730 }, { "epoch": 3.5881968166291154, "grad_norm": 0.5574440360069275, "learning_rate": 2.0222442833045936e-05, "loss": 0.0877, "step": 98740 }, { "epoch": 3.588560215131914, "grad_norm": 1.1840908527374268, "learning_rate": 2.021734925599727e-05, "loss": 0.0612, "step": 98750 }, { "epoch": 3.588923613634712, "grad_norm": 0.5361800789833069, "learning_rate": 2.021225588501601e-05, "loss": 0.0797, "step": 98760 }, { "epoch": 3.5892870121375102, "grad_norm": 1.2590301036834717, "learning_rate": 2.0207162720321604e-05, "loss": 0.0819, "step": 98770 }, { "epoch": 3.589650410640308, "grad_norm": 0.4360603392124176, "learning_rate": 2.0202069762133506e-05, "loss": 0.0844, "step": 98780 }, { "epoch": 3.590013809143106, "grad_norm": 0.6287516951560974, "learning_rate": 2.019697701067114e-05, "loss": 0.0888, "step": 98790 }, { "epoch": 3.5903772076459046, "grad_norm": 0.5893504023551941, "learning_rate": 2.019188446615396e-05, "loss": 0.0795, "step": 98800 }, { "epoch": 3.5907406061487026, "grad_norm": 1.4572880268096924, "learning_rate": 2.0186792128801368e-05, "loss": 0.0922, "step": 98810 }, { "epoch": 3.591104004651501, "grad_norm": 0.6769400835037231, "learning_rate": 2.018169999883278e-05, "loss": 0.062, "step": 98820 }, { "epoch": 3.591467403154299, "grad_norm": 1.1272927522659302, "learning_rate": 2.017660807646758e-05, "loss": 0.0594, "step": 98830 }, { "epoch": 3.591830801657097, "grad_norm": 3.9643101692199707, "learning_rate": 2.017151636192519e-05, "loss": 0.0732, "step": 98840 }, { "epoch": 3.5921942001598954, "grad_norm": 0.613264262676239, "learning_rate": 2.016642485542498e-05, "loss": 0.0745, "step": 98850 }, { "epoch": 3.5925575986626934, "grad_norm": 0.725082516670227, "learning_rate": 2.0161333557186326e-05, "loss": 0.0739, "step": 98860 }, { "epoch": 3.592920997165492, "grad_norm": 0.7394050359725952, "learning_rate": 2.0156242467428593e-05, "loss": 0.0617, "step": 98870 }, { "epoch": 3.59328439566829, "grad_norm": 0.339306116104126, "learning_rate": 2.0151151586371125e-05, "loss": 0.0641, "step": 98880 }, { "epoch": 3.593647794171088, "grad_norm": 0.6532690525054932, "learning_rate": 2.0146060914233296e-05, "loss": 0.0702, "step": 98890 }, { "epoch": 3.5940111926738862, "grad_norm": 2.9444310665130615, "learning_rate": 2.014097045123443e-05, "loss": 0.0673, "step": 98900 }, { "epoch": 3.5943745911766842, "grad_norm": 0.4563717544078827, "learning_rate": 2.013588019759386e-05, "loss": 0.0695, "step": 98910 }, { "epoch": 3.5947379896794827, "grad_norm": 0.5301656126976013, "learning_rate": 2.0130790153530905e-05, "loss": 0.0769, "step": 98920 }, { "epoch": 3.5951013881822806, "grad_norm": 0.7959988713264465, "learning_rate": 2.0125700319264867e-05, "loss": 0.0784, "step": 98930 }, { "epoch": 3.5954647866850786, "grad_norm": 0.7127143740653992, "learning_rate": 2.012061069501507e-05, "loss": 0.1098, "step": 98940 }, { "epoch": 3.595828185187877, "grad_norm": 1.249788761138916, "learning_rate": 2.0115521281000797e-05, "loss": 0.0763, "step": 98950 }, { "epoch": 3.596191583690675, "grad_norm": 0.8544566631317139, "learning_rate": 2.0110432077441333e-05, "loss": 0.0813, "step": 98960 }, { "epoch": 3.5965549821934735, "grad_norm": 0.741630494594574, "learning_rate": 2.0105343084555955e-05, "loss": 0.0738, "step": 98970 }, { "epoch": 3.5969183806962715, "grad_norm": 0.9736884832382202, "learning_rate": 2.0100254302563915e-05, "loss": 0.0734, "step": 98980 }, { "epoch": 3.5972817791990694, "grad_norm": 0.22562777996063232, "learning_rate": 2.0095165731684496e-05, "loss": 0.0766, "step": 98990 }, { "epoch": 3.597645177701868, "grad_norm": 0.6679416298866272, "learning_rate": 2.0090077372136935e-05, "loss": 0.0986, "step": 99000 }, { "epoch": 3.597645177701868, "eval_loss": 0.32793328166007996, "eval_runtime": 180.4126, "eval_samples_per_second": 41.095, "eval_steps_per_second": 5.138, "eval_wer": 0.13713761867591265, "step": 99000 }, { "epoch": 3.5980085762046663, "grad_norm": 0.5008822083473206, "learning_rate": 2.0084989224140468e-05, "loss": 0.0577, "step": 99010 }, { "epoch": 3.5983719747074643, "grad_norm": 0.7887293696403503, "learning_rate": 2.0079901287914322e-05, "loss": 0.0555, "step": 99020 }, { "epoch": 3.5987353732102623, "grad_norm": 0.7174037098884583, "learning_rate": 2.007481356367773e-05, "loss": 0.0947, "step": 99030 }, { "epoch": 3.5990987717130607, "grad_norm": 0.5506502389907837, "learning_rate": 2.0069726051649897e-05, "loss": 0.0828, "step": 99040 }, { "epoch": 3.5994621702158587, "grad_norm": 0.6919708251953125, "learning_rate": 2.006463875205003e-05, "loss": 0.1141, "step": 99050 }, { "epoch": 3.599825568718657, "grad_norm": 0.7304664254188538, "learning_rate": 2.0059551665097314e-05, "loss": 1.5067, "step": 99060 }, { "epoch": 3.600188967221455, "grad_norm": 0.3626577854156494, "learning_rate": 2.005446479101093e-05, "loss": 0.0599, "step": 99070 }, { "epoch": 3.600552365724253, "grad_norm": 1.0457093715667725, "learning_rate": 2.0049378130010075e-05, "loss": 0.0706, "step": 99080 }, { "epoch": 3.6009157642270515, "grad_norm": 0.2686658203601837, "learning_rate": 2.0044291682313905e-05, "loss": 0.0958, "step": 99090 }, { "epoch": 3.6012791627298495, "grad_norm": 0.9484358429908752, "learning_rate": 2.0039205448141568e-05, "loss": 0.1046, "step": 99100 }, { "epoch": 3.601642561232648, "grad_norm": 0.34404778480529785, "learning_rate": 2.0034119427712218e-05, "loss": 0.0979, "step": 99110 }, { "epoch": 3.602005959735446, "grad_norm": 0.6967700719833374, "learning_rate": 2.0029033621244983e-05, "loss": 0.0728, "step": 99120 }, { "epoch": 3.602369358238244, "grad_norm": 0.640446662902832, "learning_rate": 2.0023948028959017e-05, "loss": 0.0701, "step": 99130 }, { "epoch": 3.6027327567410423, "grad_norm": 1.3062965869903564, "learning_rate": 2.001886265107342e-05, "loss": 0.0921, "step": 99140 }, { "epoch": 3.6030961552438403, "grad_norm": 0.7615834474563599, "learning_rate": 2.0013777487807313e-05, "loss": 0.076, "step": 99150 }, { "epoch": 3.6034595537466387, "grad_norm": 0.9553613662719727, "learning_rate": 2.0008692539379788e-05, "loss": 0.0751, "step": 99160 }, { "epoch": 3.6038229522494367, "grad_norm": 1.0260523557662964, "learning_rate": 2.0003607806009937e-05, "loss": 0.0673, "step": 99170 }, { "epoch": 3.6041863507522347, "grad_norm": 1.555732250213623, "learning_rate": 1.9998523287916858e-05, "loss": 0.0793, "step": 99180 }, { "epoch": 3.604549749255033, "grad_norm": 0.5698230862617493, "learning_rate": 1.9993438985319612e-05, "loss": 0.0815, "step": 99190 }, { "epoch": 3.604913147757831, "grad_norm": 0.8293182849884033, "learning_rate": 1.998835489843727e-05, "loss": 0.0706, "step": 99200 }, { "epoch": 3.6052765462606295, "grad_norm": 0.5732713341712952, "learning_rate": 1.998327102748887e-05, "loss": 0.0754, "step": 99210 }, { "epoch": 3.6056399447634275, "grad_norm": 1.5586737394332886, "learning_rate": 1.9978187372693486e-05, "loss": 0.3522, "step": 99220 }, { "epoch": 3.6060033432662255, "grad_norm": 5.692126750946045, "learning_rate": 1.9973103934270136e-05, "loss": 0.0782, "step": 99230 }, { "epoch": 3.606366741769024, "grad_norm": 0.30950793623924255, "learning_rate": 1.9968020712437857e-05, "loss": 0.0866, "step": 99240 }, { "epoch": 3.606730140271822, "grad_norm": 1.8191814422607422, "learning_rate": 1.996293770741566e-05, "loss": 0.0855, "step": 99250 }, { "epoch": 3.6070935387746204, "grad_norm": 0.44492724537849426, "learning_rate": 1.9957854919422543e-05, "loss": 0.0804, "step": 99260 }, { "epoch": 3.6074569372774183, "grad_norm": 0.49994924664497375, "learning_rate": 1.9952772348677528e-05, "loss": 0.069, "step": 99270 }, { "epoch": 3.6078203357802163, "grad_norm": 0.6917654275894165, "learning_rate": 1.99476899953996e-05, "loss": 0.0745, "step": 99280 }, { "epoch": 3.6081837342830148, "grad_norm": 1.5276329517364502, "learning_rate": 1.9942607859807726e-05, "loss": 0.0987, "step": 99290 }, { "epoch": 3.608547132785813, "grad_norm": 1.3370615243911743, "learning_rate": 1.9937525942120888e-05, "loss": 0.0785, "step": 99300 }, { "epoch": 3.608910531288611, "grad_norm": 0.6237464547157288, "learning_rate": 1.9932444242558035e-05, "loss": 0.0708, "step": 99310 }, { "epoch": 3.609273929791409, "grad_norm": 1.2243361473083496, "learning_rate": 1.992736276133814e-05, "loss": 0.0606, "step": 99320 }, { "epoch": 3.6096373282942076, "grad_norm": 0.5042082667350769, "learning_rate": 1.9922281498680133e-05, "loss": 0.0696, "step": 99330 }, { "epoch": 3.6100007267970056, "grad_norm": 0.6634204387664795, "learning_rate": 1.9917200454802953e-05, "loss": 0.099, "step": 99340 }, { "epoch": 3.610364125299804, "grad_norm": 0.39905115962028503, "learning_rate": 1.991211962992552e-05, "loss": 0.0799, "step": 99350 }, { "epoch": 3.610727523802602, "grad_norm": 0.49072784185409546, "learning_rate": 1.990703902426674e-05, "loss": 0.1372, "step": 99360 }, { "epoch": 3.6110909223054, "grad_norm": 0.8174235820770264, "learning_rate": 1.990195863804553e-05, "loss": 0.0647, "step": 99370 }, { "epoch": 3.6114543208081984, "grad_norm": 0.634876549243927, "learning_rate": 1.9896878471480794e-05, "loss": 0.0704, "step": 99380 }, { "epoch": 3.6118177193109964, "grad_norm": 0.5161920189857483, "learning_rate": 1.98917985247914e-05, "loss": 0.0912, "step": 99390 }, { "epoch": 3.612181117813795, "grad_norm": 2.626404047012329, "learning_rate": 1.9886718798196226e-05, "loss": 0.0667, "step": 99400 }, { "epoch": 3.612544516316593, "grad_norm": 0.4185205101966858, "learning_rate": 1.9881639291914157e-05, "loss": 0.0703, "step": 99410 }, { "epoch": 3.6129079148193908, "grad_norm": 1.610190510749817, "learning_rate": 1.9876560006164034e-05, "loss": 0.067, "step": 99420 }, { "epoch": 3.613271313322189, "grad_norm": 0.5030075311660767, "learning_rate": 1.9871480941164718e-05, "loss": 0.0706, "step": 99430 }, { "epoch": 3.613634711824987, "grad_norm": 0.35228344798088074, "learning_rate": 1.986640209713504e-05, "loss": 0.095, "step": 99440 }, { "epoch": 3.6139981103277856, "grad_norm": 0.5087105631828308, "learning_rate": 1.9861323474293817e-05, "loss": 0.0804, "step": 99450 }, { "epoch": 3.6143615088305836, "grad_norm": 0.3755999505519867, "learning_rate": 1.985624507285989e-05, "loss": 0.0772, "step": 99460 }, { "epoch": 3.6147249073333816, "grad_norm": 0.841137707233429, "learning_rate": 1.985116689305207e-05, "loss": 0.0665, "step": 99470 }, { "epoch": 3.61508830583618, "grad_norm": 0.819675862789154, "learning_rate": 1.984608893508914e-05, "loss": 0.0654, "step": 99480 }, { "epoch": 3.615451704338978, "grad_norm": 0.50279700756073, "learning_rate": 1.9841011199189903e-05, "loss": 0.0903, "step": 99490 }, { "epoch": 3.6158151028417764, "grad_norm": 1.3066468238830566, "learning_rate": 1.983593368557313e-05, "loss": 0.0659, "step": 99500 }, { "epoch": 3.6161785013445744, "grad_norm": 3.0848491191864014, "learning_rate": 1.983085639445761e-05, "loss": 0.0895, "step": 99510 }, { "epoch": 3.6165418998473724, "grad_norm": 0.4366300106048584, "learning_rate": 1.9825779326062092e-05, "loss": 0.0701, "step": 99520 }, { "epoch": 3.616905298350171, "grad_norm": 0.8729182481765747, "learning_rate": 1.982070248060534e-05, "loss": 0.0652, "step": 99530 }, { "epoch": 3.617268696852969, "grad_norm": 0.6263926029205322, "learning_rate": 1.9815625858306087e-05, "loss": 0.0866, "step": 99540 }, { "epoch": 3.6176320953557672, "grad_norm": 1.8468258380889893, "learning_rate": 1.9810549459383057e-05, "loss": 0.0881, "step": 99550 }, { "epoch": 3.6179954938585652, "grad_norm": 0.606265127658844, "learning_rate": 1.9805473284054997e-05, "loss": 0.0915, "step": 99560 }, { "epoch": 3.618358892361363, "grad_norm": 1.2118183374404907, "learning_rate": 1.9800397332540615e-05, "loss": 0.059, "step": 99570 }, { "epoch": 3.6187222908641616, "grad_norm": 0.6607059240341187, "learning_rate": 1.979532160505861e-05, "loss": 0.0682, "step": 99580 }, { "epoch": 3.61908568936696, "grad_norm": 0.6374445557594299, "learning_rate": 1.979024610182767e-05, "loss": 0.0748, "step": 99590 }, { "epoch": 3.619449087869758, "grad_norm": 0.6254763007164001, "learning_rate": 1.9785170823066492e-05, "loss": 0.0805, "step": 99600 }, { "epoch": 3.619449087869758, "eval_loss": 0.33375027775764465, "eval_runtime": 179.686, "eval_samples_per_second": 41.261, "eval_steps_per_second": 5.159, "eval_wer": 0.13485940421515058, "step": 99600 }, { "epoch": 3.619812486372556, "grad_norm": 0.8105804324150085, "learning_rate": 1.9780095768993756e-05, "loss": 0.0794, "step": 99610 }, { "epoch": 3.6201758848753545, "grad_norm": 1.2891360521316528, "learning_rate": 1.9775020939828118e-05, "loss": 0.0743, "step": 99620 }, { "epoch": 3.6205392833781525, "grad_norm": 0.528218686580658, "learning_rate": 1.9769946335788236e-05, "loss": 0.0694, "step": 99630 }, { "epoch": 3.620902681880951, "grad_norm": 0.47785595059394836, "learning_rate": 1.976487195709275e-05, "loss": 0.1074, "step": 99640 }, { "epoch": 3.621266080383749, "grad_norm": 0.8402767181396484, "learning_rate": 1.9759797803960318e-05, "loss": 0.0795, "step": 99650 }, { "epoch": 3.621629478886547, "grad_norm": 1.007688045501709, "learning_rate": 1.9754723876609548e-05, "loss": 0.0964, "step": 99660 }, { "epoch": 3.6219928773893453, "grad_norm": 0.2740893065929413, "learning_rate": 1.9749650175259067e-05, "loss": 0.051, "step": 99670 }, { "epoch": 3.6223562758921433, "grad_norm": 0.38753753900527954, "learning_rate": 1.974457670012747e-05, "loss": 0.0649, "step": 99680 }, { "epoch": 3.6227196743949417, "grad_norm": 0.4678163230419159, "learning_rate": 1.973950345143337e-05, "loss": 0.094, "step": 99690 }, { "epoch": 3.6230830728977397, "grad_norm": 1.056203842163086, "learning_rate": 1.973443042939535e-05, "loss": 0.3395, "step": 99700 }, { "epoch": 3.6234464714005377, "grad_norm": 1.3664277791976929, "learning_rate": 1.972935763423199e-05, "loss": 0.0842, "step": 99710 }, { "epoch": 3.623809869903336, "grad_norm": 0.618270218372345, "learning_rate": 1.9724285066161858e-05, "loss": 0.0642, "step": 99720 }, { "epoch": 3.624173268406134, "grad_norm": 0.3474178910255432, "learning_rate": 1.97192127254035e-05, "loss": 0.0583, "step": 99730 }, { "epoch": 3.6245366669089325, "grad_norm": 0.9433966875076294, "learning_rate": 1.9714140612175483e-05, "loss": 0.0691, "step": 99740 }, { "epoch": 3.6249000654117305, "grad_norm": 0.7541018128395081, "learning_rate": 1.9709068726696342e-05, "loss": 0.0867, "step": 99750 }, { "epoch": 3.6252634639145285, "grad_norm": 1.1116639375686646, "learning_rate": 1.9703997069184606e-05, "loss": 0.0827, "step": 99760 }, { "epoch": 3.625626862417327, "grad_norm": 0.5351355671882629, "learning_rate": 1.9698925639858792e-05, "loss": 0.0689, "step": 99770 }, { "epoch": 3.625990260920125, "grad_norm": 0.40793484449386597, "learning_rate": 1.9693854438937405e-05, "loss": 0.0565, "step": 99780 }, { "epoch": 3.6263536594229233, "grad_norm": 0.4486261308193207, "learning_rate": 1.9688783466638952e-05, "loss": 0.099, "step": 99790 }, { "epoch": 3.6267170579257213, "grad_norm": 0.5843884944915771, "learning_rate": 1.9683712723181926e-05, "loss": 0.0825, "step": 99800 }, { "epoch": 3.6270804564285193, "grad_norm": 0.680952250957489, "learning_rate": 1.9678642208784805e-05, "loss": 0.0683, "step": 99810 }, { "epoch": 3.6274438549313177, "grad_norm": 1.9215220212936401, "learning_rate": 1.9673571923666052e-05, "loss": 0.0576, "step": 99820 }, { "epoch": 3.6278072534341157, "grad_norm": 0.8826823830604553, "learning_rate": 1.9668501868044134e-05, "loss": 0.0615, "step": 99830 }, { "epoch": 3.628170651936914, "grad_norm": 8.166322708129883, "learning_rate": 1.9663432042137507e-05, "loss": 0.0749, "step": 99840 }, { "epoch": 3.628534050439712, "grad_norm": 0.97735595703125, "learning_rate": 1.96583624461646e-05, "loss": 0.0721, "step": 99850 }, { "epoch": 3.62889744894251, "grad_norm": 0.35416311025619507, "learning_rate": 1.9653293080343858e-05, "loss": 0.0927, "step": 99860 }, { "epoch": 3.6292608474453085, "grad_norm": 1.6705694198608398, "learning_rate": 1.964822394489368e-05, "loss": 0.074, "step": 99870 }, { "epoch": 3.629624245948107, "grad_norm": 0.5998109579086304, "learning_rate": 1.9643155040032497e-05, "loss": 0.0535, "step": 99880 }, { "epoch": 3.629987644450905, "grad_norm": 0.28862881660461426, "learning_rate": 1.9638086365978707e-05, "loss": 0.0688, "step": 99890 }, { "epoch": 3.630351042953703, "grad_norm": 1.4020724296569824, "learning_rate": 1.9633017922950697e-05, "loss": 0.0843, "step": 99900 }, { "epoch": 3.6307144414565014, "grad_norm": 0.4853419363498688, "learning_rate": 1.9627949711166843e-05, "loss": 0.0813, "step": 99910 }, { "epoch": 3.6310778399592993, "grad_norm": 0.5583473443984985, "learning_rate": 1.9622881730845525e-05, "loss": 0.0519, "step": 99920 }, { "epoch": 3.6314412384620978, "grad_norm": 0.5846819281578064, "learning_rate": 1.9617813982205104e-05, "loss": 0.0698, "step": 99930 }, { "epoch": 3.6318046369648957, "grad_norm": 0.44811734557151794, "learning_rate": 1.9612746465463926e-05, "loss": 0.0849, "step": 99940 }, { "epoch": 3.6321680354676937, "grad_norm": 1.8960832357406616, "learning_rate": 1.960767918084034e-05, "loss": 0.0892, "step": 99950 }, { "epoch": 3.632531433970492, "grad_norm": 0.5286456942558289, "learning_rate": 1.9602612128552666e-05, "loss": 0.0574, "step": 99960 }, { "epoch": 3.63289483247329, "grad_norm": 0.5947690010070801, "learning_rate": 1.9597545308819234e-05, "loss": 0.059, "step": 99970 }, { "epoch": 3.6332582309760886, "grad_norm": 0.42291057109832764, "learning_rate": 1.959247872185835e-05, "loss": 0.0977, "step": 99980 }, { "epoch": 3.6336216294788866, "grad_norm": 0.7012550234794617, "learning_rate": 1.958741236788832e-05, "loss": 0.0865, "step": 99990 }, { "epoch": 3.6339850279816845, "grad_norm": 4.052799224853516, "learning_rate": 1.9582346247127432e-05, "loss": 0.0951, "step": 100000 }, { "epoch": 3.634348426484483, "grad_norm": 10.946352005004883, "learning_rate": 1.957728035979397e-05, "loss": 0.0716, "step": 100010 }, { "epoch": 3.634711824987281, "grad_norm": 0.9583892822265625, "learning_rate": 1.95722147061062e-05, "loss": 0.0574, "step": 100020 }, { "epoch": 3.6350752234900794, "grad_norm": 0.34174227714538574, "learning_rate": 1.956714928628239e-05, "loss": 0.0717, "step": 100030 }, { "epoch": 3.6354386219928774, "grad_norm": 6.283267021179199, "learning_rate": 1.9562084100540788e-05, "loss": 0.0757, "step": 100040 }, { "epoch": 3.6358020204956754, "grad_norm": 0.6548503041267395, "learning_rate": 1.955701914909963e-05, "loss": 0.0991, "step": 100050 }, { "epoch": 3.636165418998474, "grad_norm": 0.3819758892059326, "learning_rate": 1.9551954432177154e-05, "loss": 0.0704, "step": 100060 }, { "epoch": 3.6365288175012718, "grad_norm": 0.41873475909233093, "learning_rate": 1.9546889949991575e-05, "loss": 0.0723, "step": 100070 }, { "epoch": 3.63689221600407, "grad_norm": 0.7295028567314148, "learning_rate": 1.9541825702761107e-05, "loss": 0.0635, "step": 100080 }, { "epoch": 3.637255614506868, "grad_norm": 0.32320845127105713, "learning_rate": 1.953676169070395e-05, "loss": 0.0882, "step": 100090 }, { "epoch": 3.637619013009666, "grad_norm": 0.6093827486038208, "learning_rate": 1.9531697914038288e-05, "loss": 0.0623, "step": 100100 }, { "epoch": 3.6379824115124646, "grad_norm": 1.0530484914779663, "learning_rate": 1.9526634372982315e-05, "loss": 0.0793, "step": 100110 }, { "epoch": 3.6383458100152626, "grad_norm": 0.6837037205696106, "learning_rate": 1.9521571067754186e-05, "loss": 0.0512, "step": 100120 }, { "epoch": 3.638709208518061, "grad_norm": 0.4363226890563965, "learning_rate": 1.951650799857207e-05, "loss": 0.0645, "step": 100130 }, { "epoch": 3.639072607020859, "grad_norm": 1.5349054336547852, "learning_rate": 1.951144516565411e-05, "loss": 0.0782, "step": 100140 }, { "epoch": 3.639436005523657, "grad_norm": 1.7200794219970703, "learning_rate": 1.9506382569218457e-05, "loss": 0.0911, "step": 100150 }, { "epoch": 3.6397994040264554, "grad_norm": 0.8230735659599304, "learning_rate": 1.9501320209483232e-05, "loss": 0.0666, "step": 100160 }, { "epoch": 3.640162802529254, "grad_norm": 0.5745123028755188, "learning_rate": 1.9496258086666548e-05, "loss": 0.0625, "step": 100170 }, { "epoch": 3.640526201032052, "grad_norm": 0.4190816283226013, "learning_rate": 1.9491196200986525e-05, "loss": 0.1627, "step": 100180 }, { "epoch": 3.64088959953485, "grad_norm": 0.6358737349510193, "learning_rate": 1.9486134552661252e-05, "loss": 0.0775, "step": 100190 }, { "epoch": 3.6412529980376482, "grad_norm": 0.5560312271118164, "learning_rate": 1.9481073141908832e-05, "loss": 0.068, "step": 100200 }, { "epoch": 3.6412529980376482, "eval_loss": 0.32526150345802307, "eval_runtime": 179.1108, "eval_samples_per_second": 41.393, "eval_steps_per_second": 5.176, "eval_wer": 0.1345326483562365, "step": 100200 }, { "epoch": 3.641616396540446, "grad_norm": 1.3325546979904175, "learning_rate": 1.947601196894733e-05, "loss": 0.0575, "step": 100210 }, { "epoch": 3.6419797950432446, "grad_norm": 1.013014554977417, "learning_rate": 1.9470951033994817e-05, "loss": 0.0757, "step": 100220 }, { "epoch": 3.6423431935460426, "grad_norm": 0.4979040026664734, "learning_rate": 1.9465890337269345e-05, "loss": 0.0626, "step": 100230 }, { "epoch": 3.6427065920488406, "grad_norm": 0.3621061444282532, "learning_rate": 1.9460829878988977e-05, "loss": 0.0812, "step": 100240 }, { "epoch": 3.643069990551639, "grad_norm": 1.418254017829895, "learning_rate": 1.9455769659371744e-05, "loss": 0.0961, "step": 100250 }, { "epoch": 3.643433389054437, "grad_norm": 1.291218638420105, "learning_rate": 1.945070967863566e-05, "loss": 0.0642, "step": 100260 }, { "epoch": 3.6437967875572355, "grad_norm": 0.4502682387828827, "learning_rate": 1.944564993699876e-05, "loss": 0.0581, "step": 100270 }, { "epoch": 3.6441601860600334, "grad_norm": 1.5867079496383667, "learning_rate": 1.9440590434679034e-05, "loss": 0.078, "step": 100280 }, { "epoch": 3.6445235845628314, "grad_norm": 1.330061912536621, "learning_rate": 1.9435531171894493e-05, "loss": 0.0625, "step": 100290 }, { "epoch": 3.64488698306563, "grad_norm": 2.1027393341064453, "learning_rate": 1.9430472148863113e-05, "loss": 0.0961, "step": 100300 }, { "epoch": 3.645250381568428, "grad_norm": 0.29562556743621826, "learning_rate": 1.9425413365802865e-05, "loss": 0.0594, "step": 100310 }, { "epoch": 3.6456137800712263, "grad_norm": 0.3071857988834381, "learning_rate": 1.9420354822931725e-05, "loss": 0.062, "step": 100320 }, { "epoch": 3.6459771785740243, "grad_norm": 0.3967355489730835, "learning_rate": 1.9415296520467647e-05, "loss": 0.0722, "step": 100330 }, { "epoch": 3.6463405770768222, "grad_norm": 0.3051000237464905, "learning_rate": 1.941023845862857e-05, "loss": 0.0878, "step": 100340 }, { "epoch": 3.6467039755796207, "grad_norm": 0.5483397245407104, "learning_rate": 1.940518063763243e-05, "loss": 0.0832, "step": 100350 }, { "epoch": 3.6470673740824187, "grad_norm": 0.9624475836753845, "learning_rate": 1.9400123057697147e-05, "loss": 0.0844, "step": 100360 }, { "epoch": 3.647430772585217, "grad_norm": 1.777334213256836, "learning_rate": 1.9395065719040635e-05, "loss": 0.0706, "step": 100370 }, { "epoch": 3.647794171088015, "grad_norm": 0.42129790782928467, "learning_rate": 1.9390008621880806e-05, "loss": 0.0468, "step": 100380 }, { "epoch": 3.648157569590813, "grad_norm": 0.38181936740875244, "learning_rate": 1.938495176643554e-05, "loss": 0.0748, "step": 100390 }, { "epoch": 3.6485209680936115, "grad_norm": 0.6296722888946533, "learning_rate": 1.9379895152922722e-05, "loss": 0.0855, "step": 100400 }, { "epoch": 3.6488843665964095, "grad_norm": 0.39220812916755676, "learning_rate": 1.9374838781560228e-05, "loss": 0.1038, "step": 100410 }, { "epoch": 3.649247765099208, "grad_norm": 0.5395467281341553, "learning_rate": 1.936978265256592e-05, "loss": 0.0801, "step": 100420 }, { "epoch": 3.649611163602006, "grad_norm": 0.5562071800231934, "learning_rate": 1.9364726766157644e-05, "loss": 0.0648, "step": 100430 }, { "epoch": 3.649974562104804, "grad_norm": 0.5015496611595154, "learning_rate": 1.935967112255324e-05, "loss": 0.0943, "step": 100440 }, { "epoch": 3.6503379606076023, "grad_norm": 0.5984257459640503, "learning_rate": 1.935461572197054e-05, "loss": 0.0724, "step": 100450 }, { "epoch": 3.6507013591104007, "grad_norm": 0.3558928370475769, "learning_rate": 1.9349560564627354e-05, "loss": 4.4573, "step": 100460 }, { "epoch": 3.6510647576131987, "grad_norm": 0.37172460556030273, "learning_rate": 1.9344505650741506e-05, "loss": 0.0577, "step": 100470 }, { "epoch": 3.6514281561159967, "grad_norm": 0.4635004997253418, "learning_rate": 1.933945098053079e-05, "loss": 0.0586, "step": 100480 }, { "epoch": 3.651791554618795, "grad_norm": 0.6180506944656372, "learning_rate": 1.933439655421299e-05, "loss": 0.0936, "step": 100490 }, { "epoch": 3.652154953121593, "grad_norm": 0.4719576835632324, "learning_rate": 1.932934237200588e-05, "loss": 0.0786, "step": 100500 }, { "epoch": 3.6525183516243915, "grad_norm": 0.47542062401771545, "learning_rate": 1.932428843412723e-05, "loss": 0.0847, "step": 100510 }, { "epoch": 3.6528817501271895, "grad_norm": 0.40580594539642334, "learning_rate": 1.93192347407948e-05, "loss": 0.1121, "step": 100520 }, { "epoch": 3.6532451486299875, "grad_norm": 0.6533267498016357, "learning_rate": 1.9314181292226337e-05, "loss": 0.0856, "step": 100530 }, { "epoch": 3.653608547132786, "grad_norm": 0.41226625442504883, "learning_rate": 1.930912808863957e-05, "loss": 0.0721, "step": 100540 }, { "epoch": 3.653971945635584, "grad_norm": 0.9230170249938965, "learning_rate": 1.9304075130252212e-05, "loss": 0.0788, "step": 100550 }, { "epoch": 3.6543353441383823, "grad_norm": 1.469427227973938, "learning_rate": 1.9299022417282006e-05, "loss": 0.0899, "step": 100560 }, { "epoch": 3.6546987426411803, "grad_norm": 0.38428354263305664, "learning_rate": 1.9293969949946638e-05, "loss": 0.0586, "step": 100570 }, { "epoch": 3.6550621411439783, "grad_norm": 1.4046002626419067, "learning_rate": 1.9288917728463802e-05, "loss": 0.0668, "step": 100580 }, { "epoch": 3.6554255396467767, "grad_norm": 0.8734479546546936, "learning_rate": 1.9283865753051177e-05, "loss": 0.0682, "step": 100590 }, { "epoch": 3.6557889381495747, "grad_norm": 1.0090571641921997, "learning_rate": 1.927881402392644e-05, "loss": 0.0827, "step": 100600 }, { "epoch": 3.656152336652373, "grad_norm": 0.4134766459465027, "learning_rate": 1.927376254130725e-05, "loss": 0.0897, "step": 100610 }, { "epoch": 3.656515735155171, "grad_norm": 0.5063420534133911, "learning_rate": 1.9268711305411262e-05, "loss": 0.0894, "step": 100620 }, { "epoch": 3.656879133657969, "grad_norm": 0.6011788845062256, "learning_rate": 1.9263660316456114e-05, "loss": 0.0595, "step": 100630 }, { "epoch": 3.6572425321607676, "grad_norm": 0.7085416316986084, "learning_rate": 1.925860957465942e-05, "loss": 0.0748, "step": 100640 }, { "epoch": 3.6576059306635655, "grad_norm": 0.7953318357467651, "learning_rate": 1.925355908023882e-05, "loss": 0.0856, "step": 100650 }, { "epoch": 3.657969329166364, "grad_norm": 1.6228654384613037, "learning_rate": 1.924901384694669e-05, "loss": 3.1994, "step": 100660 }, { "epoch": 3.658332727669162, "grad_norm": 0.5507588982582092, "learning_rate": 1.9243963823140153e-05, "loss": 0.0693, "step": 100670 }, { "epoch": 3.65869612617196, "grad_norm": 0.5793731212615967, "learning_rate": 1.9238914047340737e-05, "loss": 0.0565, "step": 100680 }, { "epoch": 3.6590595246747584, "grad_norm": 3.112278699874878, "learning_rate": 1.9233864519766014e-05, "loss": 0.0867, "step": 100690 }, { "epoch": 3.6594229231775564, "grad_norm": 1.09683096408844, "learning_rate": 1.922881524063356e-05, "loss": 0.0739, "step": 100700 }, { "epoch": 3.659786321680355, "grad_norm": 0.35297101736068726, "learning_rate": 1.9223766210160906e-05, "loss": 0.0809, "step": 100710 }, { "epoch": 3.6601497201831528, "grad_norm": 0.3563065230846405, "learning_rate": 1.9218717428565626e-05, "loss": 0.0656, "step": 100720 }, { "epoch": 3.6605131186859508, "grad_norm": 0.7128033638000488, "learning_rate": 1.9213668896065246e-05, "loss": 0.0731, "step": 100730 }, { "epoch": 3.660876517188749, "grad_norm": 0.4847666919231415, "learning_rate": 1.920862061287728e-05, "loss": 0.0789, "step": 100740 }, { "epoch": 3.6612399156915476, "grad_norm": 0.5890012383460999, "learning_rate": 1.9203572579219246e-05, "loss": 0.0941, "step": 100750 }, { "epoch": 3.6616033141943456, "grad_norm": 0.40128976106643677, "learning_rate": 1.919852479530864e-05, "loss": 0.091, "step": 100760 }, { "epoch": 3.6619667126971436, "grad_norm": 0.9814205169677734, "learning_rate": 1.919347726136297e-05, "loss": 0.0666, "step": 100770 }, { "epoch": 3.662330111199942, "grad_norm": 0.8055763244628906, "learning_rate": 1.9188429977599705e-05, "loss": 0.7617, "step": 100780 }, { "epoch": 3.66269350970274, "grad_norm": 0.596468985080719, "learning_rate": 1.918338294423631e-05, "loss": 1.6764, "step": 100790 }, { "epoch": 3.6630569082055384, "grad_norm": 0.9450867772102356, "learning_rate": 1.9178336161490244e-05, "loss": 0.082, "step": 100800 }, { "epoch": 3.6630569082055384, "eval_loss": 0.2825222611427307, "eval_runtime": 179.407, "eval_samples_per_second": 41.325, "eval_steps_per_second": 5.167, "eval_wer": 0.13438742353005245, "step": 100800 }, { "epoch": 3.6634203067083364, "grad_norm": 2.9087324142456055, "learning_rate": 1.917328962957896e-05, "loss": 0.0853, "step": 100810 }, { "epoch": 3.6637837052111344, "grad_norm": 0.3305929899215698, "learning_rate": 1.9168243348719898e-05, "loss": 0.0967, "step": 100820 }, { "epoch": 3.664147103713933, "grad_norm": 1.6349554061889648, "learning_rate": 1.9163197319130486e-05, "loss": 0.0557, "step": 100830 }, { "epoch": 3.664510502216731, "grad_norm": 1.9938950538635254, "learning_rate": 1.915815154102813e-05, "loss": 0.193, "step": 100840 }, { "epoch": 3.6648739007195292, "grad_norm": 0.6496366262435913, "learning_rate": 1.915310601463023e-05, "loss": 0.0769, "step": 100850 }, { "epoch": 3.665237299222327, "grad_norm": 0.5455463528633118, "learning_rate": 1.91480607401542e-05, "loss": 0.0835, "step": 100860 }, { "epoch": 3.665600697725125, "grad_norm": 0.6552872657775879, "learning_rate": 1.9143015717817408e-05, "loss": 0.0774, "step": 100870 }, { "epoch": 3.6659640962279236, "grad_norm": 0.6207099556922913, "learning_rate": 1.913797094783723e-05, "loss": 0.0613, "step": 100880 }, { "epoch": 3.6663274947307216, "grad_norm": 1.0876959562301636, "learning_rate": 1.913292643043103e-05, "loss": 0.0825, "step": 100890 }, { "epoch": 3.66669089323352, "grad_norm": 0.5958231687545776, "learning_rate": 1.912788216581614e-05, "loss": 0.0801, "step": 100900 }, { "epoch": 3.667054291736318, "grad_norm": 0.7565116882324219, "learning_rate": 1.912283815420993e-05, "loss": 0.0776, "step": 100910 }, { "epoch": 3.667417690239116, "grad_norm": 0.23336388170719147, "learning_rate": 1.9117794395829706e-05, "loss": 0.0467, "step": 100920 }, { "epoch": 3.6677810887419144, "grad_norm": 0.23995588719844818, "learning_rate": 1.91127508908928e-05, "loss": 0.0567, "step": 100930 }, { "epoch": 3.6681444872447124, "grad_norm": 5.810134410858154, "learning_rate": 1.9107707639616495e-05, "loss": 0.0745, "step": 100940 }, { "epoch": 3.668507885747511, "grad_norm": 0.9874204993247986, "learning_rate": 1.9102664642218118e-05, "loss": 0.0621, "step": 100950 }, { "epoch": 3.668871284250309, "grad_norm": 0.8151770830154419, "learning_rate": 1.9097621898914937e-05, "loss": 0.0711, "step": 100960 }, { "epoch": 3.669234682753107, "grad_norm": 13.424245834350586, "learning_rate": 1.9092579409924227e-05, "loss": 0.6562, "step": 100970 }, { "epoch": 3.6695980812559053, "grad_norm": 0.8328826427459717, "learning_rate": 1.9087537175463252e-05, "loss": 0.0722, "step": 100980 }, { "epoch": 3.6699614797587032, "grad_norm": 0.9819945096969604, "learning_rate": 1.9082495195749252e-05, "loss": 0.0812, "step": 100990 }, { "epoch": 3.6703248782615017, "grad_norm": 1.7970198392868042, "learning_rate": 1.907745347099949e-05, "loss": 0.1063, "step": 101000 }, { "epoch": 3.6706882767642997, "grad_norm": 2.1204395294189453, "learning_rate": 1.9072412001431188e-05, "loss": 0.0787, "step": 101010 }, { "epoch": 3.6710516752670976, "grad_norm": 1.9938024282455444, "learning_rate": 1.906737078726156e-05, "loss": 0.0694, "step": 101020 }, { "epoch": 3.671415073769896, "grad_norm": 0.5936083793640137, "learning_rate": 1.9062329828707818e-05, "loss": 0.0937, "step": 101030 }, { "epoch": 3.6717784722726945, "grad_norm": 0.3774571716785431, "learning_rate": 1.9057289125987143e-05, "loss": 0.0657, "step": 101040 }, { "epoch": 3.6721418707754925, "grad_norm": 0.6404164433479309, "learning_rate": 1.905224867931675e-05, "loss": 0.0831, "step": 101050 }, { "epoch": 3.6725052692782905, "grad_norm": 2.3458011150360107, "learning_rate": 1.90472084889138e-05, "loss": 0.0729, "step": 101060 }, { "epoch": 3.672868667781089, "grad_norm": 0.458993136882782, "learning_rate": 1.9042168554995453e-05, "loss": 0.065, "step": 101070 }, { "epoch": 3.673232066283887, "grad_norm": 0.4776252210140228, "learning_rate": 1.9037128877778865e-05, "loss": 0.0655, "step": 101080 }, { "epoch": 3.6735954647866853, "grad_norm": 3.986689567565918, "learning_rate": 1.903208945748117e-05, "loss": 0.1049, "step": 101090 }, { "epoch": 3.6739588632894833, "grad_norm": 0.9909849762916565, "learning_rate": 1.9027050294319513e-05, "loss": 0.0937, "step": 101100 }, { "epoch": 3.6743222617922813, "grad_norm": 0.7600964903831482, "learning_rate": 1.902201138851101e-05, "loss": 0.0981, "step": 101110 }, { "epoch": 3.6746856602950797, "grad_norm": 0.587383508682251, "learning_rate": 1.9016972740272763e-05, "loss": 0.0635, "step": 101120 }, { "epoch": 3.6750490587978777, "grad_norm": 0.8672456741333008, "learning_rate": 1.901193434982187e-05, "loss": 0.073, "step": 101130 }, { "epoch": 3.675412457300676, "grad_norm": 1.177628517150879, "learning_rate": 1.9006896217375426e-05, "loss": 0.0805, "step": 101140 }, { "epoch": 3.675775855803474, "grad_norm": 0.7139699459075928, "learning_rate": 1.9001858343150496e-05, "loss": 0.0717, "step": 101150 }, { "epoch": 3.676139254306272, "grad_norm": 3.405705213546753, "learning_rate": 1.8996820727364155e-05, "loss": 0.1095, "step": 101160 }, { "epoch": 3.6765026528090705, "grad_norm": 22.62385368347168, "learning_rate": 1.899178337023345e-05, "loss": 0.1336, "step": 101170 }, { "epoch": 3.6768660513118685, "grad_norm": 0.628099799156189, "learning_rate": 1.8986746271975406e-05, "loss": 0.0731, "step": 101180 }, { "epoch": 3.677229449814667, "grad_norm": 0.5328086018562317, "learning_rate": 1.8981709432807086e-05, "loss": 0.1353, "step": 101190 }, { "epoch": 3.677592848317465, "grad_norm": 0.4889640212059021, "learning_rate": 1.897667285294549e-05, "loss": 0.0941, "step": 101200 }, { "epoch": 3.677956246820263, "grad_norm": 0.6223835945129395, "learning_rate": 1.8971636532607627e-05, "loss": 0.0895, "step": 101210 }, { "epoch": 3.6783196453230613, "grad_norm": 0.3453806936740875, "learning_rate": 1.8966600472010505e-05, "loss": 0.0618, "step": 101220 }, { "epoch": 3.6786830438258593, "grad_norm": 0.47201159596443176, "learning_rate": 1.8961564671371084e-05, "loss": 0.0565, "step": 101230 }, { "epoch": 3.6790464423286577, "grad_norm": 0.44613829255104065, "learning_rate": 1.895652913090637e-05, "loss": 0.0892, "step": 101240 }, { "epoch": 3.6794098408314557, "grad_norm": 1.7807142734527588, "learning_rate": 1.8951493850833314e-05, "loss": 0.0829, "step": 101250 }, { "epoch": 3.6797732393342537, "grad_norm": 0.4978749752044678, "learning_rate": 1.8946458831368866e-05, "loss": 0.0808, "step": 101260 }, { "epoch": 3.680136637837052, "grad_norm": 0.5074789524078369, "learning_rate": 1.894142407272997e-05, "loss": 0.0831, "step": 101270 }, { "epoch": 3.68050003633985, "grad_norm": 0.3504372537136078, "learning_rate": 1.893638957513354e-05, "loss": 0.2878, "step": 101280 }, { "epoch": 3.6808634348426486, "grad_norm": 0.5575164556503296, "learning_rate": 1.8931355338796523e-05, "loss": 0.0873, "step": 101290 }, { "epoch": 3.6812268333454465, "grad_norm": 1.6178854703903198, "learning_rate": 1.892632136393581e-05, "loss": 0.0886, "step": 101300 }, { "epoch": 3.6815902318482445, "grad_norm": 1.254417896270752, "learning_rate": 1.89212876507683e-05, "loss": 0.0666, "step": 101310 }, { "epoch": 3.681953630351043, "grad_norm": 0.5396857857704163, "learning_rate": 1.8916254199510867e-05, "loss": 0.0623, "step": 101320 }, { "epoch": 3.6823170288538414, "grad_norm": 0.29756027460098267, "learning_rate": 1.8911221010380403e-05, "loss": 0.0715, "step": 101330 }, { "epoch": 3.6826804273566394, "grad_norm": 0.4690750539302826, "learning_rate": 1.8906188083593762e-05, "loss": 1.6117, "step": 101340 }, { "epoch": 3.6830438258594373, "grad_norm": 3.301415205001831, "learning_rate": 1.8901155419367796e-05, "loss": 0.1075, "step": 101350 }, { "epoch": 3.6834072243622358, "grad_norm": 0.5322970151901245, "learning_rate": 1.8896123017919344e-05, "loss": 0.0663, "step": 101360 }, { "epoch": 3.6837706228650338, "grad_norm": 0.7713887691497803, "learning_rate": 1.889109087946522e-05, "loss": 0.0798, "step": 101370 }, { "epoch": 3.684134021367832, "grad_norm": 0.5864616632461548, "learning_rate": 1.8886059004222266e-05, "loss": 0.0631, "step": 101380 }, { "epoch": 3.68449741987063, "grad_norm": 0.5340792536735535, "learning_rate": 1.888102739240728e-05, "loss": 0.0708, "step": 101390 }, { "epoch": 3.684860818373428, "grad_norm": 0.6674026250839233, "learning_rate": 1.8875996044237047e-05, "loss": 0.0886, "step": 101400 }, { "epoch": 3.684860818373428, "eval_loss": 0.32165661454200745, "eval_runtime": 180.5952, "eval_samples_per_second": 41.053, "eval_steps_per_second": 5.133, "eval_wer": 0.13550383938134225, "step": 101400 }, { "epoch": 3.6852242168762266, "grad_norm": 2.9790070056915283, "learning_rate": 1.887096495992836e-05, "loss": 0.1033, "step": 101410 }, { "epoch": 3.6855876153790246, "grad_norm": 177.49661254882812, "learning_rate": 1.886593413969797e-05, "loss": 2.0711, "step": 101420 }, { "epoch": 3.685951013881823, "grad_norm": 1.7257159948349, "learning_rate": 1.8860903583762665e-05, "loss": 0.0661, "step": 101430 }, { "epoch": 3.686314412384621, "grad_norm": 0.6517038941383362, "learning_rate": 1.885587329233918e-05, "loss": 0.1329, "step": 101440 }, { "epoch": 3.686677810887419, "grad_norm": 1.2107021808624268, "learning_rate": 1.885084326564426e-05, "loss": 0.0769, "step": 101450 }, { "epoch": 3.6870412093902174, "grad_norm": 0.4862198829650879, "learning_rate": 1.8845813503894622e-05, "loss": 0.0795, "step": 101460 }, { "epoch": 3.6874046078930154, "grad_norm": 0.766268253326416, "learning_rate": 1.884078400730697e-05, "loss": 0.0593, "step": 101470 }, { "epoch": 3.687768006395814, "grad_norm": 0.39750218391418457, "learning_rate": 1.8835754776098035e-05, "loss": 0.0729, "step": 101480 }, { "epoch": 3.688131404898612, "grad_norm": 0.4829258322715759, "learning_rate": 1.8830725810484493e-05, "loss": 0.0722, "step": 101490 }, { "epoch": 3.68849480340141, "grad_norm": 0.6579453349113464, "learning_rate": 1.8825697110683025e-05, "loss": 0.0968, "step": 101500 }, { "epoch": 3.688858201904208, "grad_norm": 0.4035875201225281, "learning_rate": 1.882066867691029e-05, "loss": 0.0727, "step": 101510 }, { "epoch": 3.689221600407006, "grad_norm": 0.5288979411125183, "learning_rate": 1.8815640509382964e-05, "loss": 0.0677, "step": 101520 }, { "epoch": 3.6895849989098046, "grad_norm": 1.3554776906967163, "learning_rate": 1.881061260831769e-05, "loss": 0.069, "step": 101530 }, { "epoch": 3.6899483974126026, "grad_norm": 1.4026339054107666, "learning_rate": 1.880558497393109e-05, "loss": 0.0837, "step": 101540 }, { "epoch": 3.6903117959154006, "grad_norm": 0.4800674319267273, "learning_rate": 1.8800557606439798e-05, "loss": 0.067, "step": 101550 }, { "epoch": 3.690675194418199, "grad_norm": 0.9763014912605286, "learning_rate": 1.879553050606041e-05, "loss": 0.0759, "step": 101560 }, { "epoch": 3.691038592920997, "grad_norm": 0.470887690782547, "learning_rate": 1.8790503673009548e-05, "loss": 1.3916, "step": 101570 }, { "epoch": 3.6914019914237954, "grad_norm": 1.4094375371932983, "learning_rate": 1.8785477107503784e-05, "loss": 0.1743, "step": 101580 }, { "epoch": 3.6917653899265934, "grad_norm": 0.4633733928203583, "learning_rate": 1.8780450809759707e-05, "loss": 0.0662, "step": 101590 }, { "epoch": 3.6921287884293914, "grad_norm": 0.913625180721283, "learning_rate": 1.8775424779993873e-05, "loss": 0.0774, "step": 101600 }, { "epoch": 3.69249218693219, "grad_norm": 0.5545070171356201, "learning_rate": 1.8770399018422824e-05, "loss": 0.0655, "step": 101610 }, { "epoch": 3.6928555854349883, "grad_norm": 0.33811673521995544, "learning_rate": 1.876537352526313e-05, "loss": 0.0594, "step": 101620 }, { "epoch": 3.6932189839377862, "grad_norm": 0.8966468572616577, "learning_rate": 1.8760348300731308e-05, "loss": 0.0891, "step": 101630 }, { "epoch": 3.6935823824405842, "grad_norm": 0.5035248398780823, "learning_rate": 1.8755323345043878e-05, "loss": 0.0728, "step": 101640 }, { "epoch": 3.6939457809433827, "grad_norm": 0.8029829263687134, "learning_rate": 1.8750298658417345e-05, "loss": 0.0732, "step": 101650 }, { "epoch": 3.6943091794461806, "grad_norm": 0.46018436551094055, "learning_rate": 1.8745274241068196e-05, "loss": 0.068, "step": 101660 }, { "epoch": 3.694672577948979, "grad_norm": 0.6424596905708313, "learning_rate": 1.8740250093212934e-05, "loss": 0.0622, "step": 101670 }, { "epoch": 3.695035976451777, "grad_norm": 0.5092839002609253, "learning_rate": 1.8735226215068026e-05, "loss": 0.0755, "step": 101680 }, { "epoch": 3.695399374954575, "grad_norm": 0.8991031050682068, "learning_rate": 1.8730202606849933e-05, "loss": 0.089, "step": 101690 }, { "epoch": 3.6957627734573735, "grad_norm": 1.3124310970306396, "learning_rate": 1.8725179268775088e-05, "loss": 0.0809, "step": 101700 }, { "epoch": 3.6961261719601715, "grad_norm": 0.49164462089538574, "learning_rate": 1.872015620105995e-05, "loss": 0.0835, "step": 101710 }, { "epoch": 3.69648957046297, "grad_norm": 0.4127480089664459, "learning_rate": 1.8715133403920942e-05, "loss": 0.064, "step": 101720 }, { "epoch": 3.696852968965768, "grad_norm": 0.4940035939216614, "learning_rate": 1.871011087757447e-05, "loss": 0.067, "step": 101730 }, { "epoch": 3.697216367468566, "grad_norm": 0.678022563457489, "learning_rate": 1.8705088622236944e-05, "loss": 0.084, "step": 101740 }, { "epoch": 3.6975797659713643, "grad_norm": 0.5053865909576416, "learning_rate": 1.870006663812474e-05, "loss": 0.0637, "step": 101750 }, { "epoch": 3.6979431644741623, "grad_norm": 0.49947378039360046, "learning_rate": 1.869504492545426e-05, "loss": 0.0501, "step": 101760 }, { "epoch": 3.6983065629769607, "grad_norm": 0.5758054256439209, "learning_rate": 1.869002348444186e-05, "loss": 0.0615, "step": 101770 }, { "epoch": 3.6986699614797587, "grad_norm": 0.6957302689552307, "learning_rate": 1.8685002315303902e-05, "loss": 0.0997, "step": 101780 }, { "epoch": 3.6990333599825567, "grad_norm": 0.32749390602111816, "learning_rate": 1.867998141825672e-05, "loss": 0.0809, "step": 101790 }, { "epoch": 3.699396758485355, "grad_norm": 0.7871354818344116, "learning_rate": 1.8674960793516644e-05, "loss": 0.077, "step": 101800 }, { "epoch": 3.699760156988153, "grad_norm": 0.7597861289978027, "learning_rate": 1.8669940441300013e-05, "loss": 0.0736, "step": 101810 }, { "epoch": 3.7001235554909515, "grad_norm": 0.5100244879722595, "learning_rate": 1.8664920361823123e-05, "loss": 0.0686, "step": 101820 }, { "epoch": 3.7004869539937495, "grad_norm": 0.7316411733627319, "learning_rate": 1.865990055530228e-05, "loss": 0.0686, "step": 101830 }, { "epoch": 3.7008503524965475, "grad_norm": 0.4531911313533783, "learning_rate": 1.865488102195376e-05, "loss": 0.0706, "step": 101840 }, { "epoch": 3.701213750999346, "grad_norm": 0.6008373498916626, "learning_rate": 1.864986176199383e-05, "loss": 0.0756, "step": 101850 }, { "epoch": 3.701577149502144, "grad_norm": 1.114786982536316, "learning_rate": 1.8644842775638776e-05, "loss": 0.1311, "step": 101860 }, { "epoch": 3.7019405480049423, "grad_norm": 0.8214989304542542, "learning_rate": 1.8639824063104832e-05, "loss": 0.0723, "step": 101870 }, { "epoch": 3.7023039465077403, "grad_norm": 2.9424381256103516, "learning_rate": 1.863480562460824e-05, "loss": 0.0718, "step": 101880 }, { "epoch": 3.7026673450105383, "grad_norm": 1.3206162452697754, "learning_rate": 1.862978746036523e-05, "loss": 0.07, "step": 101890 }, { "epoch": 3.7030307435133367, "grad_norm": 1.1394771337509155, "learning_rate": 1.8624769570592e-05, "loss": 0.1062, "step": 101900 }, { "epoch": 3.703394142016135, "grad_norm": 0.38469168543815613, "learning_rate": 1.8619751955504776e-05, "loss": 0.0607, "step": 101910 }, { "epoch": 3.703757540518933, "grad_norm": 0.4007944166660309, "learning_rate": 1.861473461531974e-05, "loss": 0.0679, "step": 101920 }, { "epoch": 3.704120939021731, "grad_norm": 0.5937279462814331, "learning_rate": 1.860971755025307e-05, "loss": 0.0654, "step": 101930 }, { "epoch": 3.7044843375245295, "grad_norm": 1.9605783224105835, "learning_rate": 1.860470076052092e-05, "loss": 0.078, "step": 101940 }, { "epoch": 3.7048477360273275, "grad_norm": 0.9154660701751709, "learning_rate": 1.859968424633948e-05, "loss": 0.0746, "step": 101950 }, { "epoch": 3.705211134530126, "grad_norm": 1.3195884227752686, "learning_rate": 1.8594668007924863e-05, "loss": 0.0618, "step": 101960 }, { "epoch": 3.705574533032924, "grad_norm": 0.41499805450439453, "learning_rate": 1.8589652045493216e-05, "loss": 1.6398, "step": 101970 }, { "epoch": 3.705937931535722, "grad_norm": 0.4965570569038391, "learning_rate": 1.8584636359260656e-05, "loss": 0.0857, "step": 101980 }, { "epoch": 3.7063013300385204, "grad_norm": 0.46944138407707214, "learning_rate": 1.8579620949443275e-05, "loss": 0.089, "step": 101990 }, { "epoch": 3.7066647285413183, "grad_norm": 0.49834463000297546, "learning_rate": 1.8574605816257195e-05, "loss": 0.075, "step": 102000 }, { "epoch": 3.7066647285413183, "eval_loss": 0.3085034489631653, "eval_runtime": 179.7736, "eval_samples_per_second": 41.241, "eval_steps_per_second": 5.156, "eval_wer": 0.1340878973260479, "step": 102000 }, { "epoch": 3.7070281270441168, "grad_norm": 2.115856647491455, "learning_rate": 1.856959095991849e-05, "loss": 0.0773, "step": 102010 }, { "epoch": 3.7073915255469148, "grad_norm": 0.5380850434303284, "learning_rate": 1.856457638064323e-05, "loss": 0.0582, "step": 102020 }, { "epoch": 3.7077549240497127, "grad_norm": 0.6383968591690063, "learning_rate": 1.8559562078647477e-05, "loss": 0.1424, "step": 102030 }, { "epoch": 3.708118322552511, "grad_norm": 0.5777453184127808, "learning_rate": 1.855454805414727e-05, "loss": 0.0883, "step": 102040 }, { "epoch": 3.708481721055309, "grad_norm": 0.47622185945510864, "learning_rate": 1.8549534307358663e-05, "loss": 0.0826, "step": 102050 }, { "epoch": 3.7088451195581076, "grad_norm": 0.34270182251930237, "learning_rate": 1.854452083849767e-05, "loss": 0.0669, "step": 102060 }, { "epoch": 3.7092085180609056, "grad_norm": 0.6671618223190308, "learning_rate": 1.853950764778031e-05, "loss": 0.0541, "step": 102070 }, { "epoch": 3.7095719165637036, "grad_norm": 8.398327827453613, "learning_rate": 1.8534494735422574e-05, "loss": 0.0621, "step": 102080 }, { "epoch": 3.709935315066502, "grad_norm": 0.7380484342575073, "learning_rate": 1.852948210164045e-05, "loss": 0.0945, "step": 102090 }, { "epoch": 3.7102987135693, "grad_norm": 0.9097635746002197, "learning_rate": 1.8524469746649925e-05, "loss": 0.0949, "step": 102100 }, { "epoch": 3.7106621120720984, "grad_norm": 0.8075299263000488, "learning_rate": 1.8519457670666962e-05, "loss": 0.0888, "step": 102110 }, { "epoch": 3.7110255105748964, "grad_norm": 0.42995816469192505, "learning_rate": 1.851444587390751e-05, "loss": 0.0541, "step": 102120 }, { "epoch": 3.7113889090776944, "grad_norm": 0.4821558892726898, "learning_rate": 1.85094343565875e-05, "loss": 0.0657, "step": 102130 }, { "epoch": 3.711752307580493, "grad_norm": 0.7254829406738281, "learning_rate": 1.850442311892288e-05, "loss": 0.1247, "step": 102140 }, { "epoch": 3.712115706083291, "grad_norm": 0.9396657943725586, "learning_rate": 1.8499412161129554e-05, "loss": 0.0759, "step": 102150 }, { "epoch": 3.712479104586089, "grad_norm": 0.4930213391780853, "learning_rate": 1.849440148342343e-05, "loss": 0.0564, "step": 102160 }, { "epoch": 3.712842503088887, "grad_norm": 1.0215650796890259, "learning_rate": 1.8489391086020402e-05, "loss": 0.0667, "step": 102170 }, { "epoch": 3.713205901591685, "grad_norm": 0.5984035730361938, "learning_rate": 1.8484380969136332e-05, "loss": 0.0703, "step": 102180 }, { "epoch": 3.7135693000944836, "grad_norm": 0.41362717747688293, "learning_rate": 1.8479371132987116e-05, "loss": 0.0955, "step": 102190 }, { "epoch": 3.713932698597282, "grad_norm": 0.6265028119087219, "learning_rate": 1.84743615777886e-05, "loss": 0.0789, "step": 102200 }, { "epoch": 3.71429609710008, "grad_norm": 0.8152180910110474, "learning_rate": 1.8469352303756625e-05, "loss": 0.0844, "step": 102210 }, { "epoch": 3.714659495602878, "grad_norm": 5.7836480140686035, "learning_rate": 1.846434331110702e-05, "loss": 0.0633, "step": 102220 }, { "epoch": 3.7150228941056764, "grad_norm": 0.7578589916229248, "learning_rate": 1.84593346000556e-05, "loss": 0.0684, "step": 102230 }, { "epoch": 3.7153862926084744, "grad_norm": 0.5012345314025879, "learning_rate": 1.845432617081819e-05, "loss": 2.2437, "step": 102240 }, { "epoch": 3.715749691111273, "grad_norm": 4.744391918182373, "learning_rate": 1.8449318023610575e-05, "loss": 0.0819, "step": 102250 }, { "epoch": 3.716113089614071, "grad_norm": 0.4452058672904968, "learning_rate": 1.8444310158648535e-05, "loss": 0.0791, "step": 102260 }, { "epoch": 3.716476488116869, "grad_norm": 6.5998005867004395, "learning_rate": 1.843930257614785e-05, "loss": 0.1203, "step": 102270 }, { "epoch": 3.7168398866196672, "grad_norm": 0.35066086053848267, "learning_rate": 1.8434295276324265e-05, "loss": 0.0677, "step": 102280 }, { "epoch": 3.7172032851224652, "grad_norm": 0.43617117404937744, "learning_rate": 1.8429288259393544e-05, "loss": 0.0723, "step": 102290 }, { "epoch": 3.7175666836252637, "grad_norm": 0.6377655267715454, "learning_rate": 1.842428152557141e-05, "loss": 0.0611, "step": 102300 }, { "epoch": 3.7179300821280616, "grad_norm": 0.7913844585418701, "learning_rate": 1.8419275075073594e-05, "loss": 0.0635, "step": 102310 }, { "epoch": 3.7182934806308596, "grad_norm": 0.437308132648468, "learning_rate": 1.8414268908115786e-05, "loss": 0.0659, "step": 102320 }, { "epoch": 3.718656879133658, "grad_norm": 1.292069673538208, "learning_rate": 1.840926302491371e-05, "loss": 0.0669, "step": 102330 }, { "epoch": 3.719020277636456, "grad_norm": 0.46069836616516113, "learning_rate": 1.840425742568304e-05, "loss": 0.0696, "step": 102340 }, { "epoch": 3.7193836761392545, "grad_norm": 0.9697392582893372, "learning_rate": 1.8399252110639454e-05, "loss": 0.0721, "step": 102350 }, { "epoch": 3.7197470746420525, "grad_norm": 1.3439652919769287, "learning_rate": 1.8394247079998605e-05, "loss": 0.0859, "step": 102360 }, { "epoch": 3.7201104731448504, "grad_norm": 0.5652872920036316, "learning_rate": 1.8389242333976138e-05, "loss": 0.05, "step": 102370 }, { "epoch": 3.720473871647649, "grad_norm": 0.6099680662155151, "learning_rate": 1.8384237872787706e-05, "loss": 0.069, "step": 102380 }, { "epoch": 3.720837270150447, "grad_norm": 0.27559173107147217, "learning_rate": 1.8379233696648928e-05, "loss": 0.0795, "step": 102390 }, { "epoch": 3.7212006686532453, "grad_norm": 0.7850374579429626, "learning_rate": 1.8374229805775413e-05, "loss": 0.1171, "step": 102400 }, { "epoch": 3.7215640671560433, "grad_norm": 0.4163167476654053, "learning_rate": 1.8369226200382755e-05, "loss": 0.087, "step": 102410 }, { "epoch": 3.7219274656588412, "grad_norm": 1.2209895849227905, "learning_rate": 1.8364222880686545e-05, "loss": 0.0764, "step": 102420 }, { "epoch": 3.7222908641616397, "grad_norm": 0.7018761038780212, "learning_rate": 1.8359219846902366e-05, "loss": 0.065, "step": 102430 }, { "epoch": 3.7226542626644377, "grad_norm": 0.4590131342411041, "learning_rate": 1.8354217099245777e-05, "loss": 0.0735, "step": 102440 }, { "epoch": 3.723017661167236, "grad_norm": 1.2598764896392822, "learning_rate": 1.8349214637932326e-05, "loss": 0.0809, "step": 102450 }, { "epoch": 3.723381059670034, "grad_norm": 0.47641921043395996, "learning_rate": 1.834421246317755e-05, "loss": 0.0901, "step": 102460 }, { "epoch": 3.723744458172832, "grad_norm": 1.0995975732803345, "learning_rate": 1.833921057519698e-05, "loss": 0.0662, "step": 102470 }, { "epoch": 3.7241078566756305, "grad_norm": 0.47749069333076477, "learning_rate": 1.833420897420613e-05, "loss": 0.0719, "step": 102480 }, { "epoch": 3.724471255178429, "grad_norm": 0.9003120064735413, "learning_rate": 1.8329207660420496e-05, "loss": 0.0911, "step": 102490 }, { "epoch": 3.724834653681227, "grad_norm": 0.9296249151229858, "learning_rate": 1.832420663405557e-05, "loss": 0.0935, "step": 102500 }, { "epoch": 3.725198052184025, "grad_norm": 0.7710517644882202, "learning_rate": 1.8319205895326818e-05, "loss": 0.0562, "step": 102510 }, { "epoch": 3.7255614506868233, "grad_norm": 0.6956592798233032, "learning_rate": 1.8314205444449726e-05, "loss": 0.0859, "step": 102520 }, { "epoch": 3.7259248491896213, "grad_norm": 0.7365669012069702, "learning_rate": 1.830920528163973e-05, "loss": 0.0645, "step": 102530 }, { "epoch": 3.7262882476924197, "grad_norm": 0.9889929294586182, "learning_rate": 1.8304205407112275e-05, "loss": 0.0862, "step": 102540 }, { "epoch": 3.7266516461952177, "grad_norm": 1.0486135482788086, "learning_rate": 1.8299205821082778e-05, "loss": 0.0692, "step": 102550 }, { "epoch": 3.7270150446980157, "grad_norm": 0.7183421850204468, "learning_rate": 1.829420652376666e-05, "loss": 0.0811, "step": 102560 }, { "epoch": 3.727378443200814, "grad_norm": 1.0006262063980103, "learning_rate": 1.828920751537933e-05, "loss": 0.0704, "step": 102570 }, { "epoch": 3.727741841703612, "grad_norm": 0.4710160195827484, "learning_rate": 1.8284208796136173e-05, "loss": 0.0728, "step": 102580 }, { "epoch": 3.7281052402064105, "grad_norm": 0.4841473698616028, "learning_rate": 1.8279210366252564e-05, "loss": 0.1235, "step": 102590 }, { "epoch": 3.7284686387092085, "grad_norm": 0.28182855248451233, "learning_rate": 1.8274212225943858e-05, "loss": 0.0631, "step": 102600 }, { "epoch": 3.7284686387092085, "eval_loss": 0.32021304965019226, "eval_runtime": 179.5125, "eval_samples_per_second": 41.301, "eval_steps_per_second": 5.164, "eval_wer": 0.1356490642075263, "step": 102600 }, { "epoch": 3.7288320372120065, "grad_norm": 0.28042423725128174, "learning_rate": 1.8269214375425422e-05, "loss": 0.0669, "step": 102610 }, { "epoch": 3.729195435714805, "grad_norm": 0.36679309606552124, "learning_rate": 1.8264216814912595e-05, "loss": 0.0702, "step": 102620 }, { "epoch": 3.729558834217603, "grad_norm": 0.6412705779075623, "learning_rate": 1.82592195446207e-05, "loss": 0.0774, "step": 102630 }, { "epoch": 3.7299222327204014, "grad_norm": 0.603082537651062, "learning_rate": 1.8254222564765044e-05, "loss": 0.1122, "step": 102640 }, { "epoch": 3.7302856312231993, "grad_norm": 0.7889009714126587, "learning_rate": 1.824922587556094e-05, "loss": 0.0914, "step": 102650 }, { "epoch": 3.7306490297259973, "grad_norm": 0.38122794032096863, "learning_rate": 1.8244229477223668e-05, "loss": 0.082, "step": 102660 }, { "epoch": 3.7310124282287958, "grad_norm": 1.1918739080429077, "learning_rate": 1.8239732967589197e-05, "loss": 2.188, "step": 102670 }, { "epoch": 3.7313758267315937, "grad_norm": 0.9827843308448792, "learning_rate": 1.8234737122492e-05, "loss": 0.0793, "step": 102680 }, { "epoch": 3.731739225234392, "grad_norm": 9.220224380493164, "learning_rate": 1.822974156888591e-05, "loss": 0.0989, "step": 102690 }, { "epoch": 3.73210262373719, "grad_norm": 0.5240766406059265, "learning_rate": 1.822474630698617e-05, "loss": 0.0771, "step": 102700 }, { "epoch": 3.732466022239988, "grad_norm": 0.627837061882019, "learning_rate": 1.8219751337008003e-05, "loss": 0.0755, "step": 102710 }, { "epoch": 3.7328294207427866, "grad_norm": 0.41630762815475464, "learning_rate": 1.8214756659166617e-05, "loss": 0.074, "step": 102720 }, { "epoch": 3.7331928192455845, "grad_norm": 0.7459368109703064, "learning_rate": 1.8209762273677232e-05, "loss": 0.06, "step": 102730 }, { "epoch": 3.733556217748383, "grad_norm": 0.3518989384174347, "learning_rate": 1.8204768180755037e-05, "loss": 0.0606, "step": 102740 }, { "epoch": 3.733919616251181, "grad_norm": 1.498246669769287, "learning_rate": 1.8199774380615197e-05, "loss": 0.0735, "step": 102750 }, { "epoch": 3.734283014753979, "grad_norm": 0.4231000244617462, "learning_rate": 1.8194780873472883e-05, "loss": 0.0825, "step": 102760 }, { "epoch": 3.7346464132567774, "grad_norm": 0.4876510798931122, "learning_rate": 1.8189787659543246e-05, "loss": 0.061, "step": 102770 }, { "epoch": 3.735009811759576, "grad_norm": 9.027270317077637, "learning_rate": 1.8184794739041433e-05, "loss": 0.0627, "step": 102780 }, { "epoch": 3.735373210262374, "grad_norm": 0.3482457995414734, "learning_rate": 1.817980211218257e-05, "loss": 0.0852, "step": 102790 }, { "epoch": 3.7357366087651718, "grad_norm": 0.5797934532165527, "learning_rate": 1.817480977918176e-05, "loss": 0.075, "step": 102800 }, { "epoch": 3.73610000726797, "grad_norm": 0.5175044536590576, "learning_rate": 1.8169817740254114e-05, "loss": 0.088, "step": 102810 }, { "epoch": 3.736463405770768, "grad_norm": 0.40952855348587036, "learning_rate": 1.8164825995614714e-05, "loss": 0.0663, "step": 102820 }, { "epoch": 3.7368268042735666, "grad_norm": 0.47273626923561096, "learning_rate": 1.8159834545478655e-05, "loss": 0.0657, "step": 102830 }, { "epoch": 3.7371902027763646, "grad_norm": 0.6765505075454712, "learning_rate": 1.815484339006098e-05, "loss": 0.088, "step": 102840 }, { "epoch": 3.7375536012791626, "grad_norm": 0.7039837837219238, "learning_rate": 1.814985252957675e-05, "loss": 0.0837, "step": 102850 }, { "epoch": 3.737916999781961, "grad_norm": 0.7344921231269836, "learning_rate": 1.8144861964240995e-05, "loss": 0.0831, "step": 102860 }, { "epoch": 3.738280398284759, "grad_norm": 0.5294970870018005, "learning_rate": 1.8139871694268756e-05, "loss": 0.0656, "step": 102870 }, { "epoch": 3.7386437967875574, "grad_norm": 0.4704716205596924, "learning_rate": 1.813488171987504e-05, "loss": 0.0612, "step": 102880 }, { "epoch": 3.7390071952903554, "grad_norm": 0.43470290303230286, "learning_rate": 1.812989204127484e-05, "loss": 0.0932, "step": 102890 }, { "epoch": 3.7393705937931534, "grad_norm": 0.9852955341339111, "learning_rate": 1.8124902658683146e-05, "loss": 0.0947, "step": 102900 }, { "epoch": 3.739733992295952, "grad_norm": 0.6351022720336914, "learning_rate": 1.8119913572314932e-05, "loss": 0.0969, "step": 102910 }, { "epoch": 3.74009739079875, "grad_norm": 0.5832617282867432, "learning_rate": 1.8114924782385167e-05, "loss": 0.0584, "step": 102920 }, { "epoch": 3.7404607893015482, "grad_norm": 0.47710007429122925, "learning_rate": 1.81099362891088e-05, "loss": 0.0694, "step": 102930 }, { "epoch": 3.7408241878043462, "grad_norm": 0.515385091304779, "learning_rate": 1.8104948092700758e-05, "loss": 0.1376, "step": 102940 }, { "epoch": 3.741187586307144, "grad_norm": 2.539031505584717, "learning_rate": 1.809996019337597e-05, "loss": 0.0742, "step": 102950 }, { "epoch": 3.7415509848099426, "grad_norm": 0.30518245697021484, "learning_rate": 1.8094972591349346e-05, "loss": 0.0718, "step": 102960 }, { "epoch": 3.7419143833127406, "grad_norm": 1.3002036809921265, "learning_rate": 1.808998528683579e-05, "loss": 1.3903, "step": 102970 }, { "epoch": 3.742277781815539, "grad_norm": 0.6815840005874634, "learning_rate": 1.8084998280050182e-05, "loss": 0.0831, "step": 102980 }, { "epoch": 3.742641180318337, "grad_norm": 0.44951331615448, "learning_rate": 1.8080011571207388e-05, "loss": 0.073, "step": 102990 }, { "epoch": 3.743004578821135, "grad_norm": 1.890199899673462, "learning_rate": 1.807502516052228e-05, "loss": 0.0831, "step": 103000 }, { "epoch": 3.7433679773239334, "grad_norm": 0.29908934235572815, "learning_rate": 1.807003904820969e-05, "loss": 0.0641, "step": 103010 }, { "epoch": 3.7437313758267314, "grad_norm": 0.28492602705955505, "learning_rate": 1.8065053234484472e-05, "loss": 0.097, "step": 103020 }, { "epoch": 3.74409477432953, "grad_norm": 0.9672560095787048, "learning_rate": 1.8060067719561434e-05, "loss": 0.0678, "step": 103030 }, { "epoch": 3.744458172832328, "grad_norm": 0.7627213597297668, "learning_rate": 1.8055082503655376e-05, "loss": 0.08, "step": 103040 }, { "epoch": 3.744821571335126, "grad_norm": 2.1501963138580322, "learning_rate": 1.8050097586981107e-05, "loss": 0.0728, "step": 103050 }, { "epoch": 3.7451849698379243, "grad_norm": 0.8454054594039917, "learning_rate": 1.804511296975341e-05, "loss": 0.0683, "step": 103060 }, { "epoch": 3.7455483683407227, "grad_norm": 0.34756171703338623, "learning_rate": 1.8040128652187048e-05, "loss": 0.0547, "step": 103070 }, { "epoch": 3.7459117668435207, "grad_norm": 0.21398131549358368, "learning_rate": 1.8035144634496775e-05, "loss": 0.0477, "step": 103080 }, { "epoch": 3.7462751653463187, "grad_norm": 0.5252348780632019, "learning_rate": 1.8030160916897342e-05, "loss": 0.0801, "step": 103090 }, { "epoch": 3.746638563849117, "grad_norm": 1.0118563175201416, "learning_rate": 1.8025177499603473e-05, "loss": 0.0721, "step": 103100 }, { "epoch": 3.747001962351915, "grad_norm": 0.5007623434066772, "learning_rate": 1.8020194382829894e-05, "loss": 0.0779, "step": 103110 }, { "epoch": 3.7473653608547135, "grad_norm": 0.41904526948928833, "learning_rate": 1.8015211566791304e-05, "loss": 0.0623, "step": 103120 }, { "epoch": 3.7477287593575115, "grad_norm": 0.6131216883659363, "learning_rate": 1.801022905170239e-05, "loss": 0.0882, "step": 103130 }, { "epoch": 3.7480921578603095, "grad_norm": 0.7774443626403809, "learning_rate": 1.8005246837777846e-05, "loss": 0.0896, "step": 103140 }, { "epoch": 3.748455556363108, "grad_norm": 0.6011605858802795, "learning_rate": 1.800026492523232e-05, "loss": 0.08, "step": 103150 }, { "epoch": 3.748818954865906, "grad_norm": 0.2599460184574127, "learning_rate": 1.7995283314280476e-05, "loss": 0.0764, "step": 103160 }, { "epoch": 3.7491823533687043, "grad_norm": 0.8700913786888123, "learning_rate": 1.7990302005136948e-05, "loss": 0.0803, "step": 103170 }, { "epoch": 3.7495457518715023, "grad_norm": 0.8427462577819824, "learning_rate": 1.798532099801637e-05, "loss": 0.0783, "step": 103180 }, { "epoch": 3.7499091503743003, "grad_norm": 0.859573483467102, "learning_rate": 1.7980340293133353e-05, "loss": 0.0608, "step": 103190 }, { "epoch": 3.7502725488770987, "grad_norm": 0.4894687533378601, "learning_rate": 1.7975359890702492e-05, "loss": 0.0746, "step": 103200 }, { "epoch": 3.7502725488770987, "eval_loss": 0.30459001660346985, "eval_runtime": 180.7543, "eval_samples_per_second": 41.017, "eval_steps_per_second": 5.129, "eval_wer": 0.1349229400766061, "step": 103200 }, { "epoch": 3.7506359473798967, "grad_norm": 1.1439019441604614, "learning_rate": 1.7970379790938386e-05, "loss": 0.1182, "step": 103210 }, { "epoch": 3.750999345882695, "grad_norm": 1.0490964651107788, "learning_rate": 1.79653999940556e-05, "loss": 0.0761, "step": 103220 }, { "epoch": 3.751362744385493, "grad_norm": 0.7302677631378174, "learning_rate": 1.79604205002687e-05, "loss": 0.0691, "step": 103230 }, { "epoch": 3.751726142888291, "grad_norm": 0.3179365396499634, "learning_rate": 1.7955441309792227e-05, "loss": 0.074, "step": 103240 }, { "epoch": 3.7520895413910895, "grad_norm": 0.6117727160453796, "learning_rate": 1.795046242284073e-05, "loss": 0.0933, "step": 103250 }, { "epoch": 3.7524529398938875, "grad_norm": 0.4880678355693817, "learning_rate": 1.794548383962872e-05, "loss": 0.3885, "step": 103260 }, { "epoch": 3.752816338396686, "grad_norm": 2.7460379600524902, "learning_rate": 1.794050556037072e-05, "loss": 3.1444, "step": 103270 }, { "epoch": 3.753179736899484, "grad_norm": 0.6626706719398499, "learning_rate": 1.7935527585281215e-05, "loss": 0.0568, "step": 103280 }, { "epoch": 3.753543135402282, "grad_norm": 0.7829678654670715, "learning_rate": 1.7930549914574685e-05, "loss": 0.0743, "step": 103290 }, { "epoch": 3.7539065339050803, "grad_norm": 0.5516233444213867, "learning_rate": 1.792557254846561e-05, "loss": 0.0805, "step": 103300 }, { "epoch": 3.7542699324078783, "grad_norm": 3.4377355575561523, "learning_rate": 1.792059548716844e-05, "loss": 0.0762, "step": 103310 }, { "epoch": 3.7546333309106767, "grad_norm": 0.40759000182151794, "learning_rate": 1.7915618730897626e-05, "loss": 0.0551, "step": 103320 }, { "epoch": 3.7549967294134747, "grad_norm": 0.7524077892303467, "learning_rate": 1.7910642279867596e-05, "loss": 0.067, "step": 103330 }, { "epoch": 3.7553601279162727, "grad_norm": 0.8002333641052246, "learning_rate": 1.7905666134292758e-05, "loss": 0.0857, "step": 103340 }, { "epoch": 3.755723526419071, "grad_norm": 0.4075934588909149, "learning_rate": 1.7900690294387533e-05, "loss": 0.0805, "step": 103350 }, { "epoch": 3.7560869249218696, "grad_norm": 0.3919731080532074, "learning_rate": 1.78957147603663e-05, "loss": 0.099, "step": 103360 }, { "epoch": 3.7564503234246676, "grad_norm": 0.8194761872291565, "learning_rate": 1.789073953244344e-05, "loss": 0.4237, "step": 103370 }, { "epoch": 3.7568137219274655, "grad_norm": 4.275974750518799, "learning_rate": 1.7885764610833323e-05, "loss": 0.0658, "step": 103380 }, { "epoch": 3.757177120430264, "grad_norm": 0.3424472510814667, "learning_rate": 1.7880789995750293e-05, "loss": 0.0618, "step": 103390 }, { "epoch": 3.757540518933062, "grad_norm": 0.4911600947380066, "learning_rate": 1.7875815687408687e-05, "loss": 0.0692, "step": 103400 }, { "epoch": 3.7579039174358604, "grad_norm": 0.9533910155296326, "learning_rate": 1.7870841686022844e-05, "loss": 0.0818, "step": 103410 }, { "epoch": 3.7582673159386584, "grad_norm": 0.4925590455532074, "learning_rate": 1.7865867991807064e-05, "loss": 0.0706, "step": 103420 }, { "epoch": 3.7586307144414564, "grad_norm": 0.3848567008972168, "learning_rate": 1.7860894604975648e-05, "loss": 0.0679, "step": 103430 }, { "epoch": 3.758994112944255, "grad_norm": 1.7480194568634033, "learning_rate": 1.785592152574288e-05, "loss": 0.0985, "step": 103440 }, { "epoch": 3.7593575114470528, "grad_norm": 1.4022555351257324, "learning_rate": 1.7850948754323036e-05, "loss": 0.0773, "step": 103450 }, { "epoch": 3.759720909949851, "grad_norm": 0.5998416543006897, "learning_rate": 1.784597629093038e-05, "loss": 0.2167, "step": 103460 }, { "epoch": 3.760084308452649, "grad_norm": 0.5429103374481201, "learning_rate": 1.784100413577915e-05, "loss": 0.0695, "step": 103470 }, { "epoch": 3.760447706955447, "grad_norm": 0.43059512972831726, "learning_rate": 1.783603228908357e-05, "loss": 0.0541, "step": 103480 }, { "epoch": 3.7608111054582456, "grad_norm": 0.7966387271881104, "learning_rate": 1.7831060751057877e-05, "loss": 0.0789, "step": 103490 }, { "epoch": 3.7611745039610436, "grad_norm": 0.5265412330627441, "learning_rate": 1.7826089521916266e-05, "loss": 0.061, "step": 103500 }, { "epoch": 3.761537902463842, "grad_norm": 0.580176055431366, "learning_rate": 1.782111860187294e-05, "loss": 0.066, "step": 103510 }, { "epoch": 3.76190130096664, "grad_norm": 0.7160188555717468, "learning_rate": 1.7816147991142067e-05, "loss": 3.6468, "step": 103520 }, { "epoch": 3.762264699469438, "grad_norm": 0.5515186786651611, "learning_rate": 1.7811177689937813e-05, "loss": 0.0793, "step": 103530 }, { "epoch": 3.7626280979722364, "grad_norm": 0.7371792197227478, "learning_rate": 1.7806207698474334e-05, "loss": 0.074, "step": 103540 }, { "epoch": 3.7629914964750344, "grad_norm": 0.7654315233230591, "learning_rate": 1.7801238016965774e-05, "loss": 0.0752, "step": 103550 }, { "epoch": 3.763354894977833, "grad_norm": 0.42323166131973267, "learning_rate": 1.7796268645626256e-05, "loss": 0.1029, "step": 103560 }, { "epoch": 3.763718293480631, "grad_norm": 1.1199790239334106, "learning_rate": 1.779129958466989e-05, "loss": 0.196, "step": 103570 }, { "epoch": 3.764081691983429, "grad_norm": 0.9370298981666565, "learning_rate": 1.7786330834310765e-05, "loss": 0.0776, "step": 103580 }, { "epoch": 3.764445090486227, "grad_norm": 0.6853455901145935, "learning_rate": 1.778136239476299e-05, "loss": 0.0691, "step": 103590 }, { "epoch": 3.764808488989025, "grad_norm": 1.1451141834259033, "learning_rate": 1.7776394266240624e-05, "loss": 0.079, "step": 103600 }, { "epoch": 3.7651718874918236, "grad_norm": 0.9805148243904114, "learning_rate": 1.777142644895773e-05, "loss": 0.0742, "step": 103610 }, { "epoch": 3.7655352859946216, "grad_norm": 0.812449038028717, "learning_rate": 1.7766458943128346e-05, "loss": 0.0709, "step": 103620 }, { "epoch": 3.7658986844974196, "grad_norm": 1.055368185043335, "learning_rate": 1.7761491748966506e-05, "loss": 0.0553, "step": 103630 }, { "epoch": 3.766262083000218, "grad_norm": 0.5963478684425354, "learning_rate": 1.775652486668624e-05, "loss": 0.6731, "step": 103640 }, { "epoch": 3.7666254815030165, "grad_norm": 0.4923159182071686, "learning_rate": 1.775155829650154e-05, "loss": 0.0761, "step": 103650 }, { "epoch": 3.7669888800058144, "grad_norm": 0.3637178838253021, "learning_rate": 1.774659203862641e-05, "loss": 0.0936, "step": 103660 }, { "epoch": 3.7673522785086124, "grad_norm": 0.23796427249908447, "learning_rate": 1.7741626093274808e-05, "loss": 1.0844, "step": 103670 }, { "epoch": 3.767715677011411, "grad_norm": 0.570115327835083, "learning_rate": 1.773666046066072e-05, "loss": 0.0716, "step": 103680 }, { "epoch": 3.768079075514209, "grad_norm": 0.40989992022514343, "learning_rate": 1.7731695140998095e-05, "loss": 0.1496, "step": 103690 }, { "epoch": 3.7684424740170073, "grad_norm": 0.913547933101654, "learning_rate": 1.7726730134500863e-05, "loss": 0.0984, "step": 103700 }, { "epoch": 3.7688058725198053, "grad_norm": 0.6644873023033142, "learning_rate": 1.7721765441382948e-05, "loss": 0.0709, "step": 103710 }, { "epoch": 3.7691692710226032, "grad_norm": 16.15460777282715, "learning_rate": 1.7716801061858256e-05, "loss": 0.0726, "step": 103720 }, { "epoch": 3.7695326695254017, "grad_norm": 1.16048002243042, "learning_rate": 1.7711836996140704e-05, "loss": 0.0689, "step": 103730 }, { "epoch": 3.7698960680281997, "grad_norm": 0.36787500977516174, "learning_rate": 1.7706873244444165e-05, "loss": 0.0709, "step": 103740 }, { "epoch": 3.770259466530998, "grad_norm": 0.9788756966590881, "learning_rate": 1.7701909806982507e-05, "loss": 0.075, "step": 103750 }, { "epoch": 3.770622865033796, "grad_norm": 0.5091611742973328, "learning_rate": 1.769694668396959e-05, "loss": 0.0679, "step": 103760 }, { "epoch": 3.770986263536594, "grad_norm": 0.774710476398468, "learning_rate": 1.7691983875619245e-05, "loss": 0.0626, "step": 103770 }, { "epoch": 3.7713496620393925, "grad_norm": 1.0591776371002197, "learning_rate": 1.768702138214532e-05, "loss": 0.0847, "step": 103780 }, { "epoch": 3.7717130605421905, "grad_norm": 0.4821354150772095, "learning_rate": 1.7682059203761632e-05, "loss": 0.0793, "step": 103790 }, { "epoch": 3.772076459044989, "grad_norm": 1.042035460472107, "learning_rate": 1.767709734068197e-05, "loss": 0.0684, "step": 103800 }, { "epoch": 3.772076459044989, "eval_loss": 0.31211939454078674, "eval_runtime": 179.5485, "eval_samples_per_second": 41.292, "eval_steps_per_second": 5.163, "eval_wer": 0.13473233249223954, "step": 103800 }, { "epoch": 3.772439857547787, "grad_norm": 0.6240308880805969, "learning_rate": 1.767213579312012e-05, "loss": 0.0686, "step": 103810 }, { "epoch": 3.772803256050585, "grad_norm": 0.6886245012283325, "learning_rate": 1.7667174561289874e-05, "loss": 0.172, "step": 103820 }, { "epoch": 3.7731666545533833, "grad_norm": 0.6289244294166565, "learning_rate": 1.7662213645404985e-05, "loss": 0.3868, "step": 103830 }, { "epoch": 3.7735300530561813, "grad_norm": 0.2528345584869385, "learning_rate": 1.7657253045679205e-05, "loss": 0.0879, "step": 103840 }, { "epoch": 3.7738934515589797, "grad_norm": 5.790124893188477, "learning_rate": 1.7652292762326266e-05, "loss": 0.0851, "step": 103850 }, { "epoch": 3.7742568500617777, "grad_norm": 0.524517834186554, "learning_rate": 1.764733279555988e-05, "loss": 0.0537, "step": 103860 }, { "epoch": 3.7746202485645757, "grad_norm": 0.41325438022613525, "learning_rate": 1.7642373145593764e-05, "loss": 0.0695, "step": 103870 }, { "epoch": 3.774983647067374, "grad_norm": 0.5350490212440491, "learning_rate": 1.763741381264162e-05, "loss": 0.0674, "step": 103880 }, { "epoch": 3.775347045570172, "grad_norm": 0.4079577326774597, "learning_rate": 1.7632454796917117e-05, "loss": 0.0725, "step": 103890 }, { "epoch": 3.7757104440729705, "grad_norm": 1.083164095878601, "learning_rate": 1.7627496098633923e-05, "loss": 0.0943, "step": 103900 }, { "epoch": 3.7760738425757685, "grad_norm": 0.525883138179779, "learning_rate": 1.7622537718005676e-05, "loss": 0.0774, "step": 103910 }, { "epoch": 3.7764372410785665, "grad_norm": 0.420608788728714, "learning_rate": 1.7617579655246048e-05, "loss": 0.0661, "step": 103920 }, { "epoch": 3.776800639581365, "grad_norm": 0.5199810862541199, "learning_rate": 1.761262191056864e-05, "loss": 0.0948, "step": 103930 }, { "epoch": 3.7771640380841633, "grad_norm": 0.519557535648346, "learning_rate": 1.760766448418707e-05, "loss": 0.0978, "step": 103940 }, { "epoch": 3.7775274365869613, "grad_norm": 0.6157034635543823, "learning_rate": 1.7602707376314935e-05, "loss": 0.0882, "step": 103950 }, { "epoch": 3.7778908350897593, "grad_norm": 0.5788136720657349, "learning_rate": 1.7597750587165813e-05, "loss": 0.0687, "step": 103960 }, { "epoch": 3.7782542335925577, "grad_norm": 3.0995774269104004, "learning_rate": 1.7592794116953287e-05, "loss": 0.6063, "step": 103970 }, { "epoch": 3.7786176320953557, "grad_norm": 0.4705333411693573, "learning_rate": 1.7587837965890907e-05, "loss": 0.0759, "step": 103980 }, { "epoch": 3.778981030598154, "grad_norm": 0.5289357304573059, "learning_rate": 1.758288213419222e-05, "loss": 0.07, "step": 103990 }, { "epoch": 3.779344429100952, "grad_norm": 0.28470751643180847, "learning_rate": 1.7577926622070752e-05, "loss": 0.0824, "step": 104000 }, { "epoch": 3.77970782760375, "grad_norm": 0.4704053997993469, "learning_rate": 1.7572971429740004e-05, "loss": 0.0679, "step": 104010 }, { "epoch": 3.7800712261065486, "grad_norm": 0.5059223771095276, "learning_rate": 1.7568016557413503e-05, "loss": 0.0598, "step": 104020 }, { "epoch": 3.7804346246093465, "grad_norm": 0.2848690450191498, "learning_rate": 1.7563062005304724e-05, "loss": 0.0811, "step": 104030 }, { "epoch": 3.780798023112145, "grad_norm": 0.5668039917945862, "learning_rate": 1.7558107773627147e-05, "loss": 0.106, "step": 104040 }, { "epoch": 3.781161421614943, "grad_norm": 0.8382745981216431, "learning_rate": 1.7553153862594214e-05, "loss": 0.0748, "step": 104050 }, { "epoch": 3.781524820117741, "grad_norm": 0.817533552646637, "learning_rate": 1.75482002724194e-05, "loss": 0.0579, "step": 104060 }, { "epoch": 3.7818882186205394, "grad_norm": 0.26132848858833313, "learning_rate": 1.7543247003316117e-05, "loss": 0.0842, "step": 104070 }, { "epoch": 3.7822516171233374, "grad_norm": 0.4925542175769806, "learning_rate": 1.7538294055497793e-05, "loss": 1.3918, "step": 104080 }, { "epoch": 3.782615015626136, "grad_norm": 0.36757928133010864, "learning_rate": 1.753334142917783e-05, "loss": 0.0696, "step": 104090 }, { "epoch": 3.7829784141289338, "grad_norm": 1.2747372388839722, "learning_rate": 1.7528389124569605e-05, "loss": 0.0939, "step": 104100 }, { "epoch": 3.7833418126317317, "grad_norm": 0.5699681639671326, "learning_rate": 1.7523437141886516e-05, "loss": 0.0728, "step": 104110 }, { "epoch": 3.78370521113453, "grad_norm": 0.7893559336662292, "learning_rate": 1.7518485481341926e-05, "loss": 0.0609, "step": 104120 }, { "epoch": 3.784068609637328, "grad_norm": 1.0749348402023315, "learning_rate": 1.7513534143149175e-05, "loss": 0.0692, "step": 104130 }, { "epoch": 3.7844320081401266, "grad_norm": 0.6943231225013733, "learning_rate": 1.75085831275216e-05, "loss": 0.0766, "step": 104140 }, { "epoch": 3.7847954066429246, "grad_norm": 0.32822078466415405, "learning_rate": 1.750363243467251e-05, "loss": 0.1134, "step": 104150 }, { "epoch": 3.7851588051457226, "grad_norm": 1.1365320682525635, "learning_rate": 1.7498682064815242e-05, "loss": 0.0622, "step": 104160 }, { "epoch": 3.785522203648521, "grad_norm": 1.4524558782577515, "learning_rate": 1.749373201816307e-05, "loss": 0.0624, "step": 104170 }, { "epoch": 3.785885602151319, "grad_norm": 0.39508798718452454, "learning_rate": 1.7488782294929278e-05, "loss": 0.0728, "step": 104180 }, { "epoch": 3.7862490006541174, "grad_norm": 0.8721691966056824, "learning_rate": 1.7483832895327135e-05, "loss": 0.2174, "step": 104190 }, { "epoch": 3.7866123991569154, "grad_norm": 2.2220089435577393, "learning_rate": 1.747888381956988e-05, "loss": 0.0782, "step": 104200 }, { "epoch": 3.7869757976597134, "grad_norm": 0.5237187147140503, "learning_rate": 1.7473935067870766e-05, "loss": 0.0985, "step": 104210 }, { "epoch": 3.787339196162512, "grad_norm": 0.4212065041065216, "learning_rate": 1.7468986640443017e-05, "loss": 0.0634, "step": 104220 }, { "epoch": 3.7877025946653102, "grad_norm": 0.6862966418266296, "learning_rate": 1.746403853749984e-05, "loss": 0.0768, "step": 104230 }, { "epoch": 3.788065993168108, "grad_norm": 0.4138953685760498, "learning_rate": 1.7459090759254414e-05, "loss": 0.0554, "step": 104240 }, { "epoch": 3.788429391670906, "grad_norm": 1.6833606958389282, "learning_rate": 1.745414330591995e-05, "loss": 0.0804, "step": 104250 }, { "epoch": 3.7887927901737046, "grad_norm": 0.5321451425552368, "learning_rate": 1.7449196177709597e-05, "loss": 0.099, "step": 104260 }, { "epoch": 3.7891561886765026, "grad_norm": 0.4269891381263733, "learning_rate": 1.744424937483652e-05, "loss": 0.0539, "step": 104270 }, { "epoch": 3.789519587179301, "grad_norm": 0.49251261353492737, "learning_rate": 1.7439302897513854e-05, "loss": 0.0691, "step": 104280 }, { "epoch": 3.789882985682099, "grad_norm": 0.3544449508190155, "learning_rate": 1.7434356745954717e-05, "loss": 0.2799, "step": 104290 }, { "epoch": 3.790246384184897, "grad_norm": 0.3938431143760681, "learning_rate": 1.7429410920372235e-05, "loss": 0.0788, "step": 104300 }, { "epoch": 3.7906097826876954, "grad_norm": 1.1323349475860596, "learning_rate": 1.74244654209795e-05, "loss": 0.0604, "step": 104310 }, { "epoch": 3.7909731811904934, "grad_norm": 0.40305644273757935, "learning_rate": 1.74195202479896e-05, "loss": 0.05, "step": 104320 }, { "epoch": 3.791336579693292, "grad_norm": 2.940786600112915, "learning_rate": 1.74145754016156e-05, "loss": 0.0701, "step": 104330 }, { "epoch": 3.79169997819609, "grad_norm": 0.7965475916862488, "learning_rate": 1.7409630882070542e-05, "loss": 0.0764, "step": 104340 }, { "epoch": 3.792063376698888, "grad_norm": 0.395450234413147, "learning_rate": 1.7404686689567498e-05, "loss": 0.0675, "step": 104350 }, { "epoch": 3.7924267752016863, "grad_norm": 0.5075439214706421, "learning_rate": 1.7399742824319478e-05, "loss": 0.1068, "step": 104360 }, { "epoch": 3.7927901737044842, "grad_norm": 0.41297322511672974, "learning_rate": 1.7394799286539498e-05, "loss": 0.0562, "step": 104370 }, { "epoch": 3.7931535722072827, "grad_norm": 0.254867821931839, "learning_rate": 1.7389856076440557e-05, "loss": 0.0703, "step": 104380 }, { "epoch": 3.7935169707100806, "grad_norm": 0.4929332435131073, "learning_rate": 1.7384913194235635e-05, "loss": 0.0724, "step": 104390 }, { "epoch": 3.7938803692128786, "grad_norm": 0.49922120571136475, "learning_rate": 1.7379970640137717e-05, "loss": 0.0715, "step": 104400 }, { "epoch": 3.7938803692128786, "eval_loss": 0.3130161762237549, "eval_runtime": 178.8829, "eval_samples_per_second": 41.446, "eval_steps_per_second": 5.182, "eval_wer": 0.13230889320529343, "step": 104400 }, { "epoch": 3.794243767715677, "grad_norm": 0.5942332744598389, "learning_rate": 1.737502841435975e-05, "loss": 0.0644, "step": 104410 }, { "epoch": 3.794607166218475, "grad_norm": 0.33601608872413635, "learning_rate": 1.7370086517114678e-05, "loss": 0.061, "step": 104420 }, { "epoch": 3.7949705647212735, "grad_norm": 0.44274547696113586, "learning_rate": 1.736514494861542e-05, "loss": 0.0816, "step": 104430 }, { "epoch": 3.7953339632240715, "grad_norm": 1.315964698791504, "learning_rate": 1.7360203709074914e-05, "loss": 0.0883, "step": 104440 }, { "epoch": 3.7956973617268694, "grad_norm": 0.5077027678489685, "learning_rate": 1.735526279870605e-05, "loss": 0.0776, "step": 104450 }, { "epoch": 3.796060760229668, "grad_norm": 0.806336522102356, "learning_rate": 1.735032221772171e-05, "loss": 0.0767, "step": 104460 }, { "epoch": 3.796424158732466, "grad_norm": 2.48112416267395, "learning_rate": 1.7345381966334766e-05, "loss": 0.0613, "step": 104470 }, { "epoch": 3.7967875572352643, "grad_norm": 1.1071819067001343, "learning_rate": 1.7340442044758067e-05, "loss": 0.0574, "step": 104480 }, { "epoch": 3.7971509557380623, "grad_norm": 0.4046606123447418, "learning_rate": 1.733550245320448e-05, "loss": 0.0775, "step": 104490 }, { "epoch": 3.7975143542408603, "grad_norm": 0.5835363864898682, "learning_rate": 1.7330563191886822e-05, "loss": 0.0818, "step": 104500 }, { "epoch": 3.7978777527436587, "grad_norm": 1.6785948276519775, "learning_rate": 1.732562426101791e-05, "loss": 0.091, "step": 104510 }, { "epoch": 3.798241151246457, "grad_norm": 0.5676856637001038, "learning_rate": 1.732068566081054e-05, "loss": 0.0638, "step": 104520 }, { "epoch": 3.798604549749255, "grad_norm": 0.5159793496131897, "learning_rate": 1.731574739147749e-05, "loss": 0.069, "step": 104530 }, { "epoch": 3.798967948252053, "grad_norm": 0.44450074434280396, "learning_rate": 1.7310809453231557e-05, "loss": 0.0721, "step": 104540 }, { "epoch": 3.7993313467548515, "grad_norm": 1.7773900032043457, "learning_rate": 1.730587184628549e-05, "loss": 0.0927, "step": 104550 }, { "epoch": 3.7996947452576495, "grad_norm": 0.33800819516181946, "learning_rate": 1.7300934570852022e-05, "loss": 0.0798, "step": 104560 }, { "epoch": 3.800058143760448, "grad_norm": 0.5945661067962646, "learning_rate": 1.7295997627143892e-05, "loss": 0.063, "step": 104570 }, { "epoch": 3.800421542263246, "grad_norm": 1.0598748922348022, "learning_rate": 1.7291061015373805e-05, "loss": 0.0708, "step": 104580 }, { "epoch": 3.800784940766044, "grad_norm": 0.390257328748703, "learning_rate": 1.728612473575448e-05, "loss": 0.0649, "step": 104590 }, { "epoch": 3.8011483392688423, "grad_norm": 0.650588870048523, "learning_rate": 1.7281188788498593e-05, "loss": 0.0786, "step": 104600 }, { "epoch": 3.8015117377716403, "grad_norm": 0.6513156294822693, "learning_rate": 1.727625317381882e-05, "loss": 0.2863, "step": 104610 }, { "epoch": 3.8018751362744387, "grad_norm": 0.31333038210868835, "learning_rate": 1.7271317891927806e-05, "loss": 0.0651, "step": 104620 }, { "epoch": 3.8022385347772367, "grad_norm": 0.7151166200637817, "learning_rate": 1.726638294303821e-05, "loss": 0.072, "step": 104630 }, { "epoch": 3.8026019332800347, "grad_norm": 2.873300552368164, "learning_rate": 1.7261448327362664e-05, "loss": 0.0924, "step": 104640 }, { "epoch": 3.802965331782833, "grad_norm": 0.9299377202987671, "learning_rate": 1.7256514045113776e-05, "loss": 0.1467, "step": 104650 }, { "epoch": 3.803328730285631, "grad_norm": 0.6155283451080322, "learning_rate": 1.7251580096504142e-05, "loss": 0.0617, "step": 104660 }, { "epoch": 3.8036921287884295, "grad_norm": 0.7554726600646973, "learning_rate": 1.724664648174634e-05, "loss": 0.056, "step": 104670 }, { "epoch": 3.8040555272912275, "grad_norm": 0.45988065004348755, "learning_rate": 1.7241713201052974e-05, "loss": 0.0613, "step": 104680 }, { "epoch": 3.8044189257940255, "grad_norm": 0.6679671406745911, "learning_rate": 1.7236780254636575e-05, "loss": 0.0661, "step": 104690 }, { "epoch": 3.804782324296824, "grad_norm": 0.6426312327384949, "learning_rate": 1.7231847642709693e-05, "loss": 0.0669, "step": 104700 }, { "epoch": 3.805145722799622, "grad_norm": 4.563010215759277, "learning_rate": 1.7226915365484858e-05, "loss": 0.0891, "step": 104710 }, { "epoch": 3.8055091213024204, "grad_norm": 5.822086334228516, "learning_rate": 1.7221983423174576e-05, "loss": 0.0657, "step": 104720 }, { "epoch": 3.8058725198052183, "grad_norm": 0.4104626476764679, "learning_rate": 1.721705181599136e-05, "loss": 0.0737, "step": 104730 }, { "epoch": 3.8062359183080163, "grad_norm": 0.41885101795196533, "learning_rate": 1.7212120544147693e-05, "loss": 0.0615, "step": 104740 }, { "epoch": 3.8065993168108148, "grad_norm": 0.6531028151512146, "learning_rate": 1.720718960785604e-05, "loss": 0.094, "step": 104750 }, { "epoch": 3.8069627153136127, "grad_norm": 0.4141846001148224, "learning_rate": 1.7202259007328862e-05, "loss": 0.0677, "step": 104760 }, { "epoch": 3.807326113816411, "grad_norm": 0.2788487672805786, "learning_rate": 1.7197328742778586e-05, "loss": 0.0592, "step": 104770 }, { "epoch": 3.807689512319209, "grad_norm": 0.95374596118927, "learning_rate": 1.7192398814417665e-05, "loss": 0.0557, "step": 104780 }, { "epoch": 3.808052910822007, "grad_norm": 0.4431002140045166, "learning_rate": 1.7187469222458495e-05, "loss": 0.1018, "step": 104790 }, { "epoch": 3.8084163093248056, "grad_norm": 0.7210586071014404, "learning_rate": 1.7182539967113488e-05, "loss": 0.0753, "step": 104800 }, { "epoch": 3.808779707827604, "grad_norm": 1.0989420413970947, "learning_rate": 1.7177611048594998e-05, "loss": 0.0632, "step": 104810 }, { "epoch": 3.809143106330402, "grad_norm": 0.3087826073169708, "learning_rate": 1.7172682467115434e-05, "loss": 0.0489, "step": 104820 }, { "epoch": 3.8095065048332, "grad_norm": 0.5821923017501831, "learning_rate": 1.716775422288713e-05, "loss": 0.0744, "step": 104830 }, { "epoch": 3.8098699033359984, "grad_norm": 0.464653879404068, "learning_rate": 1.7162826316122432e-05, "loss": 0.0724, "step": 104840 }, { "epoch": 3.8102333018387964, "grad_norm": 1.4593892097473145, "learning_rate": 1.715789874703366e-05, "loss": 0.0721, "step": 104850 }, { "epoch": 3.810596700341595, "grad_norm": 0.3369029462337494, "learning_rate": 1.7152971515833118e-05, "loss": 0.0905, "step": 104860 }, { "epoch": 3.810960098844393, "grad_norm": 0.6023349761962891, "learning_rate": 1.7148044622733127e-05, "loss": 0.0635, "step": 104870 }, { "epoch": 3.811323497347191, "grad_norm": 0.5245524048805237, "learning_rate": 1.7143118067945955e-05, "loss": 0.0542, "step": 104880 }, { "epoch": 3.811686895849989, "grad_norm": 0.5203279256820679, "learning_rate": 1.713819185168387e-05, "loss": 0.0914, "step": 104890 }, { "epoch": 3.812050294352787, "grad_norm": 1.1707444190979004, "learning_rate": 1.7133265974159123e-05, "loss": 0.0817, "step": 104900 }, { "epoch": 3.8124136928555856, "grad_norm": 0.41992756724357605, "learning_rate": 1.7128340435583948e-05, "loss": 0.075, "step": 104910 }, { "epoch": 3.8127770913583836, "grad_norm": 0.31755873560905457, "learning_rate": 1.7123415236170587e-05, "loss": 0.057, "step": 104920 }, { "epoch": 3.8131404898611816, "grad_norm": 0.6827322244644165, "learning_rate": 1.7118490376131236e-05, "loss": 0.0622, "step": 104930 }, { "epoch": 3.81350388836398, "grad_norm": 0.27975374460220337, "learning_rate": 1.7113565855678093e-05, "loss": 0.0671, "step": 104940 }, { "epoch": 3.813867286866778, "grad_norm": 0.5157541632652283, "learning_rate": 1.710864167502334e-05, "loss": 0.0853, "step": 104950 }, { "epoch": 3.8142306853695764, "grad_norm": 0.5502781271934509, "learning_rate": 1.7103717834379126e-05, "loss": 0.0899, "step": 104960 }, { "epoch": 3.8145940838723744, "grad_norm": 0.4045618176460266, "learning_rate": 1.7098794333957627e-05, "loss": 0.0714, "step": 104970 }, { "epoch": 3.8149574823751724, "grad_norm": 1.508198857307434, "learning_rate": 1.709387117397097e-05, "loss": 0.0804, "step": 104980 }, { "epoch": 3.815320880877971, "grad_norm": 0.34709009528160095, "learning_rate": 1.7088948354631268e-05, "loss": 0.0769, "step": 104990 }, { "epoch": 3.815684279380769, "grad_norm": 1.4935734272003174, "learning_rate": 1.708402587615063e-05, "loss": 0.1259, "step": 105000 }, { "epoch": 3.815684279380769, "eval_loss": 0.31589025259017944, "eval_runtime": 181.3291, "eval_samples_per_second": 40.887, "eval_steps_per_second": 5.112, "eval_wer": 0.1329533283714851, "step": 105000 }, { "epoch": 3.8160476778835672, "grad_norm": 0.5566298365592957, "learning_rate": 1.707910373874116e-05, "loss": 0.0703, "step": 105010 }, { "epoch": 3.8164110763863652, "grad_norm": 0.5731711983680725, "learning_rate": 1.7074181942614925e-05, "loss": 0.0697, "step": 105020 }, { "epoch": 3.816774474889163, "grad_norm": 0.6845974922180176, "learning_rate": 1.706926048798399e-05, "loss": 0.0658, "step": 105030 }, { "epoch": 3.8171378733919616, "grad_norm": 0.4797564744949341, "learning_rate": 1.7064339375060407e-05, "loss": 0.0656, "step": 105040 }, { "epoch": 3.8175012718947596, "grad_norm": 0.47920140624046326, "learning_rate": 1.705941860405619e-05, "loss": 0.0717, "step": 105050 }, { "epoch": 3.817864670397558, "grad_norm": 1.682005524635315, "learning_rate": 1.7054498175183387e-05, "loss": 0.0719, "step": 105060 }, { "epoch": 3.818228068900356, "grad_norm": Infinity, "learning_rate": 1.7050070081895424e-05, "loss": 3.2264, "step": 105070 }, { "epoch": 3.818591467403154, "grad_norm": 0.9053909182548523, "learning_rate": 1.704515030365634e-05, "loss": 0.0653, "step": 105080 }, { "epoch": 3.8189548659059525, "grad_norm": 0.34650978446006775, "learning_rate": 1.704023086816342e-05, "loss": 0.0794, "step": 105090 }, { "epoch": 3.819318264408751, "grad_norm": 0.4872235357761383, "learning_rate": 1.7035311775628635e-05, "loss": 0.0793, "step": 105100 }, { "epoch": 3.819681662911549, "grad_norm": 0.5745834708213806, "learning_rate": 1.7030393026263923e-05, "loss": 0.0899, "step": 105110 }, { "epoch": 3.820045061414347, "grad_norm": 0.28083306550979614, "learning_rate": 1.7025474620281215e-05, "loss": 0.4912, "step": 105120 }, { "epoch": 3.8204084599171453, "grad_norm": 0.37169963121414185, "learning_rate": 1.702055655789244e-05, "loss": 0.0659, "step": 105130 }, { "epoch": 3.8207718584199433, "grad_norm": 0.39560073614120483, "learning_rate": 1.7015638839309488e-05, "loss": 0.992, "step": 105140 }, { "epoch": 3.8211352569227417, "grad_norm": 1.3541598320007324, "learning_rate": 1.7010721464744254e-05, "loss": 0.0931, "step": 105150 }, { "epoch": 3.8214986554255397, "grad_norm": 0.5384494662284851, "learning_rate": 1.7005804434408596e-05, "loss": 0.068, "step": 105160 }, { "epoch": 3.8218620539283377, "grad_norm": 0.4317835569381714, "learning_rate": 1.7000887748514392e-05, "loss": 0.0526, "step": 105170 }, { "epoch": 3.822225452431136, "grad_norm": 2.2331621646881104, "learning_rate": 1.6995971407273474e-05, "loss": 0.0795, "step": 105180 }, { "epoch": 3.822588850933934, "grad_norm": 0.9505332112312317, "learning_rate": 1.6991055410897666e-05, "loss": 0.0659, "step": 105190 }, { "epoch": 3.8229522494367325, "grad_norm": 0.5199359655380249, "learning_rate": 1.698613975959879e-05, "loss": 0.0803, "step": 105200 }, { "epoch": 3.8233156479395305, "grad_norm": 0.3324550688266754, "learning_rate": 1.6981224453588625e-05, "loss": 0.0634, "step": 105210 }, { "epoch": 3.8236790464423285, "grad_norm": 1.3727059364318848, "learning_rate": 1.697630949307898e-05, "loss": 0.0668, "step": 105220 }, { "epoch": 3.824042444945127, "grad_norm": 21.728702545166016, "learning_rate": 1.6971394878281604e-05, "loss": 0.2376, "step": 105230 }, { "epoch": 3.824405843447925, "grad_norm": 0.6887884140014648, "learning_rate": 1.6966480609408265e-05, "loss": 0.0708, "step": 105240 }, { "epoch": 3.8247692419507233, "grad_norm": 0.7199526429176331, "learning_rate": 1.696156668667069e-05, "loss": 0.0734, "step": 105250 }, { "epoch": 3.8251326404535213, "grad_norm": 0.6712095141410828, "learning_rate": 1.695665311028059e-05, "loss": 0.0976, "step": 105260 }, { "epoch": 3.8254960389563193, "grad_norm": 0.45200514793395996, "learning_rate": 1.6951739880449707e-05, "loss": 0.0668, "step": 105270 }, { "epoch": 3.8258594374591177, "grad_norm": 0.4408622980117798, "learning_rate": 1.6946826997389714e-05, "loss": 0.0662, "step": 105280 }, { "epoch": 3.8262228359619157, "grad_norm": 0.3657882511615753, "learning_rate": 1.6941914461312293e-05, "loss": 0.1123, "step": 105290 }, { "epoch": 3.826586234464714, "grad_norm": 0.49692997336387634, "learning_rate": 1.6937002272429102e-05, "loss": 0.078, "step": 105300 }, { "epoch": 3.826949632967512, "grad_norm": 1.2416878938674927, "learning_rate": 1.6932090430951793e-05, "loss": 0.0857, "step": 105310 }, { "epoch": 3.82731303147031, "grad_norm": 0.4481440782546997, "learning_rate": 1.692717893709201e-05, "loss": 0.0482, "step": 105320 }, { "epoch": 3.8276764299731085, "grad_norm": 4.277026176452637, "learning_rate": 1.6922267791061358e-05, "loss": 0.0658, "step": 105330 }, { "epoch": 3.8280398284759065, "grad_norm": 0.5307076573371887, "learning_rate": 1.6917356993071447e-05, "loss": 0.09, "step": 105340 }, { "epoch": 3.828403226978705, "grad_norm": 0.5222869515419006, "learning_rate": 1.6912446543333858e-05, "loss": 0.0694, "step": 105350 }, { "epoch": 3.828766625481503, "grad_norm": 0.5144734382629395, "learning_rate": 1.6907536442060185e-05, "loss": 0.0826, "step": 105360 }, { "epoch": 3.829130023984301, "grad_norm": 1.0012280941009521, "learning_rate": 1.690262668946197e-05, "loss": 0.0629, "step": 105370 }, { "epoch": 3.8294934224870993, "grad_norm": 0.7172144651412964, "learning_rate": 1.6897717285750758e-05, "loss": 0.0787, "step": 105380 }, { "epoch": 3.8298568209898978, "grad_norm": 0.3552602529525757, "learning_rate": 1.6892808231138087e-05, "loss": 0.0678, "step": 105390 }, { "epoch": 3.8302202194926958, "grad_norm": 0.5939834713935852, "learning_rate": 1.6887899525835447e-05, "loss": 0.0892, "step": 105400 }, { "epoch": 3.8305836179954937, "grad_norm": 0.5008695721626282, "learning_rate": 1.6882991170054362e-05, "loss": 0.0528, "step": 105410 }, { "epoch": 3.830947016498292, "grad_norm": 0.30899590253829956, "learning_rate": 1.6878083164006314e-05, "loss": 0.0618, "step": 105420 }, { "epoch": 3.83131041500109, "grad_norm": 0.811284601688385, "learning_rate": 1.6873175507902762e-05, "loss": 0.0621, "step": 105430 }, { "epoch": 3.8316738135038886, "grad_norm": 0.5650566220283508, "learning_rate": 1.6868268201955164e-05, "loss": 0.073, "step": 105440 }, { "epoch": 3.8320372120066866, "grad_norm": 3.2447309494018555, "learning_rate": 1.6863361246374944e-05, "loss": 0.0765, "step": 105450 }, { "epoch": 3.8324006105094846, "grad_norm": 0.9772164821624756, "learning_rate": 1.6858454641373543e-05, "loss": 0.0875, "step": 105460 }, { "epoch": 3.832764009012283, "grad_norm": 2.7476863861083984, "learning_rate": 1.685354838716237e-05, "loss": 0.8768, "step": 105470 }, { "epoch": 3.833127407515081, "grad_norm": 2.6818318367004395, "learning_rate": 1.6848642483952808e-05, "loss": 0.07, "step": 105480 }, { "epoch": 3.8334908060178794, "grad_norm": 2.635261058807373, "learning_rate": 1.6843736931956238e-05, "loss": 0.0747, "step": 105490 }, { "epoch": 3.8338542045206774, "grad_norm": 0.7043896913528442, "learning_rate": 1.6838831731384022e-05, "loss": 0.0884, "step": 105500 }, { "epoch": 3.8342176030234754, "grad_norm": 0.5598475337028503, "learning_rate": 1.6833926882447516e-05, "loss": 0.0857, "step": 105510 }, { "epoch": 3.834581001526274, "grad_norm": 0.49555504322052, "learning_rate": 1.682902238535804e-05, "loss": 0.0676, "step": 105520 }, { "epoch": 3.8349444000290718, "grad_norm": 0.46926450729370117, "learning_rate": 1.682411824032692e-05, "loss": 0.0518, "step": 105530 }, { "epoch": 3.83530779853187, "grad_norm": 2.069505214691162, "learning_rate": 1.6819214447565445e-05, "loss": 0.0886, "step": 105540 }, { "epoch": 3.835671197034668, "grad_norm": 0.7095978856086731, "learning_rate": 1.6814311007284923e-05, "loss": 0.066, "step": 105550 }, { "epoch": 3.836034595537466, "grad_norm": 0.3672688603401184, "learning_rate": 1.6809407919696615e-05, "loss": 0.101, "step": 105560 }, { "epoch": 3.8363979940402646, "grad_norm": 0.9248769879341125, "learning_rate": 1.6804505185011777e-05, "loss": 0.0519, "step": 105570 }, { "epoch": 3.8367613925430626, "grad_norm": 0.9654824733734131, "learning_rate": 1.679960280344165e-05, "loss": 0.0796, "step": 105580 }, { "epoch": 3.837124791045861, "grad_norm": 0.5376684069633484, "learning_rate": 1.6794700775197452e-05, "loss": 0.0733, "step": 105590 }, { "epoch": 3.837488189548659, "grad_norm": 0.3716341555118561, "learning_rate": 1.6789799100490414e-05, "loss": 0.0693, "step": 105600 }, { "epoch": 3.837488189548659, "eval_loss": 0.31987106800079346, "eval_runtime": 181.8851, "eval_samples_per_second": 40.762, "eval_steps_per_second": 5.097, "eval_wer": 0.13323470147221667, "step": 105600 }, { "epoch": 3.837851588051457, "grad_norm": 0.5739371180534363, "learning_rate": 1.678489777953172e-05, "loss": 0.0888, "step": 105610 }, { "epoch": 3.8382149865542554, "grad_norm": 0.628527045249939, "learning_rate": 1.677999681253255e-05, "loss": 0.6623, "step": 105620 }, { "epoch": 3.8385783850570534, "grad_norm": 0.6783026456832886, "learning_rate": 1.6775096199704067e-05, "loss": 0.0822, "step": 105630 }, { "epoch": 3.838941783559852, "grad_norm": 0.5286380052566528, "learning_rate": 1.6770195941257425e-05, "loss": 0.0997, "step": 105640 }, { "epoch": 3.83930518206265, "grad_norm": 0.7992109656333923, "learning_rate": 1.676529603740376e-05, "loss": 0.0738, "step": 105650 }, { "epoch": 3.839668580565448, "grad_norm": 0.4643116593360901, "learning_rate": 1.6760396488354195e-05, "loss": 0.056, "step": 105660 }, { "epoch": 3.8400319790682462, "grad_norm": 0.5640047192573547, "learning_rate": 1.6755497294319823e-05, "loss": 0.0651, "step": 105670 }, { "epoch": 3.8403953775710447, "grad_norm": 0.4988393485546112, "learning_rate": 1.6750598455511737e-05, "loss": 0.0622, "step": 105680 }, { "epoch": 3.8407587760738426, "grad_norm": 0.24571850895881653, "learning_rate": 1.674569997214101e-05, "loss": 3.0376, "step": 105690 }, { "epoch": 3.8411221745766406, "grad_norm": 0.3480076193809509, "learning_rate": 1.674080184441871e-05, "loss": 0.07, "step": 105700 }, { "epoch": 3.841485573079439, "grad_norm": 0.33364272117614746, "learning_rate": 1.6735904072555868e-05, "loss": 0.0919, "step": 105710 }, { "epoch": 3.841848971582237, "grad_norm": 0.29854241013526917, "learning_rate": 1.6731006656763515e-05, "loss": 0.0564, "step": 105720 }, { "epoch": 3.8422123700850355, "grad_norm": 0.4374145269393921, "learning_rate": 1.6726109597252662e-05, "loss": 0.0678, "step": 105730 }, { "epoch": 3.8425757685878335, "grad_norm": 0.3668364882469177, "learning_rate": 1.6721212894234314e-05, "loss": 0.0832, "step": 105740 }, { "epoch": 3.8429391670906314, "grad_norm": 0.8300511837005615, "learning_rate": 1.671631654791945e-05, "loss": 0.0963, "step": 105750 }, { "epoch": 3.84330256559343, "grad_norm": 0.600064754486084, "learning_rate": 1.6711420558519026e-05, "loss": 0.0829, "step": 105760 }, { "epoch": 3.843665964096228, "grad_norm": 0.42504894733428955, "learning_rate": 1.6706524926243995e-05, "loss": 0.0541, "step": 105770 }, { "epoch": 3.8440293625990263, "grad_norm": 1.8978909254074097, "learning_rate": 1.6701629651305296e-05, "loss": 0.071, "step": 105780 }, { "epoch": 3.8443927611018243, "grad_norm": 0.44248759746551514, "learning_rate": 1.6696734733913857e-05, "loss": 0.1478, "step": 105790 }, { "epoch": 3.8447561596046222, "grad_norm": 0.6377494931221008, "learning_rate": 1.6691840174280577e-05, "loss": 0.0822, "step": 105800 }, { "epoch": 3.8451195581074207, "grad_norm": 0.37662044167518616, "learning_rate": 1.6686945972616336e-05, "loss": 0.1136, "step": 105810 }, { "epoch": 3.8454829566102187, "grad_norm": 0.3601333796977997, "learning_rate": 1.668205212913202e-05, "loss": 0.063, "step": 105820 }, { "epoch": 3.845846355113017, "grad_norm": 0.36584463715553284, "learning_rate": 1.6677158644038478e-05, "loss": 0.0632, "step": 105830 }, { "epoch": 3.846209753615815, "grad_norm": 0.5580528378486633, "learning_rate": 1.667226551754656e-05, "loss": 0.0957, "step": 105840 }, { "epoch": 3.846573152118613, "grad_norm": 1.119971752166748, "learning_rate": 1.6667372749867093e-05, "loss": 0.0798, "step": 105850 }, { "epoch": 3.8469365506214115, "grad_norm": 0.424441397190094, "learning_rate": 1.6662480341210882e-05, "loss": 0.072, "step": 105860 }, { "epoch": 3.8472999491242095, "grad_norm": 0.39438381791114807, "learning_rate": 1.6657588291788734e-05, "loss": 0.1601, "step": 105870 }, { "epoch": 3.847663347627008, "grad_norm": 0.9732398390769958, "learning_rate": 1.6652696601811417e-05, "loss": 0.0615, "step": 105880 }, { "epoch": 3.848026746129806, "grad_norm": 0.6014724969863892, "learning_rate": 1.664780527148971e-05, "loss": 0.067, "step": 105890 }, { "epoch": 3.848390144632604, "grad_norm": 0.38137149810791016, "learning_rate": 1.6642914301034355e-05, "loss": 0.0779, "step": 105900 }, { "epoch": 3.8487535431354023, "grad_norm": 0.6532992720603943, "learning_rate": 1.663802369065608e-05, "loss": 0.0636, "step": 105910 }, { "epoch": 3.8491169416382003, "grad_norm": 0.22969412803649902, "learning_rate": 1.663313344056562e-05, "loss": 0.0629, "step": 105920 }, { "epoch": 3.8494803401409987, "grad_norm": 0.3547755777835846, "learning_rate": 1.662824355097367e-05, "loss": 0.0715, "step": 105930 }, { "epoch": 3.8498437386437967, "grad_norm": 1.1977945566177368, "learning_rate": 1.662335402209092e-05, "loss": 0.1126, "step": 105940 }, { "epoch": 3.8502071371465947, "grad_norm": 1.9513736963272095, "learning_rate": 1.6618464854128036e-05, "loss": 0.1062, "step": 105950 }, { "epoch": 3.850570535649393, "grad_norm": 0.7033438682556152, "learning_rate": 1.6613576047295688e-05, "loss": 0.0653, "step": 105960 }, { "epoch": 3.8509339341521915, "grad_norm": 0.6035280227661133, "learning_rate": 1.66086876018045e-05, "loss": 0.0624, "step": 105970 }, { "epoch": 3.8512973326549895, "grad_norm": 0.5035017132759094, "learning_rate": 1.660379951786511e-05, "loss": 0.0756, "step": 105980 }, { "epoch": 3.8516607311577875, "grad_norm": 0.5310548543930054, "learning_rate": 1.6598911795688132e-05, "loss": 0.0717, "step": 105990 }, { "epoch": 3.852024129660586, "grad_norm": 0.5817539691925049, "learning_rate": 1.6594024435484144e-05, "loss": 0.0751, "step": 106000 }, { "epoch": 3.852387528163384, "grad_norm": 0.823477566242218, "learning_rate": 1.658913743746374e-05, "loss": 0.097, "step": 106010 }, { "epoch": 3.8527509266661824, "grad_norm": 0.6416204571723938, "learning_rate": 1.658425080183747e-05, "loss": 0.0612, "step": 106020 }, { "epoch": 3.8531143251689803, "grad_norm": 7.8174920082092285, "learning_rate": 1.65793645288159e-05, "loss": 0.0709, "step": 106030 }, { "epoch": 3.8534777236717783, "grad_norm": 0.37136542797088623, "learning_rate": 1.6574478618609546e-05, "loss": 0.075, "step": 106040 }, { "epoch": 3.8538411221745767, "grad_norm": 0.7363283634185791, "learning_rate": 1.6569593071428932e-05, "loss": 0.0781, "step": 106050 }, { "epoch": 3.8542045206773747, "grad_norm": 0.7914659976959229, "learning_rate": 1.656470788748456e-05, "loss": 0.0683, "step": 106060 }, { "epoch": 3.854567919180173, "grad_norm": 0.710452675819397, "learning_rate": 1.6559823066986906e-05, "loss": 0.059, "step": 106070 }, { "epoch": 3.854931317682971, "grad_norm": 0.8524606227874756, "learning_rate": 1.655493861014645e-05, "loss": 0.0528, "step": 106080 }, { "epoch": 3.855294716185769, "grad_norm": 1.270829677581787, "learning_rate": 1.6550054517173635e-05, "loss": 0.0747, "step": 106090 }, { "epoch": 3.8556581146885676, "grad_norm": 0.5435279011726379, "learning_rate": 1.6545170788278913e-05, "loss": 0.0787, "step": 106100 }, { "epoch": 3.8560215131913655, "grad_norm": 0.6725544333457947, "learning_rate": 1.65402874236727e-05, "loss": 0.074, "step": 106110 }, { "epoch": 3.856384911694164, "grad_norm": 0.43053969740867615, "learning_rate": 1.6535404423565397e-05, "loss": 0.0678, "step": 106120 }, { "epoch": 3.856748310196962, "grad_norm": 0.36162063479423523, "learning_rate": 1.65305217881674e-05, "loss": 0.062, "step": 106130 }, { "epoch": 3.85711170869976, "grad_norm": 2.1975929737091064, "learning_rate": 1.6525639517689088e-05, "loss": 0.0719, "step": 106140 }, { "epoch": 3.8574751072025584, "grad_norm": 0.450427770614624, "learning_rate": 1.652075761234082e-05, "loss": 0.0682, "step": 106150 }, { "epoch": 3.8578385057053564, "grad_norm": 1.4081052541732788, "learning_rate": 1.6515876072332934e-05, "loss": 0.06, "step": 106160 }, { "epoch": 3.858201904208155, "grad_norm": 0.28008484840393066, "learning_rate": 1.6510994897875763e-05, "loss": 0.0616, "step": 106170 }, { "epoch": 3.8585653027109528, "grad_norm": 0.6576961874961853, "learning_rate": 1.6506114089179612e-05, "loss": 0.0617, "step": 106180 }, { "epoch": 3.8589287012137508, "grad_norm": 0.5949859023094177, "learning_rate": 1.650123364645479e-05, "loss": 0.0767, "step": 106190 }, { "epoch": 3.859292099716549, "grad_norm": 1.566115379333496, "learning_rate": 1.6496353569911575e-05, "loss": 0.1152, "step": 106200 }, { "epoch": 3.859292099716549, "eval_loss": 0.3167436122894287, "eval_runtime": 180.8794, "eval_samples_per_second": 40.989, "eval_steps_per_second": 5.125, "eval_wer": 0.1313286256285512, "step": 106200 }, { "epoch": 3.8596554982193476, "grad_norm": 0.697790801525116, "learning_rate": 1.649147385976022e-05, "loss": 0.0746, "step": 106210 }, { "epoch": 3.8600188967221456, "grad_norm": 0.2491450309753418, "learning_rate": 1.648659451621098e-05, "loss": 0.0578, "step": 106220 }, { "epoch": 3.8603822952249436, "grad_norm": 0.42509350180625916, "learning_rate": 1.64817155394741e-05, "loss": 0.0646, "step": 106230 }, { "epoch": 3.8607456937277416, "grad_norm": 1.1668282747268677, "learning_rate": 1.647683692975979e-05, "loss": 0.0992, "step": 106240 }, { "epoch": 3.86110909223054, "grad_norm": 0.8720428347587585, "learning_rate": 1.647195868727825e-05, "loss": 0.0714, "step": 106250 }, { "epoch": 3.8614724907333384, "grad_norm": 0.5526779294013977, "learning_rate": 1.6467080812239662e-05, "loss": 0.0523, "step": 106260 }, { "epoch": 3.8618358892361364, "grad_norm": 0.6438971161842346, "learning_rate": 1.6462203304854203e-05, "loss": 0.0666, "step": 106270 }, { "epoch": 3.8621992877389344, "grad_norm": 0.5303975343704224, "learning_rate": 1.645732616533203e-05, "loss": 0.0634, "step": 106280 }, { "epoch": 3.862562686241733, "grad_norm": 0.9776999354362488, "learning_rate": 1.6452449393883276e-05, "loss": 0.0791, "step": 106290 }, { "epoch": 3.862926084744531, "grad_norm": 0.5289604663848877, "learning_rate": 1.6447572990718068e-05, "loss": 0.1386, "step": 106300 }, { "epoch": 3.8632894832473292, "grad_norm": 0.6309967637062073, "learning_rate": 1.6442696956046504e-05, "loss": 0.1688, "step": 106310 }, { "epoch": 3.863652881750127, "grad_norm": 0.8625107407569885, "learning_rate": 1.6437821290078682e-05, "loss": 0.0569, "step": 106320 }, { "epoch": 3.864016280252925, "grad_norm": 0.4633709788322449, "learning_rate": 1.643294599302468e-05, "loss": 0.0567, "step": 106330 }, { "epoch": 3.8643796787557236, "grad_norm": 0.47958311438560486, "learning_rate": 1.6428071065094553e-05, "loss": 0.2378, "step": 106340 }, { "epoch": 3.8647430772585216, "grad_norm": 0.53351891040802, "learning_rate": 1.6423196506498338e-05, "loss": 0.1388, "step": 106350 }, { "epoch": 3.86510647576132, "grad_norm": 0.5696703791618347, "learning_rate": 1.6418322317446073e-05, "loss": 0.0815, "step": 106360 }, { "epoch": 3.865469874264118, "grad_norm": 0.8141573667526245, "learning_rate": 1.641344849814777e-05, "loss": 0.1241, "step": 106370 }, { "epoch": 3.865833272766916, "grad_norm": 2.059314012527466, "learning_rate": 1.6408575048813424e-05, "loss": 0.0726, "step": 106380 }, { "epoch": 3.8661966712697144, "grad_norm": 0.37247422337532043, "learning_rate": 1.6403701969653004e-05, "loss": 0.9905, "step": 106390 }, { "epoch": 3.8665600697725124, "grad_norm": 2.2825708389282227, "learning_rate": 1.639882926087648e-05, "loss": 0.0743, "step": 106400 }, { "epoch": 3.866923468275311, "grad_norm": 0.778655469417572, "learning_rate": 1.6393956922693798e-05, "loss": 0.0703, "step": 106410 }, { "epoch": 3.867286866778109, "grad_norm": 1.8675559759140015, "learning_rate": 1.6389084955314896e-05, "loss": 0.0579, "step": 106420 }, { "epoch": 3.867650265280907, "grad_norm": 0.796132504940033, "learning_rate": 1.6384213358949683e-05, "loss": 0.0465, "step": 106430 }, { "epoch": 3.8680136637837053, "grad_norm": 1.6710090637207031, "learning_rate": 1.6379342133808067e-05, "loss": 0.0879, "step": 106440 }, { "epoch": 3.8683770622865032, "grad_norm": 0.64317786693573, "learning_rate": 1.6374471280099912e-05, "loss": 0.0945, "step": 106450 }, { "epoch": 3.8687404607893017, "grad_norm": 1.1464306116104126, "learning_rate": 1.6369600798035113e-05, "loss": 0.1468, "step": 106460 }, { "epoch": 3.8691038592920997, "grad_norm": 4.935044765472412, "learning_rate": 1.636473068782351e-05, "loss": 0.0602, "step": 106470 }, { "epoch": 3.8694672577948976, "grad_norm": 0.9155591726303101, "learning_rate": 1.6359860949674932e-05, "loss": 0.0722, "step": 106480 }, { "epoch": 3.869830656297696, "grad_norm": 0.3697691261768341, "learning_rate": 1.6354991583799208e-05, "loss": 0.0762, "step": 106490 }, { "epoch": 3.8701940548004945, "grad_norm": 0.9484128952026367, "learning_rate": 1.6350122590406126e-05, "loss": 0.0743, "step": 106500 }, { "epoch": 3.8705574533032925, "grad_norm": 0.4434047043323517, "learning_rate": 1.6345253969705492e-05, "loss": 0.1289, "step": 106510 }, { "epoch": 3.8709208518060905, "grad_norm": 0.3249851167201996, "learning_rate": 1.6340385721907075e-05, "loss": 0.0621, "step": 106520 }, { "epoch": 3.8712842503088885, "grad_norm": 1.056702733039856, "learning_rate": 1.6335517847220626e-05, "loss": 0.0587, "step": 106530 }, { "epoch": 3.871647648811687, "grad_norm": 0.3898318111896515, "learning_rate": 1.6330650345855874e-05, "loss": 0.0668, "step": 106540 }, { "epoch": 3.8720110473144853, "grad_norm": 0.44665372371673584, "learning_rate": 1.6325783218022563e-05, "loss": 0.0572, "step": 106550 }, { "epoch": 3.8723744458172833, "grad_norm": 0.4088903069496155, "learning_rate": 1.632091646393039e-05, "loss": 0.0907, "step": 106560 }, { "epoch": 3.8727378443200813, "grad_norm": 0.8101043105125427, "learning_rate": 1.6316050083789046e-05, "loss": 0.0548, "step": 106570 }, { "epoch": 3.8731012428228797, "grad_norm": 0.627242386341095, "learning_rate": 1.6311184077808206e-05, "loss": 0.2322, "step": 106580 }, { "epoch": 3.8734646413256777, "grad_norm": 0.3941401243209839, "learning_rate": 1.6306318446197518e-05, "loss": 0.074, "step": 106590 }, { "epoch": 3.873828039828476, "grad_norm": 4.295115947723389, "learning_rate": 1.630145318916665e-05, "loss": 0.0713, "step": 106600 }, { "epoch": 3.874191438331274, "grad_norm": 0.5540897846221924, "learning_rate": 1.629658830692521e-05, "loss": 0.0737, "step": 106610 }, { "epoch": 3.874554836834072, "grad_norm": 0.6114850044250488, "learning_rate": 1.6291723799682818e-05, "loss": 0.1015, "step": 106620 }, { "epoch": 3.8749182353368705, "grad_norm": 0.686089038848877, "learning_rate": 1.6286859667649058e-05, "loss": 0.0661, "step": 106630 }, { "epoch": 3.8752816338396685, "grad_norm": 0.49293363094329834, "learning_rate": 1.6281995911033507e-05, "loss": 0.076, "step": 106640 }, { "epoch": 3.875645032342467, "grad_norm": 0.6190850734710693, "learning_rate": 1.6277132530045742e-05, "loss": 0.0892, "step": 106650 }, { "epoch": 3.876008430845265, "grad_norm": 0.4621674418449402, "learning_rate": 1.62722695248953e-05, "loss": 0.0678, "step": 106660 }, { "epoch": 3.876371829348063, "grad_norm": 1.242116093635559, "learning_rate": 1.626740689579171e-05, "loss": 0.0694, "step": 106670 }, { "epoch": 3.8767352278508613, "grad_norm": 0.4658359885215759, "learning_rate": 1.6262544642944488e-05, "loss": 0.0659, "step": 106680 }, { "epoch": 3.8770986263536593, "grad_norm": 0.6650833487510681, "learning_rate": 1.625768276656312e-05, "loss": 0.0776, "step": 106690 }, { "epoch": 3.8774620248564577, "grad_norm": 0.6864154934883118, "learning_rate": 1.6252821266857105e-05, "loss": 0.0891, "step": 106700 }, { "epoch": 3.8778254233592557, "grad_norm": 0.6746088862419128, "learning_rate": 1.62479601440359e-05, "loss": 0.1, "step": 106710 }, { "epoch": 3.8781888218620537, "grad_norm": 0.5241110920906067, "learning_rate": 1.6243099398308954e-05, "loss": 0.0599, "step": 106720 }, { "epoch": 3.878552220364852, "grad_norm": 0.4247439205646515, "learning_rate": 1.6238239029885684e-05, "loss": 0.0843, "step": 106730 }, { "epoch": 3.87891561886765, "grad_norm": 0.42593395709991455, "learning_rate": 1.6233379038975532e-05, "loss": 0.0776, "step": 106740 }, { "epoch": 3.8792790173704486, "grad_norm": 0.4933978021144867, "learning_rate": 1.6228519425787885e-05, "loss": 0.0887, "step": 106750 }, { "epoch": 3.8796424158732465, "grad_norm": 39.70121383666992, "learning_rate": 1.6223660190532126e-05, "loss": 0.776, "step": 106760 }, { "epoch": 3.8800058143760445, "grad_norm": 1.380768895149231, "learning_rate": 1.6218801333417624e-05, "loss": 0.0576, "step": 106770 }, { "epoch": 3.880369212878843, "grad_norm": 0.2609730064868927, "learning_rate": 1.621394285465372e-05, "loss": 0.0631, "step": 106780 }, { "epoch": 3.8807326113816414, "grad_norm": 0.5191308856010437, "learning_rate": 1.6209084754449766e-05, "loss": 0.0902, "step": 106790 }, { "epoch": 3.8810960098844394, "grad_norm": 0.930590033531189, "learning_rate": 1.620422703301507e-05, "loss": 0.0548, "step": 106800 }, { "epoch": 3.8810960098844394, "eval_loss": 0.30408763885498047, "eval_runtime": 179.2538, "eval_samples_per_second": 41.36, "eval_steps_per_second": 5.171, "eval_wer": 0.13078403253036106, "step": 106800 }, { "epoch": 3.8814594083872374, "grad_norm": 0.5123357176780701, "learning_rate": 1.619936969055894e-05, "loss": 0.0778, "step": 106810 }, { "epoch": 3.8818228068900353, "grad_norm": 0.5172662138938904, "learning_rate": 1.6194512727290656e-05, "loss": 0.0528, "step": 106820 }, { "epoch": 3.8821862053928338, "grad_norm": 1.0481631755828857, "learning_rate": 1.6189656143419474e-05, "loss": 0.0652, "step": 106830 }, { "epoch": 3.882549603895632, "grad_norm": 0.29324159026145935, "learning_rate": 1.6184799939154677e-05, "loss": 0.083, "step": 106840 }, { "epoch": 3.88291300239843, "grad_norm": 0.7054247260093689, "learning_rate": 1.6179944114705484e-05, "loss": 0.0834, "step": 106850 }, { "epoch": 3.883276400901228, "grad_norm": 0.34651482105255127, "learning_rate": 1.617508867028112e-05, "loss": 0.0605, "step": 106860 }, { "epoch": 3.8836397994040266, "grad_norm": 2.1791582107543945, "learning_rate": 1.6170233606090783e-05, "loss": 0.0598, "step": 106870 }, { "epoch": 3.8840031979068246, "grad_norm": 0.725145161151886, "learning_rate": 1.6165378922343652e-05, "loss": 0.086, "step": 106880 }, { "epoch": 3.884366596409623, "grad_norm": 0.8774747848510742, "learning_rate": 1.616052461924892e-05, "loss": 0.0777, "step": 106890 }, { "epoch": 3.884729994912421, "grad_norm": 0.5606204867362976, "learning_rate": 1.6155670697015735e-05, "loss": 0.0838, "step": 106900 }, { "epoch": 3.885093393415219, "grad_norm": 0.44490596652030945, "learning_rate": 1.615081715585323e-05, "loss": 0.0733, "step": 106910 }, { "epoch": 3.8854567919180174, "grad_norm": 0.3232552707195282, "learning_rate": 1.6145963995970514e-05, "loss": 0.0528, "step": 106920 }, { "epoch": 3.8858201904208154, "grad_norm": 0.4871584475040436, "learning_rate": 1.614111121757672e-05, "loss": 0.1162, "step": 106930 }, { "epoch": 3.886183588923614, "grad_norm": 2.606072425842285, "learning_rate": 1.6136258820880925e-05, "loss": 0.0982, "step": 106940 }, { "epoch": 3.886546987426412, "grad_norm": 0.6759468913078308, "learning_rate": 1.61314068060922e-05, "loss": 0.0551, "step": 106950 }, { "epoch": 3.88691038592921, "grad_norm": 0.5082545876502991, "learning_rate": 1.61265551734196e-05, "loss": 0.1194, "step": 106960 }, { "epoch": 3.887273784432008, "grad_norm": 0.4217374920845032, "learning_rate": 1.6121703923072155e-05, "loss": 0.0739, "step": 106970 }, { "epoch": 3.887637182934806, "grad_norm": 0.2575652003288269, "learning_rate": 1.611685305525891e-05, "loss": 0.0517, "step": 106980 }, { "epoch": 3.8880005814376046, "grad_norm": 0.5304332375526428, "learning_rate": 1.611200257018886e-05, "loss": 0.0997, "step": 106990 }, { "epoch": 3.8883639799404026, "grad_norm": 1.2850308418273926, "learning_rate": 1.6107152468070995e-05, "loss": 0.0848, "step": 107000 }, { "epoch": 3.8887273784432006, "grad_norm": 0.8286998867988586, "learning_rate": 1.610230274911429e-05, "loss": 0.06, "step": 107010 }, { "epoch": 3.889090776945999, "grad_norm": 0.36516815423965454, "learning_rate": 1.609745341352769e-05, "loss": 0.0546, "step": 107020 }, { "epoch": 3.889454175448797, "grad_norm": 0.608978271484375, "learning_rate": 1.6092604461520162e-05, "loss": 0.0591, "step": 107030 }, { "epoch": 3.8898175739515954, "grad_norm": 0.40146422386169434, "learning_rate": 1.608775589330061e-05, "loss": 0.0586, "step": 107040 }, { "epoch": 3.8901809724543934, "grad_norm": 0.7387737035751343, "learning_rate": 1.608290770907795e-05, "loss": 0.0826, "step": 107050 }, { "epoch": 3.8905443709571914, "grad_norm": 1.0020533800125122, "learning_rate": 1.6078059909061067e-05, "loss": 0.0709, "step": 107060 }, { "epoch": 3.89090776945999, "grad_norm": 0.44272226095199585, "learning_rate": 1.6073212493458827e-05, "loss": 0.0711, "step": 107070 }, { "epoch": 3.8912711679627883, "grad_norm": 0.39536282420158386, "learning_rate": 1.6068365462480112e-05, "loss": 0.059, "step": 107080 }, { "epoch": 3.8916345664655863, "grad_norm": 1.6497310400009155, "learning_rate": 1.606351881633375e-05, "loss": 0.0642, "step": 107090 }, { "epoch": 3.8919979649683842, "grad_norm": 0.7479108572006226, "learning_rate": 1.6058672555228565e-05, "loss": 0.0902, "step": 107100 }, { "epoch": 3.8923613634711822, "grad_norm": 0.6676745414733887, "learning_rate": 1.6053826679373356e-05, "loss": 0.0975, "step": 107110 }, { "epoch": 3.8927247619739807, "grad_norm": 0.6554991602897644, "learning_rate": 1.6048981188976936e-05, "loss": 0.0753, "step": 107120 }, { "epoch": 3.893088160476779, "grad_norm": 0.561316728591919, "learning_rate": 1.6044136084248068e-05, "loss": 0.0874, "step": 107130 }, { "epoch": 3.893451558979577, "grad_norm": 0.5027711987495422, "learning_rate": 1.6039291365395515e-05, "loss": 0.0706, "step": 107140 }, { "epoch": 3.893814957482375, "grad_norm": 0.5242792963981628, "learning_rate": 1.603444703262801e-05, "loss": 0.0705, "step": 107150 }, { "epoch": 3.8941783559851735, "grad_norm": 0.5882894992828369, "learning_rate": 1.6029603086154274e-05, "loss": 0.0854, "step": 107160 }, { "epoch": 3.8945417544879715, "grad_norm": 0.5304105281829834, "learning_rate": 1.6024759526183037e-05, "loss": 0.0564, "step": 107170 }, { "epoch": 3.89490515299077, "grad_norm": 0.9514649510383606, "learning_rate": 1.6019916352922972e-05, "loss": 0.0565, "step": 107180 }, { "epoch": 3.895268551493568, "grad_norm": 0.5723317861557007, "learning_rate": 1.6015073566582762e-05, "loss": 0.0756, "step": 107190 }, { "epoch": 3.895631949996366, "grad_norm": 1.066159963607788, "learning_rate": 1.6010231167371067e-05, "loss": 0.0668, "step": 107200 }, { "epoch": 3.8959953484991643, "grad_norm": 0.6977314949035645, "learning_rate": 1.6005389155496514e-05, "loss": 0.063, "step": 107210 }, { "epoch": 3.8963587470019623, "grad_norm": 0.9089870452880859, "learning_rate": 1.6000547531167747e-05, "loss": 0.0545, "step": 107220 }, { "epoch": 3.8967221455047607, "grad_norm": 0.4446127116680145, "learning_rate": 1.599570629459337e-05, "loss": 0.0672, "step": 107230 }, { "epoch": 3.8970855440075587, "grad_norm": 0.4407578408718109, "learning_rate": 1.599086544598197e-05, "loss": 0.0931, "step": 107240 }, { "epoch": 3.8974489425103567, "grad_norm": 0.5468510389328003, "learning_rate": 1.5986024985542124e-05, "loss": 0.0712, "step": 107250 }, { "epoch": 3.897812341013155, "grad_norm": 0.7128602266311646, "learning_rate": 1.598118491348238e-05, "loss": 0.0582, "step": 107260 }, { "epoch": 3.898175739515953, "grad_norm": 0.6812794804573059, "learning_rate": 1.5976345230011297e-05, "loss": 0.0638, "step": 107270 }, { "epoch": 3.8985391380187515, "grad_norm": 0.9153865575790405, "learning_rate": 1.597150593533739e-05, "loss": 0.0765, "step": 107280 }, { "epoch": 3.8989025365215495, "grad_norm": 0.4191397428512573, "learning_rate": 1.5966667029669173e-05, "loss": 0.1032, "step": 107290 }, { "epoch": 3.8992659350243475, "grad_norm": 0.9470223784446716, "learning_rate": 1.5961828513215132e-05, "loss": 0.0772, "step": 107300 }, { "epoch": 3.899629333527146, "grad_norm": 0.8003381490707397, "learning_rate": 1.595699038618373e-05, "loss": 0.0638, "step": 107310 }, { "epoch": 3.899992732029944, "grad_norm": 0.5758501887321472, "learning_rate": 1.5952152648783448e-05, "loss": 0.0702, "step": 107320 }, { "epoch": 3.9003561305327423, "grad_norm": 0.4874951243400574, "learning_rate": 1.5947315301222717e-05, "loss": 0.0698, "step": 107330 }, { "epoch": 3.9007195290355403, "grad_norm": 0.9432708024978638, "learning_rate": 1.594247834370996e-05, "loss": 0.0931, "step": 107340 }, { "epoch": 3.9010829275383383, "grad_norm": 6.735447883605957, "learning_rate": 1.5937641776453578e-05, "loss": 0.0929, "step": 107350 }, { "epoch": 3.9014463260411367, "grad_norm": 0.32913991808891296, "learning_rate": 1.593280559966197e-05, "loss": 0.0863, "step": 107360 }, { "epoch": 3.901809724543935, "grad_norm": 0.65277099609375, "learning_rate": 1.5927969813543513e-05, "loss": 0.0631, "step": 107370 }, { "epoch": 3.902173123046733, "grad_norm": 0.7276453375816345, "learning_rate": 1.5923134418306557e-05, "loss": 1.8576, "step": 107380 }, { "epoch": 3.902536521549531, "grad_norm": 0.7184520959854126, "learning_rate": 1.5918299414159443e-05, "loss": 0.0671, "step": 107390 }, { "epoch": 3.902899920052329, "grad_norm": 0.7107725143432617, "learning_rate": 1.5913464801310486e-05, "loss": 0.0847, "step": 107400 }, { "epoch": 3.902899920052329, "eval_loss": 0.30393534898757935, "eval_runtime": 181.5459, "eval_samples_per_second": 40.838, "eval_steps_per_second": 5.106, "eval_wer": 0.13077495597872457, "step": 107400 }, { "epoch": 3.9032633185551275, "grad_norm": 0.760789692401886, "learning_rate": 1.590863057996801e-05, "loss": 0.0683, "step": 107410 }, { "epoch": 3.903626717057926, "grad_norm": 0.39104652404785156, "learning_rate": 1.5903796750340295e-05, "loss": 0.0493, "step": 107420 }, { "epoch": 3.903990115560724, "grad_norm": 1.022646188735962, "learning_rate": 1.5898963312635612e-05, "loss": 0.0675, "step": 107430 }, { "epoch": 3.904353514063522, "grad_norm": 0.4232107400894165, "learning_rate": 1.5894130267062217e-05, "loss": 0.0684, "step": 107440 }, { "epoch": 3.9047169125663204, "grad_norm": 1.4065017700195312, "learning_rate": 1.5889297613828347e-05, "loss": 1.4241, "step": 107450 }, { "epoch": 3.9050803110691183, "grad_norm": 1.4978172779083252, "learning_rate": 1.588446535314223e-05, "loss": 0.079, "step": 107460 }, { "epoch": 3.9054437095719168, "grad_norm": 0.40406903624534607, "learning_rate": 1.587963348521207e-05, "loss": 0.0671, "step": 107470 }, { "epoch": 3.9058071080747148, "grad_norm": 0.4887799918651581, "learning_rate": 1.5874802010246054e-05, "loss": 0.0739, "step": 107480 }, { "epoch": 3.9061705065775127, "grad_norm": 1.1040798425674438, "learning_rate": 1.586997092845235e-05, "loss": 0.0894, "step": 107490 }, { "epoch": 3.906533905080311, "grad_norm": 1.5164659023284912, "learning_rate": 1.5865140240039106e-05, "loss": 0.0769, "step": 107500 }, { "epoch": 3.906897303583109, "grad_norm": 0.4174286425113678, "learning_rate": 1.5860309945214473e-05, "loss": 0.0785, "step": 107510 }, { "epoch": 3.9072607020859076, "grad_norm": 0.3692898750305176, "learning_rate": 1.585548004418657e-05, "loss": 0.0511, "step": 107520 }, { "epoch": 3.9076241005887056, "grad_norm": 0.49770957231521606, "learning_rate": 1.5850650537163494e-05, "loss": 0.0643, "step": 107530 }, { "epoch": 3.9079874990915036, "grad_norm": 0.4804151654243469, "learning_rate": 1.5845821424353323e-05, "loss": 0.0757, "step": 107540 }, { "epoch": 3.908350897594302, "grad_norm": 0.7496044635772705, "learning_rate": 1.5840992705964148e-05, "loss": 0.0656, "step": 107550 }, { "epoch": 3.9087142960971, "grad_norm": 0.6202625632286072, "learning_rate": 1.5836164382204004e-05, "loss": 0.062, "step": 107560 }, { "epoch": 3.9090776945998984, "grad_norm": 0.4673279821872711, "learning_rate": 1.5831336453280937e-05, "loss": 0.0669, "step": 107570 }, { "epoch": 3.9094410931026964, "grad_norm": 0.4209801256656647, "learning_rate": 1.5826508919402958e-05, "loss": 0.0724, "step": 107580 }, { "epoch": 3.9098044916054944, "grad_norm": 0.6488276124000549, "learning_rate": 1.5821681780778057e-05, "loss": 0.0854, "step": 107590 }, { "epoch": 3.910167890108293, "grad_norm": 1.0254498720169067, "learning_rate": 1.5816855037614248e-05, "loss": 0.08, "step": 107600 }, { "epoch": 3.910531288611091, "grad_norm": 0.4866830110549927, "learning_rate": 1.581202869011948e-05, "loss": 0.0676, "step": 107610 }, { "epoch": 3.910894687113889, "grad_norm": 0.3314000368118286, "learning_rate": 1.5807202738501703e-05, "loss": 0.0454, "step": 107620 }, { "epoch": 3.911258085616687, "grad_norm": 0.7425368428230286, "learning_rate": 1.580237718296885e-05, "loss": 0.0688, "step": 107630 }, { "epoch": 3.911621484119485, "grad_norm": 1.5754629373550415, "learning_rate": 1.5797552023728833e-05, "loss": 0.0629, "step": 107640 }, { "epoch": 3.9119848826222836, "grad_norm": 1.2293038368225098, "learning_rate": 1.5792727260989566e-05, "loss": 0.1056, "step": 107650 }, { "epoch": 3.912348281125082, "grad_norm": 0.7234967947006226, "learning_rate": 1.5787902894958916e-05, "loss": 0.09, "step": 107660 }, { "epoch": 3.91271167962788, "grad_norm": 1.6464751958847046, "learning_rate": 1.578307892584476e-05, "loss": 0.0716, "step": 107670 }, { "epoch": 3.913075078130678, "grad_norm": 0.325550377368927, "learning_rate": 1.5778255353854937e-05, "loss": 0.0843, "step": 107680 }, { "epoch": 3.913438476633476, "grad_norm": 0.40090569853782654, "learning_rate": 1.5773432179197272e-05, "loss": 0.0599, "step": 107690 }, { "epoch": 3.9138018751362744, "grad_norm": 0.4380953907966614, "learning_rate": 1.5768609402079592e-05, "loss": 0.0877, "step": 107700 }, { "epoch": 3.914165273639073, "grad_norm": 0.5776761174201965, "learning_rate": 1.5763787022709693e-05, "loss": 0.0736, "step": 107710 }, { "epoch": 3.914528672141871, "grad_norm": 0.5864748954772949, "learning_rate": 1.5758965041295343e-05, "loss": 0.0563, "step": 107720 }, { "epoch": 3.914892070644669, "grad_norm": 0.4065738320350647, "learning_rate": 1.57541434580443e-05, "loss": 0.0739, "step": 107730 }, { "epoch": 3.9152554691474672, "grad_norm": 0.3938973546028137, "learning_rate": 1.5749322273164336e-05, "loss": 0.0883, "step": 107740 }, { "epoch": 3.9156188676502652, "grad_norm": 0.42485731840133667, "learning_rate": 1.574450148686315e-05, "loss": 0.0824, "step": 107750 }, { "epoch": 3.9159822661530637, "grad_norm": 1.9941436052322388, "learning_rate": 1.5739681099348473e-05, "loss": 0.0731, "step": 107760 }, { "epoch": 3.9163456646558616, "grad_norm": 0.6175404787063599, "learning_rate": 1.5734861110827987e-05, "loss": 0.0537, "step": 107770 }, { "epoch": 3.9167090631586596, "grad_norm": 0.7640911340713501, "learning_rate": 1.573004152150936e-05, "loss": 0.0635, "step": 107780 }, { "epoch": 3.917072461661458, "grad_norm": 0.23279604315757751, "learning_rate": 1.572522233160027e-05, "loss": 0.0634, "step": 107790 }, { "epoch": 3.917435860164256, "grad_norm": 0.3372284770011902, "learning_rate": 1.572040354130835e-05, "loss": 0.0966, "step": 107800 }, { "epoch": 3.9177992586670545, "grad_norm": 0.8758069276809692, "learning_rate": 1.571558515084122e-05, "loss": 0.06, "step": 107810 }, { "epoch": 3.9181626571698525, "grad_norm": 0.29386308789253235, "learning_rate": 1.57107671604065e-05, "loss": 0.0593, "step": 107820 }, { "epoch": 3.9185260556726504, "grad_norm": 0.23792269825935364, "learning_rate": 1.5705949570211757e-05, "loss": 0.0801, "step": 107830 }, { "epoch": 3.918889454175449, "grad_norm": 0.33399441838264465, "learning_rate": 1.570113238046459e-05, "loss": 0.068, "step": 107840 }, { "epoch": 3.919252852678247, "grad_norm": 0.7931497097015381, "learning_rate": 1.5696315591372544e-05, "loss": 0.0883, "step": 107850 }, { "epoch": 3.9196162511810453, "grad_norm": 0.4268588721752167, "learning_rate": 1.5691499203143157e-05, "loss": 0.0622, "step": 107860 }, { "epoch": 3.9199796496838433, "grad_norm": 0.3438781797885895, "learning_rate": 1.568668321598395e-05, "loss": 0.0553, "step": 107870 }, { "epoch": 3.9203430481866413, "grad_norm": 0.6346169114112854, "learning_rate": 1.568186763010241e-05, "loss": 0.0677, "step": 107880 }, { "epoch": 3.9207064466894397, "grad_norm": 0.4053609371185303, "learning_rate": 1.5677052445706058e-05, "loss": 0.0733, "step": 107890 }, { "epoch": 3.9210698451922377, "grad_norm": 0.5387663841247559, "learning_rate": 1.5672237663002344e-05, "loss": 0.0905, "step": 107900 }, { "epoch": 3.921433243695036, "grad_norm": 0.35756614804267883, "learning_rate": 1.5667423282198714e-05, "loss": 0.0616, "step": 107910 }, { "epoch": 3.921796642197834, "grad_norm": 0.5838266611099243, "learning_rate": 1.5662609303502607e-05, "loss": 0.068, "step": 107920 }, { "epoch": 3.922160040700632, "grad_norm": 0.5504740476608276, "learning_rate": 1.5657795727121448e-05, "loss": 0.0596, "step": 107930 }, { "epoch": 3.9225234392034305, "grad_norm": 0.3853808343410492, "learning_rate": 1.565298255326263e-05, "loss": 0.1693, "step": 107940 }, { "epoch": 3.922886837706229, "grad_norm": 0.6952928900718689, "learning_rate": 1.5648169782133543e-05, "loss": 0.0862, "step": 107950 }, { "epoch": 3.923250236209027, "grad_norm": 0.39551839232444763, "learning_rate": 1.564335741394154e-05, "loss": 0.0867, "step": 107960 }, { "epoch": 3.923613634711825, "grad_norm": 0.5088280439376831, "learning_rate": 1.563854544889397e-05, "loss": 0.0541, "step": 107970 }, { "epoch": 3.9239770332146233, "grad_norm": 0.33864468336105347, "learning_rate": 1.5633733887198175e-05, "loss": 0.0592, "step": 107980 }, { "epoch": 3.9243404317174213, "grad_norm": 1.1237084865570068, "learning_rate": 1.5628922729061463e-05, "loss": 0.0847, "step": 107990 }, { "epoch": 3.9247038302202197, "grad_norm": 0.4131975769996643, "learning_rate": 1.562411197469113e-05, "loss": 0.0857, "step": 108000 }, { "epoch": 3.9247038302202197, "eval_loss": 0.31695127487182617, "eval_runtime": 179.0981, "eval_samples_per_second": 41.396, "eval_steps_per_second": 5.176, "eval_wer": 0.13087479804672608, "step": 108000 }, { "epoch": 3.9250672287230177, "grad_norm": 0.4081736207008362, "learning_rate": 1.561930162429445e-05, "loss": 0.0775, "step": 108010 }, { "epoch": 3.9254306272258157, "grad_norm": 0.9158249497413635, "learning_rate": 1.5614491678078673e-05, "loss": 0.0609, "step": 108020 }, { "epoch": 3.925794025728614, "grad_norm": 0.4705490469932556, "learning_rate": 1.5609682136251072e-05, "loss": 0.0512, "step": 108030 }, { "epoch": 3.926157424231412, "grad_norm": 0.3536396622657776, "learning_rate": 1.5604872999018854e-05, "loss": 0.0765, "step": 108040 }, { "epoch": 3.9265208227342105, "grad_norm": 0.530402660369873, "learning_rate": 1.5600064266589232e-05, "loss": 0.0821, "step": 108050 }, { "epoch": 3.9268842212370085, "grad_norm": 0.5621787309646606, "learning_rate": 1.559525593916939e-05, "loss": 0.0988, "step": 108060 }, { "epoch": 3.9272476197398065, "grad_norm": 0.42473259568214417, "learning_rate": 1.5590448016966507e-05, "loss": 0.047, "step": 108070 }, { "epoch": 3.927611018242605, "grad_norm": 0.5450408458709717, "learning_rate": 1.5585640500187746e-05, "loss": 0.0766, "step": 108080 }, { "epoch": 3.927974416745403, "grad_norm": 0.3374296724796295, "learning_rate": 1.558083338904024e-05, "loss": 0.0931, "step": 108090 }, { "epoch": 3.9283378152482014, "grad_norm": 0.3764584958553314, "learning_rate": 1.5576026683731103e-05, "loss": 0.0805, "step": 108100 }, { "epoch": 3.9287012137509993, "grad_norm": 0.6628488898277283, "learning_rate": 1.5571220384467444e-05, "loss": 0.0795, "step": 108110 }, { "epoch": 3.9290646122537973, "grad_norm": 0.5128569602966309, "learning_rate": 1.556641449145636e-05, "loss": 0.5942, "step": 108120 }, { "epoch": 3.9294280107565958, "grad_norm": 0.4064798057079315, "learning_rate": 1.5561609004904905e-05, "loss": 0.0817, "step": 108130 }, { "epoch": 3.9297914092593937, "grad_norm": 0.38870757818222046, "learning_rate": 1.5556803925020143e-05, "loss": 0.0707, "step": 108140 }, { "epoch": 3.930154807762192, "grad_norm": 0.9279939532279968, "learning_rate": 1.5551999252009093e-05, "loss": 0.0927, "step": 108150 }, { "epoch": 3.93051820626499, "grad_norm": 1.5513790845870972, "learning_rate": 1.5547194986078772e-05, "loss": 0.0582, "step": 108160 }, { "epoch": 3.930881604767788, "grad_norm": 0.7676124572753906, "learning_rate": 1.5542391127436203e-05, "loss": 0.0502, "step": 108170 }, { "epoch": 3.9312450032705866, "grad_norm": 0.5067169070243835, "learning_rate": 1.553758767628834e-05, "loss": 0.0547, "step": 108180 }, { "epoch": 3.9316084017733846, "grad_norm": 1.1675875186920166, "learning_rate": 1.553278463284216e-05, "loss": 0.0684, "step": 108190 }, { "epoch": 3.931971800276183, "grad_norm": 0.763733446598053, "learning_rate": 1.5527981997304604e-05, "loss": 0.0811, "step": 108200 }, { "epoch": 3.932335198778981, "grad_norm": 1.6377514600753784, "learning_rate": 1.55231797698826e-05, "loss": 0.0599, "step": 108210 }, { "epoch": 3.932698597281779, "grad_norm": 0.814492404460907, "learning_rate": 1.5518377950783063e-05, "loss": 0.0676, "step": 108220 }, { "epoch": 3.9330619957845774, "grad_norm": 0.41483911871910095, "learning_rate": 1.551357654021289e-05, "loss": 0.0713, "step": 108230 }, { "epoch": 3.933425394287376, "grad_norm": 0.4929620027542114, "learning_rate": 1.550877553837895e-05, "loss": 0.0705, "step": 108240 }, { "epoch": 3.933788792790174, "grad_norm": 0.7828431725502014, "learning_rate": 1.550397494548809e-05, "loss": 0.14, "step": 108250 }, { "epoch": 3.934152191292972, "grad_norm": 0.5891411304473877, "learning_rate": 1.549917476174717e-05, "loss": 0.0672, "step": 108260 }, { "epoch": 3.93451558979577, "grad_norm": 0.6698787212371826, "learning_rate": 1.549437498736301e-05, "loss": 0.065, "step": 108270 }, { "epoch": 3.934878988298568, "grad_norm": 0.500284731388092, "learning_rate": 1.548957562254241e-05, "loss": 0.0762, "step": 108280 }, { "epoch": 3.9352423868013666, "grad_norm": 0.3653343617916107, "learning_rate": 1.5484776667492153e-05, "loss": 0.0865, "step": 108290 }, { "epoch": 3.9356057853041646, "grad_norm": 0.552470862865448, "learning_rate": 1.5479978122419013e-05, "loss": 0.0883, "step": 108300 }, { "epoch": 3.9359691838069626, "grad_norm": 3.6672260761260986, "learning_rate": 1.5475659782554515e-05, "loss": 0.4845, "step": 108310 }, { "epoch": 3.936332582309761, "grad_norm": 0.20884235203266144, "learning_rate": 1.5470862017007484e-05, "loss": 0.0482, "step": 108320 }, { "epoch": 3.936695980812559, "grad_norm": 0.35152187943458557, "learning_rate": 1.5466064662037115e-05, "loss": 0.0674, "step": 108330 }, { "epoch": 3.9370593793153574, "grad_norm": 0.6102438569068909, "learning_rate": 1.5461267717850096e-05, "loss": 0.0861, "step": 108340 }, { "epoch": 3.9374227778181554, "grad_norm": 0.943453311920166, "learning_rate": 1.545647118465311e-05, "loss": 0.0787, "step": 108350 }, { "epoch": 3.9377861763209534, "grad_norm": 0.4639219641685486, "learning_rate": 1.545167506265282e-05, "loss": 0.0676, "step": 108360 }, { "epoch": 3.938149574823752, "grad_norm": 0.5150544047355652, "learning_rate": 1.544687935205588e-05, "loss": 0.0651, "step": 108370 }, { "epoch": 3.93851297332655, "grad_norm": 0.5360589027404785, "learning_rate": 1.5442084053068927e-05, "loss": 0.0552, "step": 108380 }, { "epoch": 3.9388763718293482, "grad_norm": 0.6416401267051697, "learning_rate": 1.543728916589856e-05, "loss": 0.0997, "step": 108390 }, { "epoch": 3.9392397703321462, "grad_norm": 0.6596519351005554, "learning_rate": 1.5432494690751383e-05, "loss": 0.0774, "step": 108400 }, { "epoch": 3.939603168834944, "grad_norm": 0.40156471729278564, "learning_rate": 1.5427700627833958e-05, "loss": 0.0641, "step": 108410 }, { "epoch": 3.9399665673377426, "grad_norm": 1.4535303115844727, "learning_rate": 1.5422906977352857e-05, "loss": 0.063, "step": 108420 }, { "epoch": 3.9403299658405406, "grad_norm": 0.359152615070343, "learning_rate": 1.5418113739514623e-05, "loss": 0.0632, "step": 108430 }, { "epoch": 3.940693364343339, "grad_norm": 0.9958677291870117, "learning_rate": 1.5413320914525778e-05, "loss": 0.084, "step": 108440 }, { "epoch": 3.941056762846137, "grad_norm": 0.8245310187339783, "learning_rate": 1.5408528502592823e-05, "loss": 0.0915, "step": 108450 }, { "epoch": 3.941420161348935, "grad_norm": 3.544517755508423, "learning_rate": 1.540373650392224e-05, "loss": 0.0664, "step": 108460 }, { "epoch": 3.9417835598517335, "grad_norm": 1.7801954746246338, "learning_rate": 1.5398944918720516e-05, "loss": 0.0644, "step": 108470 }, { "epoch": 3.9421469583545314, "grad_norm": 0.5512742400169373, "learning_rate": 1.5394153747194096e-05, "loss": 0.0683, "step": 108480 }, { "epoch": 3.94251035685733, "grad_norm": 0.4868443012237549, "learning_rate": 1.5389362989549413e-05, "loss": 1.0657, "step": 108490 }, { "epoch": 3.942873755360128, "grad_norm": 0.46926984190940857, "learning_rate": 1.5384572645992877e-05, "loss": 0.1335, "step": 108500 }, { "epoch": 3.943237153862926, "grad_norm": 1.9577665328979492, "learning_rate": 1.5379782716730896e-05, "loss": 0.0659, "step": 108510 }, { "epoch": 3.9436005523657243, "grad_norm": 2.820413827896118, "learning_rate": 1.5374993201969855e-05, "loss": 0.0566, "step": 108520 }, { "epoch": 3.9439639508685227, "grad_norm": 0.4880913197994232, "learning_rate": 1.5370204101916107e-05, "loss": 0.0815, "step": 108530 }, { "epoch": 3.9443273493713207, "grad_norm": 0.7637322545051575, "learning_rate": 1.5365415416776007e-05, "loss": 0.0615, "step": 108540 }, { "epoch": 3.9446907478741187, "grad_norm": 0.6059696078300476, "learning_rate": 1.536062714675587e-05, "loss": 0.0647, "step": 108550 }, { "epoch": 3.945054146376917, "grad_norm": 0.6734591126441956, "learning_rate": 1.5355839292062008e-05, "loss": 0.0598, "step": 108560 }, { "epoch": 3.945417544879715, "grad_norm": 1.2149147987365723, "learning_rate": 1.5351051852900726e-05, "loss": 2.337, "step": 108570 }, { "epoch": 3.9457809433825135, "grad_norm": 0.5000627636909485, "learning_rate": 1.534626482947829e-05, "loss": 0.0783, "step": 108580 }, { "epoch": 3.9461443418853115, "grad_norm": 0.5199702978134155, "learning_rate": 1.5341478222000944e-05, "loss": 0.0833, "step": 108590 }, { "epoch": 3.9465077403881095, "grad_norm": 0.526923656463623, "learning_rate": 1.5336692030674942e-05, "loss": 0.1112, "step": 108600 }, { "epoch": 3.9465077403881095, "eval_loss": 0.30439403653144836, "eval_runtime": 179.538, "eval_samples_per_second": 41.295, "eval_steps_per_second": 5.163, "eval_wer": 0.1313104725252782, "step": 108600 }, { "epoch": 3.946871138890908, "grad_norm": 0.7700692415237427, "learning_rate": 1.5331906255706495e-05, "loss": 0.0835, "step": 108610 }, { "epoch": 3.947234537393706, "grad_norm": 1.3175575733184814, "learning_rate": 1.5327120897301817e-05, "loss": 0.0635, "step": 108620 }, { "epoch": 3.9475979358965043, "grad_norm": 0.5062894225120544, "learning_rate": 1.5322335955667077e-05, "loss": 0.0665, "step": 108630 }, { "epoch": 3.9479613343993023, "grad_norm": 2.1600241661071777, "learning_rate": 1.531755143100845e-05, "loss": 0.079, "step": 108640 }, { "epoch": 3.9483247329021003, "grad_norm": 1.095664143562317, "learning_rate": 1.5312767323532074e-05, "loss": 0.0874, "step": 108650 }, { "epoch": 3.9486881314048987, "grad_norm": 2.105536699295044, "learning_rate": 1.5307983633444096e-05, "loss": 0.0709, "step": 108660 }, { "epoch": 3.9490515299076967, "grad_norm": 0.4600623548030853, "learning_rate": 1.5303200360950618e-05, "loss": 0.0607, "step": 108670 }, { "epoch": 3.949414928410495, "grad_norm": 4.388635158538818, "learning_rate": 1.5298417506257727e-05, "loss": 0.0691, "step": 108680 }, { "epoch": 3.949778326913293, "grad_norm": 0.7141379714012146, "learning_rate": 1.5293635069571516e-05, "loss": 0.0948, "step": 108690 }, { "epoch": 3.950141725416091, "grad_norm": 0.6126868724822998, "learning_rate": 1.5288853051098028e-05, "loss": 0.0655, "step": 108700 }, { "epoch": 3.9505051239188895, "grad_norm": 0.4774114787578583, "learning_rate": 1.5284071451043315e-05, "loss": 0.1103, "step": 108710 }, { "epoch": 3.9508685224216875, "grad_norm": 0.7860293388366699, "learning_rate": 1.5279290269613393e-05, "loss": 0.0787, "step": 108720 }, { "epoch": 3.951231920924486, "grad_norm": 0.3550005853176117, "learning_rate": 1.5274509507014263e-05, "loss": 0.09, "step": 108730 }, { "epoch": 3.951595319427284, "grad_norm": 0.42200934886932373, "learning_rate": 1.5269729163451924e-05, "loss": 0.0617, "step": 108740 }, { "epoch": 3.951958717930082, "grad_norm": 0.6047300696372986, "learning_rate": 1.5264949239132327e-05, "loss": 0.0645, "step": 108750 }, { "epoch": 3.9523221164328803, "grad_norm": 0.3704775869846344, "learning_rate": 1.5260169734261432e-05, "loss": 0.0705, "step": 108760 }, { "epoch": 3.9526855149356783, "grad_norm": 0.5324059724807739, "learning_rate": 1.5255390649045165e-05, "loss": 0.129, "step": 108770 }, { "epoch": 3.9530489134384768, "grad_norm": 0.5403017401695251, "learning_rate": 1.5250611983689448e-05, "loss": 0.0975, "step": 108780 }, { "epoch": 3.9534123119412747, "grad_norm": 0.30867788195610046, "learning_rate": 1.5245833738400173e-05, "loss": 0.0778, "step": 108790 }, { "epoch": 3.9537757104440727, "grad_norm": 0.4358409643173218, "learning_rate": 1.5241055913383212e-05, "loss": 0.0818, "step": 108800 }, { "epoch": 3.954139108946871, "grad_norm": 0.5556782484054565, "learning_rate": 1.5236278508844431e-05, "loss": 0.0898, "step": 108810 }, { "epoch": 3.9545025074496696, "grad_norm": 0.4871123135089874, "learning_rate": 1.523150152498967e-05, "loss": 0.0775, "step": 108820 }, { "epoch": 3.9548659059524676, "grad_norm": 0.6219027042388916, "learning_rate": 1.5226724962024755e-05, "loss": 0.216, "step": 108830 }, { "epoch": 3.9552293044552655, "grad_norm": 0.5182541012763977, "learning_rate": 1.5221948820155483e-05, "loss": 0.068, "step": 108840 }, { "epoch": 3.955592702958064, "grad_norm": 0.5644829869270325, "learning_rate": 1.521717309958765e-05, "loss": 0.0755, "step": 108850 }, { "epoch": 3.955956101460862, "grad_norm": 0.5542482733726501, "learning_rate": 1.521239780052702e-05, "loss": 0.1001, "step": 108860 }, { "epoch": 3.9563194999636604, "grad_norm": 0.5790029764175415, "learning_rate": 1.520762292317934e-05, "loss": 0.0521, "step": 108870 }, { "epoch": 3.9566828984664584, "grad_norm": 0.33317145705223083, "learning_rate": 1.5202848467750353e-05, "loss": 0.056, "step": 108880 }, { "epoch": 3.9570462969692564, "grad_norm": 0.40172079205513, "learning_rate": 1.5198074434445762e-05, "loss": 0.0723, "step": 108890 }, { "epoch": 3.957409695472055, "grad_norm": 0.7119815349578857, "learning_rate": 1.5193300823471273e-05, "loss": 0.0804, "step": 108900 }, { "epoch": 3.9577730939748528, "grad_norm": 0.5087375044822693, "learning_rate": 1.5188527635032555e-05, "loss": 0.0448, "step": 108910 }, { "epoch": 3.958136492477651, "grad_norm": 0.548531174659729, "learning_rate": 1.5183754869335277e-05, "loss": 0.0689, "step": 108920 }, { "epoch": 3.958499890980449, "grad_norm": 0.27257245779037476, "learning_rate": 1.5178982526585073e-05, "loss": 0.0649, "step": 108930 }, { "epoch": 3.958863289483247, "grad_norm": 0.3826998770236969, "learning_rate": 1.5174210606987563e-05, "loss": 0.0729, "step": 108940 }, { "epoch": 3.9592266879860456, "grad_norm": 0.6397566795349121, "learning_rate": 1.5169439110748364e-05, "loss": 0.0691, "step": 108950 }, { "epoch": 3.9595900864888436, "grad_norm": 0.2969396114349365, "learning_rate": 1.5164668038073055e-05, "loss": 0.0767, "step": 108960 }, { "epoch": 3.959953484991642, "grad_norm": 0.6821795105934143, "learning_rate": 1.5159897389167204e-05, "loss": 0.0525, "step": 108970 }, { "epoch": 3.96031688349444, "grad_norm": 0.7459032535552979, "learning_rate": 1.5155127164236369e-05, "loss": 0.057, "step": 108980 }, { "epoch": 3.960680281997238, "grad_norm": 0.3866112232208252, "learning_rate": 1.5150357363486067e-05, "loss": 0.0642, "step": 108990 }, { "epoch": 3.9610436805000364, "grad_norm": 0.5428589582443237, "learning_rate": 1.5145587987121826e-05, "loss": 0.0698, "step": 109000 }, { "epoch": 3.9614070790028344, "grad_norm": 0.5992247462272644, "learning_rate": 1.514081903534914e-05, "loss": 0.0881, "step": 109010 }, { "epoch": 3.961770477505633, "grad_norm": 0.5933393836021423, "learning_rate": 1.5136050508373482e-05, "loss": 0.0744, "step": 109020 }, { "epoch": 3.962133876008431, "grad_norm": 1.02914297580719, "learning_rate": 1.5131282406400304e-05, "loss": 0.0783, "step": 109030 }, { "epoch": 3.962497274511229, "grad_norm": 0.27180367708206177, "learning_rate": 1.5126514729635063e-05, "loss": 0.078, "step": 109040 }, { "epoch": 3.9628606730140272, "grad_norm": 1.8903266191482544, "learning_rate": 1.5121747478283166e-05, "loss": 0.0744, "step": 109050 }, { "epoch": 3.963224071516825, "grad_norm": 0.5451824069023132, "learning_rate": 1.5116980652550028e-05, "loss": 0.0744, "step": 109060 }, { "epoch": 3.9635874700196236, "grad_norm": 0.6950256824493408, "learning_rate": 1.511221425264103e-05, "loss": 0.4433, "step": 109070 }, { "epoch": 3.9639508685224216, "grad_norm": 1.0612424612045288, "learning_rate": 1.5107448278761533e-05, "loss": 0.0543, "step": 109080 }, { "epoch": 3.9643142670252196, "grad_norm": 0.3324570953845978, "learning_rate": 1.5102682731116893e-05, "loss": 0.0761, "step": 109090 }, { "epoch": 3.964677665528018, "grad_norm": 0.48673510551452637, "learning_rate": 1.5097917609912443e-05, "loss": 0.0582, "step": 109100 }, { "epoch": 3.9650410640308165, "grad_norm": 2.974033832550049, "learning_rate": 1.5093152915353492e-05, "loss": 0.1036, "step": 109110 }, { "epoch": 3.9654044625336144, "grad_norm": 0.7069442868232727, "learning_rate": 1.5088388647645335e-05, "loss": 0.0624, "step": 109120 }, { "epoch": 3.9657678610364124, "grad_norm": 0.3872590661048889, "learning_rate": 1.508362480699324e-05, "loss": 0.0643, "step": 109130 }, { "epoch": 3.966131259539211, "grad_norm": 4.4336676597595215, "learning_rate": 1.5078861393602467e-05, "loss": 0.0811, "step": 109140 }, { "epoch": 3.966494658042009, "grad_norm": 0.5968475341796875, "learning_rate": 1.5074098407678267e-05, "loss": 0.0612, "step": 109150 }, { "epoch": 3.9668580565448073, "grad_norm": 0.8365516662597656, "learning_rate": 1.5069335849425845e-05, "loss": 0.0629, "step": 109160 }, { "epoch": 3.9672214550476053, "grad_norm": 1.566721796989441, "learning_rate": 1.506457371905041e-05, "loss": 3.8251, "step": 109170 }, { "epoch": 3.9675848535504032, "grad_norm": 0.6778194308280945, "learning_rate": 1.5059812016757138e-05, "loss": 0.062, "step": 109180 }, { "epoch": 3.9679482520532017, "grad_norm": 3.5872347354888916, "learning_rate": 1.5055050742751198e-05, "loss": 0.0696, "step": 109190 }, { "epoch": 3.9683116505559997, "grad_norm": 1.0359376668930054, "learning_rate": 1.5050289897237742e-05, "loss": 0.0769, "step": 109200 }, { "epoch": 3.9683116505559997, "eval_loss": 0.302498459815979, "eval_runtime": 180.0974, "eval_samples_per_second": 41.167, "eval_steps_per_second": 5.147, "eval_wer": 0.1293045546136112, "step": 109200 }, { "epoch": 3.968675049058798, "grad_norm": 0.8150886297225952, "learning_rate": 1.5045529480421893e-05, "loss": 0.0748, "step": 109210 }, { "epoch": 3.969038447561596, "grad_norm": 0.4916574954986572, "learning_rate": 1.5040769492508761e-05, "loss": 0.0614, "step": 109220 }, { "epoch": 3.969401846064394, "grad_norm": 0.4304117262363434, "learning_rate": 1.5036009933703433e-05, "loss": 0.0681, "step": 109230 }, { "epoch": 3.9697652445671925, "grad_norm": 1.1975165605545044, "learning_rate": 1.5031250804210986e-05, "loss": 0.0735, "step": 109240 }, { "epoch": 3.9701286430699905, "grad_norm": 0.8331923484802246, "learning_rate": 1.5026492104236478e-05, "loss": 0.0701, "step": 109250 }, { "epoch": 3.970492041572789, "grad_norm": 0.38081446290016174, "learning_rate": 1.5021733833984936e-05, "loss": 0.0736, "step": 109260 }, { "epoch": 3.970855440075587, "grad_norm": 1.5401438474655151, "learning_rate": 1.5016975993661374e-05, "loss": 0.0549, "step": 109270 }, { "epoch": 3.971218838578385, "grad_norm": 0.4230031371116638, "learning_rate": 1.5012218583470803e-05, "loss": 0.0806, "step": 109280 }, { "epoch": 3.9715822370811833, "grad_norm": 0.6398650407791138, "learning_rate": 1.5007461603618197e-05, "loss": 0.0674, "step": 109290 }, { "epoch": 3.9719456355839813, "grad_norm": 0.9541281461715698, "learning_rate": 1.5002705054308518e-05, "loss": 0.0754, "step": 109300 }, { "epoch": 3.9723090340867797, "grad_norm": 0.939696192741394, "learning_rate": 1.4997948935746708e-05, "loss": 0.0699, "step": 109310 }, { "epoch": 3.9726724325895777, "grad_norm": 0.3460374176502228, "learning_rate": 1.4993193248137682e-05, "loss": 1.1332, "step": 109320 }, { "epoch": 3.9730358310923757, "grad_norm": 0.606484055519104, "learning_rate": 1.498843799168636e-05, "loss": 0.0736, "step": 109330 }, { "epoch": 3.973399229595174, "grad_norm": 0.8378924131393433, "learning_rate": 1.4983683166597629e-05, "loss": 0.7118, "step": 109340 }, { "epoch": 3.973762628097972, "grad_norm": 1.4549202919006348, "learning_rate": 1.497892877307635e-05, "loss": 0.0853, "step": 109350 }, { "epoch": 3.9741260266007705, "grad_norm": 0.3216412365436554, "learning_rate": 1.4974174811327373e-05, "loss": 0.0841, "step": 109360 }, { "epoch": 3.9744894251035685, "grad_norm": 2.007521867752075, "learning_rate": 1.4969421281555525e-05, "loss": 0.0604, "step": 109370 }, { "epoch": 3.9748528236063665, "grad_norm": 0.438717782497406, "learning_rate": 1.4964668183965636e-05, "loss": 0.0626, "step": 109380 }, { "epoch": 3.975216222109165, "grad_norm": 0.6095426082611084, "learning_rate": 1.4959915518762486e-05, "loss": 0.0828, "step": 109390 }, { "epoch": 3.9755796206119633, "grad_norm": 0.5519289374351501, "learning_rate": 1.4955163286150853e-05, "loss": 0.0595, "step": 109400 }, { "epoch": 3.9759430191147613, "grad_norm": 0.5407907366752625, "learning_rate": 1.4950411486335497e-05, "loss": 0.294, "step": 109410 }, { "epoch": 3.9763064176175593, "grad_norm": 0.3119775354862213, "learning_rate": 1.4945660119521144e-05, "loss": 0.0772, "step": 109420 }, { "epoch": 3.9766698161203577, "grad_norm": 1.3484299182891846, "learning_rate": 1.4940909185912527e-05, "loss": 0.1017, "step": 109430 }, { "epoch": 3.9770332146231557, "grad_norm": 0.6218773126602173, "learning_rate": 1.493615868571435e-05, "loss": 0.0633, "step": 109440 }, { "epoch": 3.977396613125954, "grad_norm": 0.6401359438896179, "learning_rate": 1.4931408619131285e-05, "loss": 0.0718, "step": 109450 }, { "epoch": 3.977760011628752, "grad_norm": 32.70407485961914, "learning_rate": 1.4926658986367986e-05, "loss": 0.439, "step": 109460 }, { "epoch": 3.97812341013155, "grad_norm": 0.718180775642395, "learning_rate": 1.4921909787629124e-05, "loss": 0.0747, "step": 109470 }, { "epoch": 3.9784868086343486, "grad_norm": 0.38643378019332886, "learning_rate": 1.491716102311931e-05, "loss": 0.064, "step": 109480 }, { "epoch": 3.9788502071371465, "grad_norm": 0.7724172472953796, "learning_rate": 1.4912412693043155e-05, "loss": 0.074, "step": 109490 }, { "epoch": 3.979213605639945, "grad_norm": 0.7316296100616455, "learning_rate": 1.4907664797605242e-05, "loss": 0.0763, "step": 109500 }, { "epoch": 3.979577004142743, "grad_norm": 0.456412136554718, "learning_rate": 1.4902917337010133e-05, "loss": 0.064, "step": 109510 }, { "epoch": 3.979940402645541, "grad_norm": 0.3653579354286194, "learning_rate": 1.4898170311462404e-05, "loss": 0.1424, "step": 109520 }, { "epoch": 3.9803038011483394, "grad_norm": 0.3944752514362335, "learning_rate": 1.4893423721166572e-05, "loss": 0.0611, "step": 109530 }, { "epoch": 3.9806671996511374, "grad_norm": 2.8756213188171387, "learning_rate": 1.4888677566327153e-05, "loss": 0.0884, "step": 109540 }, { "epoch": 3.981030598153936, "grad_norm": 1.020673394203186, "learning_rate": 1.4883931847148642e-05, "loss": 0.0708, "step": 109550 }, { "epoch": 3.9813939966567338, "grad_norm": 0.3214241862297058, "learning_rate": 1.4879186563835504e-05, "loss": 0.0694, "step": 109560 }, { "epoch": 3.9817573951595318, "grad_norm": 0.41782814264297485, "learning_rate": 1.4874441716592216e-05, "loss": 0.0709, "step": 109570 }, { "epoch": 3.98212079366233, "grad_norm": 0.6422412395477295, "learning_rate": 1.4869697305623209e-05, "loss": 0.0669, "step": 109580 }, { "epoch": 3.982484192165128, "grad_norm": 0.4334978461265564, "learning_rate": 1.4864953331132903e-05, "loss": 0.0708, "step": 109590 }, { "epoch": 3.9828475906679266, "grad_norm": 0.5081255435943604, "learning_rate": 1.4860209793325693e-05, "loss": 0.0863, "step": 109600 }, { "epoch": 3.9832109891707246, "grad_norm": 0.4459257125854492, "learning_rate": 1.4855466692405959e-05, "loss": 0.0794, "step": 109610 }, { "epoch": 3.9835743876735226, "grad_norm": 15.502050399780273, "learning_rate": 1.4850724028578077e-05, "loss": 0.1751, "step": 109620 }, { "epoch": 3.983937786176321, "grad_norm": 0.6289138793945312, "learning_rate": 1.4845981802046388e-05, "loss": 0.0545, "step": 109630 }, { "epoch": 3.984301184679119, "grad_norm": 0.4336656630039215, "learning_rate": 1.4841240013015217e-05, "loss": 0.0908, "step": 109640 }, { "epoch": 3.9846645831819174, "grad_norm": 0.7223489880561829, "learning_rate": 1.4836498661688857e-05, "loss": 0.0717, "step": 109650 }, { "epoch": 3.9850279816847154, "grad_norm": 0.6017643809318542, "learning_rate": 1.483175774827162e-05, "loss": 0.0628, "step": 109660 }, { "epoch": 3.9853913801875134, "grad_norm": 0.5750892758369446, "learning_rate": 1.4827017272967758e-05, "loss": 0.0671, "step": 109670 }, { "epoch": 3.985754778690312, "grad_norm": 0.5592082738876343, "learning_rate": 1.482227723598153e-05, "loss": 0.0633, "step": 109680 }, { "epoch": 3.9861181771931102, "grad_norm": 0.4206995964050293, "learning_rate": 1.4817537637517162e-05, "loss": 0.0846, "step": 109690 }, { "epoch": 3.986481575695908, "grad_norm": 1.356323003768921, "learning_rate": 1.4812798477778859e-05, "loss": 0.083, "step": 109700 }, { "epoch": 3.986844974198706, "grad_norm": 0.34790241718292236, "learning_rate": 1.4808059756970832e-05, "loss": 0.0781, "step": 109710 }, { "epoch": 3.9872083727015046, "grad_norm": 0.349802702665329, "learning_rate": 1.4803321475297246e-05, "loss": 0.087, "step": 109720 }, { "epoch": 3.9875717712043026, "grad_norm": 0.7321351766586304, "learning_rate": 1.4798583632962259e-05, "loss": 0.0776, "step": 109730 }, { "epoch": 3.987935169707101, "grad_norm": 0.3417205512523651, "learning_rate": 1.4793846230170009e-05, "loss": 0.0593, "step": 109740 }, { "epoch": 3.988298568209899, "grad_norm": 0.5319487452507019, "learning_rate": 1.4789109267124598e-05, "loss": 0.0981, "step": 109750 }, { "epoch": 3.988661966712697, "grad_norm": 0.4192574918270111, "learning_rate": 1.478437274403015e-05, "loss": 0.0628, "step": 109760 }, { "epoch": 3.9890253652154954, "grad_norm": 0.5519381165504456, "learning_rate": 1.4779636661090731e-05, "loss": 0.059, "step": 109770 }, { "epoch": 3.9893887637182934, "grad_norm": 2.7939960956573486, "learning_rate": 1.4774901018510407e-05, "loss": 0.063, "step": 109780 }, { "epoch": 3.989752162221092, "grad_norm": 0.9663445353507996, "learning_rate": 1.4770165816493214e-05, "loss": 2.4534, "step": 109790 }, { "epoch": 3.99011556072389, "grad_norm": 2.655893325805664, "learning_rate": 1.4765431055243173e-05, "loss": 0.0592, "step": 109800 }, { "epoch": 3.99011556072389, "eval_loss": 0.3074624836444855, "eval_runtime": 179.5996, "eval_samples_per_second": 41.281, "eval_steps_per_second": 5.161, "eval_wer": 0.13042097046490098, "step": 109800 }, { "epoch": 3.990478959226688, "grad_norm": 146.27210998535156, "learning_rate": 1.4760696734964296e-05, "loss": 2.3423, "step": 109810 }, { "epoch": 3.9908423577294863, "grad_norm": 1.0552388429641724, "learning_rate": 1.4755962855860572e-05, "loss": 0.0616, "step": 109820 }, { "epoch": 3.9912057562322842, "grad_norm": 0.38608866930007935, "learning_rate": 1.4751229418135956e-05, "loss": 0.0719, "step": 109830 }, { "epoch": 3.9915691547350827, "grad_norm": 0.4337970018386841, "learning_rate": 1.474649642199439e-05, "loss": 0.0761, "step": 109840 }, { "epoch": 3.9919325532378807, "grad_norm": 0.6015897989273071, "learning_rate": 1.4741763867639821e-05, "loss": 0.0948, "step": 109850 }, { "epoch": 3.9922959517406786, "grad_norm": 0.8983295559883118, "learning_rate": 1.4737031755276148e-05, "loss": 0.0706, "step": 109860 }, { "epoch": 3.992659350243477, "grad_norm": 2.648515224456787, "learning_rate": 1.4732300085107265e-05, "loss": 0.065, "step": 109870 }, { "epoch": 3.993022748746275, "grad_norm": 0.7430446147918701, "learning_rate": 1.4727568857337032e-05, "loss": 0.0688, "step": 109880 }, { "epoch": 3.9933861472490735, "grad_norm": 0.7264726161956787, "learning_rate": 1.47228380721693e-05, "loss": 0.0792, "step": 109890 }, { "epoch": 3.9937495457518715, "grad_norm": 0.5347972512245178, "learning_rate": 1.4718107729807922e-05, "loss": 0.0627, "step": 109900 }, { "epoch": 3.9941129442546695, "grad_norm": 0.5735613107681274, "learning_rate": 1.4713377830456696e-05, "loss": 0.1417, "step": 109910 }, { "epoch": 3.994476342757468, "grad_norm": 1.1950944662094116, "learning_rate": 1.4708648374319419e-05, "loss": 0.0629, "step": 109920 }, { "epoch": 3.994839741260266, "grad_norm": 0.30401110649108887, "learning_rate": 1.4703919361599868e-05, "loss": 0.0715, "step": 109930 }, { "epoch": 3.9952031397630643, "grad_norm": 1.0371030569076538, "learning_rate": 1.4699190792501789e-05, "loss": 0.0693, "step": 109940 }, { "epoch": 3.9955665382658623, "grad_norm": 1.0880746841430664, "learning_rate": 1.4694462667228936e-05, "loss": 0.1114, "step": 109950 }, { "epoch": 3.9959299367686603, "grad_norm": 0.6585062146186829, "learning_rate": 1.468973498598502e-05, "loss": 0.0882, "step": 109960 }, { "epoch": 3.9962933352714587, "grad_norm": 0.39981094002723694, "learning_rate": 1.4685007748973742e-05, "loss": 0.0591, "step": 109970 }, { "epoch": 3.996656733774257, "grad_norm": 1.2546730041503906, "learning_rate": 1.4680280956398778e-05, "loss": 0.0657, "step": 109980 }, { "epoch": 3.997020132277055, "grad_norm": 0.3919306695461273, "learning_rate": 1.4675554608463776e-05, "loss": 0.2914, "step": 109990 }, { "epoch": 3.997383530779853, "grad_norm": 3.1263980865478516, "learning_rate": 1.4670828705372408e-05, "loss": 0.0869, "step": 110000 }, { "epoch": 3.9977469292826515, "grad_norm": 0.8971359729766846, "learning_rate": 1.4666103247328276e-05, "loss": 0.1294, "step": 110010 }, { "epoch": 3.9981103277854495, "grad_norm": 0.5809153914451599, "learning_rate": 1.4661378234534986e-05, "loss": 0.0759, "step": 110020 }, { "epoch": 3.998473726288248, "grad_norm": 1.7673100233078003, "learning_rate": 1.4656653667196112e-05, "loss": 0.0733, "step": 110030 }, { "epoch": 3.998837124791046, "grad_norm": 0.23588208854198456, "learning_rate": 1.4651929545515248e-05, "loss": 0.0677, "step": 110040 }, { "epoch": 3.999200523293844, "grad_norm": 0.581369161605835, "learning_rate": 1.4647205869695913e-05, "loss": 0.0658, "step": 110050 }, { "epoch": 3.9995639217966423, "grad_norm": 0.7178440093994141, "learning_rate": 1.4642482639941643e-05, "loss": 0.0552, "step": 110060 }, { "epoch": 3.9999273202994403, "grad_norm": 0.5883386135101318, "learning_rate": 1.4637759856455947e-05, "loss": 0.0751, "step": 110070 }, { "epoch": 4.000290718802239, "grad_norm": 0.4838172197341919, "learning_rate": 1.4633037519442297e-05, "loss": 0.0578, "step": 110080 }, { "epoch": 4.000654117305037, "grad_norm": 0.20663967728614807, "learning_rate": 1.4628315629104183e-05, "loss": 0.0619, "step": 110090 }, { "epoch": 4.001017515807835, "grad_norm": 0.6073209047317505, "learning_rate": 1.4623594185645052e-05, "loss": 0.0701, "step": 110100 }, { "epoch": 4.001380914310633, "grad_norm": 0.21287468075752258, "learning_rate": 1.4618873189268322e-05, "loss": 0.0609, "step": 110110 }, { "epoch": 4.001744312813432, "grad_norm": 0.5142436623573303, "learning_rate": 1.4614152640177414e-05, "loss": 0.0583, "step": 110120 }, { "epoch": 4.0021077113162296, "grad_norm": 0.4657193124294281, "learning_rate": 1.4609432538575705e-05, "loss": 0.4652, "step": 110130 }, { "epoch": 4.0024711098190275, "grad_norm": 0.3312014043331146, "learning_rate": 1.4604712884666588e-05, "loss": 0.0592, "step": 110140 }, { "epoch": 4.0028345083218255, "grad_norm": 0.24331605434417725, "learning_rate": 1.4599993678653404e-05, "loss": 0.5292, "step": 110150 }, { "epoch": 4.0031979068246235, "grad_norm": 0.34533190727233887, "learning_rate": 1.4595274920739487e-05, "loss": 0.0605, "step": 110160 }, { "epoch": 4.003561305327422, "grad_norm": 0.6242371201515198, "learning_rate": 1.4590556611128161e-05, "loss": 0.0617, "step": 110170 }, { "epoch": 4.00392470383022, "grad_norm": 0.3273410201072693, "learning_rate": 1.4585838750022707e-05, "loss": 0.0463, "step": 110180 }, { "epoch": 4.004288102333018, "grad_norm": 0.7081814408302307, "learning_rate": 1.4581121337626402e-05, "loss": 0.0493, "step": 110190 }, { "epoch": 4.004651500835816, "grad_norm": 59.34846496582031, "learning_rate": 1.4576404374142514e-05, "loss": 0.6547, "step": 110200 }, { "epoch": 4.005014899338614, "grad_norm": 0.32631370425224304, "learning_rate": 1.457168785977428e-05, "loss": 0.0666, "step": 110210 }, { "epoch": 4.005378297841413, "grad_norm": 1.6881418228149414, "learning_rate": 1.4566971794724904e-05, "loss": 0.0651, "step": 110220 }, { "epoch": 4.005741696344211, "grad_norm": 3.0024871826171875, "learning_rate": 1.4562256179197595e-05, "loss": 0.0681, "step": 110230 }, { "epoch": 4.006105094847009, "grad_norm": 0.28225383162498474, "learning_rate": 1.4557541013395526e-05, "loss": 0.0707, "step": 110240 }, { "epoch": 4.006468493349807, "grad_norm": 0.6246106624603271, "learning_rate": 1.4552826297521871e-05, "loss": 0.0594, "step": 110250 }, { "epoch": 4.006831891852605, "grad_norm": 1.095879077911377, "learning_rate": 1.4548112031779751e-05, "loss": 0.0722, "step": 110260 }, { "epoch": 4.007195290355404, "grad_norm": 0.6250698566436768, "learning_rate": 1.4543398216372295e-05, "loss": 0.0604, "step": 110270 }, { "epoch": 4.007558688858202, "grad_norm": 0.6139402389526367, "learning_rate": 1.4538684851502615e-05, "loss": 0.1047, "step": 110280 }, { "epoch": 4.007922087361, "grad_norm": 0.4828980267047882, "learning_rate": 1.4533971937373776e-05, "loss": 0.0574, "step": 110290 }, { "epoch": 4.008285485863798, "grad_norm": 0.7050805687904358, "learning_rate": 1.4529259474188844e-05, "loss": 0.0706, "step": 110300 }, { "epoch": 4.008648884366597, "grad_norm": 0.49131813645362854, "learning_rate": 1.4524547462150876e-05, "loss": 0.0823, "step": 110310 }, { "epoch": 4.009012282869395, "grad_norm": 2.5979621410369873, "learning_rate": 1.4519835901462878e-05, "loss": 0.06, "step": 110320 }, { "epoch": 4.009375681372193, "grad_norm": 1.8901911973953247, "learning_rate": 1.4515124792327861e-05, "loss": 0.0635, "step": 110330 }, { "epoch": 4.009739079874991, "grad_norm": 2.455570697784424, "learning_rate": 1.4510414134948814e-05, "loss": 0.0487, "step": 110340 }, { "epoch": 4.010102478377789, "grad_norm": 0.5497618913650513, "learning_rate": 1.4505703929528707e-05, "loss": 0.0578, "step": 110350 }, { "epoch": 4.010465876880588, "grad_norm": 0.5679813027381897, "learning_rate": 1.4500994176270471e-05, "loss": 0.0698, "step": 110360 }, { "epoch": 4.010829275383386, "grad_norm": 0.7943199276924133, "learning_rate": 1.4496284875377036e-05, "loss": 0.0686, "step": 110370 }, { "epoch": 4.011192673886184, "grad_norm": 0.9593531489372253, "learning_rate": 1.4492046891512567e-05, "loss": 3.0162, "step": 110380 }, { "epoch": 4.011556072388982, "grad_norm": 2.073781967163086, "learning_rate": 1.4487338450671259e-05, "loss": 0.0471, "step": 110390 }, { "epoch": 4.01191947089178, "grad_norm": 0.6789143681526184, "learning_rate": 1.4482630462783132e-05, "loss": 0.0593, "step": 110400 }, { "epoch": 4.01191947089178, "eval_loss": 0.3222469091415405, "eval_runtime": 179.3672, "eval_samples_per_second": 41.334, "eval_steps_per_second": 5.168, "eval_wer": 0.13037558770671848, "step": 110400 }, { "epoch": 4.0122828693945785, "grad_norm": 0.3693692088127136, "learning_rate": 1.4477922928051047e-05, "loss": 0.0701, "step": 110410 }, { "epoch": 4.012646267897376, "grad_norm": 0.8600411415100098, "learning_rate": 1.4473215846677818e-05, "loss": 0.0631, "step": 110420 }, { "epoch": 4.013009666400174, "grad_norm": 1.3148378133773804, "learning_rate": 1.4468509218866261e-05, "loss": 0.0821, "step": 110430 }, { "epoch": 4.013373064902972, "grad_norm": 0.508591890335083, "learning_rate": 1.446380304481918e-05, "loss": 0.069, "step": 110440 }, { "epoch": 4.01373646340577, "grad_norm": 2.257439136505127, "learning_rate": 1.4459097324739329e-05, "loss": 0.0722, "step": 110450 }, { "epoch": 4.014099861908569, "grad_norm": 0.6121902465820312, "learning_rate": 1.4454392058829472e-05, "loss": 0.1137, "step": 110460 }, { "epoch": 4.014463260411367, "grad_norm": 0.5297804474830627, "learning_rate": 1.4449687247292349e-05, "loss": 0.0496, "step": 110470 }, { "epoch": 4.014826658914165, "grad_norm": 1.0392407178878784, "learning_rate": 1.4444982890330653e-05, "loss": 0.05, "step": 110480 }, { "epoch": 4.015190057416963, "grad_norm": 0.29816481471061707, "learning_rate": 1.4440278988147087e-05, "loss": 0.0524, "step": 110490 }, { "epoch": 4.015553455919761, "grad_norm": 0.9273152947425842, "learning_rate": 1.4435575540944332e-05, "loss": 0.2128, "step": 110500 }, { "epoch": 4.01591685442256, "grad_norm": 0.6994959712028503, "learning_rate": 1.4430872548925046e-05, "loss": 0.0778, "step": 110510 }, { "epoch": 4.016280252925358, "grad_norm": 0.4037676155567169, "learning_rate": 1.4426170012291848e-05, "loss": 0.0602, "step": 110520 }, { "epoch": 4.016643651428156, "grad_norm": 0.2352452278137207, "learning_rate": 1.4421467931247362e-05, "loss": 0.0559, "step": 110530 }, { "epoch": 4.017007049930954, "grad_norm": 0.3989976942539215, "learning_rate": 1.4416766305994184e-05, "loss": 0.0505, "step": 110540 }, { "epoch": 4.017370448433752, "grad_norm": 0.42041394114494324, "learning_rate": 1.4412065136734904e-05, "loss": 0.0964, "step": 110550 }, { "epoch": 4.017733846936551, "grad_norm": 0.9922043085098267, "learning_rate": 1.4407364423672048e-05, "loss": 0.0782, "step": 110560 }, { "epoch": 4.018097245439349, "grad_norm": 0.4274202585220337, "learning_rate": 1.4402664167008178e-05, "loss": 0.0664, "step": 110570 }, { "epoch": 4.018460643942147, "grad_norm": 0.520118236541748, "learning_rate": 1.439796436694581e-05, "loss": 0.0601, "step": 110580 }, { "epoch": 4.018824042444945, "grad_norm": 0.9387579560279846, "learning_rate": 1.4393265023687425e-05, "loss": 0.0511, "step": 110590 }, { "epoch": 4.019187440947744, "grad_norm": 0.9909424781799316, "learning_rate": 1.438856613743551e-05, "loss": 0.057, "step": 110600 }, { "epoch": 4.019550839450542, "grad_norm": 0.53632652759552, "learning_rate": 1.4383867708392537e-05, "loss": 0.0729, "step": 110610 }, { "epoch": 4.01991423795334, "grad_norm": 0.6779784560203552, "learning_rate": 1.4379169736760923e-05, "loss": 0.0808, "step": 110620 }, { "epoch": 4.020277636456138, "grad_norm": 0.3937224745750427, "learning_rate": 1.4374472222743093e-05, "loss": 0.0551, "step": 110630 }, { "epoch": 4.020641034958936, "grad_norm": 4.022054672241211, "learning_rate": 1.4369775166541449e-05, "loss": 3.4486, "step": 110640 }, { "epoch": 4.0210044334617345, "grad_norm": 0.4625096619129181, "learning_rate": 1.4365078568358383e-05, "loss": 0.0543, "step": 110650 }, { "epoch": 4.0213678319645325, "grad_norm": 0.6315404772758484, "learning_rate": 1.4360382428396232e-05, "loss": 0.0658, "step": 110660 }, { "epoch": 4.0217312304673305, "grad_norm": 5.309476375579834, "learning_rate": 1.4355686746857344e-05, "loss": 0.0509, "step": 110670 }, { "epoch": 4.0220946289701285, "grad_norm": 1.7463594675064087, "learning_rate": 1.4350991523944046e-05, "loss": 0.056, "step": 110680 }, { "epoch": 4.0224580274729265, "grad_norm": 0.3528885245323181, "learning_rate": 1.434629675985864e-05, "loss": 0.1695, "step": 110690 }, { "epoch": 4.022821425975725, "grad_norm": 0.8287866115570068, "learning_rate": 1.4341602454803393e-05, "loss": 0.3779, "step": 110700 }, { "epoch": 4.023184824478523, "grad_norm": 0.5021520256996155, "learning_rate": 1.4336908608980582e-05, "loss": 0.0684, "step": 110710 }, { "epoch": 4.023548222981321, "grad_norm": 0.5340952277183533, "learning_rate": 1.4332215222592418e-05, "loss": 0.0702, "step": 110720 }, { "epoch": 4.023911621484119, "grad_norm": 0.44070643186569214, "learning_rate": 1.4327522295841168e-05, "loss": 0.0556, "step": 110730 }, { "epoch": 4.024275019986917, "grad_norm": 0.3349458873271942, "learning_rate": 1.4322829828928996e-05, "loss": 0.0545, "step": 110740 }, { "epoch": 4.024638418489716, "grad_norm": 0.5574124455451965, "learning_rate": 1.4318137822058109e-05, "loss": 0.0501, "step": 110750 }, { "epoch": 4.025001816992514, "grad_norm": 1.3972676992416382, "learning_rate": 1.4313446275430647e-05, "loss": 0.0691, "step": 110760 }, { "epoch": 4.025365215495312, "grad_norm": 0.916902482509613, "learning_rate": 1.4308755189248763e-05, "loss": 0.1109, "step": 110770 }, { "epoch": 4.02572861399811, "grad_norm": 0.410158634185791, "learning_rate": 1.4304064563714576e-05, "loss": 0.0549, "step": 110780 }, { "epoch": 4.026092012500908, "grad_norm": 1.5064035654067993, "learning_rate": 1.4299374399030202e-05, "loss": 1.0608, "step": 110790 }, { "epoch": 4.026455411003707, "grad_norm": 0.44640934467315674, "learning_rate": 1.42946846953977e-05, "loss": 0.0566, "step": 110800 }, { "epoch": 4.026818809506505, "grad_norm": 0.6432878971099854, "learning_rate": 1.4289995453019145e-05, "loss": 0.0722, "step": 110810 }, { "epoch": 4.027182208009303, "grad_norm": 0.7535707950592041, "learning_rate": 1.4285306672096583e-05, "loss": 0.0758, "step": 110820 }, { "epoch": 4.027545606512101, "grad_norm": 0.8665387630462646, "learning_rate": 1.4280618352832043e-05, "loss": 0.0837, "step": 110830 }, { "epoch": 4.027909005014899, "grad_norm": 0.7218203544616699, "learning_rate": 1.4275930495427506e-05, "loss": 0.0612, "step": 110840 }, { "epoch": 4.028272403517698, "grad_norm": 1.0231331586837769, "learning_rate": 1.427124310008498e-05, "loss": 0.0635, "step": 110850 }, { "epoch": 4.028635802020496, "grad_norm": 0.5189678072929382, "learning_rate": 1.4266556167006396e-05, "loss": 0.074, "step": 110860 }, { "epoch": 4.028999200523294, "grad_norm": 0.621478796005249, "learning_rate": 1.4261869696393735e-05, "loss": 0.061, "step": 110870 }, { "epoch": 4.029362599026092, "grad_norm": 2.097764730453491, "learning_rate": 1.425718368844889e-05, "loss": 0.0539, "step": 110880 }, { "epoch": 4.029725997528891, "grad_norm": 0.3547973036766052, "learning_rate": 1.4252498143373793e-05, "loss": 0.0468, "step": 110890 }, { "epoch": 4.030089396031689, "grad_norm": 0.2843954265117645, "learning_rate": 1.4247813061370297e-05, "loss": 0.0521, "step": 110900 }, { "epoch": 4.030452794534487, "grad_norm": 0.36639404296875, "learning_rate": 1.424312844264028e-05, "loss": 0.0638, "step": 110910 }, { "epoch": 4.030816193037285, "grad_norm": 0.4634372889995575, "learning_rate": 1.4238444287385588e-05, "loss": 0.079, "step": 110920 }, { "epoch": 4.0311795915400825, "grad_norm": 0.5150337815284729, "learning_rate": 1.4233760595808049e-05, "loss": 0.1644, "step": 110930 }, { "epoch": 4.031542990042881, "grad_norm": 1.6643534898757935, "learning_rate": 1.4229077368109451e-05, "loss": 0.0646, "step": 110940 }, { "epoch": 4.031906388545679, "grad_norm": 0.9327892065048218, "learning_rate": 1.4224394604491586e-05, "loss": 0.0581, "step": 110950 }, { "epoch": 4.032269787048477, "grad_norm": 1.278937578201294, "learning_rate": 1.4219712305156218e-05, "loss": 0.0637, "step": 110960 }, { "epoch": 4.032633185551275, "grad_norm": 0.9296409487724304, "learning_rate": 1.4215030470305102e-05, "loss": 0.0624, "step": 110970 }, { "epoch": 4.032996584054073, "grad_norm": 0.5513620972633362, "learning_rate": 1.4210349100139936e-05, "loss": 1.9567, "step": 110980 }, { "epoch": 4.033359982556872, "grad_norm": 0.42453351616859436, "learning_rate": 1.4205668194862448e-05, "loss": 0.0577, "step": 110990 }, { "epoch": 4.03372338105967, "grad_norm": 1.1154534816741943, "learning_rate": 1.4200987754674294e-05, "loss": 0.0633, "step": 111000 }, { "epoch": 4.03372338105967, "eval_loss": 0.2871040403842926, "eval_runtime": 179.1702, "eval_samples_per_second": 41.38, "eval_steps_per_second": 5.174, "eval_wer": 0.13044820011981048, "step": 111000 }, { "epoch": 4.034086779562468, "grad_norm": 0.23643670976161957, "learning_rate": 1.4196307779777173e-05, "loss": 0.0728, "step": 111010 }, { "epoch": 4.034450178065266, "grad_norm": 0.7400628924369812, "learning_rate": 1.4191628270372703e-05, "loss": 0.0715, "step": 111020 }, { "epoch": 4.034813576568064, "grad_norm": 0.6206227540969849, "learning_rate": 1.4186949226662522e-05, "loss": 0.0586, "step": 111030 }, { "epoch": 4.035176975070863, "grad_norm": 6.141451358795166, "learning_rate": 1.4182270648848215e-05, "loss": 0.1303, "step": 111040 }, { "epoch": 4.035540373573661, "grad_norm": 0.33030861616134644, "learning_rate": 1.4177592537131376e-05, "loss": 1.078, "step": 111050 }, { "epoch": 4.035903772076459, "grad_norm": 0.5480292439460754, "learning_rate": 1.4172914891713569e-05, "loss": 0.0622, "step": 111060 }, { "epoch": 4.036267170579257, "grad_norm": 1.9571572542190552, "learning_rate": 1.4168237712796347e-05, "loss": 0.0547, "step": 111070 }, { "epoch": 4.036630569082055, "grad_norm": 0.8759858012199402, "learning_rate": 1.4163561000581213e-05, "loss": 0.0631, "step": 111080 }, { "epoch": 4.036993967584854, "grad_norm": 0.46415144205093384, "learning_rate": 1.415888475526969e-05, "loss": 0.054, "step": 111090 }, { "epoch": 4.037357366087652, "grad_norm": 1.5888949632644653, "learning_rate": 1.4154208977063227e-05, "loss": 0.0835, "step": 111100 }, { "epoch": 4.03772076459045, "grad_norm": 0.47662070393562317, "learning_rate": 1.4149533666163331e-05, "loss": 0.0614, "step": 111110 }, { "epoch": 4.038084163093248, "grad_norm": 0.4895434081554413, "learning_rate": 1.4144858822771412e-05, "loss": 0.0696, "step": 111120 }, { "epoch": 4.038447561596046, "grad_norm": 0.5362039804458618, "learning_rate": 1.4140184447088916e-05, "loss": 0.2058, "step": 111130 }, { "epoch": 4.038810960098845, "grad_norm": 0.28153735399246216, "learning_rate": 1.4135510539317212e-05, "loss": 0.0608, "step": 111140 }, { "epoch": 4.039174358601643, "grad_norm": 0.323169469833374, "learning_rate": 1.4130837099657724e-05, "loss": 0.0578, "step": 111150 }, { "epoch": 4.039537757104441, "grad_norm": 0.43453449010849, "learning_rate": 1.412616412831178e-05, "loss": 0.0689, "step": 111160 }, { "epoch": 4.039901155607239, "grad_norm": 0.42590922117233276, "learning_rate": 1.4121491625480749e-05, "loss": 0.0676, "step": 111170 }, { "epoch": 4.0402645541100375, "grad_norm": 0.39531514048576355, "learning_rate": 1.4116819591365924e-05, "loss": 0.0565, "step": 111180 }, { "epoch": 4.0406279526128355, "grad_norm": 0.2730831801891327, "learning_rate": 1.411214802616862e-05, "loss": 0.0615, "step": 111190 }, { "epoch": 4.0409913511156335, "grad_norm": 0.47754859924316406, "learning_rate": 1.410747693009012e-05, "loss": 0.0462, "step": 111200 }, { "epoch": 4.041354749618431, "grad_norm": 4.3222270011901855, "learning_rate": 1.4102806303331695e-05, "loss": 0.0711, "step": 111210 }, { "epoch": 4.041718148121229, "grad_norm": 0.7736272811889648, "learning_rate": 1.4098136146094559e-05, "loss": 0.0492, "step": 111220 }, { "epoch": 4.042081546624028, "grad_norm": 1.073490023612976, "learning_rate": 1.4093466458579962e-05, "loss": 0.0753, "step": 111230 }, { "epoch": 4.042444945126826, "grad_norm": 0.35597535967826843, "learning_rate": 1.4088797240989071e-05, "loss": 0.055, "step": 111240 }, { "epoch": 4.042808343629624, "grad_norm": 0.5514324307441711, "learning_rate": 1.4084128493523102e-05, "loss": 0.0664, "step": 111250 }, { "epoch": 4.043171742132422, "grad_norm": 0.2898502051830292, "learning_rate": 1.4079460216383186e-05, "loss": 0.0663, "step": 111260 }, { "epoch": 4.04353514063522, "grad_norm": 16.011980056762695, "learning_rate": 1.4074792409770487e-05, "loss": 0.0632, "step": 111270 }, { "epoch": 4.043898539138019, "grad_norm": 0.34230297803878784, "learning_rate": 1.4070125073886097e-05, "loss": 0.0545, "step": 111280 }, { "epoch": 4.044261937640817, "grad_norm": 0.6182803511619568, "learning_rate": 1.4065458208931132e-05, "loss": 0.0614, "step": 111290 }, { "epoch": 4.044625336143615, "grad_norm": 3.1813621520996094, "learning_rate": 1.4060791815106666e-05, "loss": 0.0556, "step": 111300 }, { "epoch": 4.044988734646413, "grad_norm": 3.2446606159210205, "learning_rate": 1.4056125892613773e-05, "loss": 0.0733, "step": 111310 }, { "epoch": 4.045352133149211, "grad_norm": 0.8564647436141968, "learning_rate": 1.4051460441653463e-05, "loss": 0.0706, "step": 111320 }, { "epoch": 4.04571553165201, "grad_norm": 59.863319396972656, "learning_rate": 1.4046795462426767e-05, "loss": 0.8816, "step": 111330 }, { "epoch": 4.046078930154808, "grad_norm": 0.32583507895469666, "learning_rate": 1.4042130955134686e-05, "loss": 0.0482, "step": 111340 }, { "epoch": 4.046442328657606, "grad_norm": 0.5416057705879211, "learning_rate": 1.4037466919978201e-05, "loss": 0.0531, "step": 111350 }, { "epoch": 4.046805727160404, "grad_norm": 0.40496620535850525, "learning_rate": 1.4032803357158253e-05, "loss": 0.0605, "step": 111360 }, { "epoch": 4.047169125663202, "grad_norm": 1.0938149690628052, "learning_rate": 1.4028140266875797e-05, "loss": 0.0873, "step": 111370 }, { "epoch": 4.047532524166001, "grad_norm": 0.465610533952713, "learning_rate": 1.4023477649331718e-05, "loss": 0.0675, "step": 111380 }, { "epoch": 4.047895922668799, "grad_norm": 0.5324172973632812, "learning_rate": 1.4018815504726953e-05, "loss": 0.1398, "step": 111390 }, { "epoch": 4.048259321171597, "grad_norm": 0.6042605638504028, "learning_rate": 1.4014153833262347e-05, "loss": 0.0731, "step": 111400 }, { "epoch": 4.048622719674395, "grad_norm": 1.338255524635315, "learning_rate": 1.4009492635138777e-05, "loss": 0.0899, "step": 111410 }, { "epoch": 4.048986118177193, "grad_norm": 0.43646422028541565, "learning_rate": 1.400483191055705e-05, "loss": 0.0484, "step": 111420 }, { "epoch": 4.0493495166799915, "grad_norm": 28.60755729675293, "learning_rate": 1.4000171659717999e-05, "loss": 0.5856, "step": 111430 }, { "epoch": 4.0497129151827895, "grad_norm": 0.8646038174629211, "learning_rate": 1.399551188282241e-05, "loss": 0.0675, "step": 111440 }, { "epoch": 4.0500763136855875, "grad_norm": 0.30594268441200256, "learning_rate": 1.3990852580071073e-05, "loss": 0.1136, "step": 111450 }, { "epoch": 4.0504397121883855, "grad_norm": 0.5570999979972839, "learning_rate": 1.3986193751664717e-05, "loss": 0.0656, "step": 111460 }, { "epoch": 4.050803110691184, "grad_norm": 2.459162950515747, "learning_rate": 1.3981535397804093e-05, "loss": 0.0414, "step": 111470 }, { "epoch": 4.051166509193982, "grad_norm": 0.7406504154205322, "learning_rate": 1.3976877518689887e-05, "loss": 0.055, "step": 111480 }, { "epoch": 4.05152990769678, "grad_norm": 0.36871564388275146, "learning_rate": 1.3972220114522827e-05, "loss": 0.0609, "step": 111490 }, { "epoch": 4.051893306199578, "grad_norm": 0.8053199648857117, "learning_rate": 1.3967563185503557e-05, "loss": 0.0804, "step": 111500 }, { "epoch": 4.052256704702376, "grad_norm": 0.557873547077179, "learning_rate": 1.3962906731832746e-05, "loss": 0.0676, "step": 111510 }, { "epoch": 4.052620103205175, "grad_norm": 1.9856547117233276, "learning_rate": 1.3958250753711002e-05, "loss": 0.0771, "step": 111520 }, { "epoch": 4.052983501707973, "grad_norm": 0.48433226346969604, "learning_rate": 1.3953595251338947e-05, "loss": 0.0558, "step": 111530 }, { "epoch": 4.053346900210771, "grad_norm": 0.42576339840888977, "learning_rate": 1.3948940224917167e-05, "loss": 0.0643, "step": 111540 }, { "epoch": 4.053710298713569, "grad_norm": 0.7690130472183228, "learning_rate": 1.3944285674646245e-05, "loss": 0.0637, "step": 111550 }, { "epoch": 4.054073697216367, "grad_norm": 0.28431037068367004, "learning_rate": 1.393963160072671e-05, "loss": 0.0623, "step": 111560 }, { "epoch": 4.054437095719166, "grad_norm": 1.6892107725143433, "learning_rate": 1.3934978003359095e-05, "loss": 0.0553, "step": 111570 }, { "epoch": 4.054800494221964, "grad_norm": 0.3961574137210846, "learning_rate": 1.3930324882743906e-05, "loss": 0.0575, "step": 111580 }, { "epoch": 4.055163892724762, "grad_norm": 0.4090615212917328, "learning_rate": 1.3925672239081644e-05, "loss": 0.0495, "step": 111590 }, { "epoch": 4.05552729122756, "grad_norm": 0.4748428165912628, "learning_rate": 1.3921020072572749e-05, "loss": 0.0797, "step": 111600 }, { "epoch": 4.05552729122756, "eval_loss": 0.3094218969345093, "eval_runtime": 178.8555, "eval_samples_per_second": 41.452, "eval_steps_per_second": 5.183, "eval_wer": 0.1288416504801496, "step": 111600 }, { "epoch": 4.055890689730358, "grad_norm": 0.6011778712272644, "learning_rate": 1.3916368383417694e-05, "loss": 0.0691, "step": 111610 }, { "epoch": 4.056254088233157, "grad_norm": 0.7090577483177185, "learning_rate": 1.3911717171816868e-05, "loss": 0.0546, "step": 111620 }, { "epoch": 4.056617486735955, "grad_norm": 1.0578325986862183, "learning_rate": 1.3907066437970718e-05, "loss": 0.0682, "step": 111630 }, { "epoch": 4.056980885238753, "grad_norm": 0.3923257887363434, "learning_rate": 1.3902416182079591e-05, "loss": 0.0846, "step": 111640 }, { "epoch": 4.057344283741551, "grad_norm": 2.852869749069214, "learning_rate": 1.389776640434388e-05, "loss": 0.0787, "step": 111650 }, { "epoch": 4.057707682244349, "grad_norm": 0.35996344685554504, "learning_rate": 1.3893117104963903e-05, "loss": 0.0758, "step": 111660 }, { "epoch": 4.058071080747148, "grad_norm": 0.7732596397399902, "learning_rate": 1.3888468284139994e-05, "loss": 0.0551, "step": 111670 }, { "epoch": 4.058434479249946, "grad_norm": 1.0408018827438354, "learning_rate": 1.3883819942072446e-05, "loss": 0.0633, "step": 111680 }, { "epoch": 4.058797877752744, "grad_norm": 0.3703053891658783, "learning_rate": 1.3879172078961561e-05, "loss": 0.0597, "step": 111690 }, { "epoch": 4.059161276255542, "grad_norm": 0.7436791658401489, "learning_rate": 1.3874524695007568e-05, "loss": 0.0552, "step": 111700 }, { "epoch": 4.05952467475834, "grad_norm": 0.3398180305957794, "learning_rate": 1.3869877790410734e-05, "loss": 0.0528, "step": 111710 }, { "epoch": 4.059888073261138, "grad_norm": 1.0248258113861084, "learning_rate": 1.3865231365371245e-05, "loss": 0.0779, "step": 111720 }, { "epoch": 4.060251471763936, "grad_norm": 0.3211299180984497, "learning_rate": 1.3860585420089336e-05, "loss": 0.0874, "step": 111730 }, { "epoch": 4.060614870266734, "grad_norm": 0.44448596239089966, "learning_rate": 1.385593995476516e-05, "loss": 0.0578, "step": 111740 }, { "epoch": 4.060978268769532, "grad_norm": 0.36641961336135864, "learning_rate": 1.3851294969598888e-05, "loss": 0.077, "step": 111750 }, { "epoch": 4.061341667272331, "grad_norm": 0.8280020952224731, "learning_rate": 1.3846650464790633e-05, "loss": 0.0618, "step": 111760 }, { "epoch": 4.061705065775129, "grad_norm": 1.596620798110962, "learning_rate": 1.3842006440540542e-05, "loss": 0.0688, "step": 111770 }, { "epoch": 4.062068464277927, "grad_norm": 0.3118537366390228, "learning_rate": 1.3837362897048684e-05, "loss": 0.0477, "step": 111780 }, { "epoch": 4.062431862780725, "grad_norm": 0.9146553874015808, "learning_rate": 1.3832719834515151e-05, "loss": 0.0631, "step": 111790 }, { "epoch": 4.062795261283523, "grad_norm": 1.1774924993515015, "learning_rate": 1.3828077253139978e-05, "loss": 0.054, "step": 111800 }, { "epoch": 4.063158659786322, "grad_norm": 0.6028741598129272, "learning_rate": 1.3823435153123209e-05, "loss": 0.0627, "step": 111810 }, { "epoch": 4.06352205828912, "grad_norm": 5.221044540405273, "learning_rate": 1.3818793534664848e-05, "loss": 0.0515, "step": 111820 }, { "epoch": 4.063885456791918, "grad_norm": 0.31061217188835144, "learning_rate": 1.3814152397964906e-05, "loss": 0.0912, "step": 111830 }, { "epoch": 4.064248855294716, "grad_norm": 0.45412084460258484, "learning_rate": 1.3809511743223324e-05, "loss": 0.0507, "step": 111840 }, { "epoch": 4.064612253797514, "grad_norm": 0.2962772846221924, "learning_rate": 1.3804871570640077e-05, "loss": 0.0543, "step": 111850 }, { "epoch": 4.064975652300313, "grad_norm": 0.30459967255592346, "learning_rate": 1.380023188041506e-05, "loss": 0.0648, "step": 111860 }, { "epoch": 4.065339050803111, "grad_norm": 0.43784353137016296, "learning_rate": 1.3795592672748223e-05, "loss": 0.0585, "step": 111870 }, { "epoch": 4.065702449305909, "grad_norm": 0.5347334146499634, "learning_rate": 1.3790953947839421e-05, "loss": 0.0576, "step": 111880 }, { "epoch": 4.066065847808707, "grad_norm": 0.750178337097168, "learning_rate": 1.3786315705888542e-05, "loss": 0.0503, "step": 111890 }, { "epoch": 4.066429246311505, "grad_norm": 0.21846427023410797, "learning_rate": 1.3781677947095412e-05, "loss": 0.069, "step": 111900 }, { "epoch": 4.066792644814304, "grad_norm": 0.3489988148212433, "learning_rate": 1.3777040671659866e-05, "loss": 0.0562, "step": 111910 }, { "epoch": 4.067156043317102, "grad_norm": 0.5933734774589539, "learning_rate": 1.3772403879781703e-05, "loss": 0.0663, "step": 111920 }, { "epoch": 4.0675194418199, "grad_norm": 0.31282684206962585, "learning_rate": 1.3767767571660722e-05, "loss": 0.0552, "step": 111930 }, { "epoch": 4.067882840322698, "grad_norm": 0.5128657817840576, "learning_rate": 1.3763131747496657e-05, "loss": 0.0528, "step": 111940 }, { "epoch": 4.068246238825496, "grad_norm": 0.5472941994667053, "learning_rate": 1.3758496407489268e-05, "loss": 0.0844, "step": 111950 }, { "epoch": 4.0686096373282945, "grad_norm": 0.41867172718048096, "learning_rate": 1.3753861551838271e-05, "loss": 0.1142, "step": 111960 }, { "epoch": 4.0689730358310925, "grad_norm": 2.5127737522125244, "learning_rate": 1.3749227180743374e-05, "loss": 0.0838, "step": 111970 }, { "epoch": 4.0693364343338905, "grad_norm": 1.4139436483383179, "learning_rate": 1.374459329440424e-05, "loss": 0.0921, "step": 111980 }, { "epoch": 4.0696998328366885, "grad_norm": 0.5689426064491272, "learning_rate": 1.3739959893020543e-05, "loss": 0.5989, "step": 111990 }, { "epoch": 4.070063231339487, "grad_norm": 0.745959997177124, "learning_rate": 1.373532697679189e-05, "loss": 0.0635, "step": 112000 }, { "epoch": 4.070426629842285, "grad_norm": 0.531570315361023, "learning_rate": 1.3730694545917938e-05, "loss": 0.0692, "step": 112010 }, { "epoch": 4.070790028345083, "grad_norm": 1.154670000076294, "learning_rate": 1.3726062600598252e-05, "loss": 0.0781, "step": 112020 }, { "epoch": 4.071153426847881, "grad_norm": 0.7137158513069153, "learning_rate": 1.3721431141032426e-05, "loss": 0.0566, "step": 112030 }, { "epoch": 4.071516825350679, "grad_norm": 0.5212516188621521, "learning_rate": 1.3716800167419991e-05, "loss": 0.0522, "step": 112040 }, { "epoch": 4.071880223853478, "grad_norm": 0.4456008970737457, "learning_rate": 1.3712169679960495e-05, "loss": 0.0596, "step": 112050 }, { "epoch": 4.072243622356276, "grad_norm": 1.1105504035949707, "learning_rate": 1.3707539678853443e-05, "loss": 0.0598, "step": 112060 }, { "epoch": 4.072607020859074, "grad_norm": 0.3087688684463501, "learning_rate": 1.3702910164298338e-05, "loss": 0.0592, "step": 112070 }, { "epoch": 4.072970419361872, "grad_norm": 0.8320184946060181, "learning_rate": 1.3698281136494628e-05, "loss": 0.0709, "step": 112080 }, { "epoch": 4.07333381786467, "grad_norm": 3.412813901901245, "learning_rate": 1.3693652595641782e-05, "loss": 0.0526, "step": 112090 }, { "epoch": 4.073697216367469, "grad_norm": 0.5016017556190491, "learning_rate": 1.3689024541939196e-05, "loss": 0.0607, "step": 112100 }, { "epoch": 4.074060614870267, "grad_norm": 0.3929903507232666, "learning_rate": 1.3684396975586322e-05, "loss": 0.0621, "step": 112110 }, { "epoch": 4.074424013373065, "grad_norm": 0.8663429617881775, "learning_rate": 1.3679769896782507e-05, "loss": 0.0638, "step": 112120 }, { "epoch": 4.074787411875863, "grad_norm": 0.8499599099159241, "learning_rate": 1.3675143305727145e-05, "loss": 0.064, "step": 112130 }, { "epoch": 4.075150810378661, "grad_norm": 0.5362977981567383, "learning_rate": 1.3670517202619538e-05, "loss": 0.0531, "step": 112140 }, { "epoch": 4.07551420888146, "grad_norm": 0.3657929599285126, "learning_rate": 1.3665891587659058e-05, "loss": 0.0506, "step": 112150 }, { "epoch": 4.075877607384258, "grad_norm": 0.4857289493083954, "learning_rate": 1.3661266461044973e-05, "loss": 0.073, "step": 112160 }, { "epoch": 4.076241005887056, "grad_norm": 0.36839261651039124, "learning_rate": 1.3656641822976579e-05, "loss": 0.0501, "step": 112170 }, { "epoch": 4.076604404389854, "grad_norm": 0.6398412585258484, "learning_rate": 1.3652017673653122e-05, "loss": 0.0451, "step": 112180 }, { "epoch": 4.076967802892652, "grad_norm": 0.5313104391098022, "learning_rate": 1.3647394013273848e-05, "loss": 0.0478, "step": 112190 }, { "epoch": 4.077331201395451, "grad_norm": 0.8435815572738647, "learning_rate": 1.3642770842037972e-05, "loss": 0.0698, "step": 112200 }, { "epoch": 4.077331201395451, "eval_loss": 0.3243897259235382, "eval_runtime": 179.2488, "eval_samples_per_second": 41.361, "eval_steps_per_second": 5.172, "eval_wer": 0.12874180841214805, "step": 112200 }, { "epoch": 4.077694599898249, "grad_norm": 0.5462674498558044, "learning_rate": 1.3638148160144701e-05, "loss": 0.0626, "step": 112210 }, { "epoch": 4.0780579984010465, "grad_norm": 0.5389562249183655, "learning_rate": 1.3633525967793192e-05, "loss": 0.0675, "step": 112220 }, { "epoch": 4.0784213969038445, "grad_norm": 0.4394398629665375, "learning_rate": 1.3628904265182612e-05, "loss": 0.0567, "step": 112230 }, { "epoch": 4.0787847954066425, "grad_norm": 0.36386388540267944, "learning_rate": 1.3624283052512075e-05, "loss": 0.0518, "step": 112240 }, { "epoch": 4.079148193909441, "grad_norm": 0.3440745174884796, "learning_rate": 1.3619662329980723e-05, "loss": 0.0608, "step": 112250 }, { "epoch": 4.079511592412239, "grad_norm": 0.540234386920929, "learning_rate": 1.361504209778762e-05, "loss": 0.0715, "step": 112260 }, { "epoch": 4.079874990915037, "grad_norm": 0.803322434425354, "learning_rate": 1.3610422356131858e-05, "loss": 0.0674, "step": 112270 }, { "epoch": 4.080238389417835, "grad_norm": 0.3784193992614746, "learning_rate": 1.3605803105212459e-05, "loss": 0.1557, "step": 112280 }, { "epoch": 4.080601787920633, "grad_norm": 0.4170146584510803, "learning_rate": 1.3601184345228463e-05, "loss": 0.0572, "step": 112290 }, { "epoch": 4.080965186423432, "grad_norm": 0.46115851402282715, "learning_rate": 1.359656607637888e-05, "loss": 0.0536, "step": 112300 }, { "epoch": 4.08132858492623, "grad_norm": 0.39042162895202637, "learning_rate": 1.3591948298862698e-05, "loss": 0.061, "step": 112310 }, { "epoch": 4.081691983429028, "grad_norm": 0.5178929567337036, "learning_rate": 1.3587331012878864e-05, "loss": 0.0664, "step": 112320 }, { "epoch": 4.082055381931826, "grad_norm": 0.5064478516578674, "learning_rate": 1.358271421862633e-05, "loss": 0.0662, "step": 112330 }, { "epoch": 4.082418780434625, "grad_norm": 0.2574649751186371, "learning_rate": 1.3578097916304023e-05, "loss": 0.0439, "step": 112340 }, { "epoch": 4.082782178937423, "grad_norm": 0.3785637617111206, "learning_rate": 1.357348210611084e-05, "loss": 0.0535, "step": 112350 }, { "epoch": 4.083145577440221, "grad_norm": 0.31486454606056213, "learning_rate": 1.3568866788245652e-05, "loss": 0.0949, "step": 112360 }, { "epoch": 4.083508975943019, "grad_norm": 0.732257604598999, "learning_rate": 1.3564251962907331e-05, "loss": 0.0806, "step": 112370 }, { "epoch": 4.083872374445817, "grad_norm": 0.43957632780075073, "learning_rate": 1.3559637630294683e-05, "loss": 0.0681, "step": 112380 }, { "epoch": 4.084235772948616, "grad_norm": 0.997170627117157, "learning_rate": 1.3555023790606566e-05, "loss": 0.0501, "step": 112390 }, { "epoch": 4.084599171451414, "grad_norm": 161.74917602539062, "learning_rate": 1.3550410444041741e-05, "loss": 2.0045, "step": 112400 }, { "epoch": 4.084962569954212, "grad_norm": 0.7197566628456116, "learning_rate": 1.3545797590799003e-05, "loss": 0.0535, "step": 112410 }, { "epoch": 4.08532596845701, "grad_norm": 0.7341930270195007, "learning_rate": 1.3541185231077085e-05, "loss": 0.0845, "step": 112420 }, { "epoch": 4.085689366959808, "grad_norm": 0.393226683139801, "learning_rate": 1.3536573365074724e-05, "loss": 0.0971, "step": 112430 }, { "epoch": 4.086052765462607, "grad_norm": 0.3241816461086273, "learning_rate": 1.3531961992990627e-05, "loss": 0.0446, "step": 112440 }, { "epoch": 4.086416163965405, "grad_norm": 0.755688488483429, "learning_rate": 1.3527351115023496e-05, "loss": 0.0686, "step": 112450 }, { "epoch": 4.086779562468203, "grad_norm": 0.28549787402153015, "learning_rate": 1.3522740731371975e-05, "loss": 0.1545, "step": 112460 }, { "epoch": 4.087142960971001, "grad_norm": 2.1800248622894287, "learning_rate": 1.3518130842234721e-05, "loss": 0.0571, "step": 112470 }, { "epoch": 4.087506359473799, "grad_norm": 0.7874051332473755, "learning_rate": 1.3513521447810354e-05, "loss": 0.0672, "step": 112480 }, { "epoch": 4.0878697579765975, "grad_norm": 1.9011385440826416, "learning_rate": 1.3508912548297491e-05, "loss": 0.0732, "step": 112490 }, { "epoch": 4.0882331564793954, "grad_norm": 0.41868069767951965, "learning_rate": 1.3504304143894692e-05, "loss": 0.0611, "step": 112500 }, { "epoch": 4.088596554982193, "grad_norm": 0.8152614831924438, "learning_rate": 1.349969623480053e-05, "loss": 0.0696, "step": 112510 }, { "epoch": 4.088959953484991, "grad_norm": 0.7754275798797607, "learning_rate": 1.3495088821213526e-05, "loss": 0.0734, "step": 112520 }, { "epoch": 4.089323351987789, "grad_norm": 0.3078191876411438, "learning_rate": 1.3490481903332226e-05, "loss": 0.0669, "step": 112530 }, { "epoch": 4.089686750490588, "grad_norm": 1.8453993797302246, "learning_rate": 1.3485875481355098e-05, "loss": 0.053, "step": 112540 }, { "epoch": 4.090050148993386, "grad_norm": 0.5882243514060974, "learning_rate": 1.3481269555480642e-05, "loss": 0.0562, "step": 112550 }, { "epoch": 4.090413547496184, "grad_norm": 0.2845616042613983, "learning_rate": 1.3476664125907284e-05, "loss": 0.0624, "step": 112560 }, { "epoch": 4.090776945998982, "grad_norm": 1.1381127834320068, "learning_rate": 1.3472059192833475e-05, "loss": 0.0648, "step": 112570 }, { "epoch": 4.091140344501781, "grad_norm": 0.5941457748413086, "learning_rate": 1.3467454756457612e-05, "loss": 0.0596, "step": 112580 }, { "epoch": 4.091503743004579, "grad_norm": 0.6055946350097656, "learning_rate": 1.3462850816978103e-05, "loss": 0.5211, "step": 112590 }, { "epoch": 4.091867141507377, "grad_norm": 0.3982195258140564, "learning_rate": 1.3458247374593292e-05, "loss": 0.0668, "step": 112600 }, { "epoch": 4.092230540010175, "grad_norm": 0.5881565809249878, "learning_rate": 1.3453644429501539e-05, "loss": 0.0974, "step": 112610 }, { "epoch": 4.092593938512973, "grad_norm": 0.5674206614494324, "learning_rate": 1.3449041981901162e-05, "loss": 0.0665, "step": 112620 }, { "epoch": 4.092957337015772, "grad_norm": 0.8177425265312195, "learning_rate": 1.344444003199048e-05, "loss": 0.065, "step": 112630 }, { "epoch": 4.09332073551857, "grad_norm": 0.5913267135620117, "learning_rate": 1.343983857996775e-05, "loss": 0.0532, "step": 112640 }, { "epoch": 4.093684134021368, "grad_norm": 0.33033841848373413, "learning_rate": 1.3435237626031256e-05, "loss": 0.0533, "step": 112650 }, { "epoch": 4.094047532524166, "grad_norm": 0.5540090203285217, "learning_rate": 1.3430637170379215e-05, "loss": 0.0609, "step": 112660 }, { "epoch": 4.094410931026964, "grad_norm": 0.4232136011123657, "learning_rate": 1.3426037213209852e-05, "loss": 0.0564, "step": 112670 }, { "epoch": 4.094774329529763, "grad_norm": 0.8112702965736389, "learning_rate": 1.342143775472137e-05, "loss": 0.0512, "step": 112680 }, { "epoch": 4.095137728032561, "grad_norm": 0.43123000860214233, "learning_rate": 1.3416838795111944e-05, "loss": 0.0432, "step": 112690 }, { "epoch": 4.095501126535359, "grad_norm": 0.1756378710269928, "learning_rate": 1.3412240334579713e-05, "loss": 0.2854, "step": 112700 }, { "epoch": 4.095864525038157, "grad_norm": 0.3354843258857727, "learning_rate": 1.3407642373322816e-05, "loss": 0.0786, "step": 112710 }, { "epoch": 4.096227923540955, "grad_norm": 0.34515076875686646, "learning_rate": 1.3403044911539364e-05, "loss": 0.0691, "step": 112720 }, { "epoch": 4.0965913220437535, "grad_norm": 0.33889952301979065, "learning_rate": 1.3398447949427456e-05, "loss": 0.0643, "step": 112730 }, { "epoch": 4.0969547205465515, "grad_norm": 0.36444467306137085, "learning_rate": 1.3393851487185135e-05, "loss": 0.0531, "step": 112740 }, { "epoch": 4.0973181190493495, "grad_norm": 0.37586820125579834, "learning_rate": 1.3389255525010461e-05, "loss": 0.0596, "step": 112750 }, { "epoch": 4.0976815175521475, "grad_norm": 0.2949109971523285, "learning_rate": 1.3384660063101454e-05, "loss": 0.0646, "step": 112760 }, { "epoch": 4.0980449160549455, "grad_norm": 1.0989277362823486, "learning_rate": 1.3380065101656126e-05, "loss": 0.0734, "step": 112770 }, { "epoch": 4.098408314557744, "grad_norm": 1.996840238571167, "learning_rate": 1.337547064087244e-05, "loss": 0.0589, "step": 112780 }, { "epoch": 4.098771713060542, "grad_norm": 0.34807854890823364, "learning_rate": 1.3370876680948365e-05, "loss": 0.057, "step": 112790 }, { "epoch": 4.09913511156334, "grad_norm": 1.1680188179016113, "learning_rate": 1.3366283222081847e-05, "loss": 0.0604, "step": 112800 }, { "epoch": 4.09913511156334, "eval_loss": 0.3146750032901764, "eval_runtime": 179.6068, "eval_samples_per_second": 41.279, "eval_steps_per_second": 5.161, "eval_wer": 0.1273621725633997, "step": 112800 }, { "epoch": 4.099498510066138, "grad_norm": 0.31680727005004883, "learning_rate": 1.3361690264470783e-05, "loss": 0.0601, "step": 112810 }, { "epoch": 4.099861908568936, "grad_norm": 0.5075859427452087, "learning_rate": 1.3357097808313074e-05, "loss": 0.05, "step": 112820 }, { "epoch": 4.100225307071735, "grad_norm": 0.3714093267917633, "learning_rate": 1.3352505853806604e-05, "loss": 0.0483, "step": 112830 }, { "epoch": 4.100588705574533, "grad_norm": 0.3648132383823395, "learning_rate": 1.3347914401149208e-05, "loss": 0.046, "step": 112840 }, { "epoch": 4.100952104077331, "grad_norm": 0.5334128737449646, "learning_rate": 1.334332345053872e-05, "loss": 0.0688, "step": 112850 }, { "epoch": 4.101315502580129, "grad_norm": 0.4606197476387024, "learning_rate": 1.3338733002172948e-05, "loss": 0.071, "step": 112860 }, { "epoch": 4.101678901082927, "grad_norm": 0.47062140703201294, "learning_rate": 1.3334143056249692e-05, "loss": 0.0665, "step": 112870 }, { "epoch": 4.102042299585726, "grad_norm": 1.2191188335418701, "learning_rate": 1.3329553612966697e-05, "loss": 0.0644, "step": 112880 }, { "epoch": 4.102405698088524, "grad_norm": 0.48544201254844666, "learning_rate": 1.3324964672521712e-05, "loss": 0.0657, "step": 112890 }, { "epoch": 4.102769096591322, "grad_norm": 0.748146653175354, "learning_rate": 1.332037623511247e-05, "loss": 0.0774, "step": 112900 }, { "epoch": 4.10313249509412, "grad_norm": 0.43806397914886475, "learning_rate": 1.3315788300936646e-05, "loss": 0.066, "step": 112910 }, { "epoch": 4.103495893596919, "grad_norm": 0.6744257211685181, "learning_rate": 1.3311200870191937e-05, "loss": 0.0598, "step": 112920 }, { "epoch": 4.103859292099717, "grad_norm": 0.44944775104522705, "learning_rate": 1.3306613943075988e-05, "loss": 0.062, "step": 112930 }, { "epoch": 4.104222690602515, "grad_norm": 0.46824315190315247, "learning_rate": 1.3302027519786453e-05, "loss": 0.0571, "step": 112940 }, { "epoch": 4.104586089105313, "grad_norm": 1.0917015075683594, "learning_rate": 1.3297441600520918e-05, "loss": 0.0711, "step": 112950 }, { "epoch": 4.104949487608111, "grad_norm": 0.3450702428817749, "learning_rate": 1.3292856185476987e-05, "loss": 0.0726, "step": 112960 }, { "epoch": 4.10531288611091, "grad_norm": 0.49435433745384216, "learning_rate": 1.3288271274852232e-05, "loss": 0.073, "step": 112970 }, { "epoch": 4.105676284613708, "grad_norm": 0.7066315412521362, "learning_rate": 1.3283686868844203e-05, "loss": 0.0654, "step": 112980 }, { "epoch": 4.106039683116506, "grad_norm": 0.4903556704521179, "learning_rate": 1.3279102967650414e-05, "loss": 0.0519, "step": 112990 }, { "epoch": 4.106403081619304, "grad_norm": 3.5668628215789795, "learning_rate": 1.3274519571468372e-05, "loss": 0.0664, "step": 113000 }, { "epoch": 4.1067664801221015, "grad_norm": 0.6797897219657898, "learning_rate": 1.3269936680495573e-05, "loss": 0.0626, "step": 113010 }, { "epoch": 4.1071298786249, "grad_norm": 0.6676300168037415, "learning_rate": 1.3265354294929455e-05, "loss": 0.0471, "step": 113020 }, { "epoch": 4.107493277127698, "grad_norm": 0.3579924404621124, "learning_rate": 1.326077241496747e-05, "loss": 0.0484, "step": 113030 }, { "epoch": 4.107856675630496, "grad_norm": 1.363911509513855, "learning_rate": 1.3256191040807048e-05, "loss": 0.0527, "step": 113040 }, { "epoch": 4.108220074133294, "grad_norm": 0.37151971459388733, "learning_rate": 1.3251610172645553e-05, "loss": 0.0579, "step": 113050 }, { "epoch": 4.108583472636092, "grad_norm": 0.368559330701828, "learning_rate": 1.3247029810680378e-05, "loss": 0.0671, "step": 113060 }, { "epoch": 4.108946871138891, "grad_norm": 3.4040791988372803, "learning_rate": 1.324244995510887e-05, "loss": 0.0607, "step": 113070 }, { "epoch": 4.109310269641689, "grad_norm": 0.24719972908496857, "learning_rate": 1.323787060612837e-05, "loss": 0.0534, "step": 113080 }, { "epoch": 4.109673668144487, "grad_norm": 0.29014429450035095, "learning_rate": 1.3233291763936167e-05, "loss": 0.0538, "step": 113090 }, { "epoch": 4.110037066647285, "grad_norm": 0.49623697996139526, "learning_rate": 1.3228713428729553e-05, "loss": 0.0659, "step": 113100 }, { "epoch": 4.110400465150083, "grad_norm": 0.7023900747299194, "learning_rate": 1.3224135600705798e-05, "loss": 0.0714, "step": 113110 }, { "epoch": 4.110763863652882, "grad_norm": 1.4295860528945923, "learning_rate": 1.321955828006215e-05, "loss": 0.0685, "step": 113120 }, { "epoch": 4.11112726215568, "grad_norm": 0.9260228276252747, "learning_rate": 1.321498146699581e-05, "loss": 0.0601, "step": 113130 }, { "epoch": 4.111490660658478, "grad_norm": 0.5110155344009399, "learning_rate": 1.3210405161703987e-05, "loss": 0.0489, "step": 113140 }, { "epoch": 4.111854059161276, "grad_norm": 0.49911022186279297, "learning_rate": 1.3205829364383871e-05, "loss": 0.0497, "step": 113150 }, { "epoch": 4.112217457664075, "grad_norm": 0.44169220328330994, "learning_rate": 1.3201254075232592e-05, "loss": 0.0716, "step": 113160 }, { "epoch": 4.112580856166873, "grad_norm": 0.34353122115135193, "learning_rate": 1.3196679294447295e-05, "loss": 0.0888, "step": 113170 }, { "epoch": 4.112944254669671, "grad_norm": 0.28636273741722107, "learning_rate": 1.3192105022225098e-05, "loss": 0.0494, "step": 113180 }, { "epoch": 4.113307653172469, "grad_norm": 1.3827937841415405, "learning_rate": 1.3187531258763078e-05, "loss": 0.054, "step": 113190 }, { "epoch": 4.113671051675267, "grad_norm": 0.4005539119243622, "learning_rate": 1.3182958004258306e-05, "loss": 0.0566, "step": 113200 }, { "epoch": 4.114034450178066, "grad_norm": 0.529242217540741, "learning_rate": 1.3178385258907827e-05, "loss": 0.0661, "step": 113210 }, { "epoch": 4.114397848680864, "grad_norm": 0.6762093901634216, "learning_rate": 1.3173813022908677e-05, "loss": 0.083, "step": 113220 }, { "epoch": 4.114761247183662, "grad_norm": 0.6283437013626099, "learning_rate": 1.3169241296457835e-05, "loss": 0.1065, "step": 113230 }, { "epoch": 4.11512464568646, "grad_norm": 1.0765012502670288, "learning_rate": 1.316467007975229e-05, "loss": 0.0463, "step": 113240 }, { "epoch": 4.115488044189258, "grad_norm": 0.43171441555023193, "learning_rate": 1.3160099372989004e-05, "loss": 0.0582, "step": 113250 }, { "epoch": 4.1158514426920565, "grad_norm": 0.26878660917282104, "learning_rate": 1.3155529176364917e-05, "loss": 0.0657, "step": 113260 }, { "epoch": 4.1162148411948545, "grad_norm": 0.6298261880874634, "learning_rate": 1.3150959490076929e-05, "loss": 0.0584, "step": 113270 }, { "epoch": 4.1165782396976525, "grad_norm": 0.36110997200012207, "learning_rate": 1.3146390314321944e-05, "loss": 0.05, "step": 113280 }, { "epoch": 4.1169416382004504, "grad_norm": 0.2957223653793335, "learning_rate": 1.3141821649296803e-05, "loss": 0.0494, "step": 113290 }, { "epoch": 4.117305036703248, "grad_norm": 0.17959628999233246, "learning_rate": 1.31372534951984e-05, "loss": 0.0549, "step": 113300 }, { "epoch": 4.117668435206047, "grad_norm": 0.5282506942749023, "learning_rate": 1.3132685852223526e-05, "loss": 0.9037, "step": 113310 }, { "epoch": 4.118031833708845, "grad_norm": 0.44463062286376953, "learning_rate": 1.3128118720569002e-05, "loss": 0.0727, "step": 113320 }, { "epoch": 4.118395232211643, "grad_norm": 0.9258912801742554, "learning_rate": 1.3123552100431593e-05, "loss": 0.0559, "step": 113330 }, { "epoch": 4.118758630714441, "grad_norm": 0.6866888999938965, "learning_rate": 1.311898599200807e-05, "loss": 0.047, "step": 113340 }, { "epoch": 4.119122029217239, "grad_norm": 0.4890584647655487, "learning_rate": 1.3114420395495164e-05, "loss": 0.0588, "step": 113350 }, { "epoch": 4.119485427720038, "grad_norm": 0.5454927682876587, "learning_rate": 1.3109855311089606e-05, "loss": 0.0704, "step": 113360 }, { "epoch": 4.119848826222836, "grad_norm": 0.9774706959724426, "learning_rate": 1.3105290738988068e-05, "loss": 0.0824, "step": 113370 }, { "epoch": 4.120212224725634, "grad_norm": 0.5349249243736267, "learning_rate": 1.3100726679387228e-05, "loss": 0.0626, "step": 113380 }, { "epoch": 4.120575623228432, "grad_norm": 0.38488900661468506, "learning_rate": 1.3096163132483741e-05, "loss": 1.1301, "step": 113390 }, { "epoch": 4.12093902173123, "grad_norm": 0.48048198223114014, "learning_rate": 1.3091600098474238e-05, "loss": 0.0758, "step": 113400 }, { "epoch": 4.12093902173123, "eval_loss": 0.2987889349460602, "eval_runtime": 178.5175, "eval_samples_per_second": 41.531, "eval_steps_per_second": 5.193, "eval_wer": 0.12876903806705756, "step": 113400 }, { "epoch": 4.121302420234029, "grad_norm": 0.39969778060913086, "learning_rate": 1.3087037577555309e-05, "loss": 0.0697, "step": 113410 }, { "epoch": 4.121665818736827, "grad_norm": 1.1826426982879639, "learning_rate": 1.3082475569923553e-05, "loss": 0.0586, "step": 113420 }, { "epoch": 4.122029217239625, "grad_norm": 0.36098846793174744, "learning_rate": 1.3077914075775499e-05, "loss": 0.0632, "step": 113430 }, { "epoch": 4.122392615742423, "grad_norm": 0.37273460626602173, "learning_rate": 1.3073353095307733e-05, "loss": 0.0491, "step": 113440 }, { "epoch": 4.122756014245221, "grad_norm": 0.4271377921104431, "learning_rate": 1.3068792628716736e-05, "loss": 0.067, "step": 113450 }, { "epoch": 4.12311941274802, "grad_norm": 0.5918843746185303, "learning_rate": 1.3064232676199023e-05, "loss": 0.0751, "step": 113460 }, { "epoch": 4.123482811250818, "grad_norm": 1.6499254703521729, "learning_rate": 1.3059673237951044e-05, "loss": 0.0618, "step": 113470 }, { "epoch": 4.123846209753616, "grad_norm": 0.35528233647346497, "learning_rate": 1.3055114314169265e-05, "loss": 0.0704, "step": 113480 }, { "epoch": 4.124209608256414, "grad_norm": 0.2478209286928177, "learning_rate": 1.3050555905050107e-05, "loss": 0.0519, "step": 113490 }, { "epoch": 4.124573006759213, "grad_norm": 0.3479679524898529, "learning_rate": 1.304599801078999e-05, "loss": 0.0691, "step": 113500 }, { "epoch": 4.1249364052620106, "grad_norm": 0.5923532843589783, "learning_rate": 1.3041440631585278e-05, "loss": 0.0537, "step": 113510 }, { "epoch": 4.1252998037648085, "grad_norm": 0.7202960848808289, "learning_rate": 1.3036883767632339e-05, "loss": 0.0693, "step": 113520 }, { "epoch": 4.1256632022676065, "grad_norm": 0.3048873841762543, "learning_rate": 1.3032327419127513e-05, "loss": 0.0655, "step": 113530 }, { "epoch": 4.1260266007704045, "grad_norm": 0.3401569724082947, "learning_rate": 1.3027771586267129e-05, "loss": 0.0435, "step": 113540 }, { "epoch": 4.126389999273203, "grad_norm": 1.8426971435546875, "learning_rate": 1.3023216269247457e-05, "loss": 0.0668, "step": 113550 }, { "epoch": 4.126753397776001, "grad_norm": 0.6817682385444641, "learning_rate": 1.3018661468264795e-05, "loss": 0.0567, "step": 113560 }, { "epoch": 4.127116796278799, "grad_norm": 0.8271031379699707, "learning_rate": 1.3014107183515362e-05, "loss": 0.1007, "step": 113570 }, { "epoch": 4.127480194781597, "grad_norm": 0.24805913865566254, "learning_rate": 1.300955341519542e-05, "loss": 0.0572, "step": 113580 }, { "epoch": 4.127843593284395, "grad_norm": 15.380496978759766, "learning_rate": 1.3005000163501152e-05, "loss": 0.0565, "step": 113590 }, { "epoch": 4.128206991787194, "grad_norm": 2.278918981552124, "learning_rate": 1.300044742862876e-05, "loss": 0.0818, "step": 113600 }, { "epoch": 4.128570390289992, "grad_norm": 0.5283384919166565, "learning_rate": 1.2995895210774381e-05, "loss": 0.049, "step": 113610 }, { "epoch": 4.12893378879279, "grad_norm": 2.944115161895752, "learning_rate": 1.299134351013417e-05, "loss": 0.0599, "step": 113620 }, { "epoch": 4.129297187295588, "grad_norm": 1.6555734872817993, "learning_rate": 1.2986792326904235e-05, "loss": 0.0717, "step": 113630 }, { "epoch": 4.129660585798386, "grad_norm": 0.5407847762107849, "learning_rate": 1.2982241661280688e-05, "loss": 0.053, "step": 113640 }, { "epoch": 4.130023984301185, "grad_norm": 0.7451368570327759, "learning_rate": 1.2977691513459578e-05, "loss": 0.0681, "step": 113650 }, { "epoch": 4.130387382803983, "grad_norm": 5.28464937210083, "learning_rate": 1.2973141883636978e-05, "loss": 0.0695, "step": 113660 }, { "epoch": 4.130750781306781, "grad_norm": 0.9070919156074524, "learning_rate": 1.296859277200888e-05, "loss": 0.0868, "step": 113670 }, { "epoch": 4.131114179809579, "grad_norm": 0.5017779469490051, "learning_rate": 1.2964044178771333e-05, "loss": 0.0691, "step": 113680 }, { "epoch": 4.131477578312377, "grad_norm": 0.41018345952033997, "learning_rate": 1.295949610412029e-05, "loss": 0.0495, "step": 113690 }, { "epoch": 4.131840976815176, "grad_norm": 1.58717942237854, "learning_rate": 1.2954948548251724e-05, "loss": 0.0719, "step": 113700 }, { "epoch": 4.132204375317974, "grad_norm": 0.3683645725250244, "learning_rate": 1.2950401511361554e-05, "loss": 0.0739, "step": 113710 }, { "epoch": 4.132567773820772, "grad_norm": 0.510909914970398, "learning_rate": 1.2945854993645726e-05, "loss": 0.08, "step": 113720 }, { "epoch": 4.13293117232357, "grad_norm": 1.1862256526947021, "learning_rate": 1.2941308995300111e-05, "loss": 0.057, "step": 113730 }, { "epoch": 4.133294570826369, "grad_norm": 0.42124128341674805, "learning_rate": 1.2936763516520595e-05, "loss": 0.0452, "step": 113740 }, { "epoch": 4.133657969329167, "grad_norm": 0.6174753308296204, "learning_rate": 1.2932218557503007e-05, "loss": 0.0952, "step": 113750 }, { "epoch": 4.134021367831965, "grad_norm": 0.9886456727981567, "learning_rate": 1.2927674118443184e-05, "loss": 0.0912, "step": 113760 }, { "epoch": 4.134384766334763, "grad_norm": 0.4314543306827545, "learning_rate": 1.292313019953693e-05, "loss": 0.0655, "step": 113770 }, { "epoch": 4.134748164837561, "grad_norm": 0.4748517870903015, "learning_rate": 1.2918586800980037e-05, "loss": 0.0575, "step": 113780 }, { "epoch": 4.1351115633403595, "grad_norm": 0.5830983519554138, "learning_rate": 1.2914043922968244e-05, "loss": 0.0471, "step": 113790 }, { "epoch": 4.135474961843157, "grad_norm": 0.43536341190338135, "learning_rate": 1.2909501565697305e-05, "loss": 0.0533, "step": 113800 }, { "epoch": 4.135838360345955, "grad_norm": 0.5548887252807617, "learning_rate": 1.2904959729362904e-05, "loss": 0.0665, "step": 113810 }, { "epoch": 4.136201758848753, "grad_norm": 0.49552205204963684, "learning_rate": 1.2900418414160775e-05, "loss": 0.0772, "step": 113820 }, { "epoch": 4.136565157351551, "grad_norm": 0.6507740616798401, "learning_rate": 1.2895877620286556e-05, "loss": 0.0564, "step": 113830 }, { "epoch": 4.13692855585435, "grad_norm": 2.410308361053467, "learning_rate": 1.2891337347935916e-05, "loss": 0.8506, "step": 113840 }, { "epoch": 4.137291954357148, "grad_norm": 1.289736270904541, "learning_rate": 1.2886797597304456e-05, "loss": 0.0775, "step": 113850 }, { "epoch": 4.137655352859946, "grad_norm": 0.49139320850372314, "learning_rate": 1.2882258368587785e-05, "loss": 0.1908, "step": 113860 }, { "epoch": 4.138018751362744, "grad_norm": 13.986310958862305, "learning_rate": 1.287771966198149e-05, "loss": 0.0872, "step": 113870 }, { "epoch": 4.138382149865542, "grad_norm": 0.320834755897522, "learning_rate": 1.2873181477681134e-05, "loss": 0.0608, "step": 113880 }, { "epoch": 4.138745548368341, "grad_norm": 0.6267417073249817, "learning_rate": 1.2868643815882228e-05, "loss": 0.0474, "step": 113890 }, { "epoch": 4.139108946871139, "grad_norm": 0.6176286935806274, "learning_rate": 1.2864106676780308e-05, "loss": 0.06, "step": 113900 }, { "epoch": 4.139472345373937, "grad_norm": 1.188921570777893, "learning_rate": 1.285957006057083e-05, "loss": 0.0597, "step": 113910 }, { "epoch": 4.139835743876735, "grad_norm": 0.4680976867675781, "learning_rate": 1.2855033967449304e-05, "loss": 0.0616, "step": 113920 }, { "epoch": 4.140199142379533, "grad_norm": 1.202904462814331, "learning_rate": 1.2850498397611144e-05, "loss": 0.0528, "step": 113930 }, { "epoch": 4.140562540882332, "grad_norm": 0.6726404428482056, "learning_rate": 1.2845963351251786e-05, "loss": 0.0571, "step": 113940 }, { "epoch": 4.14092593938513, "grad_norm": 0.5581681132316589, "learning_rate": 1.2841428828566604e-05, "loss": 0.069, "step": 113950 }, { "epoch": 4.141289337887928, "grad_norm": 1.7517188787460327, "learning_rate": 1.2836894829751015e-05, "loss": 0.0647, "step": 113960 }, { "epoch": 4.141652736390726, "grad_norm": 0.6693496108055115, "learning_rate": 1.2832361355000339e-05, "loss": 0.0517, "step": 113970 }, { "epoch": 4.142016134893524, "grad_norm": 0.4584248661994934, "learning_rate": 1.2827828404509935e-05, "loss": 0.0677, "step": 113980 }, { "epoch": 4.142379533396323, "grad_norm": 0.39411190152168274, "learning_rate": 1.282329597847508e-05, "loss": 0.05, "step": 113990 }, { "epoch": 4.142742931899121, "grad_norm": 0.3312693238258362, "learning_rate": 1.2818764077091077e-05, "loss": 0.0519, "step": 114000 }, { "epoch": 4.142742931899121, "eval_loss": 0.3173038363456726, "eval_runtime": 178.9243, "eval_samples_per_second": 41.437, "eval_steps_per_second": 5.181, "eval_wer": 0.1280792201426834, "step": 114000 }, { "epoch": 4.143106330401919, "grad_norm": 0.6391065716743469, "learning_rate": 1.2814232700553191e-05, "loss": 0.0669, "step": 114010 }, { "epoch": 4.143469728904717, "grad_norm": 0.6498408317565918, "learning_rate": 1.2809701849056671e-05, "loss": 0.0594, "step": 114020 }, { "epoch": 4.143833127407515, "grad_norm": 0.3123835623264313, "learning_rate": 1.2805171522796715e-05, "loss": 0.0644, "step": 114030 }, { "epoch": 4.1441965259103135, "grad_norm": 1.046025276184082, "learning_rate": 1.2800641721968537e-05, "loss": 0.0546, "step": 114040 }, { "epoch": 4.1445599244131115, "grad_norm": 0.5524206757545471, "learning_rate": 1.2796112446767286e-05, "loss": 0.0877, "step": 114050 }, { "epoch": 4.1449233229159095, "grad_norm": 0.873522162437439, "learning_rate": 1.2791583697388143e-05, "loss": 0.0731, "step": 114060 }, { "epoch": 4.1452867214187075, "grad_norm": 0.4891306161880493, "learning_rate": 1.2787055474026216e-05, "loss": 0.0705, "step": 114070 }, { "epoch": 4.145650119921506, "grad_norm": 0.36137646436691284, "learning_rate": 1.278252777687662e-05, "loss": 0.0818, "step": 114080 }, { "epoch": 4.146013518424304, "grad_norm": 0.3782752454280853, "learning_rate": 1.2778000606134428e-05, "loss": 0.0723, "step": 114090 }, { "epoch": 4.146376916927102, "grad_norm": 0.6145089268684387, "learning_rate": 1.2773473961994697e-05, "loss": 0.0587, "step": 114100 }, { "epoch": 4.1467403154299, "grad_norm": 4.576334476470947, "learning_rate": 1.2768947844652474e-05, "loss": 0.0809, "step": 114110 }, { "epoch": 4.147103713932698, "grad_norm": 0.6847585439682007, "learning_rate": 1.276442225430278e-05, "loss": 0.0586, "step": 114120 }, { "epoch": 4.147467112435497, "grad_norm": 0.6855227947235107, "learning_rate": 1.2759897191140586e-05, "loss": 0.0534, "step": 114130 }, { "epoch": 4.147830510938295, "grad_norm": 0.34615084528923035, "learning_rate": 1.2755372655360875e-05, "loss": 0.0522, "step": 114140 }, { "epoch": 4.148193909441093, "grad_norm": 1.5634496212005615, "learning_rate": 1.2750848647158586e-05, "loss": 0.0662, "step": 114150 }, { "epoch": 4.148557307943891, "grad_norm": 0.3985532224178314, "learning_rate": 1.2746325166728656e-05, "loss": 0.0716, "step": 114160 }, { "epoch": 4.148920706446689, "grad_norm": 1.0279290676116943, "learning_rate": 1.2741802214265969e-05, "loss": 0.0597, "step": 114170 }, { "epoch": 4.149284104949488, "grad_norm": 0.3082030713558197, "learning_rate": 1.2737279789965417e-05, "loss": 0.0802, "step": 114180 }, { "epoch": 4.149647503452286, "grad_norm": 0.3072323203086853, "learning_rate": 1.2732757894021829e-05, "loss": 0.0595, "step": 114190 }, { "epoch": 4.150010901955084, "grad_norm": 0.3936696946620941, "learning_rate": 1.2728236526630077e-05, "loss": 0.0528, "step": 114200 }, { "epoch": 4.150374300457882, "grad_norm": 0.5724795460700989, "learning_rate": 1.2723715687984938e-05, "loss": 0.1364, "step": 114210 }, { "epoch": 4.15073769896068, "grad_norm": 0.5028474926948547, "learning_rate": 1.2719195378281223e-05, "loss": 0.0613, "step": 114220 }, { "epoch": 4.151101097463479, "grad_norm": 0.5082797408103943, "learning_rate": 1.2714675597713672e-05, "loss": 0.0633, "step": 114230 }, { "epoch": 4.151464495966277, "grad_norm": 0.3836214244365692, "learning_rate": 1.271015634647704e-05, "loss": 0.0624, "step": 114240 }, { "epoch": 4.151827894469075, "grad_norm": 0.6407490968704224, "learning_rate": 1.2705637624766042e-05, "loss": 0.0571, "step": 114250 }, { "epoch": 4.152191292971873, "grad_norm": 0.4085807204246521, "learning_rate": 1.2701119432775389e-05, "loss": 0.0735, "step": 114260 }, { "epoch": 4.152554691474671, "grad_norm": 3.630969762802124, "learning_rate": 1.2696601770699723e-05, "loss": 0.081, "step": 114270 }, { "epoch": 4.15291808997747, "grad_norm": 2.0950167179107666, "learning_rate": 1.2692084638733725e-05, "loss": 0.0455, "step": 114280 }, { "epoch": 4.153281488480268, "grad_norm": 0.29622146487236023, "learning_rate": 1.2687568037071989e-05, "loss": 0.0506, "step": 114290 }, { "epoch": 4.1536448869830656, "grad_norm": 0.46987539529800415, "learning_rate": 1.268305196590916e-05, "loss": 0.0421, "step": 114300 }, { "epoch": 4.1540082854858635, "grad_norm": 0.39149653911590576, "learning_rate": 1.2678536425439785e-05, "loss": 0.0598, "step": 114310 }, { "epoch": 4.154371683988662, "grad_norm": 1.2286535501480103, "learning_rate": 1.2674021415858445e-05, "loss": 0.0584, "step": 114320 }, { "epoch": 4.15473508249146, "grad_norm": 0.3485181927680969, "learning_rate": 1.2669506937359649e-05, "loss": 0.0596, "step": 114330 }, { "epoch": 4.155098480994258, "grad_norm": 0.9163201451301575, "learning_rate": 1.2664992990137947e-05, "loss": 0.0466, "step": 114340 }, { "epoch": 4.155461879497056, "grad_norm": 0.2799241244792938, "learning_rate": 1.2660479574387796e-05, "loss": 0.0516, "step": 114350 }, { "epoch": 4.155825277999854, "grad_norm": 0.4756034314632416, "learning_rate": 1.2655966690303689e-05, "loss": 0.0797, "step": 114360 }, { "epoch": 4.156188676502653, "grad_norm": 0.5071646571159363, "learning_rate": 1.2651454338080043e-05, "loss": 0.0625, "step": 114370 }, { "epoch": 4.156552075005451, "grad_norm": 0.2303503006696701, "learning_rate": 1.2646942517911298e-05, "loss": 0.1184, "step": 114380 }, { "epoch": 4.156915473508249, "grad_norm": 0.3640551269054413, "learning_rate": 1.2642431229991847e-05, "loss": 0.0476, "step": 114390 }, { "epoch": 4.157278872011047, "grad_norm": 0.9771270155906677, "learning_rate": 1.2637920474516074e-05, "loss": 0.0596, "step": 114400 }, { "epoch": 4.157642270513845, "grad_norm": 0.4869442880153656, "learning_rate": 1.2633410251678313e-05, "loss": 0.0765, "step": 114410 }, { "epoch": 4.158005669016644, "grad_norm": 5.728058338165283, "learning_rate": 1.2628900561672913e-05, "loss": 0.0707, "step": 114420 }, { "epoch": 4.158369067519442, "grad_norm": 0.33092445135116577, "learning_rate": 1.2624391404694156e-05, "loss": 0.0514, "step": 114430 }, { "epoch": 4.15873246602224, "grad_norm": 0.5583271384239197, "learning_rate": 1.2619882780936358e-05, "loss": 0.0618, "step": 114440 }, { "epoch": 4.159095864525038, "grad_norm": 0.8753048777580261, "learning_rate": 1.2615374690593751e-05, "loss": 0.0672, "step": 114450 }, { "epoch": 4.159459263027836, "grad_norm": 0.7249387502670288, "learning_rate": 1.2610867133860594e-05, "loss": 0.0619, "step": 114460 }, { "epoch": 4.159822661530635, "grad_norm": 1.9549199342727661, "learning_rate": 1.2606360110931081e-05, "loss": 0.0645, "step": 114470 }, { "epoch": 4.160186060033433, "grad_norm": 1.0763561725616455, "learning_rate": 1.2601853621999419e-05, "loss": 0.0519, "step": 114480 }, { "epoch": 4.160549458536231, "grad_norm": 0.9392730593681335, "learning_rate": 1.2597347667259768e-05, "loss": 0.047, "step": 114490 }, { "epoch": 4.160912857039029, "grad_norm": 7.717432022094727, "learning_rate": 1.2592842246906286e-05, "loss": 0.0801, "step": 114500 }, { "epoch": 4.161276255541827, "grad_norm": 0.4307349920272827, "learning_rate": 1.2588337361133079e-05, "loss": 0.0749, "step": 114510 }, { "epoch": 4.161639654044626, "grad_norm": 0.6074416041374207, "learning_rate": 1.2583833010134255e-05, "loss": 0.0548, "step": 114520 }, { "epoch": 4.162003052547424, "grad_norm": 0.41101697087287903, "learning_rate": 1.257932919410389e-05, "loss": 0.0763, "step": 114530 }, { "epoch": 4.162366451050222, "grad_norm": 0.47247016429901123, "learning_rate": 1.2574825913236043e-05, "loss": 0.0527, "step": 114540 }, { "epoch": 4.16272984955302, "grad_norm": 0.8120209574699402, "learning_rate": 1.257032316772473e-05, "loss": 0.053, "step": 114550 }, { "epoch": 4.163093248055818, "grad_norm": 0.4224017560482025, "learning_rate": 1.256582095776398e-05, "loss": 0.0652, "step": 114560 }, { "epoch": 4.1634566465586165, "grad_norm": 0.7514461278915405, "learning_rate": 1.256131928354774e-05, "loss": 0.0596, "step": 114570 }, { "epoch": 4.1638200450614145, "grad_norm": 0.5608872175216675, "learning_rate": 1.2556818145270017e-05, "loss": 0.0599, "step": 114580 }, { "epoch": 4.164183443564212, "grad_norm": 0.22857458889484406, "learning_rate": 1.2552317543124717e-05, "loss": 0.0619, "step": 114590 }, { "epoch": 4.16454684206701, "grad_norm": 0.5940126180648804, "learning_rate": 1.2547817477305773e-05, "loss": 0.0539, "step": 114600 }, { "epoch": 4.16454684206701, "eval_loss": 0.32606130838394165, "eval_runtime": 179.2442, "eval_samples_per_second": 41.363, "eval_steps_per_second": 5.172, "eval_wer": 0.12764354566413127, "step": 114600 }, { "epoch": 4.164910240569808, "grad_norm": 0.45421409606933594, "learning_rate": 1.2543317948007063e-05, "loss": 0.0765, "step": 114610 }, { "epoch": 4.165273639072607, "grad_norm": 1.46690833568573, "learning_rate": 1.253881895542246e-05, "loss": 0.0599, "step": 114620 }, { "epoch": 4.165637037575405, "grad_norm": 0.26001593470573425, "learning_rate": 1.2534320499745811e-05, "loss": 0.0466, "step": 114630 }, { "epoch": 4.166000436078203, "grad_norm": 0.26628535985946655, "learning_rate": 1.2529822581170947e-05, "loss": 0.0494, "step": 114640 }, { "epoch": 4.166363834581001, "grad_norm": 0.5085469484329224, "learning_rate": 1.2525325199891653e-05, "loss": 0.0526, "step": 114650 }, { "epoch": 4.1667272330838, "grad_norm": 31.338340759277344, "learning_rate": 1.2520828356101716e-05, "loss": 0.0675, "step": 114660 }, { "epoch": 4.167090631586598, "grad_norm": 1.0442249774932861, "learning_rate": 1.2516332049994866e-05, "loss": 0.0646, "step": 114670 }, { "epoch": 4.167454030089396, "grad_norm": 0.6965683102607727, "learning_rate": 1.251183628176487e-05, "loss": 0.0552, "step": 114680 }, { "epoch": 4.167817428592194, "grad_norm": 0.42971551418304443, "learning_rate": 1.250734105160541e-05, "loss": 0.0583, "step": 114690 }, { "epoch": 4.168180827094992, "grad_norm": 0.34043270349502563, "learning_rate": 1.250284635971018e-05, "loss": 0.0619, "step": 114700 }, { "epoch": 4.168544225597791, "grad_norm": 0.6225563287734985, "learning_rate": 1.249835220627282e-05, "loss": 0.0668, "step": 114710 }, { "epoch": 4.168907624100589, "grad_norm": 0.7544811367988586, "learning_rate": 1.2493858591486998e-05, "loss": 0.0522, "step": 114720 }, { "epoch": 4.169271022603387, "grad_norm": 0.51103675365448, "learning_rate": 1.2489365515546306e-05, "loss": 0.051, "step": 114730 }, { "epoch": 4.169634421106185, "grad_norm": 0.8677123188972473, "learning_rate": 1.2484872978644349e-05, "loss": 0.0456, "step": 114740 }, { "epoch": 4.169997819608983, "grad_norm": 0.3584132790565491, "learning_rate": 1.2480380980974676e-05, "loss": 0.0679, "step": 114750 }, { "epoch": 4.170361218111782, "grad_norm": 0.36030539870262146, "learning_rate": 1.247588952273084e-05, "loss": 0.0744, "step": 114760 }, { "epoch": 4.17072461661458, "grad_norm": 3.283201217651367, "learning_rate": 1.2471398604106368e-05, "loss": 0.0603, "step": 114770 }, { "epoch": 4.171088015117378, "grad_norm": 0.437751829624176, "learning_rate": 1.246690822529476e-05, "loss": 0.0508, "step": 114780 }, { "epoch": 4.171451413620176, "grad_norm": 0.37101438641548157, "learning_rate": 1.2462418386489474e-05, "loss": 0.0639, "step": 114790 }, { "epoch": 4.171814812122974, "grad_norm": 0.5961673259735107, "learning_rate": 1.2457929087883982e-05, "loss": 0.0548, "step": 114800 }, { "epoch": 4.1721782106257725, "grad_norm": 0.9467266798019409, "learning_rate": 1.2453440329671682e-05, "loss": 0.0457, "step": 114810 }, { "epoch": 4.1725416091285705, "grad_norm": 1.0820558071136475, "learning_rate": 1.2448952112046014e-05, "loss": 0.0864, "step": 114820 }, { "epoch": 4.1729050076313685, "grad_norm": 1.219152569770813, "learning_rate": 1.2444464435200335e-05, "loss": 0.0458, "step": 114830 }, { "epoch": 4.1732684061341665, "grad_norm": 0.5412958264350891, "learning_rate": 1.2439977299328021e-05, "loss": 0.0587, "step": 114840 }, { "epoch": 4.1736318046369645, "grad_norm": 1.5153650045394897, "learning_rate": 1.2435490704622384e-05, "loss": 0.0722, "step": 114850 }, { "epoch": 4.173995203139763, "grad_norm": 0.6175846457481384, "learning_rate": 1.2431004651276751e-05, "loss": 0.0602, "step": 114860 }, { "epoch": 4.174358601642561, "grad_norm": 0.7764977812767029, "learning_rate": 1.2426519139484404e-05, "loss": 0.0556, "step": 114870 }, { "epoch": 4.174722000145359, "grad_norm": 1.1157594919204712, "learning_rate": 1.2422034169438623e-05, "loss": 0.059, "step": 114880 }, { "epoch": 4.175085398648157, "grad_norm": 0.2795602083206177, "learning_rate": 1.2417549741332626e-05, "loss": 0.0476, "step": 114890 }, { "epoch": 4.175448797150956, "grad_norm": 1.7806609869003296, "learning_rate": 1.2413065855359643e-05, "loss": 0.0533, "step": 114900 }, { "epoch": 4.175812195653754, "grad_norm": 0.41895151138305664, "learning_rate": 1.2408582511712865e-05, "loss": 0.0586, "step": 114910 }, { "epoch": 4.176175594156552, "grad_norm": 4.639927387237549, "learning_rate": 1.240409971058548e-05, "loss": 0.0753, "step": 114920 }, { "epoch": 4.17653899265935, "grad_norm": 0.5736679434776306, "learning_rate": 1.239961745217061e-05, "loss": 0.0558, "step": 114930 }, { "epoch": 4.176902391162148, "grad_norm": 0.38681358098983765, "learning_rate": 1.23951357366614e-05, "loss": 0.0807, "step": 114940 }, { "epoch": 4.177265789664947, "grad_norm": 3.046159505844116, "learning_rate": 1.2390654564250926e-05, "loss": 0.0538, "step": 114950 }, { "epoch": 4.177629188167745, "grad_norm": 0.7652380466461182, "learning_rate": 1.2386173935132303e-05, "loss": 0.0783, "step": 114960 }, { "epoch": 4.177992586670543, "grad_norm": 0.5098959803581238, "learning_rate": 1.2381693849498551e-05, "loss": 0.0581, "step": 114970 }, { "epoch": 4.178355985173341, "grad_norm": 0.18062551319599152, "learning_rate": 1.2377214307542729e-05, "loss": 0.0618, "step": 114980 }, { "epoch": 4.178719383676139, "grad_norm": 0.37934646010398865, "learning_rate": 1.2372735309457819e-05, "loss": 0.0653, "step": 114990 }, { "epoch": 4.179082782178938, "grad_norm": 0.48118042945861816, "learning_rate": 1.2368256855436816e-05, "loss": 0.0598, "step": 115000 }, { "epoch": 4.179446180681736, "grad_norm": 1.4239192008972168, "learning_rate": 1.2363778945672683e-05, "loss": 0.1146, "step": 115010 }, { "epoch": 4.179809579184534, "grad_norm": 0.6761791706085205, "learning_rate": 1.2359301580358362e-05, "loss": 0.0623, "step": 115020 }, { "epoch": 4.180172977687332, "grad_norm": 0.5229442119598389, "learning_rate": 1.2354824759686754e-05, "loss": 0.0637, "step": 115030 }, { "epoch": 4.18053637619013, "grad_norm": 0.4049164354801178, "learning_rate": 1.2350348483850755e-05, "loss": 0.0489, "step": 115040 }, { "epoch": 4.180899774692929, "grad_norm": 0.41811639070510864, "learning_rate": 1.234587275304323e-05, "loss": 0.0697, "step": 115050 }, { "epoch": 4.181263173195727, "grad_norm": 0.35178762674331665, "learning_rate": 1.2341397567457036e-05, "loss": 0.0782, "step": 115060 }, { "epoch": 4.181626571698525, "grad_norm": 0.7727785110473633, "learning_rate": 1.233692292728497e-05, "loss": 0.0621, "step": 115070 }, { "epoch": 4.181989970201323, "grad_norm": 0.7113915681838989, "learning_rate": 1.2332448832719851e-05, "loss": 0.0681, "step": 115080 }, { "epoch": 4.1823533687041206, "grad_norm": 0.6821020841598511, "learning_rate": 1.2327975283954429e-05, "loss": 0.0485, "step": 115090 }, { "epoch": 4.182716767206919, "grad_norm": 0.8478249311447144, "learning_rate": 1.2323502281181464e-05, "loss": 0.0532, "step": 115100 }, { "epoch": 4.183080165709717, "grad_norm": 0.8352124691009521, "learning_rate": 1.2319029824593687e-05, "loss": 0.0708, "step": 115110 }, { "epoch": 4.183443564212515, "grad_norm": 0.6750608086585999, "learning_rate": 1.2314557914383804e-05, "loss": 0.0765, "step": 115120 }, { "epoch": 4.183806962715313, "grad_norm": 0.33730462193489075, "learning_rate": 1.2310086550744474e-05, "loss": 0.0808, "step": 115130 }, { "epoch": 4.184170361218111, "grad_norm": 0.637880265712738, "learning_rate": 1.2305615733868364e-05, "loss": 0.057, "step": 115140 }, { "epoch": 4.18453375972091, "grad_norm": 1.4142619371414185, "learning_rate": 1.2301145463948105e-05, "loss": 0.0628, "step": 115150 }, { "epoch": 4.184897158223708, "grad_norm": 0.47432175278663635, "learning_rate": 1.2296675741176316e-05, "loss": 0.0775, "step": 115160 }, { "epoch": 4.185260556726506, "grad_norm": 0.3818480670452118, "learning_rate": 1.2292206565745562e-05, "loss": 0.0738, "step": 115170 }, { "epoch": 4.185623955229304, "grad_norm": 0.458845853805542, "learning_rate": 1.2287737937848412e-05, "loss": 0.0476, "step": 115180 }, { "epoch": 4.185987353732102, "grad_norm": 0.4036356806755066, "learning_rate": 1.2283269857677402e-05, "loss": 0.0485, "step": 115190 }, { "epoch": 4.186350752234901, "grad_norm": 0.7177650332450867, "learning_rate": 1.227880232542506e-05, "loss": 0.0614, "step": 115200 }, { "epoch": 4.186350752234901, "eval_loss": 0.33033162355422974, "eval_runtime": 179.5501, "eval_samples_per_second": 41.292, "eval_steps_per_second": 5.163, "eval_wer": 0.12759816290594878, "step": 115200 }, { "epoch": 4.186714150737699, "grad_norm": 0.61422199010849, "learning_rate": 1.2274335341283851e-05, "loss": 0.0672, "step": 115210 }, { "epoch": 4.187077549240497, "grad_norm": 4.265668869018555, "learning_rate": 1.2269868905446265e-05, "loss": 0.0565, "step": 115220 }, { "epoch": 4.187440947743295, "grad_norm": 1.605099081993103, "learning_rate": 1.2265403018104726e-05, "loss": 0.058, "step": 115230 }, { "epoch": 4.187804346246094, "grad_norm": 0.9331768751144409, "learning_rate": 1.2260937679451659e-05, "loss": 0.6245, "step": 115240 }, { "epoch": 4.188167744748892, "grad_norm": 0.6140132546424866, "learning_rate": 1.2256472889679462e-05, "loss": 0.0672, "step": 115250 }, { "epoch": 4.18853114325169, "grad_norm": 0.43287473917007446, "learning_rate": 1.2252008648980518e-05, "loss": 0.0775, "step": 115260 }, { "epoch": 4.188894541754488, "grad_norm": 18.50632667541504, "learning_rate": 1.2247544957547153e-05, "loss": 0.0584, "step": 115270 }, { "epoch": 4.189257940257286, "grad_norm": 0.5341188907623291, "learning_rate": 1.22430818155717e-05, "loss": 0.061, "step": 115280 }, { "epoch": 4.189621338760085, "grad_norm": 2.5828707218170166, "learning_rate": 1.2238619223246464e-05, "loss": 0.06, "step": 115290 }, { "epoch": 4.189984737262883, "grad_norm": 0.912663996219635, "learning_rate": 1.223415718076373e-05, "loss": 0.0714, "step": 115300 }, { "epoch": 4.190348135765681, "grad_norm": 0.7936631441116333, "learning_rate": 1.2229695688315735e-05, "loss": 0.0603, "step": 115310 }, { "epoch": 4.190711534268479, "grad_norm": 1.1647089719772339, "learning_rate": 1.2225234746094713e-05, "loss": 0.0536, "step": 115320 }, { "epoch": 4.191074932771277, "grad_norm": 0.5609497427940369, "learning_rate": 1.2220774354292874e-05, "loss": 0.0611, "step": 115330 }, { "epoch": 4.1914383312740755, "grad_norm": 0.671658456325531, "learning_rate": 1.2216314513102409e-05, "loss": 0.0584, "step": 115340 }, { "epoch": 4.1918017297768735, "grad_norm": 0.7967808246612549, "learning_rate": 1.2211855222715458e-05, "loss": 0.0605, "step": 115350 }, { "epoch": 4.1921651282796715, "grad_norm": 0.4767843782901764, "learning_rate": 1.2207396483324166e-05, "loss": 0.0573, "step": 115360 }, { "epoch": 4.1925285267824695, "grad_norm": 0.601372480392456, "learning_rate": 1.220293829512065e-05, "loss": 0.0879, "step": 115370 }, { "epoch": 4.192891925285267, "grad_norm": 0.32629552483558655, "learning_rate": 1.219892639716179e-05, "loss": 4.6326, "step": 115380 }, { "epoch": 4.193255323788066, "grad_norm": 0.8194393515586853, "learning_rate": 1.2194469256744206e-05, "loss": 0.0553, "step": 115390 }, { "epoch": 4.193618722290864, "grad_norm": 0.4936831593513489, "learning_rate": 1.2190012668071382e-05, "loss": 1.3225, "step": 115400 }, { "epoch": 4.193982120793662, "grad_norm": 0.5675899386405945, "learning_rate": 1.2185556631335335e-05, "loss": 0.0516, "step": 115410 }, { "epoch": 4.19434551929646, "grad_norm": 0.462312787771225, "learning_rate": 1.2181101146728069e-05, "loss": 0.0628, "step": 115420 }, { "epoch": 4.194708917799258, "grad_norm": 1.7180153131484985, "learning_rate": 1.2176646214441534e-05, "loss": 0.0525, "step": 115430 }, { "epoch": 4.195072316302057, "grad_norm": 0.39138999581336975, "learning_rate": 1.2172191834667688e-05, "loss": 0.058, "step": 115440 }, { "epoch": 4.195435714804855, "grad_norm": 6.312756538391113, "learning_rate": 1.2167738007598452e-05, "loss": 0.6926, "step": 115450 }, { "epoch": 4.195799113307653, "grad_norm": 0.26518774032592773, "learning_rate": 1.2163284733425743e-05, "loss": 0.0688, "step": 115460 }, { "epoch": 4.196162511810451, "grad_norm": 1.2338483333587646, "learning_rate": 1.215883201234141e-05, "loss": 0.0958, "step": 115470 }, { "epoch": 4.19652591031325, "grad_norm": 0.4234517514705658, "learning_rate": 1.2154379844537315e-05, "loss": 0.0533, "step": 115480 }, { "epoch": 4.196889308816048, "grad_norm": 0.7015941143035889, "learning_rate": 1.2149928230205288e-05, "loss": 0.0646, "step": 115490 }, { "epoch": 4.197252707318846, "grad_norm": 0.7061643004417419, "learning_rate": 1.2145477169537142e-05, "loss": 0.0719, "step": 115500 }, { "epoch": 4.197616105821644, "grad_norm": 0.7136194109916687, "learning_rate": 1.2141026662724638e-05, "loss": 0.1861, "step": 115510 }, { "epoch": 4.197979504324442, "grad_norm": 1.6546835899353027, "learning_rate": 1.2136576709959546e-05, "loss": 0.0674, "step": 115520 }, { "epoch": 4.198342902827241, "grad_norm": 1.6519831418991089, "learning_rate": 1.2132127311433602e-05, "loss": 0.0637, "step": 115530 }, { "epoch": 4.198706301330039, "grad_norm": 0.9831448197364807, "learning_rate": 1.21276784673385e-05, "loss": 0.0655, "step": 115540 }, { "epoch": 4.199069699832837, "grad_norm": 0.7948331832885742, "learning_rate": 1.2123230177865933e-05, "loss": 0.0488, "step": 115550 }, { "epoch": 4.199433098335635, "grad_norm": 0.45860570669174194, "learning_rate": 1.2118782443207568e-05, "loss": 0.0785, "step": 115560 }, { "epoch": 4.199796496838433, "grad_norm": 1.3545856475830078, "learning_rate": 1.2114335263555033e-05, "loss": 0.086, "step": 115570 }, { "epoch": 4.200159895341232, "grad_norm": 0.6654021739959717, "learning_rate": 1.210988863909994e-05, "loss": 0.0565, "step": 115580 }, { "epoch": 4.20052329384403, "grad_norm": 0.3669990301132202, "learning_rate": 1.210544257003388e-05, "loss": 0.0695, "step": 115590 }, { "epoch": 4.2008866923468275, "grad_norm": 0.6029540300369263, "learning_rate": 1.2100997056548436e-05, "loss": 0.0567, "step": 115600 }, { "epoch": 4.2012500908496255, "grad_norm": 0.5935440063476562, "learning_rate": 1.209655209883512e-05, "loss": 0.0573, "step": 115610 }, { "epoch": 4.2016134893524235, "grad_norm": 1.650621771812439, "learning_rate": 1.2092107697085467e-05, "loss": 0.09, "step": 115620 }, { "epoch": 4.201976887855222, "grad_norm": 0.2956644296646118, "learning_rate": 1.2087663851490963e-05, "loss": 0.0537, "step": 115630 }, { "epoch": 4.20234028635802, "grad_norm": 0.602063775062561, "learning_rate": 1.2083220562243094e-05, "loss": 0.0626, "step": 115640 }, { "epoch": 4.202703684860818, "grad_norm": 0.6536275744438171, "learning_rate": 1.2078777829533283e-05, "loss": 0.0556, "step": 115650 }, { "epoch": 4.203067083363616, "grad_norm": 0.5045903325080872, "learning_rate": 1.207433565355296e-05, "loss": 0.4054, "step": 115660 }, { "epoch": 4.203430481866414, "grad_norm": 0.513131856918335, "learning_rate": 1.2069894034493534e-05, "loss": 0.0532, "step": 115670 }, { "epoch": 4.203793880369213, "grad_norm": 0.37641045451164246, "learning_rate": 1.2065452972546359e-05, "loss": 1.5798, "step": 115680 }, { "epoch": 4.204157278872011, "grad_norm": 0.26772618293762207, "learning_rate": 1.2061012467902797e-05, "loss": 0.0484, "step": 115690 }, { "epoch": 4.204520677374809, "grad_norm": 2.2048678398132324, "learning_rate": 1.2056572520754175e-05, "loss": 0.0441, "step": 115700 }, { "epoch": 4.204884075877607, "grad_norm": 0.37708210945129395, "learning_rate": 1.2052133131291785e-05, "loss": 0.0616, "step": 115710 }, { "epoch": 4.205247474380405, "grad_norm": 0.9134958982467651, "learning_rate": 1.2047694299706908e-05, "loss": 0.0575, "step": 115720 }, { "epoch": 4.205610872883204, "grad_norm": 2.1388869285583496, "learning_rate": 1.2043256026190799e-05, "loss": 0.72, "step": 115730 }, { "epoch": 4.205974271386002, "grad_norm": 0.324491947889328, "learning_rate": 1.2038818310934697e-05, "loss": 0.0527, "step": 115740 }, { "epoch": 4.2063376698888, "grad_norm": 0.5549638271331787, "learning_rate": 1.203438115412979e-05, "loss": 0.0604, "step": 115750 }, { "epoch": 4.206701068391598, "grad_norm": 0.425426721572876, "learning_rate": 1.2029944555967265e-05, "loss": 0.0594, "step": 115760 }, { "epoch": 4.207064466894396, "grad_norm": 0.5203282833099365, "learning_rate": 1.2025508516638292e-05, "loss": 0.0615, "step": 115770 }, { "epoch": 4.207427865397195, "grad_norm": 0.37468624114990234, "learning_rate": 1.2021073036333985e-05, "loss": 0.0637, "step": 115780 }, { "epoch": 4.207791263899993, "grad_norm": 0.9887217879295349, "learning_rate": 1.201663811524546e-05, "loss": 0.0473, "step": 115790 }, { "epoch": 4.208154662402791, "grad_norm": 2.432356357574463, "learning_rate": 1.2012203753563805e-05, "loss": 0.0661, "step": 115800 }, { "epoch": 4.208154662402791, "eval_loss": 0.3070759177207947, "eval_runtime": 179.1817, "eval_samples_per_second": 41.377, "eval_steps_per_second": 5.174, "eval_wer": 0.12718064153066966, "step": 115800 }, { "epoch": 4.208518060905589, "grad_norm": 0.36186277866363525, "learning_rate": 1.2007769951480088e-05, "loss": 0.0914, "step": 115810 }, { "epoch": 4.208881459408388, "grad_norm": 0.5292090773582458, "learning_rate": 1.2003336709185329e-05, "loss": 0.0758, "step": 115820 }, { "epoch": 4.209244857911186, "grad_norm": 0.5596882104873657, "learning_rate": 1.199890402687055e-05, "loss": 0.0745, "step": 115830 }, { "epoch": 4.209608256413984, "grad_norm": 0.5243220329284668, "learning_rate": 1.1994471904726737e-05, "loss": 0.0557, "step": 115840 }, { "epoch": 4.209971654916782, "grad_norm": 0.4961388111114502, "learning_rate": 1.1990040342944863e-05, "loss": 0.0517, "step": 115850 }, { "epoch": 4.21033505341958, "grad_norm": 0.29883086681365967, "learning_rate": 1.1985609341715853e-05, "loss": 0.0703, "step": 115860 }, { "epoch": 4.2106984519223785, "grad_norm": 0.7670660614967346, "learning_rate": 1.1981178901230633e-05, "loss": 0.0695, "step": 115870 }, { "epoch": 4.2110618504251764, "grad_norm": 0.2789497375488281, "learning_rate": 1.19767490216801e-05, "loss": 0.0428, "step": 115880 }, { "epoch": 4.211425248927974, "grad_norm": 0.6327366828918457, "learning_rate": 1.1972319703255107e-05, "loss": 0.0506, "step": 115890 }, { "epoch": 4.211788647430772, "grad_norm": 0.4748649597167969, "learning_rate": 1.1967890946146507e-05, "loss": 0.065, "step": 115900 }, { "epoch": 4.21215204593357, "grad_norm": 0.7210052013397217, "learning_rate": 1.1963462750545123e-05, "loss": 0.0714, "step": 115910 }, { "epoch": 4.212515444436369, "grad_norm": 0.351001113653183, "learning_rate": 1.195903511664174e-05, "loss": 0.0614, "step": 115920 }, { "epoch": 4.212878842939167, "grad_norm": 0.3562362790107727, "learning_rate": 1.195460804462713e-05, "loss": 0.0528, "step": 115930 }, { "epoch": 4.213242241441965, "grad_norm": 0.5658897161483765, "learning_rate": 1.1950181534692046e-05, "loss": 0.0463, "step": 115940 }, { "epoch": 4.213605639944763, "grad_norm": 0.5244725942611694, "learning_rate": 1.1945755587027216e-05, "loss": 0.051, "step": 115950 }, { "epoch": 4.213969038447561, "grad_norm": 0.6189519166946411, "learning_rate": 1.1941330201823322e-05, "loss": 0.0716, "step": 115960 }, { "epoch": 4.21433243695036, "grad_norm": 0.7561732530593872, "learning_rate": 1.1936905379271046e-05, "loss": 0.0704, "step": 115970 }, { "epoch": 4.214695835453158, "grad_norm": 0.5961002111434937, "learning_rate": 1.193248111956104e-05, "loss": 0.0562, "step": 115980 }, { "epoch": 4.215059233955956, "grad_norm": 0.6460363864898682, "learning_rate": 1.1928057422883937e-05, "loss": 0.0485, "step": 115990 }, { "epoch": 4.215422632458754, "grad_norm": 0.9474358558654785, "learning_rate": 1.1923634289430321e-05, "loss": 0.1026, "step": 116000 }, { "epoch": 4.215786030961552, "grad_norm": 0.29789021611213684, "learning_rate": 1.1919211719390785e-05, "loss": 0.0677, "step": 116010 }, { "epoch": 4.216149429464351, "grad_norm": 0.38266611099243164, "learning_rate": 1.1914789712955868e-05, "loss": 0.0585, "step": 116020 }, { "epoch": 4.216512827967149, "grad_norm": 0.5584983825683594, "learning_rate": 1.1910368270316102e-05, "loss": 0.052, "step": 116030 }, { "epoch": 4.216876226469947, "grad_norm": 0.2954959571361542, "learning_rate": 1.1905947391661995e-05, "loss": 0.0757, "step": 116040 }, { "epoch": 4.217239624972745, "grad_norm": 0.7285795211791992, "learning_rate": 1.1901527077184036e-05, "loss": 0.0704, "step": 116050 }, { "epoch": 4.217603023475544, "grad_norm": 0.4272112548351288, "learning_rate": 1.189710732707266e-05, "loss": 0.0622, "step": 116060 }, { "epoch": 4.217966421978342, "grad_norm": 0.4311266839504242, "learning_rate": 1.189268814151831e-05, "loss": 0.0578, "step": 116070 }, { "epoch": 4.21832982048114, "grad_norm": 0.818369448184967, "learning_rate": 1.1888269520711393e-05, "loss": 0.0706, "step": 116080 }, { "epoch": 4.218693218983938, "grad_norm": 2.2008750438690186, "learning_rate": 1.1883851464842299e-05, "loss": 0.0473, "step": 116090 }, { "epoch": 4.219056617486736, "grad_norm": 0.7152899503707886, "learning_rate": 1.187943397410137e-05, "loss": 0.0477, "step": 116100 }, { "epoch": 4.2194200159895345, "grad_norm": 1.1275982856750488, "learning_rate": 1.1875017048678947e-05, "loss": 0.0679, "step": 116110 }, { "epoch": 4.2197834144923325, "grad_norm": 0.5166335701942444, "learning_rate": 1.1870600688765337e-05, "loss": 0.057, "step": 116120 }, { "epoch": 4.2201468129951305, "grad_norm": 0.6710319519042969, "learning_rate": 1.1866184894550845e-05, "loss": 0.0873, "step": 116130 }, { "epoch": 4.2205102114979285, "grad_norm": 0.6926589012145996, "learning_rate": 1.18617696662257e-05, "loss": 0.0508, "step": 116140 }, { "epoch": 4.2208736100007265, "grad_norm": 0.46043068170547485, "learning_rate": 1.1857355003980167e-05, "loss": 0.0598, "step": 116150 }, { "epoch": 4.221237008503525, "grad_norm": 0.6792109608650208, "learning_rate": 1.1852940908004426e-05, "loss": 0.0506, "step": 116160 }, { "epoch": 4.221600407006323, "grad_norm": 0.7868732213973999, "learning_rate": 1.1848527378488703e-05, "loss": 0.0671, "step": 116170 }, { "epoch": 4.221963805509121, "grad_norm": 0.4833846688270569, "learning_rate": 1.1844114415623132e-05, "loss": 0.0574, "step": 116180 }, { "epoch": 4.222327204011919, "grad_norm": 0.7338122129440308, "learning_rate": 1.183970201959787e-05, "loss": 0.0572, "step": 116190 }, { "epoch": 4.222690602514717, "grad_norm": 0.42131030559539795, "learning_rate": 1.1835290190603016e-05, "loss": 0.0536, "step": 116200 }, { "epoch": 4.223054001017516, "grad_norm": 0.3166612982749939, "learning_rate": 1.1830878928828668e-05, "loss": 0.0471, "step": 116210 }, { "epoch": 4.223417399520314, "grad_norm": 0.880670964717865, "learning_rate": 1.1826468234464888e-05, "loss": 0.0783, "step": 116220 }, { "epoch": 4.223780798023112, "grad_norm": 0.5158451795578003, "learning_rate": 1.1822058107701733e-05, "loss": 0.0568, "step": 116230 }, { "epoch": 4.22414419652591, "grad_norm": 0.3280569612979889, "learning_rate": 1.1817648548729197e-05, "loss": 0.0477, "step": 116240 }, { "epoch": 4.224507595028708, "grad_norm": 0.4095805585384369, "learning_rate": 1.181323955773728e-05, "loss": 0.0758, "step": 116250 }, { "epoch": 4.224870993531507, "grad_norm": 0.3900350332260132, "learning_rate": 1.1808831134915951e-05, "loss": 0.0553, "step": 116260 }, { "epoch": 4.225234392034305, "grad_norm": 0.6116195321083069, "learning_rate": 1.1804423280455168e-05, "loss": 0.0631, "step": 116270 }, { "epoch": 4.225597790537103, "grad_norm": 0.317804217338562, "learning_rate": 1.1800015994544822e-05, "loss": 0.0583, "step": 116280 }, { "epoch": 4.225961189039901, "grad_norm": 0.6528775095939636, "learning_rate": 1.1795609277374834e-05, "loss": 0.0456, "step": 116290 }, { "epoch": 4.226324587542699, "grad_norm": 1.3475067615509033, "learning_rate": 1.179120312913504e-05, "loss": 0.0786, "step": 116300 }, { "epoch": 4.226687986045498, "grad_norm": 0.27601638436317444, "learning_rate": 1.1786797550015324e-05, "loss": 0.0654, "step": 116310 }, { "epoch": 4.227051384548296, "grad_norm": 6.547038555145264, "learning_rate": 1.1782392540205481e-05, "loss": 0.0543, "step": 116320 }, { "epoch": 4.227414783051094, "grad_norm": 0.2848477363586426, "learning_rate": 1.1777988099895326e-05, "loss": 0.061, "step": 116330 }, { "epoch": 4.227778181553892, "grad_norm": 2.2146904468536377, "learning_rate": 1.1773584229274609e-05, "loss": 0.0793, "step": 116340 }, { "epoch": 4.22814158005669, "grad_norm": 0.5318537354469299, "learning_rate": 1.1769180928533086e-05, "loss": 0.0623, "step": 116350 }, { "epoch": 4.228504978559489, "grad_norm": 0.993850588798523, "learning_rate": 1.1764778197860482e-05, "loss": 0.0606, "step": 116360 }, { "epoch": 4.228868377062287, "grad_norm": 0.7397371530532837, "learning_rate": 1.1760376037446504e-05, "loss": 0.0746, "step": 116370 }, { "epoch": 4.229231775565085, "grad_norm": 0.6844049096107483, "learning_rate": 1.1755974447480809e-05, "loss": 0.068, "step": 116380 }, { "epoch": 4.2295951740678825, "grad_norm": 0.6430009007453918, "learning_rate": 1.175157342815306e-05, "loss": 0.0447, "step": 116390 }, { "epoch": 4.229958572570681, "grad_norm": 2.6813087463378906, "learning_rate": 1.1747172979652853e-05, "loss": 0.0525, "step": 116400 }, { "epoch": 4.229958572570681, "eval_loss": 0.3230968713760376, "eval_runtime": 180.4532, "eval_samples_per_second": 41.085, "eval_steps_per_second": 5.137, "eval_wer": 0.12699003394630312, "step": 116400 }, { "epoch": 4.230321971073479, "grad_norm": 8.114737510681152, "learning_rate": 1.1742773102169832e-05, "loss": 0.1612, "step": 116410 }, { "epoch": 4.230685369576277, "grad_norm": 0.7535303235054016, "learning_rate": 1.1738373795893537e-05, "loss": 0.077, "step": 116420 }, { "epoch": 4.231048768079075, "grad_norm": 0.2678695321083069, "learning_rate": 1.1733975061013538e-05, "loss": 0.0473, "step": 116430 }, { "epoch": 4.231412166581873, "grad_norm": 0.7494844794273376, "learning_rate": 1.1729576897719336e-05, "loss": 0.0452, "step": 116440 }, { "epoch": 4.231775565084672, "grad_norm": 1.5150567293167114, "learning_rate": 1.1725179306200467e-05, "loss": 0.0575, "step": 116450 }, { "epoch": 4.23213896358747, "grad_norm": 0.5527431964874268, "learning_rate": 1.1720782286646382e-05, "loss": 0.0643, "step": 116460 }, { "epoch": 4.232502362090268, "grad_norm": 0.6264133453369141, "learning_rate": 1.1716385839246549e-05, "loss": 0.0611, "step": 116470 }, { "epoch": 4.232865760593066, "grad_norm": 0.8068933486938477, "learning_rate": 1.1711989964190376e-05, "loss": 0.0589, "step": 116480 }, { "epoch": 4.233229159095864, "grad_norm": 0.30341091752052307, "learning_rate": 1.170759466166728e-05, "loss": 0.06, "step": 116490 }, { "epoch": 4.233592557598663, "grad_norm": 0.5522620677947998, "learning_rate": 1.1703199931866631e-05, "loss": 0.0596, "step": 116500 }, { "epoch": 4.233955956101461, "grad_norm": 0.29794126749038696, "learning_rate": 1.16988057749778e-05, "loss": 0.0546, "step": 116510 }, { "epoch": 4.234319354604259, "grad_norm": 0.9130751490592957, "learning_rate": 1.169441219119009e-05, "loss": 0.0749, "step": 116520 }, { "epoch": 4.234682753107057, "grad_norm": 0.8570445775985718, "learning_rate": 1.169001918069283e-05, "loss": 0.0569, "step": 116530 }, { "epoch": 4.235046151609855, "grad_norm": 0.38336512446403503, "learning_rate": 1.1685626743675265e-05, "loss": 0.0468, "step": 116540 }, { "epoch": 4.235409550112654, "grad_norm": 0.8326718807220459, "learning_rate": 1.168123488032669e-05, "loss": 0.0576, "step": 116550 }, { "epoch": 4.235772948615452, "grad_norm": 0.4676150977611542, "learning_rate": 1.1676843590836308e-05, "loss": 0.079, "step": 116560 }, { "epoch": 4.23613634711825, "grad_norm": 0.4062000811100006, "learning_rate": 1.1672452875393339e-05, "loss": 0.0629, "step": 116570 }, { "epoch": 4.236499745621048, "grad_norm": 0.4659651815891266, "learning_rate": 1.166806273418695e-05, "loss": 0.0597, "step": 116580 }, { "epoch": 4.236863144123846, "grad_norm": 0.49495962262153625, "learning_rate": 1.16636731674063e-05, "loss": 0.0542, "step": 116590 }, { "epoch": 4.237226542626645, "grad_norm": 2.226994514465332, "learning_rate": 1.165928417524052e-05, "loss": 0.0659, "step": 116600 }, { "epoch": 4.237589941129443, "grad_norm": 0.429401695728302, "learning_rate": 1.165489575787873e-05, "loss": 0.0694, "step": 116610 }, { "epoch": 4.237953339632241, "grad_norm": 0.5462735295295715, "learning_rate": 1.1650507915509991e-05, "loss": 0.0714, "step": 116620 }, { "epoch": 4.238316738135039, "grad_norm": 0.8976079821586609, "learning_rate": 1.164612064832337e-05, "loss": 0.0481, "step": 116630 }, { "epoch": 4.2386801366378375, "grad_norm": 0.4468517005443573, "learning_rate": 1.1641733956507894e-05, "loss": 0.0467, "step": 116640 }, { "epoch": 4.2390435351406355, "grad_norm": 0.6745365858078003, "learning_rate": 1.1637347840252585e-05, "loss": 0.0673, "step": 116650 }, { "epoch": 4.2394069336434335, "grad_norm": 0.3953607380390167, "learning_rate": 1.1632962299746403e-05, "loss": 0.0659, "step": 116660 }, { "epoch": 4.2397703321462314, "grad_norm": 0.834370493888855, "learning_rate": 1.1628577335178323e-05, "loss": 0.0639, "step": 116670 }, { "epoch": 4.240133730649029, "grad_norm": 3.582240104675293, "learning_rate": 1.1624192946737252e-05, "loss": 0.1856, "step": 116680 }, { "epoch": 4.240497129151828, "grad_norm": 0.36230847239494324, "learning_rate": 1.1619809134612134e-05, "loss": 0.0512, "step": 116690 }, { "epoch": 4.240860527654626, "grad_norm": 0.6795392632484436, "learning_rate": 1.1615425898991824e-05, "loss": 0.0562, "step": 116700 }, { "epoch": 4.241223926157424, "grad_norm": 0.7256725430488586, "learning_rate": 1.16110432400652e-05, "loss": 0.0618, "step": 116710 }, { "epoch": 4.241587324660222, "grad_norm": 0.428634911775589, "learning_rate": 1.1606661158021076e-05, "loss": 0.0771, "step": 116720 }, { "epoch": 4.24195072316302, "grad_norm": 0.9015412926673889, "learning_rate": 1.1602279653048267e-05, "loss": 0.0741, "step": 116730 }, { "epoch": 4.242314121665819, "grad_norm": 0.2342122346162796, "learning_rate": 1.159789872533556e-05, "loss": 0.043, "step": 116740 }, { "epoch": 4.242677520168617, "grad_norm": 1.0161068439483643, "learning_rate": 1.1593518375071721e-05, "loss": 0.0574, "step": 116750 }, { "epoch": 4.243040918671415, "grad_norm": 0.4920096695423126, "learning_rate": 1.1589138602445465e-05, "loss": 0.0665, "step": 116760 }, { "epoch": 4.243404317174213, "grad_norm": 1.862987995147705, "learning_rate": 1.1584759407645522e-05, "loss": 0.0661, "step": 116770 }, { "epoch": 4.243767715677011, "grad_norm": 0.40719863772392273, "learning_rate": 1.1580380790860545e-05, "loss": 0.0548, "step": 116780 }, { "epoch": 4.24413111417981, "grad_norm": 0.3578474223613739, "learning_rate": 1.157600275227923e-05, "loss": 0.0539, "step": 116790 }, { "epoch": 4.244494512682608, "grad_norm": 0.5226281881332397, "learning_rate": 1.1571625292090185e-05, "loss": 0.0828, "step": 116800 }, { "epoch": 4.244857911185406, "grad_norm": 0.45012742280960083, "learning_rate": 1.1567248410482037e-05, "loss": 0.0675, "step": 116810 }, { "epoch": 4.245221309688204, "grad_norm": 0.42035412788391113, "learning_rate": 1.1562872107643341e-05, "loss": 0.0718, "step": 116820 }, { "epoch": 4.245584708191002, "grad_norm": 0.39669880270957947, "learning_rate": 1.1558496383762699e-05, "loss": 0.0595, "step": 116830 }, { "epoch": 4.245948106693801, "grad_norm": 0.44172903895378113, "learning_rate": 1.1554121239028609e-05, "loss": 0.0624, "step": 116840 }, { "epoch": 4.246311505196599, "grad_norm": 0.5983627438545227, "learning_rate": 1.1549746673629607e-05, "loss": 0.0551, "step": 116850 }, { "epoch": 4.246674903699397, "grad_norm": 0.221538707613945, "learning_rate": 1.1545372687754153e-05, "loss": 0.069, "step": 116860 }, { "epoch": 4.247038302202195, "grad_norm": 0.4085201025009155, "learning_rate": 1.154099928159072e-05, "loss": 0.0656, "step": 116870 }, { "epoch": 4.247401700704993, "grad_norm": 0.39535167813301086, "learning_rate": 1.153662645532774e-05, "loss": 0.0591, "step": 116880 }, { "epoch": 4.2477650992077916, "grad_norm": 0.38008686900138855, "learning_rate": 1.1532254209153631e-05, "loss": 0.0496, "step": 116890 }, { "epoch": 4.2481284977105895, "grad_norm": 0.6974036693572998, "learning_rate": 1.1527882543256762e-05, "loss": 0.0665, "step": 116900 }, { "epoch": 4.2484918962133875, "grad_norm": 0.5487545132637024, "learning_rate": 1.1523511457825508e-05, "loss": 0.058, "step": 116910 }, { "epoch": 4.2488552947161855, "grad_norm": 1.3574179410934448, "learning_rate": 1.151914095304818e-05, "loss": 0.0948, "step": 116920 }, { "epoch": 4.2492186932189835, "grad_norm": 0.7095970511436462, "learning_rate": 1.1514771029113123e-05, "loss": 0.0727, "step": 116930 }, { "epoch": 4.249582091721782, "grad_norm": 0.5452703237533569, "learning_rate": 1.1510401686208589e-05, "loss": 0.9569, "step": 116940 }, { "epoch": 4.24994549022458, "grad_norm": 0.5960811376571655, "learning_rate": 1.1506032924522857e-05, "loss": 0.0622, "step": 116950 }, { "epoch": 4.250308888727378, "grad_norm": 1.1241512298583984, "learning_rate": 1.150166474424415e-05, "loss": 0.1427, "step": 116960 }, { "epoch": 4.250672287230176, "grad_norm": 0.6698485612869263, "learning_rate": 1.1497297145560682e-05, "loss": 0.0537, "step": 116970 }, { "epoch": 4.251035685732975, "grad_norm": 0.8490001559257507, "learning_rate": 1.1492930128660634e-05, "loss": 0.0599, "step": 116980 }, { "epoch": 4.251399084235773, "grad_norm": 0.25859925150871277, "learning_rate": 1.148856369373218e-05, "loss": 1.3398, "step": 116990 }, { "epoch": 4.251762482738571, "grad_norm": 3.991356134414673, "learning_rate": 1.1484197840963434e-05, "loss": 0.075, "step": 117000 }, { "epoch": 4.251762482738571, "eval_loss": 0.29347988963127136, "eval_runtime": 178.752, "eval_samples_per_second": 41.476, "eval_steps_per_second": 5.186, "eval_wer": 0.1268266560168461, "step": 117000 }, { "epoch": 4.252125881241369, "grad_norm": 1.0394984483718872, "learning_rate": 1.1479832570542512e-05, "loss": 0.0679, "step": 117010 }, { "epoch": 4.252489279744167, "grad_norm": 1.0671586990356445, "learning_rate": 1.1475467882657502e-05, "loss": 0.06, "step": 117020 }, { "epoch": 4.252852678246966, "grad_norm": 0.838779091835022, "learning_rate": 1.1471103777496466e-05, "loss": 0.0584, "step": 117030 }, { "epoch": 4.253216076749764, "grad_norm": 0.6783778071403503, "learning_rate": 1.1466740255247423e-05, "loss": 0.0523, "step": 117040 }, { "epoch": 4.253579475252562, "grad_norm": 0.8431203365325928, "learning_rate": 1.1462377316098406e-05, "loss": 0.0776, "step": 117050 }, { "epoch": 4.25394287375536, "grad_norm": 0.4470868706703186, "learning_rate": 1.145801496023736e-05, "loss": 0.0601, "step": 117060 }, { "epoch": 4.254306272258158, "grad_norm": 1.690788745880127, "learning_rate": 1.1453653187852284e-05, "loss": 0.0796, "step": 117070 }, { "epoch": 4.254669670760957, "grad_norm": 0.4969995319843292, "learning_rate": 1.1449291999131087e-05, "loss": 0.0671, "step": 117080 }, { "epoch": 4.255033069263755, "grad_norm": 0.5570054650306702, "learning_rate": 1.1444931394261696e-05, "loss": 0.06, "step": 117090 }, { "epoch": 4.255396467766553, "grad_norm": 0.49795541167259216, "learning_rate": 1.1440571373431969e-05, "loss": 0.0692, "step": 117100 }, { "epoch": 4.255759866269351, "grad_norm": 1.130143642425537, "learning_rate": 1.143621193682978e-05, "loss": 0.0659, "step": 117110 }, { "epoch": 4.256123264772149, "grad_norm": 0.8319960236549377, "learning_rate": 1.1431853084642955e-05, "loss": 0.067, "step": 117120 }, { "epoch": 4.256486663274948, "grad_norm": 0.5573909282684326, "learning_rate": 1.1427494817059316e-05, "loss": 0.0506, "step": 117130 }, { "epoch": 4.256850061777746, "grad_norm": 0.8314167261123657, "learning_rate": 1.1423137134266627e-05, "loss": 0.0562, "step": 117140 }, { "epoch": 4.257213460280544, "grad_norm": 0.5405414700508118, "learning_rate": 1.141878003645266e-05, "loss": 0.0602, "step": 117150 }, { "epoch": 4.257576858783342, "grad_norm": 0.3804232180118561, "learning_rate": 1.1414423523805119e-05, "loss": 0.0591, "step": 117160 }, { "epoch": 4.2579402572861405, "grad_norm": 3.5269417762756348, "learning_rate": 1.1410067596511751e-05, "loss": 0.072, "step": 117170 }, { "epoch": 4.258303655788938, "grad_norm": 0.42852017283439636, "learning_rate": 1.1405712254760207e-05, "loss": 0.083, "step": 117180 }, { "epoch": 4.258667054291736, "grad_norm": 0.4363724887371063, "learning_rate": 1.1401357498738163e-05, "loss": 0.0449, "step": 117190 }, { "epoch": 4.259030452794534, "grad_norm": 0.6442729234695435, "learning_rate": 1.139700332863323e-05, "loss": 0.0696, "step": 117200 }, { "epoch": 4.259393851297332, "grad_norm": 1.4724963903427124, "learning_rate": 1.1392649744633024e-05, "loss": 0.0752, "step": 117210 }, { "epoch": 4.259757249800131, "grad_norm": 0.5399142503738403, "learning_rate": 1.1388296746925125e-05, "loss": 0.0695, "step": 117220 }, { "epoch": 4.260120648302929, "grad_norm": 0.37897446751594543, "learning_rate": 1.13839443356971e-05, "loss": 0.0724, "step": 117230 }, { "epoch": 4.260484046805727, "grad_norm": 0.7814234495162964, "learning_rate": 1.1379592511136459e-05, "loss": 0.0523, "step": 117240 }, { "epoch": 4.260847445308525, "grad_norm": 0.28157898783683777, "learning_rate": 1.1375241273430712e-05, "loss": 0.053, "step": 117250 }, { "epoch": 4.261210843811323, "grad_norm": 0.6805558204650879, "learning_rate": 1.1370890622767343e-05, "loss": 0.0757, "step": 117260 }, { "epoch": 4.261574242314122, "grad_norm": 0.6317630410194397, "learning_rate": 1.1366540559333814e-05, "loss": 0.0698, "step": 117270 }, { "epoch": 4.26193764081692, "grad_norm": 0.19483137130737305, "learning_rate": 1.1362191083317533e-05, "loss": 0.4184, "step": 117280 }, { "epoch": 4.262301039319718, "grad_norm": 0.6760255694389343, "learning_rate": 1.1357842194905927e-05, "loss": 0.0509, "step": 117290 }, { "epoch": 4.262664437822516, "grad_norm": 1.1232966184616089, "learning_rate": 1.1353493894286344e-05, "loss": 0.058, "step": 117300 }, { "epoch": 4.263027836325314, "grad_norm": 0.4030855894088745, "learning_rate": 1.134914618164617e-05, "loss": 0.0867, "step": 117310 }, { "epoch": 4.263391234828113, "grad_norm": 0.3685227036476135, "learning_rate": 1.1344799057172709e-05, "loss": 0.0544, "step": 117320 }, { "epoch": 4.263754633330911, "grad_norm": 1.729017972946167, "learning_rate": 1.134045252105328e-05, "loss": 0.1786, "step": 117330 }, { "epoch": 4.264118031833709, "grad_norm": 0.544093668460846, "learning_rate": 1.1336106573475142e-05, "loss": 0.0524, "step": 117340 }, { "epoch": 4.264481430336507, "grad_norm": 4.73328971862793, "learning_rate": 1.1331761214625556e-05, "loss": 0.0701, "step": 117350 }, { "epoch": 4.264844828839305, "grad_norm": 0.449083536863327, "learning_rate": 1.132741644469175e-05, "loss": 0.0666, "step": 117360 }, { "epoch": 4.265208227342104, "grad_norm": 0.8569865226745605, "learning_rate": 1.1323072263860926e-05, "loss": 0.0601, "step": 117370 }, { "epoch": 4.265571625844902, "grad_norm": 0.3465893864631653, "learning_rate": 1.1318728672320251e-05, "loss": 0.0604, "step": 117380 }, { "epoch": 4.2659350243477, "grad_norm": 0.6943307518959045, "learning_rate": 1.1314385670256885e-05, "loss": 0.059, "step": 117390 }, { "epoch": 4.266298422850498, "grad_norm": 0.5685479044914246, "learning_rate": 1.131004325785793e-05, "loss": 0.0594, "step": 117400 }, { "epoch": 4.266661821353296, "grad_norm": 0.3086636960506439, "learning_rate": 1.130570143531052e-05, "loss": 0.0584, "step": 117410 }, { "epoch": 4.2670252198560945, "grad_norm": 0.511326789855957, "learning_rate": 1.1301360202801702e-05, "loss": 0.0608, "step": 117420 }, { "epoch": 4.2673886183588925, "grad_norm": 1.085497498512268, "learning_rate": 1.129745359818137e-05, "loss": 4.1029, "step": 117430 }, { "epoch": 4.2677520168616905, "grad_norm": 0.6486175060272217, "learning_rate": 1.1293113487261193e-05, "loss": 0.1174, "step": 117440 }, { "epoch": 4.2681154153644885, "grad_norm": 0.6041621565818787, "learning_rate": 1.1288773966921993e-05, "loss": 0.1025, "step": 117450 }, { "epoch": 4.2684788138672864, "grad_norm": 0.4472499489784241, "learning_rate": 1.1284435037350718e-05, "loss": 0.0587, "step": 117460 }, { "epoch": 4.268842212370085, "grad_norm": 0.680538535118103, "learning_rate": 1.1280096698734358e-05, "loss": 0.0543, "step": 117470 }, { "epoch": 4.269205610872883, "grad_norm": 0.43602830171585083, "learning_rate": 1.1275758951259807e-05, "loss": 0.0725, "step": 117480 }, { "epoch": 4.269569009375681, "grad_norm": 0.3344304859638214, "learning_rate": 1.1271421795113979e-05, "loss": 0.0581, "step": 117490 }, { "epoch": 4.269932407878479, "grad_norm": 0.3231044411659241, "learning_rate": 1.1267085230483731e-05, "loss": 0.0466, "step": 117500 }, { "epoch": 4.270295806381277, "grad_norm": 1.852975845336914, "learning_rate": 1.1262749257555918e-05, "loss": 0.0667, "step": 117510 }, { "epoch": 4.270659204884076, "grad_norm": 0.6052572727203369, "learning_rate": 1.1258413876517362e-05, "loss": 0.0585, "step": 117520 }, { "epoch": 4.271022603386874, "grad_norm": 1.2631534337997437, "learning_rate": 1.1254079087554869e-05, "loss": 0.0592, "step": 117530 }, { "epoch": 4.271386001889672, "grad_norm": 0.666578471660614, "learning_rate": 1.124974489085519e-05, "loss": 0.0371, "step": 117540 }, { "epoch": 4.27174940039247, "grad_norm": 7.956512928009033, "learning_rate": 1.1245411286605081e-05, "loss": 0.1486, "step": 117550 }, { "epoch": 4.272112798895269, "grad_norm": 0.3498833477497101, "learning_rate": 1.1241078274991262e-05, "loss": 0.0698, "step": 117560 }, { "epoch": 4.272476197398067, "grad_norm": 1.615881323814392, "learning_rate": 1.1236745856200434e-05, "loss": 0.0541, "step": 117570 }, { "epoch": 4.272839595900865, "grad_norm": 0.6609883308410645, "learning_rate": 1.1232414030419248e-05, "loss": 0.0772, "step": 117580 }, { "epoch": 4.273202994403663, "grad_norm": 0.353644996881485, "learning_rate": 1.1228082797834366e-05, "loss": 0.0519, "step": 117590 }, { "epoch": 4.273566392906461, "grad_norm": 0.2837958037853241, "learning_rate": 1.1223752158632376e-05, "loss": 0.0646, "step": 117600 }, { "epoch": 4.273566392906461, "eval_loss": 0.31549566984176636, "eval_runtime": 179.8198, "eval_samples_per_second": 41.23, "eval_steps_per_second": 5.155, "eval_wer": 0.1261277615408354, "step": 117600 }, { "epoch": 4.27392979140926, "grad_norm": 0.4844183325767517, "learning_rate": 1.1219422112999909e-05, "loss": 0.0665, "step": 117610 }, { "epoch": 4.274293189912058, "grad_norm": 1.0689698457717896, "learning_rate": 1.12150926611235e-05, "loss": 0.076, "step": 117620 }, { "epoch": 4.274656588414856, "grad_norm": 0.6706063747406006, "learning_rate": 1.121076380318971e-05, "loss": 0.064, "step": 117630 }, { "epoch": 4.275019986917654, "grad_norm": 0.5715863108634949, "learning_rate": 1.120643553938504e-05, "loss": 0.0591, "step": 117640 }, { "epoch": 4.275383385420452, "grad_norm": 0.6068958640098572, "learning_rate": 1.1202107869895984e-05, "loss": 0.0556, "step": 117650 }, { "epoch": 4.275746783923251, "grad_norm": 0.4386001527309418, "learning_rate": 1.1197780794909005e-05, "loss": 0.0707, "step": 117660 }, { "epoch": 4.276110182426049, "grad_norm": 0.6598414778709412, "learning_rate": 1.1193454314610555e-05, "loss": 0.0733, "step": 117670 }, { "epoch": 4.2764735809288466, "grad_norm": 0.4910728335380554, "learning_rate": 1.1189128429187023e-05, "loss": 0.0458, "step": 117680 }, { "epoch": 4.2768369794316445, "grad_norm": 1.046238899230957, "learning_rate": 1.1184803138824817e-05, "loss": 0.0553, "step": 117690 }, { "epoch": 4.2772003779344425, "grad_norm": 0.5634602904319763, "learning_rate": 1.1180478443710273e-05, "loss": 0.0646, "step": 117700 }, { "epoch": 4.277563776437241, "grad_norm": 0.5064831376075745, "learning_rate": 1.117615434402976e-05, "loss": 0.0783, "step": 117710 }, { "epoch": 4.277927174940039, "grad_norm": 0.3808704912662506, "learning_rate": 1.1171830839969564e-05, "loss": 0.0757, "step": 117720 }, { "epoch": 4.278290573442837, "grad_norm": 0.29641565680503845, "learning_rate": 1.1167507931715981e-05, "loss": 1.2, "step": 117730 }, { "epoch": 4.278653971945635, "grad_norm": 0.38750988245010376, "learning_rate": 1.116318561945525e-05, "loss": 0.0431, "step": 117740 }, { "epoch": 4.279017370448434, "grad_norm": 0.5684570670127869, "learning_rate": 1.1158863903373635e-05, "loss": 0.0632, "step": 117750 }, { "epoch": 4.279380768951232, "grad_norm": 0.43865761160850525, "learning_rate": 1.115454278365732e-05, "loss": 0.0657, "step": 117760 }, { "epoch": 4.27974416745403, "grad_norm": 0.5715042948722839, "learning_rate": 1.1150222260492502e-05, "loss": 0.0721, "step": 117770 }, { "epoch": 4.280107565956828, "grad_norm": 0.669643759727478, "learning_rate": 1.1145902334065322e-05, "loss": 0.0411, "step": 117780 }, { "epoch": 4.280470964459626, "grad_norm": 0.4652119576931, "learning_rate": 1.1141583004561918e-05, "loss": 0.0529, "step": 117790 }, { "epoch": 4.280834362962425, "grad_norm": 1.1076879501342773, "learning_rate": 1.113726427216839e-05, "loss": 0.0629, "step": 117800 }, { "epoch": 4.281197761465223, "grad_norm": 0.24515122175216675, "learning_rate": 1.1132946137070833e-05, "loss": 0.067, "step": 117810 }, { "epoch": 4.281561159968021, "grad_norm": 1.3377306461334229, "learning_rate": 1.1128628599455277e-05, "loss": 0.0648, "step": 117820 }, { "epoch": 4.281924558470819, "grad_norm": 0.7239679098129272, "learning_rate": 1.1124311659507772e-05, "loss": 0.0448, "step": 117830 }, { "epoch": 4.282287956973617, "grad_norm": 0.37537556886672974, "learning_rate": 1.1119995317414286e-05, "loss": 0.0567, "step": 117840 }, { "epoch": 4.282651355476416, "grad_norm": 0.5312022566795349, "learning_rate": 1.1115679573360837e-05, "loss": 0.0473, "step": 117850 }, { "epoch": 4.283014753979214, "grad_norm": 0.6750919818878174, "learning_rate": 1.1111364427533344e-05, "loss": 0.0669, "step": 117860 }, { "epoch": 4.283378152482012, "grad_norm": 1.3904988765716553, "learning_rate": 1.1107049880117754e-05, "loss": 0.1327, "step": 117870 }, { "epoch": 4.28374155098481, "grad_norm": 0.6002905368804932, "learning_rate": 1.110273593129994e-05, "loss": 0.0573, "step": 117880 }, { "epoch": 4.284104949487608, "grad_norm": 0.3468700349330902, "learning_rate": 1.1098422581265791e-05, "loss": 0.0668, "step": 117890 }, { "epoch": 4.284468347990407, "grad_norm": 0.41571733355522156, "learning_rate": 1.109410983020115e-05, "loss": 0.0632, "step": 117900 }, { "epoch": 4.284831746493205, "grad_norm": 0.45861104130744934, "learning_rate": 1.108979767829185e-05, "loss": 0.0725, "step": 117910 }, { "epoch": 4.285195144996003, "grad_norm": 0.44667941331863403, "learning_rate": 1.1085486125723663e-05, "loss": 0.0724, "step": 117920 }, { "epoch": 4.285558543498801, "grad_norm": 0.3753122091293335, "learning_rate": 1.1081175172682372e-05, "loss": 0.058, "step": 117930 }, { "epoch": 4.285921942001599, "grad_norm": 0.5387061834335327, "learning_rate": 1.1076864819353722e-05, "loss": 0.0574, "step": 117940 }, { "epoch": 4.2862853405043975, "grad_norm": 3.1030807495117188, "learning_rate": 1.1072555065923434e-05, "loss": 0.0732, "step": 117950 }, { "epoch": 4.2866487390071955, "grad_norm": 0.2658712565898895, "learning_rate": 1.1068245912577188e-05, "loss": 0.3439, "step": 117960 }, { "epoch": 4.287012137509993, "grad_norm": 0.5664216876029968, "learning_rate": 1.1063937359500661e-05, "loss": 0.0531, "step": 117970 }, { "epoch": 4.287375536012791, "grad_norm": 0.6745922565460205, "learning_rate": 1.1059629406879474e-05, "loss": 0.0607, "step": 117980 }, { "epoch": 4.287738934515589, "grad_norm": 0.7925942540168762, "learning_rate": 1.1055322054899276e-05, "loss": 0.0445, "step": 117990 }, { "epoch": 4.288102333018388, "grad_norm": 0.41084855794906616, "learning_rate": 1.1051015303745621e-05, "loss": 0.0614, "step": 118000 }, { "epoch": 4.288465731521186, "grad_norm": 0.6917276382446289, "learning_rate": 1.1046709153604099e-05, "loss": 0.0571, "step": 118010 }, { "epoch": 4.288829130023984, "grad_norm": 0.710117757320404, "learning_rate": 1.1042403604660224e-05, "loss": 0.0899, "step": 118020 }, { "epoch": 4.289192528526782, "grad_norm": 0.49841251969337463, "learning_rate": 1.1038098657099514e-05, "loss": 0.0465, "step": 118030 }, { "epoch": 4.28955592702958, "grad_norm": 0.35992223024368286, "learning_rate": 1.1033794311107456e-05, "loss": 0.0502, "step": 118040 }, { "epoch": 4.289919325532379, "grad_norm": 3.4158763885498047, "learning_rate": 1.1029490566869522e-05, "loss": 0.0941, "step": 118050 }, { "epoch": 4.290282724035177, "grad_norm": 0.31620514392852783, "learning_rate": 1.1025187424571121e-05, "loss": 0.0761, "step": 118060 }, { "epoch": 4.290646122537975, "grad_norm": 0.6288778185844421, "learning_rate": 1.102088488439767e-05, "loss": 0.0804, "step": 118070 }, { "epoch": 4.291009521040773, "grad_norm": 0.4508550763130188, "learning_rate": 1.1016582946534554e-05, "loss": 0.0547, "step": 118080 }, { "epoch": 4.291372919543571, "grad_norm": 2.0510289669036865, "learning_rate": 1.1012281611167132e-05, "loss": 0.0452, "step": 118090 }, { "epoch": 4.29173631804637, "grad_norm": 1.4700452089309692, "learning_rate": 1.1007980878480723e-05, "loss": 0.0524, "step": 118100 }, { "epoch": 4.292099716549168, "grad_norm": 0.46162959933280945, "learning_rate": 1.100368074866064e-05, "loss": 0.0615, "step": 118110 }, { "epoch": 4.292463115051966, "grad_norm": 0.5406383872032166, "learning_rate": 1.0999381221892135e-05, "loss": 0.0654, "step": 118120 }, { "epoch": 4.292826513554764, "grad_norm": 0.6364983916282654, "learning_rate": 1.0995082298360498e-05, "loss": 0.0556, "step": 118130 }, { "epoch": 4.293189912057563, "grad_norm": 0.33140653371810913, "learning_rate": 1.0990783978250926e-05, "loss": 0.0553, "step": 118140 }, { "epoch": 4.293553310560361, "grad_norm": 0.6522350311279297, "learning_rate": 1.0986486261748635e-05, "loss": 0.0553, "step": 118150 }, { "epoch": 4.293916709063159, "grad_norm": 2.0446338653564453, "learning_rate": 1.0982189149038786e-05, "loss": 0.0698, "step": 118160 }, { "epoch": 4.294280107565957, "grad_norm": 0.6550914645195007, "learning_rate": 1.0977892640306525e-05, "loss": 0.0742, "step": 118170 }, { "epoch": 4.294643506068755, "grad_norm": 0.4958548843860626, "learning_rate": 1.0973596735736986e-05, "loss": 0.0391, "step": 118180 }, { "epoch": 4.2950069045715535, "grad_norm": 0.39033403992652893, "learning_rate": 1.0969730938336505e-05, "loss": 2.0204, "step": 118190 }, { "epoch": 4.2953703030743515, "grad_norm": 0.8210968375205994, "learning_rate": 1.0965436182186037e-05, "loss": 0.0694, "step": 118200 }, { "epoch": 4.2953703030743515, "eval_loss": 0.32131335139274597, "eval_runtime": 179.8102, "eval_samples_per_second": 41.232, "eval_steps_per_second": 5.155, "eval_wer": 0.12632744567683846, "step": 118200 }, { "epoch": 4.2957337015771495, "grad_norm": 1.2557179927825928, "learning_rate": 1.0961142030735e-05, "loss": 0.0697, "step": 118210 }, { "epoch": 4.2960971000799475, "grad_norm": 1.0582549571990967, "learning_rate": 1.0956848484168391e-05, "loss": 0.0799, "step": 118220 }, { "epoch": 4.2964604985827455, "grad_norm": 0.43808823823928833, "learning_rate": 1.0952555542671225e-05, "loss": 0.0501, "step": 118230 }, { "epoch": 4.296823897085544, "grad_norm": 0.5373784303665161, "learning_rate": 1.0948263206428442e-05, "loss": 0.0528, "step": 118240 }, { "epoch": 4.297187295588342, "grad_norm": 0.5531120896339417, "learning_rate": 1.0943971475625017e-05, "loss": 0.0543, "step": 118250 }, { "epoch": 4.29755069409114, "grad_norm": 9.014130592346191, "learning_rate": 1.0939680350445843e-05, "loss": 0.1086, "step": 118260 }, { "epoch": 4.297914092593938, "grad_norm": 0.986786961555481, "learning_rate": 1.0935389831075826e-05, "loss": 0.0611, "step": 118270 }, { "epoch": 4.298277491096736, "grad_norm": 0.8131172060966492, "learning_rate": 1.0931099917699796e-05, "loss": 0.0475, "step": 118280 }, { "epoch": 4.298640889599535, "grad_norm": 0.4684634506702423, "learning_rate": 1.0926810610502636e-05, "loss": 0.0471, "step": 118290 }, { "epoch": 4.299004288102333, "grad_norm": 0.4415627717971802, "learning_rate": 1.0922521909669126e-05, "loss": 0.0704, "step": 118300 }, { "epoch": 4.299367686605131, "grad_norm": 1.0379295349121094, "learning_rate": 1.0918233815384066e-05, "loss": 0.0805, "step": 118310 }, { "epoch": 4.299731085107929, "grad_norm": 1.0131906270980835, "learning_rate": 1.0913946327832198e-05, "loss": 0.0725, "step": 118320 }, { "epoch": 4.300094483610728, "grad_norm": 0.4846254587173462, "learning_rate": 1.0909659447198264e-05, "loss": 0.0839, "step": 118330 }, { "epoch": 4.300457882113526, "grad_norm": 0.41148313879966736, "learning_rate": 1.0905373173666974e-05, "loss": 0.0631, "step": 118340 }, { "epoch": 4.300821280616324, "grad_norm": 89.82099151611328, "learning_rate": 1.0901087507423011e-05, "loss": 1.5804, "step": 118350 }, { "epoch": 4.301184679119122, "grad_norm": 0.5065470933914185, "learning_rate": 1.0896802448651017e-05, "loss": 0.0677, "step": 118360 }, { "epoch": 4.30154807762192, "grad_norm": 0.5171691179275513, "learning_rate": 1.0892517997535622e-05, "loss": 0.072, "step": 118370 }, { "epoch": 4.301911476124719, "grad_norm": 0.44612202048301697, "learning_rate": 1.0888234154261437e-05, "loss": 0.0676, "step": 118380 }, { "epoch": 4.302274874627517, "grad_norm": 0.29291561245918274, "learning_rate": 1.0883950919013039e-05, "loss": 0.0754, "step": 118390 }, { "epoch": 4.302638273130315, "grad_norm": 0.6537386178970337, "learning_rate": 1.087966829197496e-05, "loss": 0.0754, "step": 118400 }, { "epoch": 4.303001671633113, "grad_norm": 1.1553950309753418, "learning_rate": 1.0875386273331744e-05, "loss": 0.0632, "step": 118410 }, { "epoch": 4.303365070135911, "grad_norm": 0.5298495292663574, "learning_rate": 1.0871104863267873e-05, "loss": 0.0699, "step": 118420 }, { "epoch": 4.30372846863871, "grad_norm": 0.3563203811645508, "learning_rate": 1.0866824061967817e-05, "loss": 0.0589, "step": 118430 }, { "epoch": 4.304091867141508, "grad_norm": 0.45000791549682617, "learning_rate": 1.0862543869616029e-05, "loss": 0.0629, "step": 118440 }, { "epoch": 4.304455265644306, "grad_norm": 0.834939181804657, "learning_rate": 1.0858264286396932e-05, "loss": 0.0604, "step": 118450 }, { "epoch": 4.304818664147104, "grad_norm": 0.9879086017608643, "learning_rate": 1.0853985312494905e-05, "loss": 0.063, "step": 118460 }, { "epoch": 4.3051820626499016, "grad_norm": 0.3103133738040924, "learning_rate": 1.0849706948094315e-05, "loss": 0.0715, "step": 118470 }, { "epoch": 4.3055454611527, "grad_norm": 0.679695725440979, "learning_rate": 1.0845429193379508e-05, "loss": 0.0677, "step": 118480 }, { "epoch": 4.305908859655498, "grad_norm": 0.5237340927124023, "learning_rate": 1.0841152048534802e-05, "loss": 0.1327, "step": 118490 }, { "epoch": 4.306272258158296, "grad_norm": 0.7196406722068787, "learning_rate": 1.083687551374447e-05, "loss": 0.0733, "step": 118500 }, { "epoch": 4.306635656661094, "grad_norm": 7.049716949462891, "learning_rate": 1.083259958919278e-05, "loss": 0.0961, "step": 118510 }, { "epoch": 4.306999055163892, "grad_norm": 3.882913827896118, "learning_rate": 1.0828324275063965e-05, "loss": 0.072, "step": 118520 }, { "epoch": 4.307362453666691, "grad_norm": 0.7456324696540833, "learning_rate": 1.0824049571542241e-05, "loss": 0.0459, "step": 118530 }, { "epoch": 4.307725852169489, "grad_norm": 1.826056957244873, "learning_rate": 1.0819775478811775e-05, "loss": 0.0506, "step": 118540 }, { "epoch": 4.308089250672287, "grad_norm": 0.4404772222042084, "learning_rate": 1.081550199705674e-05, "loss": 0.0567, "step": 118550 }, { "epoch": 4.308452649175085, "grad_norm": 0.4826473593711853, "learning_rate": 1.0811229126461245e-05, "loss": 0.0744, "step": 118560 }, { "epoch": 4.308816047677883, "grad_norm": 0.41651955246925354, "learning_rate": 1.0806956867209406e-05, "loss": 0.0601, "step": 118570 }, { "epoch": 4.309179446180682, "grad_norm": 0.7595980167388916, "learning_rate": 1.0802685219485293e-05, "loss": 0.1158, "step": 118580 }, { "epoch": 4.30954284468348, "grad_norm": 0.3744035065174103, "learning_rate": 1.079841418347297e-05, "loss": 0.0744, "step": 118590 }, { "epoch": 4.309906243186278, "grad_norm": 1.2552756071090698, "learning_rate": 1.0794143759356443e-05, "loss": 0.0621, "step": 118600 }, { "epoch": 4.310269641689076, "grad_norm": 0.5260960459709167, "learning_rate": 1.0789873947319715e-05, "loss": 0.0592, "step": 118610 }, { "epoch": 4.310633040191874, "grad_norm": 0.7075775265693665, "learning_rate": 1.0785604747546759e-05, "loss": 0.0676, "step": 118620 }, { "epoch": 4.310996438694673, "grad_norm": 0.2839989960193634, "learning_rate": 1.0781336160221528e-05, "loss": 0.0557, "step": 118630 }, { "epoch": 4.311359837197471, "grad_norm": 0.5634669661521912, "learning_rate": 1.0777068185527925e-05, "loss": 0.049, "step": 118640 }, { "epoch": 4.311723235700269, "grad_norm": 0.7401905059814453, "learning_rate": 1.0772800823649848e-05, "loss": 0.0688, "step": 118650 }, { "epoch": 4.312086634203067, "grad_norm": 0.3461298942565918, "learning_rate": 1.0768534074771161e-05, "loss": 0.06, "step": 118660 }, { "epoch": 4.312450032705865, "grad_norm": 0.915807843208313, "learning_rate": 1.0764267939075714e-05, "loss": 0.0641, "step": 118670 }, { "epoch": 4.312813431208664, "grad_norm": 0.36104288697242737, "learning_rate": 1.0760002416747302e-05, "loss": 0.0718, "step": 118680 }, { "epoch": 4.313176829711462, "grad_norm": 0.36635956168174744, "learning_rate": 1.0755737507969721e-05, "loss": 0.0812, "step": 118690 }, { "epoch": 4.31354022821426, "grad_norm": 17.671672821044922, "learning_rate": 1.0751473212926739e-05, "loss": 0.0651, "step": 118700 }, { "epoch": 4.313903626717058, "grad_norm": 0.3592807352542877, "learning_rate": 1.074720953180207e-05, "loss": 0.0684, "step": 118710 }, { "epoch": 4.3142670252198565, "grad_norm": 0.6981629729270935, "learning_rate": 1.074294646477943e-05, "loss": 0.0812, "step": 118720 }, { "epoch": 4.3146304237226545, "grad_norm": 0.30696114897727966, "learning_rate": 1.073868401204251e-05, "loss": 0.0387, "step": 118730 }, { "epoch": 4.3149938222254525, "grad_norm": 0.3968905210494995, "learning_rate": 1.0734422173774944e-05, "loss": 1.4487, "step": 118740 }, { "epoch": 4.3153572207282505, "grad_norm": 0.8053102493286133, "learning_rate": 1.073016095016037e-05, "loss": 0.0656, "step": 118750 }, { "epoch": 4.315720619231048, "grad_norm": 0.7892598509788513, "learning_rate": 1.072590034138239e-05, "loss": 0.0556, "step": 118760 }, { "epoch": 4.316084017733847, "grad_norm": 0.8279727697372437, "learning_rate": 1.0721640347624583e-05, "loss": 0.0763, "step": 118770 }, { "epoch": 4.316447416236645, "grad_norm": 0.26920953392982483, "learning_rate": 1.0717380969070483e-05, "loss": 0.0507, "step": 118780 }, { "epoch": 4.316810814739443, "grad_norm": 0.5866697430610657, "learning_rate": 1.0713122205903619e-05, "loss": 0.0547, "step": 118790 }, { "epoch": 4.317174213242241, "grad_norm": 0.4768913686275482, "learning_rate": 1.0708864058307497e-05, "loss": 0.0701, "step": 118800 }, { "epoch": 4.317174213242241, "eval_loss": 0.3066200017929077, "eval_runtime": 179.5455, "eval_samples_per_second": 41.293, "eval_steps_per_second": 5.163, "eval_wer": 0.12600976636956088, "step": 118800 }, { "epoch": 4.317537611745039, "grad_norm": 0.3298153579235077, "learning_rate": 1.0704606526465563e-05, "loss": 0.0569, "step": 118810 }, { "epoch": 4.317901010247838, "grad_norm": 0.6012352108955383, "learning_rate": 1.0700349610561273e-05, "loss": 0.0757, "step": 118820 }, { "epoch": 4.318264408750636, "grad_norm": NaN, "learning_rate": 1.0696518913025693e-05, "loss": 3.1513, "step": 118830 }, { "epoch": 4.318627807253434, "grad_norm": 0.8840529322624207, "learning_rate": 1.069226316790821e-05, "loss": 0.0563, "step": 118840 }, { "epoch": 4.318991205756232, "grad_norm": 0.3720659017562866, "learning_rate": 1.0688008039260206e-05, "loss": 0.0593, "step": 118850 }, { "epoch": 4.31935460425903, "grad_norm": 1.3442655801773071, "learning_rate": 1.0683753527265005e-05, "loss": 0.0581, "step": 118860 }, { "epoch": 4.319718002761829, "grad_norm": 0.41103148460388184, "learning_rate": 1.067949963210593e-05, "loss": 0.0526, "step": 118870 }, { "epoch": 4.320081401264627, "grad_norm": 0.36499980092048645, "learning_rate": 1.067524635396626e-05, "loss": 0.06, "step": 118880 }, { "epoch": 4.320444799767425, "grad_norm": 0.7986207604408264, "learning_rate": 1.0670993693029268e-05, "loss": 0.0547, "step": 118890 }, { "epoch": 4.320808198270223, "grad_norm": 0.33180713653564453, "learning_rate": 1.0666741649478165e-05, "loss": 0.0506, "step": 118900 }, { "epoch": 4.321171596773022, "grad_norm": 0.6820605397224426, "learning_rate": 1.0662490223496166e-05, "loss": 0.0746, "step": 118910 }, { "epoch": 4.32153499527582, "grad_norm": 0.9797353148460388, "learning_rate": 1.0658239415266451e-05, "loss": 0.068, "step": 118920 }, { "epoch": 4.321898393778618, "grad_norm": 0.3585362732410431, "learning_rate": 1.065398922497218e-05, "loss": 0.0559, "step": 118930 }, { "epoch": 4.322261792281416, "grad_norm": 2.2352147102355957, "learning_rate": 1.0649739652796462e-05, "loss": 0.0522, "step": 118940 }, { "epoch": 4.322625190784214, "grad_norm": 1.126900315284729, "learning_rate": 1.0645490698922404e-05, "loss": 0.2009, "step": 118950 }, { "epoch": 4.322988589287013, "grad_norm": 0.5328972339630127, "learning_rate": 1.0641242363533077e-05, "loss": 0.0546, "step": 118960 }, { "epoch": 4.323351987789811, "grad_norm": 0.3610680103302002, "learning_rate": 1.0636994646811538e-05, "loss": 0.107, "step": 118970 }, { "epoch": 4.3237153862926085, "grad_norm": 0.3678978383541107, "learning_rate": 1.0632747548940785e-05, "loss": 0.052, "step": 118980 }, { "epoch": 4.3240787847954065, "grad_norm": 0.3910360038280487, "learning_rate": 1.0628501070103824e-05, "loss": 0.048, "step": 118990 }, { "epoch": 4.3244421832982045, "grad_norm": 65.42230224609375, "learning_rate": 1.0624255210483624e-05, "loss": 0.2312, "step": 119000 }, { "epoch": 4.324805581801003, "grad_norm": 0.40207189321517944, "learning_rate": 1.0620009970263112e-05, "loss": 0.0936, "step": 119010 }, { "epoch": 4.325168980303801, "grad_norm": 1.1557154655456543, "learning_rate": 1.0615765349625206e-05, "loss": 0.0645, "step": 119020 }, { "epoch": 4.325532378806599, "grad_norm": 0.9678333401679993, "learning_rate": 1.0611521348752798e-05, "loss": 0.0645, "step": 119030 }, { "epoch": 4.325895777309397, "grad_norm": 0.24323670566082, "learning_rate": 1.0607277967828736e-05, "loss": 0.0622, "step": 119040 }, { "epoch": 4.326259175812195, "grad_norm": 0.42628422379493713, "learning_rate": 1.0603035207035853e-05, "loss": 0.0534, "step": 119050 }, { "epoch": 4.326622574314994, "grad_norm": 0.4441395103931427, "learning_rate": 1.0598793066556959e-05, "loss": 0.0716, "step": 119060 }, { "epoch": 4.326985972817792, "grad_norm": 3.301652431488037, "learning_rate": 1.0594551546574841e-05, "loss": 0.077, "step": 119070 }, { "epoch": 4.32734937132059, "grad_norm": 0.34682294726371765, "learning_rate": 1.0590310647272234e-05, "loss": 0.0522, "step": 119080 }, { "epoch": 4.327712769823388, "grad_norm": 1.717201828956604, "learning_rate": 1.058607036883187e-05, "loss": 0.1052, "step": 119090 }, { "epoch": 4.328076168326186, "grad_norm": 0.6550588607788086, "learning_rate": 1.058183071143646e-05, "loss": 0.0701, "step": 119100 }, { "epoch": 4.328439566828985, "grad_norm": 0.2946367859840393, "learning_rate": 1.0577591675268656e-05, "loss": 0.1096, "step": 119110 }, { "epoch": 4.328802965331783, "grad_norm": 0.5222314596176147, "learning_rate": 1.057335326051111e-05, "loss": 0.0804, "step": 119120 }, { "epoch": 4.329166363834581, "grad_norm": 0.5417972803115845, "learning_rate": 1.056911546734644e-05, "loss": 0.0692, "step": 119130 }, { "epoch": 4.329529762337379, "grad_norm": 0.399072527885437, "learning_rate": 1.0564878295957253e-05, "loss": 0.0457, "step": 119140 }, { "epoch": 4.329893160840177, "grad_norm": 0.2701396942138672, "learning_rate": 1.0560641746526087e-05, "loss": 0.0395, "step": 119150 }, { "epoch": 4.330256559342976, "grad_norm": 0.21145252883434296, "learning_rate": 1.0556405819235496e-05, "loss": 0.0613, "step": 119160 }, { "epoch": 4.330619957845774, "grad_norm": 0.8462580442428589, "learning_rate": 1.0552170514267983e-05, "loss": 0.0668, "step": 119170 }, { "epoch": 4.330983356348572, "grad_norm": 0.45559296011924744, "learning_rate": 1.054793583180605e-05, "loss": 0.0454, "step": 119180 }, { "epoch": 4.33134675485137, "grad_norm": 0.5273690819740295, "learning_rate": 1.0543701772032133e-05, "loss": 0.0739, "step": 119190 }, { "epoch": 4.331710153354168, "grad_norm": 1.413089394569397, "learning_rate": 1.0539468335128669e-05, "loss": 0.0713, "step": 119200 }, { "epoch": 4.332073551856967, "grad_norm": 0.42065924406051636, "learning_rate": 1.0535235521278072e-05, "loss": 0.0591, "step": 119210 }, { "epoch": 4.332436950359765, "grad_norm": 1.570407748222351, "learning_rate": 1.0531003330662704e-05, "loss": 0.0781, "step": 119220 }, { "epoch": 4.332800348862563, "grad_norm": 0.23802921175956726, "learning_rate": 1.0526771763464919e-05, "loss": 0.0453, "step": 119230 }, { "epoch": 4.333163747365361, "grad_norm": 0.36468029022216797, "learning_rate": 1.0522540819867053e-05, "loss": 0.0561, "step": 119240 }, { "epoch": 4.333527145868159, "grad_norm": 0.5816068649291992, "learning_rate": 1.0518310500051382e-05, "loss": 0.0528, "step": 119250 }, { "epoch": 4.333890544370957, "grad_norm": 0.37400707602500916, "learning_rate": 1.0514080804200189e-05, "loss": 0.0717, "step": 119260 }, { "epoch": 4.334253942873755, "grad_norm": 0.4630601108074188, "learning_rate": 1.0509851732495709e-05, "loss": 0.058, "step": 119270 }, { "epoch": 4.334617341376553, "grad_norm": 0.31610459089279175, "learning_rate": 1.0505623285120173e-05, "loss": 0.0529, "step": 119280 }, { "epoch": 4.334980739879351, "grad_norm": 0.20980675518512726, "learning_rate": 1.0501395462255748e-05, "loss": 0.0502, "step": 119290 }, { "epoch": 4.33534413838215, "grad_norm": 0.4645346999168396, "learning_rate": 1.0497168264084606e-05, "loss": 2.34, "step": 119300 }, { "epoch": 4.335707536884948, "grad_norm": 0.32523655891418457, "learning_rate": 1.0492941690788885e-05, "loss": 0.0589, "step": 119310 }, { "epoch": 4.336070935387746, "grad_norm": 0.7687360048294067, "learning_rate": 1.0488715742550697e-05, "loss": 0.063, "step": 119320 }, { "epoch": 4.336434333890544, "grad_norm": 1.7641674280166626, "learning_rate": 1.0484490419552109e-05, "loss": 0.069, "step": 119330 }, { "epoch": 4.336797732393342, "grad_norm": 0.295184463262558, "learning_rate": 1.0480265721975191e-05, "loss": 0.0469, "step": 119340 }, { "epoch": 4.337161130896141, "grad_norm": 0.8231440782546997, "learning_rate": 1.047604165000195e-05, "loss": 0.0415, "step": 119350 }, { "epoch": 4.337524529398939, "grad_norm": 0.3827292323112488, "learning_rate": 1.0471818203814402e-05, "loss": 0.0567, "step": 119360 }, { "epoch": 4.337887927901737, "grad_norm": 2.2949953079223633, "learning_rate": 1.0467595383594514e-05, "loss": 0.0684, "step": 119370 }, { "epoch": 4.338251326404535, "grad_norm": 0.4049169421195984, "learning_rate": 1.0463373189524242e-05, "loss": 0.0524, "step": 119380 }, { "epoch": 4.338614724907333, "grad_norm": 0.9538078904151917, "learning_rate": 1.0459151621785493e-05, "loss": 0.061, "step": 119390 }, { "epoch": 4.338978123410132, "grad_norm": 0.4654026925563812, "learning_rate": 1.045493068056016e-05, "loss": 0.0543, "step": 119400 }, { "epoch": 4.338978123410132, "eval_loss": 0.30670055747032166, "eval_runtime": 179.7467, "eval_samples_per_second": 41.247, "eval_steps_per_second": 5.157, "eval_wer": 0.12517472361900267, "step": 119400 }, { "epoch": 4.33934152191293, "grad_norm": 1.0376760959625244, "learning_rate": 1.0450710366030115e-05, "loss": 0.065, "step": 119410 }, { "epoch": 4.339704920415728, "grad_norm": 0.7893726229667664, "learning_rate": 1.0446490678377202e-05, "loss": 0.0768, "step": 119420 }, { "epoch": 4.340068318918526, "grad_norm": 0.3951668441295624, "learning_rate": 1.0442271617783215e-05, "loss": 0.1219, "step": 119430 }, { "epoch": 4.340431717421324, "grad_norm": 0.46963751316070557, "learning_rate": 1.0438053184429946e-05, "loss": 0.0556, "step": 119440 }, { "epoch": 4.340795115924123, "grad_norm": 1.2681185007095337, "learning_rate": 1.0433835378499155e-05, "loss": 0.1118, "step": 119450 }, { "epoch": 4.341158514426921, "grad_norm": 1.335719347000122, "learning_rate": 1.0429618200172581e-05, "loss": 0.0599, "step": 119460 }, { "epoch": 4.341521912929719, "grad_norm": 0.39294785261154175, "learning_rate": 1.0425401649631908e-05, "loss": 0.0801, "step": 119470 }, { "epoch": 4.341885311432517, "grad_norm": 0.6418009400367737, "learning_rate": 1.0421185727058826e-05, "loss": 0.0646, "step": 119480 }, { "epoch": 4.3422487099353155, "grad_norm": 0.49082010984420776, "learning_rate": 1.0416970432634968e-05, "loss": 0.0474, "step": 119490 }, { "epoch": 4.3426121084381135, "grad_norm": 1.266304850578308, "learning_rate": 1.0412755766541981e-05, "loss": 0.1087, "step": 119500 }, { "epoch": 4.3429755069409115, "grad_norm": 0.32737457752227783, "learning_rate": 1.0408541728961441e-05, "loss": 0.0648, "step": 119510 }, { "epoch": 4.3433389054437095, "grad_norm": 3.866938829421997, "learning_rate": 1.0404328320074928e-05, "loss": 0.0588, "step": 119520 }, { "epoch": 4.3437023039465075, "grad_norm": 0.3310281038284302, "learning_rate": 1.0400115540063965e-05, "loss": 0.054, "step": 119530 }, { "epoch": 4.344065702449306, "grad_norm": 0.6156519651412964, "learning_rate": 1.0395903389110081e-05, "loss": 0.0653, "step": 119540 }, { "epoch": 4.344429100952104, "grad_norm": 0.7114567160606384, "learning_rate": 1.0391691867394757e-05, "loss": 0.0519, "step": 119550 }, { "epoch": 4.344792499454902, "grad_norm": 0.27406829595565796, "learning_rate": 1.0387480975099462e-05, "loss": 0.072, "step": 119560 }, { "epoch": 4.3451558979577, "grad_norm": 0.6738885045051575, "learning_rate": 1.038327071240561e-05, "loss": 0.1765, "step": 119570 }, { "epoch": 4.345519296460498, "grad_norm": 0.7371698021888733, "learning_rate": 1.037906107949462e-05, "loss": 0.055, "step": 119580 }, { "epoch": 4.345882694963297, "grad_norm": 0.3803616166114807, "learning_rate": 1.0374852076547863e-05, "loss": 0.063, "step": 119590 }, { "epoch": 4.346246093466095, "grad_norm": 0.4684884548187256, "learning_rate": 1.0370643703746705e-05, "loss": 0.0515, "step": 119600 }, { "epoch": 4.346609491968893, "grad_norm": 0.6796711683273315, "learning_rate": 1.036643596127245e-05, "loss": 0.0586, "step": 119610 }, { "epoch": 4.346972890471691, "grad_norm": 0.2606639862060547, "learning_rate": 1.036222884930641e-05, "loss": 0.0711, "step": 119620 }, { "epoch": 4.347336288974489, "grad_norm": 0.2645287811756134, "learning_rate": 1.0358022368029829e-05, "loss": 0.0763, "step": 119630 }, { "epoch": 4.347699687477288, "grad_norm": 2.693612813949585, "learning_rate": 1.0353816517623988e-05, "loss": 0.0577, "step": 119640 }, { "epoch": 4.348063085980086, "grad_norm": 0.5992051959037781, "learning_rate": 1.0349611298270074e-05, "loss": 0.0518, "step": 119650 }, { "epoch": 4.348426484482884, "grad_norm": 0.9718080759048462, "learning_rate": 1.0345406710149288e-05, "loss": 0.0675, "step": 119660 }, { "epoch": 4.348789882985682, "grad_norm": 0.8428995013237, "learning_rate": 1.034120275344278e-05, "loss": 0.0547, "step": 119670 }, { "epoch": 4.34915328148848, "grad_norm": 0.3205854892730713, "learning_rate": 1.0336999428331687e-05, "loss": 0.0654, "step": 119680 }, { "epoch": 4.349516679991279, "grad_norm": 1.1903361082077026, "learning_rate": 1.0332796734997119e-05, "loss": 0.0446, "step": 119690 }, { "epoch": 4.349880078494077, "grad_norm": 0.5820839405059814, "learning_rate": 1.0328594673620163e-05, "loss": 0.0654, "step": 119700 }, { "epoch": 4.350243476996875, "grad_norm": 0.4552536904811859, "learning_rate": 1.032439324438185e-05, "loss": 0.0705, "step": 119710 }, { "epoch": 4.350606875499673, "grad_norm": 0.882331371307373, "learning_rate": 1.0320192447463228e-05, "loss": 0.0611, "step": 119720 }, { "epoch": 4.350970274002471, "grad_norm": 0.6142471432685852, "learning_rate": 1.0315992283045264e-05, "loss": 0.0446, "step": 119730 }, { "epoch": 4.35133367250527, "grad_norm": 0.7002348303794861, "learning_rate": 1.0311792751308963e-05, "loss": 3.1284, "step": 119740 }, { "epoch": 4.351697071008068, "grad_norm": 0.7333692312240601, "learning_rate": 1.0307593852435244e-05, "loss": 0.0584, "step": 119750 }, { "epoch": 4.352060469510866, "grad_norm": 0.8283504247665405, "learning_rate": 1.030339558660504e-05, "loss": 0.0679, "step": 119760 }, { "epoch": 4.3524238680136635, "grad_norm": 1.5086302757263184, "learning_rate": 1.0299197953999207e-05, "loss": 0.0751, "step": 119770 }, { "epoch": 4.3527872665164615, "grad_norm": 0.532910168170929, "learning_rate": 1.029500095479865e-05, "loss": 0.064, "step": 119780 }, { "epoch": 4.35315066501926, "grad_norm": 0.47405874729156494, "learning_rate": 1.0290804589184172e-05, "loss": 0.0422, "step": 119790 }, { "epoch": 4.353514063522058, "grad_norm": 0.6326292753219604, "learning_rate": 1.0286608857336596e-05, "loss": 0.06, "step": 119800 }, { "epoch": 4.353877462024856, "grad_norm": 3.041429042816162, "learning_rate": 1.0282413759436688e-05, "loss": 0.0762, "step": 119810 }, { "epoch": 4.354240860527654, "grad_norm": 0.4619033932685852, "learning_rate": 1.0278219295665203e-05, "loss": 0.1377, "step": 119820 }, { "epoch": 4.354604259030452, "grad_norm": 0.45761334896087646, "learning_rate": 1.0274025466202872e-05, "loss": 0.0496, "step": 119830 }, { "epoch": 4.354967657533251, "grad_norm": 1.9688977003097534, "learning_rate": 1.0269832271230396e-05, "loss": 0.0471, "step": 119840 }, { "epoch": 4.355331056036049, "grad_norm": 0.5251762866973877, "learning_rate": 1.0265639710928426e-05, "loss": 0.0529, "step": 119850 }, { "epoch": 4.355694454538847, "grad_norm": 0.25119343400001526, "learning_rate": 1.0261447785477626e-05, "loss": 0.0672, "step": 119860 }, { "epoch": 4.356057853041645, "grad_norm": 7.577336311340332, "learning_rate": 1.0257256495058584e-05, "loss": 0.0561, "step": 119870 }, { "epoch": 4.356421251544444, "grad_norm": 0.5575109720230103, "learning_rate": 1.0253065839851922e-05, "loss": 0.0787, "step": 119880 }, { "epoch": 4.356784650047242, "grad_norm": 0.5600306987762451, "learning_rate": 1.0248875820038173e-05, "loss": 0.0778, "step": 119890 }, { "epoch": 4.35714804855004, "grad_norm": 0.49824684858322144, "learning_rate": 1.024468643579789e-05, "loss": 0.0553, "step": 119900 }, { "epoch": 4.357511447052838, "grad_norm": 1.0549505949020386, "learning_rate": 1.0240497687311563e-05, "loss": 0.0715, "step": 119910 }, { "epoch": 4.357874845555636, "grad_norm": 0.590259313583374, "learning_rate": 1.0236309574759673e-05, "loss": 0.0778, "step": 119920 }, { "epoch": 4.358238244058435, "grad_norm": 1.4435093402862549, "learning_rate": 1.0232122098322675e-05, "loss": 0.0574, "step": 119930 }, { "epoch": 4.358601642561233, "grad_norm": 0.4872845411300659, "learning_rate": 1.0227935258180999e-05, "loss": 0.0519, "step": 119940 }, { "epoch": 4.358965041064031, "grad_norm": 0.725078284740448, "learning_rate": 1.0223749054515024e-05, "loss": 0.0558, "step": 119950 }, { "epoch": 4.359328439566829, "grad_norm": 0.6495826840400696, "learning_rate": 1.0219563487505132e-05, "loss": 0.0634, "step": 119960 }, { "epoch": 4.359691838069627, "grad_norm": 0.6960912942886353, "learning_rate": 1.0215378557331657e-05, "loss": 0.0681, "step": 119970 }, { "epoch": 4.360055236572426, "grad_norm": 0.41013091802597046, "learning_rate": 1.0211194264174926e-05, "loss": 0.102, "step": 119980 }, { "epoch": 4.360418635075224, "grad_norm": 0.5867893099784851, "learning_rate": 1.0207010608215206e-05, "loss": 0.0565, "step": 119990 }, { "epoch": 4.360782033578022, "grad_norm": 0.5199207067489624, "learning_rate": 1.0202827589632778e-05, "loss": 0.0576, "step": 120000 }, { "epoch": 4.360782033578022, "eval_loss": 0.31102028489112854, "eval_runtime": 180.0694, "eval_samples_per_second": 41.173, "eval_steps_per_second": 5.148, "eval_wer": 0.12710802911757765, "step": 120000 }, { "epoch": 4.36114543208082, "grad_norm": 0.340093195438385, "learning_rate": 1.0198645208607837e-05, "loss": 0.065, "step": 120010 }, { "epoch": 4.361508830583618, "grad_norm": 0.45320606231689453, "learning_rate": 1.019446346532063e-05, "loss": 0.0509, "step": 120020 }, { "epoch": 4.3618722290864165, "grad_norm": 1.0077732801437378, "learning_rate": 1.0190282359951308e-05, "loss": 0.0729, "step": 120030 }, { "epoch": 4.3622356275892145, "grad_norm": 2.358867883682251, "learning_rate": 1.0186101892680034e-05, "loss": 0.1418, "step": 120040 }, { "epoch": 4.3625990260920124, "grad_norm": 0.7889578938484192, "learning_rate": 1.018192206368691e-05, "loss": 0.0456, "step": 120050 }, { "epoch": 4.36296242459481, "grad_norm": 0.5588453412055969, "learning_rate": 1.0177742873152044e-05, "loss": 0.0544, "step": 120060 }, { "epoch": 4.363325823097609, "grad_norm": 0.5798775553703308, "learning_rate": 1.0173564321255499e-05, "loss": 0.0525, "step": 120070 }, { "epoch": 4.363689221600407, "grad_norm": 0.47772547602653503, "learning_rate": 1.0169386408177323e-05, "loss": 0.0589, "step": 120080 }, { "epoch": 4.364052620103205, "grad_norm": 0.7030138969421387, "learning_rate": 1.0165209134097511e-05, "loss": 0.0515, "step": 120090 }, { "epoch": 4.364416018606003, "grad_norm": 0.49324220418930054, "learning_rate": 1.0161032499196063e-05, "loss": 0.0724, "step": 120100 }, { "epoch": 4.364779417108801, "grad_norm": 2.162036657333374, "learning_rate": 1.0156856503652911e-05, "loss": 0.0676, "step": 120110 }, { "epoch": 4.3651428156116, "grad_norm": 1.029558539390564, "learning_rate": 1.0152681147648015e-05, "loss": 0.077, "step": 120120 }, { "epoch": 4.365506214114398, "grad_norm": 0.7200583815574646, "learning_rate": 1.0148506431361257e-05, "loss": 0.0513, "step": 120130 }, { "epoch": 4.365869612617196, "grad_norm": 0.18625468015670776, "learning_rate": 1.0144332354972522e-05, "loss": 0.0545, "step": 120140 }, { "epoch": 4.366233011119994, "grad_norm": 0.37879735231399536, "learning_rate": 1.014015891866163e-05, "loss": 0.0566, "step": 120150 }, { "epoch": 4.366596409622792, "grad_norm": 0.408992737531662, "learning_rate": 1.013598612260844e-05, "loss": 0.059, "step": 120160 }, { "epoch": 4.366959808125591, "grad_norm": 0.5125990509986877, "learning_rate": 1.0131813966992708e-05, "loss": 0.0676, "step": 120170 }, { "epoch": 4.367323206628389, "grad_norm": 0.4787319004535675, "learning_rate": 1.0127642451994224e-05, "loss": 0.0896, "step": 120180 }, { "epoch": 4.367686605131187, "grad_norm": 0.5579050183296204, "learning_rate": 1.0123471577792701e-05, "loss": 0.0603, "step": 120190 }, { "epoch": 4.368050003633985, "grad_norm": 0.5333271026611328, "learning_rate": 1.0119301344567856e-05, "loss": 0.0684, "step": 120200 }, { "epoch": 4.368413402136783, "grad_norm": 0.37088432908058167, "learning_rate": 1.0115131752499368e-05, "loss": 0.0569, "step": 120210 }, { "epoch": 4.368776800639582, "grad_norm": 1.721410870552063, "learning_rate": 1.0110962801766905e-05, "loss": 0.0595, "step": 120220 }, { "epoch": 4.36914019914238, "grad_norm": 0.8621863126754761, "learning_rate": 1.010679449255007e-05, "loss": 0.0817, "step": 120230 }, { "epoch": 4.369503597645178, "grad_norm": 0.33434170484542847, "learning_rate": 1.0102626825028478e-05, "loss": 0.0447, "step": 120240 }, { "epoch": 4.369866996147976, "grad_norm": 0.8776593208312988, "learning_rate": 1.0098459799381673e-05, "loss": 0.0529, "step": 120250 }, { "epoch": 4.370230394650774, "grad_norm": 0.5067620277404785, "learning_rate": 1.0094293415789233e-05, "loss": 0.0604, "step": 120260 }, { "epoch": 4.3705937931535725, "grad_norm": 2.770481586456299, "learning_rate": 1.0090127674430647e-05, "loss": 0.0809, "step": 120270 }, { "epoch": 4.3709571916563705, "grad_norm": 0.423946738243103, "learning_rate": 1.0085962575485418e-05, "loss": 2.5159, "step": 120280 }, { "epoch": 4.3713205901591685, "grad_norm": 1.2640351057052612, "learning_rate": 1.008179811913299e-05, "loss": 0.0707, "step": 120290 }, { "epoch": 4.3716839886619665, "grad_norm": 2.004375696182251, "learning_rate": 1.0077634305552802e-05, "loss": 0.0857, "step": 120300 }, { "epoch": 4.3720473871647645, "grad_norm": 0.6926215291023254, "learning_rate": 1.0073471134924256e-05, "loss": 0.0561, "step": 120310 }, { "epoch": 4.372410785667563, "grad_norm": 0.8502501845359802, "learning_rate": 1.006930860742674e-05, "loss": 0.0848, "step": 120320 }, { "epoch": 4.372774184170361, "grad_norm": 0.5317420363426208, "learning_rate": 1.0065146723239582e-05, "loss": 0.0828, "step": 120330 }, { "epoch": 4.373137582673159, "grad_norm": 0.38525134325027466, "learning_rate": 1.0060985482542123e-05, "loss": 0.0572, "step": 120340 }, { "epoch": 4.373500981175957, "grad_norm": 1.866957426071167, "learning_rate": 1.005682488551363e-05, "loss": 0.065, "step": 120350 }, { "epoch": 4.373864379678755, "grad_norm": 0.8129653930664062, "learning_rate": 1.0052664932333403e-05, "loss": 0.0737, "step": 120360 }, { "epoch": 4.374227778181554, "grad_norm": 2.701793670654297, "learning_rate": 1.0048505623180651e-05, "loss": 0.0667, "step": 120370 }, { "epoch": 4.374591176684352, "grad_norm": 0.5643318295478821, "learning_rate": 1.0044346958234601e-05, "loss": 0.0596, "step": 120380 }, { "epoch": 4.37495457518715, "grad_norm": 0.9791012406349182, "learning_rate": 1.0040188937674414e-05, "loss": 0.083, "step": 120390 }, { "epoch": 4.375317973689948, "grad_norm": 1.1770576238632202, "learning_rate": 1.0036031561679274e-05, "loss": 0.0686, "step": 120400 }, { "epoch": 4.375681372192746, "grad_norm": 0.3363044857978821, "learning_rate": 1.0031874830428284e-05, "loss": 0.0766, "step": 120410 }, { "epoch": 4.376044770695545, "grad_norm": 0.6147244572639465, "learning_rate": 1.0027718744100561e-05, "loss": 0.0555, "step": 120420 }, { "epoch": 4.376408169198343, "grad_norm": 0.4810098707675934, "learning_rate": 1.0023563302875155e-05, "loss": 0.0488, "step": 120430 }, { "epoch": 4.376771567701141, "grad_norm": 1.6959590911865234, "learning_rate": 1.0019823957482765e-05, "loss": 2.7914, "step": 120440 }, { "epoch": 4.377134966203939, "grad_norm": 3.5046002864837646, "learning_rate": 1.0015669742445025e-05, "loss": 0.0566, "step": 120450 }, { "epoch": 4.377498364706738, "grad_norm": 0.4760388135910034, "learning_rate": 1.001151617302876e-05, "loss": 0.0566, "step": 120460 }, { "epoch": 4.377861763209536, "grad_norm": 0.5679770112037659, "learning_rate": 1.0007363249412933e-05, "loss": 0.065, "step": 120470 }, { "epoch": 4.378225161712334, "grad_norm": 0.9972585439682007, "learning_rate": 1.0003210971776489e-05, "loss": 0.0603, "step": 120480 }, { "epoch": 4.378588560215132, "grad_norm": 0.4830745458602905, "learning_rate": 9.999059340298314e-06, "loss": 0.0484, "step": 120490 }, { "epoch": 4.37895195871793, "grad_norm": 0.3061172068119049, "learning_rate": 9.994908355157301e-06, "loss": 0.0489, "step": 120500 }, { "epoch": 4.379315357220729, "grad_norm": 0.6901132464408875, "learning_rate": 9.9907580165323e-06, "loss": 1.9897, "step": 120510 }, { "epoch": 4.379678755723527, "grad_norm": 0.6878951191902161, "learning_rate": 9.98660832460214e-06, "loss": 0.0739, "step": 120520 }, { "epoch": 4.380042154226325, "grad_norm": 0.4450702667236328, "learning_rate": 9.982459279545603e-06, "loss": 0.0581, "step": 120530 }, { "epoch": 4.380405552729123, "grad_norm": 0.5035352110862732, "learning_rate": 9.978310881541469e-06, "loss": 0.0553, "step": 120540 }, { "epoch": 4.380768951231921, "grad_norm": 0.3807744085788727, "learning_rate": 9.974163130768455e-06, "loss": 0.0494, "step": 120550 }, { "epoch": 4.381132349734719, "grad_norm": 0.3851553797721863, "learning_rate": 9.970016027405307e-06, "loss": 2.1352, "step": 120560 }, { "epoch": 4.381495748237517, "grad_norm": 0.3695625066757202, "learning_rate": 9.965869571630685e-06, "loss": 0.0547, "step": 120570 }, { "epoch": 4.381859146740315, "grad_norm": 0.7372445464134216, "learning_rate": 9.961723763623258e-06, "loss": 0.0518, "step": 120580 }, { "epoch": 4.382222545243113, "grad_norm": 0.47810813784599304, "learning_rate": 9.95757860356164e-06, "loss": 0.0554, "step": 120590 }, { "epoch": 4.382585943745911, "grad_norm": 0.5655418634414673, "learning_rate": 9.953434091624436e-06, "loss": 0.0667, "step": 120600 }, { "epoch": 4.382585943745911, "eval_loss": 0.29421278834342957, "eval_runtime": 178.8805, "eval_samples_per_second": 41.447, "eval_steps_per_second": 5.182, "eval_wer": 0.126445440848113, "step": 120600 }, { "epoch": 4.38294934224871, "grad_norm": 0.3682198226451874, "learning_rate": 9.949290227990225e-06, "loss": 0.0585, "step": 120610 }, { "epoch": 4.383312740751508, "grad_norm": 0.3467046618461609, "learning_rate": 9.945147012837554e-06, "loss": 0.0534, "step": 120620 }, { "epoch": 4.383676139254306, "grad_norm": 0.23876287043094635, "learning_rate": 9.941004446344926e-06, "loss": 0.1076, "step": 120630 }, { "epoch": 4.384039537757104, "grad_norm": 0.23189418017864227, "learning_rate": 9.936862528690843e-06, "loss": 0.0452, "step": 120640 }, { "epoch": 4.384402936259903, "grad_norm": 1.7186256647109985, "learning_rate": 9.932721260053743e-06, "loss": 0.0663, "step": 120650 }, { "epoch": 4.384766334762701, "grad_norm": 0.3706608712673187, "learning_rate": 9.928580640612089e-06, "loss": 0.0804, "step": 120660 }, { "epoch": 4.385129733265499, "grad_norm": 0.48258817195892334, "learning_rate": 9.924440670544267e-06, "loss": 0.0705, "step": 120670 }, { "epoch": 4.385493131768297, "grad_norm": 0.6055228114128113, "learning_rate": 9.920301350028662e-06, "loss": 0.0551, "step": 120680 }, { "epoch": 4.385856530271095, "grad_norm": 0.30372968316078186, "learning_rate": 9.9161626792436e-06, "loss": 0.0548, "step": 120690 }, { "epoch": 4.386219928773894, "grad_norm": 0.5794808268547058, "learning_rate": 9.91202465836744e-06, "loss": 0.0473, "step": 120700 }, { "epoch": 4.386583327276692, "grad_norm": 0.48584842681884766, "learning_rate": 9.907887287578446e-06, "loss": 0.0775, "step": 120710 }, { "epoch": 4.38694672577949, "grad_norm": 1.3762316703796387, "learning_rate": 9.903750567054899e-06, "loss": 0.0721, "step": 120720 }, { "epoch": 4.387310124282288, "grad_norm": 0.31192561984062195, "learning_rate": 9.899614496975019e-06, "loss": 0.0563, "step": 120730 }, { "epoch": 4.387673522785086, "grad_norm": 0.7657603621482849, "learning_rate": 9.895479077517023e-06, "loss": 0.0461, "step": 120740 }, { "epoch": 4.388036921287885, "grad_norm": 0.7203567624092102, "learning_rate": 9.891344308859089e-06, "loss": 0.0749, "step": 120750 }, { "epoch": 4.388400319790683, "grad_norm": 0.6209551095962524, "learning_rate": 9.887210191179383e-06, "loss": 0.0684, "step": 120760 }, { "epoch": 4.388763718293481, "grad_norm": 0.532970666885376, "learning_rate": 9.883076724656007e-06, "loss": 0.076, "step": 120770 }, { "epoch": 4.389127116796279, "grad_norm": 5.468721866607666, "learning_rate": 9.87894390946708e-06, "loss": 0.051, "step": 120780 }, { "epoch": 4.389490515299077, "grad_norm": 0.34060055017471313, "learning_rate": 9.87481174579064e-06, "loss": 0.0554, "step": 120790 }, { "epoch": 4.3898539138018755, "grad_norm": 1.880702257156372, "learning_rate": 9.870680233804767e-06, "loss": 0.0679, "step": 120800 }, { "epoch": 4.3902173123046735, "grad_norm": 0.7434289455413818, "learning_rate": 9.866549373687442e-06, "loss": 0.7246, "step": 120810 }, { "epoch": 4.3905807108074715, "grad_norm": 0.324386864900589, "learning_rate": 9.862419165616665e-06, "loss": 0.0885, "step": 120820 }, { "epoch": 4.3909441093102695, "grad_norm": 0.33165642619132996, "learning_rate": 9.858289609770382e-06, "loss": 0.0658, "step": 120830 }, { "epoch": 4.3913075078130674, "grad_norm": 1.4159249067306519, "learning_rate": 9.854160706326523e-06, "loss": 0.0501, "step": 120840 }, { "epoch": 4.391670906315866, "grad_norm": 0.40814369916915894, "learning_rate": 9.85003245546299e-06, "loss": 0.0553, "step": 120850 }, { "epoch": 4.392034304818664, "grad_norm": 2.6607532501220703, "learning_rate": 9.845904857357665e-06, "loss": 2.0686, "step": 120860 }, { "epoch": 4.392397703321462, "grad_norm": 0.4138577878475189, "learning_rate": 9.841777912188372e-06, "loss": 0.1482, "step": 120870 }, { "epoch": 4.39276110182426, "grad_norm": 0.7508553266525269, "learning_rate": 9.837651620132938e-06, "loss": 0.0554, "step": 120880 }, { "epoch": 4.393124500327058, "grad_norm": 0.4384622871875763, "learning_rate": 9.833525981369144e-06, "loss": 0.0544, "step": 120890 }, { "epoch": 4.393487898829857, "grad_norm": 0.6955190896987915, "learning_rate": 9.829400996074762e-06, "loss": 0.056, "step": 120900 }, { "epoch": 4.393851297332655, "grad_norm": 0.44200626015663147, "learning_rate": 9.825276664427508e-06, "loss": 0.0551, "step": 120910 }, { "epoch": 4.394214695835453, "grad_norm": 2.2168397903442383, "learning_rate": 9.821152986605095e-06, "loss": 0.0629, "step": 120920 }, { "epoch": 4.394578094338251, "grad_norm": 0.2910119593143463, "learning_rate": 9.817029962785179e-06, "loss": 0.0544, "step": 120930 }, { "epoch": 4.394941492841049, "grad_norm": 0.32496803998947144, "learning_rate": 9.812907593145438e-06, "loss": 0.0377, "step": 120940 }, { "epoch": 4.395304891343848, "grad_norm": 0.7183531522750854, "learning_rate": 9.808785877863463e-06, "loss": 0.1254, "step": 120950 }, { "epoch": 4.395668289846646, "grad_norm": 0.7459282279014587, "learning_rate": 9.804664817116866e-06, "loss": 0.0601, "step": 120960 }, { "epoch": 4.396031688349444, "grad_norm": 0.5424075126647949, "learning_rate": 9.800544411083184e-06, "loss": 0.0614, "step": 120970 }, { "epoch": 4.396395086852242, "grad_norm": 2.5938775539398193, "learning_rate": 9.796424659939965e-06, "loss": 0.0813, "step": 120980 }, { "epoch": 4.39675848535504, "grad_norm": 0.6978410482406616, "learning_rate": 9.792305563864715e-06, "loss": 0.0579, "step": 120990 }, { "epoch": 4.397121883857839, "grad_norm": 0.5140237212181091, "learning_rate": 9.788187123034914e-06, "loss": 0.0607, "step": 121000 }, { "epoch": 4.397485282360637, "grad_norm": 0.4372212886810303, "learning_rate": 9.784069337627999e-06, "loss": 0.0581, "step": 121010 }, { "epoch": 4.397848680863435, "grad_norm": 0.458351194858551, "learning_rate": 9.779952207821407e-06, "loss": 0.0619, "step": 121020 }, { "epoch": 4.398212079366233, "grad_norm": 0.6185303330421448, "learning_rate": 9.775835733792504e-06, "loss": 0.0573, "step": 121030 }, { "epoch": 4.398575477869032, "grad_norm": 0.28265702724456787, "learning_rate": 9.771719915718688e-06, "loss": 0.0559, "step": 121040 }, { "epoch": 4.39893887637183, "grad_norm": 118.88215637207031, "learning_rate": 9.767604753777271e-06, "loss": 0.5752, "step": 121050 }, { "epoch": 4.3993022748746275, "grad_norm": 0.9920980334281921, "learning_rate": 9.763490248145576e-06, "loss": 0.0751, "step": 121060 }, { "epoch": 4.3996656733774255, "grad_norm": 0.9971364140510559, "learning_rate": 9.759376399000858e-06, "loss": 0.0887, "step": 121070 }, { "epoch": 4.4000290718802235, "grad_norm": 0.6670841574668884, "learning_rate": 9.755263206520402e-06, "loss": 0.0554, "step": 121080 }, { "epoch": 4.400392470383022, "grad_norm": 0.27042630314826965, "learning_rate": 9.751150670881405e-06, "loss": 0.0509, "step": 121090 }, { "epoch": 4.40075586888582, "grad_norm": 1.5506465435028076, "learning_rate": 9.747038792261077e-06, "loss": 0.0775, "step": 121100 }, { "epoch": 4.401119267388618, "grad_norm": 0.8040229678153992, "learning_rate": 9.742927570836569e-06, "loss": 0.0741, "step": 121110 }, { "epoch": 4.401482665891416, "grad_norm": 2.6243174076080322, "learning_rate": 9.738817006785027e-06, "loss": 0.0655, "step": 121120 }, { "epoch": 4.401846064394214, "grad_norm": 0.4748077392578125, "learning_rate": 9.73470710028356e-06, "loss": 0.0707, "step": 121130 }, { "epoch": 4.402209462897013, "grad_norm": 1.0175915956497192, "learning_rate": 9.73059785150926e-06, "loss": 0.0541, "step": 121140 }, { "epoch": 4.402572861399811, "grad_norm": 0.41001373529434204, "learning_rate": 9.726489260639162e-06, "loss": 0.0527, "step": 121150 }, { "epoch": 4.402936259902609, "grad_norm": 0.3499217629432678, "learning_rate": 9.722381327850306e-06, "loss": 0.0763, "step": 121160 }, { "epoch": 4.403299658405407, "grad_norm": 0.7092549204826355, "learning_rate": 9.718274053319666e-06, "loss": 0.0587, "step": 121170 }, { "epoch": 4.403663056908205, "grad_norm": 0.7665150165557861, "learning_rate": 9.714167437224237e-06, "loss": 0.0679, "step": 121180 }, { "epoch": 4.404026455411004, "grad_norm": 0.6386712789535522, "learning_rate": 9.710061479740939e-06, "loss": 0.0401, "step": 121190 }, { "epoch": 4.404389853913802, "grad_norm": 0.5466418862342834, "learning_rate": 9.705956181046699e-06, "loss": 0.0718, "step": 121200 }, { "epoch": 4.404389853913802, "eval_loss": 0.29599717259407043, "eval_runtime": 180.2222, "eval_samples_per_second": 41.138, "eval_steps_per_second": 5.144, "eval_wer": 0.12556501533937225, "step": 121200 }, { "epoch": 4.4047532524166, "grad_norm": 0.5336459875106812, "learning_rate": 9.701851541318382e-06, "loss": 0.0853, "step": 121210 }, { "epoch": 4.405116650919398, "grad_norm": 0.5972842574119568, "learning_rate": 9.697747560732854e-06, "loss": 0.0791, "step": 121220 }, { "epoch": 4.405480049422197, "grad_norm": 0.42219623923301697, "learning_rate": 9.693644239466934e-06, "loss": 0.0732, "step": 121230 }, { "epoch": 4.405843447924995, "grad_norm": 0.5730012655258179, "learning_rate": 9.689541577697434e-06, "loss": 0.0563, "step": 121240 }, { "epoch": 4.406206846427793, "grad_norm": 0.8514838218688965, "learning_rate": 9.685439575601104e-06, "loss": 0.1801, "step": 121250 }, { "epoch": 4.406570244930591, "grad_norm": 0.49094006419181824, "learning_rate": 9.681338233354701e-06, "loss": 0.0576, "step": 121260 }, { "epoch": 4.406933643433389, "grad_norm": 0.45783770084381104, "learning_rate": 9.677237551134914e-06, "loss": 0.0423, "step": 121270 }, { "epoch": 4.407297041936188, "grad_norm": 1.286647081375122, "learning_rate": 9.673137529118457e-06, "loss": 0.0641, "step": 121280 }, { "epoch": 4.407660440438986, "grad_norm": 0.43167805671691895, "learning_rate": 9.669038167481964e-06, "loss": 0.0439, "step": 121290 }, { "epoch": 4.408023838941784, "grad_norm": 0.33967655897140503, "learning_rate": 9.664939466402077e-06, "loss": 0.0546, "step": 121300 }, { "epoch": 4.408387237444582, "grad_norm": 0.5627890825271606, "learning_rate": 9.66084142605537e-06, "loss": 0.0764, "step": 121310 }, { "epoch": 4.40875063594738, "grad_norm": 6.8122239112854, "learning_rate": 9.656744046618443e-06, "loss": 0.0509, "step": 121320 }, { "epoch": 4.4091140344501785, "grad_norm": 0.8651514053344727, "learning_rate": 9.652647328267817e-06, "loss": 0.0528, "step": 121330 }, { "epoch": 4.4094774329529765, "grad_norm": 0.2621031105518341, "learning_rate": 9.648551271180021e-06, "loss": 0.0467, "step": 121340 }, { "epoch": 4.409840831455774, "grad_norm": 0.6462423205375671, "learning_rate": 9.644455875531519e-06, "loss": 0.0521, "step": 121350 }, { "epoch": 4.410204229958572, "grad_norm": 0.9707630276679993, "learning_rate": 9.64036114149878e-06, "loss": 0.0694, "step": 121360 }, { "epoch": 4.41056762846137, "grad_norm": 1.069124460220337, "learning_rate": 9.636267069258231e-06, "loss": 0.066, "step": 121370 }, { "epoch": 4.410931026964169, "grad_norm": 0.36638501286506653, "learning_rate": 9.632173658986277e-06, "loss": 0.044, "step": 121380 }, { "epoch": 4.411294425466967, "grad_norm": 1.7517961263656616, "learning_rate": 9.628080910859272e-06, "loss": 0.0554, "step": 121390 }, { "epoch": 4.411657823969765, "grad_norm": 0.2911940813064575, "learning_rate": 9.623988825053576e-06, "loss": 0.0607, "step": 121400 }, { "epoch": 4.412021222472563, "grad_norm": 0.6068363189697266, "learning_rate": 9.619897401745476e-06, "loss": 0.0616, "step": 121410 }, { "epoch": 4.412384620975361, "grad_norm": 1.797816276550293, "learning_rate": 9.615806641111291e-06, "loss": 0.0647, "step": 121420 }, { "epoch": 4.41274801947816, "grad_norm": 7.071191787719727, "learning_rate": 9.611716543327251e-06, "loss": 0.0511, "step": 121430 }, { "epoch": 4.413111417980958, "grad_norm": 0.30585846304893494, "learning_rate": 9.607627108569605e-06, "loss": 0.046, "step": 121440 }, { "epoch": 4.413474816483756, "grad_norm": 0.3486677408218384, "learning_rate": 9.603538337014531e-06, "loss": 0.0579, "step": 121450 }, { "epoch": 4.413838214986554, "grad_norm": 0.4719579219818115, "learning_rate": 9.599450228838209e-06, "loss": 0.0623, "step": 121460 }, { "epoch": 4.414201613489352, "grad_norm": 0.4528675377368927, "learning_rate": 9.595362784216783e-06, "loss": 0.056, "step": 121470 }, { "epoch": 4.414565011992151, "grad_norm": 0.45884427428245544, "learning_rate": 9.591276003326371e-06, "loss": 0.0673, "step": 121480 }, { "epoch": 4.414928410494949, "grad_norm": 0.7520114779472351, "learning_rate": 9.587189886343042e-06, "loss": 0.0498, "step": 121490 }, { "epoch": 4.415291808997747, "grad_norm": 0.6762428879737854, "learning_rate": 9.583104433442865e-06, "loss": 0.0529, "step": 121500 }, { "epoch": 4.415655207500545, "grad_norm": 0.4355708658695221, "learning_rate": 9.579019644801862e-06, "loss": 0.0508, "step": 121510 }, { "epoch": 4.416018606003343, "grad_norm": 0.5865467190742493, "learning_rate": 9.574935520596046e-06, "loss": 0.0646, "step": 121520 }, { "epoch": 4.416382004506142, "grad_norm": 0.32523924112319946, "learning_rate": 9.570852061001365e-06, "loss": 0.0623, "step": 121530 }, { "epoch": 4.41674540300894, "grad_norm": 1.222623586654663, "learning_rate": 9.566769266193773e-06, "loss": 0.0511, "step": 121540 }, { "epoch": 4.417108801511738, "grad_norm": 0.5185289978981018, "learning_rate": 9.56268713634918e-06, "loss": 0.0578, "step": 121550 }, { "epoch": 4.417472200014536, "grad_norm": 0.5509432554244995, "learning_rate": 9.55860567164348e-06, "loss": 0.0664, "step": 121560 }, { "epoch": 4.417835598517334, "grad_norm": 0.439535915851593, "learning_rate": 9.554524872252515e-06, "loss": 0.0612, "step": 121570 }, { "epoch": 4.4181989970201325, "grad_norm": 2.6066372394561768, "learning_rate": 9.550444738352126e-06, "loss": 0.0743, "step": 121580 }, { "epoch": 4.4185623955229305, "grad_norm": 0.6362573504447937, "learning_rate": 9.546365270118094e-06, "loss": 0.0656, "step": 121590 }, { "epoch": 4.4189257940257285, "grad_norm": 0.5322681069374084, "learning_rate": 9.5422864677262e-06, "loss": 0.0587, "step": 121600 }, { "epoch": 4.4192891925285265, "grad_norm": 0.22413715720176697, "learning_rate": 9.53820833135218e-06, "loss": 0.0747, "step": 121610 }, { "epoch": 4.419652591031325, "grad_norm": 0.5755869150161743, "learning_rate": 9.534130861171761e-06, "loss": 0.0579, "step": 121620 }, { "epoch": 4.420015989534123, "grad_norm": 0.65174400806427, "learning_rate": 9.530054057360607e-06, "loss": 0.1269, "step": 121630 }, { "epoch": 4.420379388036921, "grad_norm": 1.3365408182144165, "learning_rate": 9.525977920094381e-06, "loss": 0.0617, "step": 121640 }, { "epoch": 4.420742786539719, "grad_norm": 1.1761038303375244, "learning_rate": 9.521902449548708e-06, "loss": 0.0701, "step": 121650 }, { "epoch": 4.421106185042517, "grad_norm": 0.4623169004917145, "learning_rate": 9.5178276458992e-06, "loss": 0.075, "step": 121660 }, { "epoch": 4.421469583545316, "grad_norm": 3.6203951835632324, "learning_rate": 9.513753509321399e-06, "loss": 0.0894, "step": 121670 }, { "epoch": 4.421832982048114, "grad_norm": 0.6498029828071594, "learning_rate": 9.509680039990862e-06, "loss": 0.0506, "step": 121680 }, { "epoch": 4.422196380550912, "grad_norm": 0.363116592168808, "learning_rate": 9.505607238083098e-06, "loss": 0.0399, "step": 121690 }, { "epoch": 4.42255977905371, "grad_norm": 1.8000086545944214, "learning_rate": 9.501535103773596e-06, "loss": 0.0579, "step": 121700 }, { "epoch": 4.422923177556508, "grad_norm": 0.30083876848220825, "learning_rate": 9.497463637237796e-06, "loss": 0.052, "step": 121710 }, { "epoch": 4.423286576059307, "grad_norm": 0.41300225257873535, "learning_rate": 9.493392838651127e-06, "loss": 0.0589, "step": 121720 }, { "epoch": 4.423649974562105, "grad_norm": 3.8217811584472656, "learning_rate": 9.489322708189e-06, "loss": 0.0596, "step": 121730 }, { "epoch": 4.424013373064903, "grad_norm": 0.5146976709365845, "learning_rate": 9.485253246026757e-06, "loss": 0.0643, "step": 121740 }, { "epoch": 4.424376771567701, "grad_norm": 0.4420959949493408, "learning_rate": 9.481184452339752e-06, "loss": 0.0637, "step": 121750 }, { "epoch": 4.424740170070499, "grad_norm": 0.3984520733356476, "learning_rate": 9.477116327303304e-06, "loss": 0.056, "step": 121760 }, { "epoch": 4.425103568573298, "grad_norm": 0.6233524680137634, "learning_rate": 9.473048871092671e-06, "loss": 0.0717, "step": 121770 }, { "epoch": 4.425466967076096, "grad_norm": 1.2787010669708252, "learning_rate": 9.468982083883119e-06, "loss": 0.0527, "step": 121780 }, { "epoch": 4.425830365578894, "grad_norm": 0.2306261509656906, "learning_rate": 9.464915965849868e-06, "loss": 0.0463, "step": 121790 }, { "epoch": 4.426193764081692, "grad_norm": 0.9940856099128723, "learning_rate": 9.460850517168124e-06, "loss": 0.0562, "step": 121800 }, { "epoch": 4.426193764081692, "eval_loss": 0.3148193657398224, "eval_runtime": 179.5808, "eval_samples_per_second": 41.285, "eval_steps_per_second": 5.162, "eval_wer": 0.1269264980848476, "step": 121800 }, { "epoch": 4.426557162584491, "grad_norm": 0.4759156405925751, "learning_rate": 9.456785738013032e-06, "loss": 0.0684, "step": 121810 }, { "epoch": 4.426920561087289, "grad_norm": 0.8884357810020447, "learning_rate": 9.452721628559744e-06, "loss": 0.0632, "step": 121820 }, { "epoch": 4.427283959590087, "grad_norm": 0.4367246925830841, "learning_rate": 9.448658188983367e-06, "loss": 0.0483, "step": 121830 }, { "epoch": 4.427647358092885, "grad_norm": 0.27785640954971313, "learning_rate": 9.444595419458971e-06, "loss": 0.0691, "step": 121840 }, { "epoch": 4.4280107565956826, "grad_norm": 0.927058756351471, "learning_rate": 9.440533320161612e-06, "loss": 0.0649, "step": 121850 }, { "epoch": 4.428374155098481, "grad_norm": 0.7171421051025391, "learning_rate": 9.43647189126631e-06, "loss": 0.0614, "step": 121860 }, { "epoch": 4.428737553601279, "grad_norm": 0.2706540524959564, "learning_rate": 9.432411132948072e-06, "loss": 0.0591, "step": 121870 }, { "epoch": 4.429100952104077, "grad_norm": 0.5892338156700134, "learning_rate": 9.428351045381834e-06, "loss": 0.0684, "step": 121880 }, { "epoch": 4.429464350606875, "grad_norm": 0.7264726161956787, "learning_rate": 9.424291628742549e-06, "loss": 0.0517, "step": 121890 }, { "epoch": 4.429827749109673, "grad_norm": 0.29440802335739136, "learning_rate": 9.420232883205126e-06, "loss": 0.0597, "step": 121900 }, { "epoch": 4.430191147612472, "grad_norm": 0.3240821361541748, "learning_rate": 9.416174808944427e-06, "loss": 0.0533, "step": 121910 }, { "epoch": 4.43055454611527, "grad_norm": 0.4936390519142151, "learning_rate": 9.41211740613531e-06, "loss": 0.0577, "step": 121920 }, { "epoch": 4.430917944618068, "grad_norm": 1.849422812461853, "learning_rate": 9.40806067495259e-06, "loss": 0.0435, "step": 121930 }, { "epoch": 4.431281343120866, "grad_norm": 0.725542426109314, "learning_rate": 9.404004615571068e-06, "loss": 0.0573, "step": 121940 }, { "epoch": 4.431644741623664, "grad_norm": 0.5076490640640259, "learning_rate": 9.399949228165489e-06, "loss": 0.0702, "step": 121950 }, { "epoch": 4.432008140126463, "grad_norm": 0.4323822855949402, "learning_rate": 9.39589451291059e-06, "loss": 0.0563, "step": 121960 }, { "epoch": 4.432371538629261, "grad_norm": 0.6941072940826416, "learning_rate": 9.39184046998109e-06, "loss": 0.0715, "step": 121970 }, { "epoch": 4.432734937132059, "grad_norm": 0.4006383717060089, "learning_rate": 9.387787099551638e-06, "loss": 0.0529, "step": 121980 }, { "epoch": 4.433098335634857, "grad_norm": 0.3894495964050293, "learning_rate": 9.38373440179689e-06, "loss": 0.4697, "step": 121990 }, { "epoch": 4.433461734137655, "grad_norm": 0.5610470175743103, "learning_rate": 9.379682376891466e-06, "loss": 0.0577, "step": 122000 }, { "epoch": 4.433825132640454, "grad_norm": 0.49981486797332764, "learning_rate": 9.37563102500996e-06, "loss": 0.0569, "step": 122010 }, { "epoch": 4.434188531143252, "grad_norm": 0.5308887362480164, "learning_rate": 9.371580346326914e-06, "loss": 0.06, "step": 122020 }, { "epoch": 4.43455192964605, "grad_norm": 0.44779539108276367, "learning_rate": 9.36753034101686e-06, "loss": 0.0472, "step": 122030 }, { "epoch": 4.434915328148848, "grad_norm": 0.38655567169189453, "learning_rate": 9.363481009254307e-06, "loss": 0.0495, "step": 122040 }, { "epoch": 4.435278726651646, "grad_norm": 0.6829718351364136, "learning_rate": 9.35943235121373e-06, "loss": 0.0543, "step": 122050 }, { "epoch": 4.435642125154445, "grad_norm": 0.4029079079627991, "learning_rate": 9.355384367069556e-06, "loss": 0.0726, "step": 122060 }, { "epoch": 4.436005523657243, "grad_norm": 0.6119958758354187, "learning_rate": 9.351337056996206e-06, "loss": 0.0738, "step": 122070 }, { "epoch": 4.436368922160041, "grad_norm": 1.8577392101287842, "learning_rate": 9.347290421168074e-06, "loss": 0.1148, "step": 122080 }, { "epoch": 4.436732320662839, "grad_norm": 0.46738559007644653, "learning_rate": 9.343244459759493e-06, "loss": 0.0582, "step": 122090 }, { "epoch": 4.437095719165637, "grad_norm": 1.4975764751434326, "learning_rate": 9.339199172944801e-06, "loss": 0.0667, "step": 122100 }, { "epoch": 4.4374591176684355, "grad_norm": 0.3371270000934601, "learning_rate": 9.335154560898307e-06, "loss": 0.0573, "step": 122110 }, { "epoch": 4.4378225161712335, "grad_norm": 0.6232864260673523, "learning_rate": 9.331110623794256e-06, "loss": 0.0587, "step": 122120 }, { "epoch": 4.4381859146740315, "grad_norm": 0.3525649905204773, "learning_rate": 9.327067361806902e-06, "loss": 0.0648, "step": 122130 }, { "epoch": 4.438549313176829, "grad_norm": 1.0711743831634521, "learning_rate": 9.323024775110447e-06, "loss": 0.0512, "step": 122140 }, { "epoch": 4.438912711679627, "grad_norm": 6.2273335456848145, "learning_rate": 9.318982863879087e-06, "loss": 0.0681, "step": 122150 }, { "epoch": 4.439276110182426, "grad_norm": 0.8364354968070984, "learning_rate": 9.314941628286953e-06, "loss": 1.8858, "step": 122160 }, { "epoch": 4.439639508685224, "grad_norm": 0.5039761066436768, "learning_rate": 9.310901068508174e-06, "loss": 0.0615, "step": 122170 }, { "epoch": 4.440002907188022, "grad_norm": 1.4762966632843018, "learning_rate": 9.306861184716848e-06, "loss": 0.0817, "step": 122180 }, { "epoch": 4.44036630569082, "grad_norm": 0.7664554715156555, "learning_rate": 9.302821977087046e-06, "loss": 0.0687, "step": 122190 }, { "epoch": 4.440729704193619, "grad_norm": 2.065042495727539, "learning_rate": 9.298783445792786e-06, "loss": 0.06, "step": 122200 }, { "epoch": 4.441093102696417, "grad_norm": 0.47532665729522705, "learning_rate": 9.294745591008088e-06, "loss": 0.0854, "step": 122210 }, { "epoch": 4.441456501199215, "grad_norm": 0.7480117082595825, "learning_rate": 9.290708412906918e-06, "loss": 0.0991, "step": 122220 }, { "epoch": 4.441819899702013, "grad_norm": 0.40353912115097046, "learning_rate": 9.286671911663227e-06, "loss": 0.0471, "step": 122230 }, { "epoch": 4.442183298204811, "grad_norm": 0.4551263451576233, "learning_rate": 9.282636087450936e-06, "loss": 0.0624, "step": 122240 }, { "epoch": 4.44254669670761, "grad_norm": 0.3604438006877899, "learning_rate": 9.27860094044394e-06, "loss": 0.0547, "step": 122250 }, { "epoch": 4.442910095210408, "grad_norm": 0.4923056662082672, "learning_rate": 9.274566470816087e-06, "loss": 0.0665, "step": 122260 }, { "epoch": 4.443273493713206, "grad_norm": 0.6668517589569092, "learning_rate": 9.270532678741212e-06, "loss": 0.0706, "step": 122270 }, { "epoch": 4.443636892216004, "grad_norm": 0.23227757215499878, "learning_rate": 9.266499564393117e-06, "loss": 0.0657, "step": 122280 }, { "epoch": 4.444000290718802, "grad_norm": 0.40541398525238037, "learning_rate": 9.262467127945585e-06, "loss": 0.606, "step": 122290 }, { "epoch": 4.444363689221601, "grad_norm": 0.2569025456905365, "learning_rate": 9.258435369572344e-06, "loss": 0.0561, "step": 122300 }, { "epoch": 4.444727087724399, "grad_norm": 0.5046670436859131, "learning_rate": 9.254404289447111e-06, "loss": 0.0857, "step": 122310 }, { "epoch": 4.445090486227197, "grad_norm": 0.7304697036743164, "learning_rate": 9.250373887743575e-06, "loss": 0.0764, "step": 122320 }, { "epoch": 4.445453884729995, "grad_norm": 0.726542592048645, "learning_rate": 9.2463441646354e-06, "loss": 0.0872, "step": 122330 }, { "epoch": 4.445817283232793, "grad_norm": 0.24902032315731049, "learning_rate": 9.242315120296194e-06, "loss": 0.0689, "step": 122340 }, { "epoch": 4.4461806817355916, "grad_norm": 0.5349747538566589, "learning_rate": 9.238286754899572e-06, "loss": 0.0893, "step": 122350 }, { "epoch": 4.4465440802383895, "grad_norm": 0.34126025438308716, "learning_rate": 9.234259068619078e-06, "loss": 0.0549, "step": 122360 }, { "epoch": 4.4469074787411875, "grad_norm": 2.2075178623199463, "learning_rate": 9.230232061628284e-06, "loss": 0.0771, "step": 122370 }, { "epoch": 4.4472708772439855, "grad_norm": 0.4811003804206848, "learning_rate": 9.226205734100669e-06, "loss": 0.0467, "step": 122380 }, { "epoch": 4.447634275746784, "grad_norm": 0.26153671741485596, "learning_rate": 9.222180086209736e-06, "loss": 0.0506, "step": 122390 }, { "epoch": 4.447997674249582, "grad_norm": 1.9838602542877197, "learning_rate": 9.218155118128915e-06, "loss": 0.0665, "step": 122400 }, { "epoch": 4.447997674249582, "eval_loss": 0.3039458692073822, "eval_runtime": 178.9086, "eval_samples_per_second": 41.44, "eval_steps_per_second": 5.181, "eval_wer": 0.12512026430918366, "step": 122400 }, { "epoch": 4.44836107275238, "grad_norm": 0.9466204047203064, "learning_rate": 9.214130830031642e-06, "loss": 0.0703, "step": 122410 }, { "epoch": 4.448724471255178, "grad_norm": 0.9471811652183533, "learning_rate": 9.210107222091303e-06, "loss": 0.0822, "step": 122420 }, { "epoch": 4.449087869757976, "grad_norm": 0.4464567005634308, "learning_rate": 9.206084294481271e-06, "loss": 0.0577, "step": 122430 }, { "epoch": 4.449451268260775, "grad_norm": 1.0034137964248657, "learning_rate": 9.202062047374863e-06, "loss": 0.0679, "step": 122440 }, { "epoch": 4.449814666763573, "grad_norm": 1.1435924768447876, "learning_rate": 9.198040480945402e-06, "loss": 0.0577, "step": 122450 }, { "epoch": 4.450178065266371, "grad_norm": 0.5850510001182556, "learning_rate": 9.194019595366136e-06, "loss": 0.0698, "step": 122460 }, { "epoch": 4.450541463769169, "grad_norm": 0.9886276721954346, "learning_rate": 9.189999390810344e-06, "loss": 0.0627, "step": 122470 }, { "epoch": 4.450904862271967, "grad_norm": 0.4550401270389557, "learning_rate": 9.18597986745122e-06, "loss": 2.8013, "step": 122480 }, { "epoch": 4.451268260774766, "grad_norm": 1.6501051187515259, "learning_rate": 9.181961025461962e-06, "loss": 0.1491, "step": 122490 }, { "epoch": 4.451631659277564, "grad_norm": 1.2472875118255615, "learning_rate": 9.177942865015706e-06, "loss": 0.0649, "step": 122500 }, { "epoch": 4.451995057780362, "grad_norm": 0.678860068321228, "learning_rate": 9.173925386285617e-06, "loss": 0.0604, "step": 122510 }, { "epoch": 4.45235845628316, "grad_norm": 0.7805083990097046, "learning_rate": 9.169908589444765e-06, "loss": 0.0442, "step": 122520 }, { "epoch": 4.452721854785958, "grad_norm": 0.7497388124465942, "learning_rate": 9.165892474666235e-06, "loss": 0.0682, "step": 122530 }, { "epoch": 4.453085253288757, "grad_norm": 0.7967200875282288, "learning_rate": 9.161877042123052e-06, "loss": 0.0511, "step": 122540 }, { "epoch": 4.453448651791555, "grad_norm": 3.856332540512085, "learning_rate": 9.157862291988234e-06, "loss": 0.0552, "step": 122550 }, { "epoch": 4.453812050294353, "grad_norm": 0.45662280917167664, "learning_rate": 9.153848224434764e-06, "loss": 0.0538, "step": 122560 }, { "epoch": 4.454175448797151, "grad_norm": 1.0618711709976196, "learning_rate": 9.149834839635602e-06, "loss": 0.0453, "step": 122570 }, { "epoch": 4.454538847299949, "grad_norm": 0.3563428521156311, "learning_rate": 9.145822137763654e-06, "loss": 0.0483, "step": 122580 }, { "epoch": 4.454902245802748, "grad_norm": 0.4878004789352417, "learning_rate": 9.141810118991829e-06, "loss": 0.0521, "step": 122590 }, { "epoch": 4.455265644305546, "grad_norm": 0.3980761766433716, "learning_rate": 9.137798783492966e-06, "loss": 0.0623, "step": 122600 }, { "epoch": 4.455629042808344, "grad_norm": 0.6049517393112183, "learning_rate": 9.13378813143993e-06, "loss": 0.0675, "step": 122610 }, { "epoch": 4.455992441311142, "grad_norm": 0.4014378786087036, "learning_rate": 9.129778163005504e-06, "loss": 0.055, "step": 122620 }, { "epoch": 4.45635583981394, "grad_norm": 0.6160863637924194, "learning_rate": 9.12576887836248e-06, "loss": 0.0562, "step": 122630 }, { "epoch": 4.456719238316738, "grad_norm": 0.6602206826210022, "learning_rate": 9.121760277683584e-06, "loss": 0.0445, "step": 122640 }, { "epoch": 4.457082636819536, "grad_norm": 0.29972442984580994, "learning_rate": 9.117752361141543e-06, "loss": 0.0481, "step": 122650 }, { "epoch": 4.457446035322334, "grad_norm": 0.7123286128044128, "learning_rate": 9.113745128909047e-06, "loss": 0.0698, "step": 122660 }, { "epoch": 4.457809433825132, "grad_norm": 0.9615245461463928, "learning_rate": 9.109738581158752e-06, "loss": 0.09, "step": 122670 }, { "epoch": 4.45817283232793, "grad_norm": 0.39651069045066833, "learning_rate": 9.105732718063281e-06, "loss": 0.0529, "step": 122680 }, { "epoch": 4.458536230830729, "grad_norm": 0.6359738111495972, "learning_rate": 9.101727539795232e-06, "loss": 0.0604, "step": 122690 }, { "epoch": 4.458899629333527, "grad_norm": 0.46020254492759705, "learning_rate": 9.097723046527178e-06, "loss": 0.0622, "step": 122700 }, { "epoch": 4.459263027836325, "grad_norm": 0.5293834209442139, "learning_rate": 9.093719238431666e-06, "loss": 0.0574, "step": 122710 }, { "epoch": 4.459626426339123, "grad_norm": 2.3157310485839844, "learning_rate": 9.089716115681191e-06, "loss": 0.0672, "step": 122720 }, { "epoch": 4.459989824841921, "grad_norm": 0.43088576197624207, "learning_rate": 9.085713678448246e-06, "loss": 0.0534, "step": 122730 }, { "epoch": 4.46035322334472, "grad_norm": 0.4464167356491089, "learning_rate": 9.081711926905259e-06, "loss": 0.0525, "step": 122740 }, { "epoch": 4.460716621847518, "grad_norm": 1.689940094947815, "learning_rate": 9.077710861224683e-06, "loss": 0.0606, "step": 122750 }, { "epoch": 4.461080020350316, "grad_norm": 0.5056033134460449, "learning_rate": 9.073710481578886e-06, "loss": 0.0646, "step": 122760 }, { "epoch": 4.461443418853114, "grad_norm": 0.7784010767936707, "learning_rate": 9.069710788140249e-06, "loss": 0.0712, "step": 122770 }, { "epoch": 4.461806817355913, "grad_norm": 0.8776401877403259, "learning_rate": 9.065711781081085e-06, "loss": 3.867, "step": 122780 }, { "epoch": 4.462170215858711, "grad_norm": 0.4149848520755768, "learning_rate": 9.061713460573703e-06, "loss": 0.058, "step": 122790 }, { "epoch": 4.462533614361509, "grad_norm": 0.5918300151824951, "learning_rate": 9.057715826790384e-06, "loss": 0.053, "step": 122800 }, { "epoch": 4.462897012864307, "grad_norm": 0.4016144573688507, "learning_rate": 9.053718879903372e-06, "loss": 0.4509, "step": 122810 }, { "epoch": 4.463260411367105, "grad_norm": 0.7509815096855164, "learning_rate": 9.04972262008487e-06, "loss": 0.0708, "step": 122820 }, { "epoch": 4.463623809869904, "grad_norm": 0.4195763170719147, "learning_rate": 9.045727047507077e-06, "loss": 0.0464, "step": 122830 }, { "epoch": 4.463987208372702, "grad_norm": 2.7467033863067627, "learning_rate": 9.041732162342122e-06, "loss": 0.054, "step": 122840 }, { "epoch": 4.4643506068755, "grad_norm": 0.5175169110298157, "learning_rate": 9.037737964762166e-06, "loss": 0.0673, "step": 122850 }, { "epoch": 4.464714005378298, "grad_norm": 0.6345258951187134, "learning_rate": 9.03374445493928e-06, "loss": 0.0765, "step": 122860 }, { "epoch": 4.465077403881096, "grad_norm": 1.1035598516464233, "learning_rate": 9.029751633045544e-06, "loss": 0.0696, "step": 122870 }, { "epoch": 4.4654408023838945, "grad_norm": 0.4406812787055969, "learning_rate": 9.025759499252973e-06, "loss": 0.0682, "step": 122880 }, { "epoch": 4.4658042008866925, "grad_norm": 1.8794528245925903, "learning_rate": 9.021768053733603e-06, "loss": 0.0542, "step": 122890 }, { "epoch": 4.4661675993894905, "grad_norm": 2.102262020111084, "learning_rate": 9.017777296659389e-06, "loss": 0.0671, "step": 122900 }, { "epoch": 4.4665309978922885, "grad_norm": 0.729246199131012, "learning_rate": 9.013787228202297e-06, "loss": 0.0818, "step": 122910 }, { "epoch": 4.4668943963950865, "grad_norm": 0.6433634161949158, "learning_rate": 9.009797848534223e-06, "loss": 0.0795, "step": 122920 }, { "epoch": 4.467257794897885, "grad_norm": 1.5871676206588745, "learning_rate": 9.005809157827067e-06, "loss": 0.0474, "step": 122930 }, { "epoch": 4.467621193400683, "grad_norm": 0.8328747749328613, "learning_rate": 9.001821156252688e-06, "loss": 0.0361, "step": 122940 }, { "epoch": 4.467984591903481, "grad_norm": 0.6324238181114197, "learning_rate": 8.997833843982923e-06, "loss": 0.0535, "step": 122950 }, { "epoch": 4.468347990406279, "grad_norm": 0.3543473184108734, "learning_rate": 8.99384722118955e-06, "loss": 0.0591, "step": 122960 }, { "epoch": 4.468711388909078, "grad_norm": 0.6472491025924683, "learning_rate": 8.98986128804436e-06, "loss": 0.0791, "step": 122970 }, { "epoch": 4.469074787411876, "grad_norm": 0.3557899594306946, "learning_rate": 8.985876044719066e-06, "loss": 0.1064, "step": 122980 }, { "epoch": 4.469438185914674, "grad_norm": 0.33114179968833923, "learning_rate": 8.981891491385414e-06, "loss": 0.0617, "step": 122990 }, { "epoch": 4.469801584417472, "grad_norm": 0.5591074824333191, "learning_rate": 8.977907628215055e-06, "loss": 0.0641, "step": 123000 }, { "epoch": 4.469801584417472, "eval_loss": 0.29875436425209045, "eval_runtime": 179.1068, "eval_samples_per_second": 41.394, "eval_steps_per_second": 5.176, "eval_wer": 0.12442136983317298, "step": 123000 }, { "epoch": 4.47016498292027, "grad_norm": 0.3731163442134857, "learning_rate": 8.973924455379657e-06, "loss": 0.0748, "step": 123010 }, { "epoch": 4.470528381423069, "grad_norm": 0.38922783732414246, "learning_rate": 8.969941973050824e-06, "loss": 0.0762, "step": 123020 }, { "epoch": 4.470891779925867, "grad_norm": 0.27188175916671753, "learning_rate": 8.96596018140016e-06, "loss": 0.0492, "step": 123030 }, { "epoch": 4.471255178428665, "grad_norm": 0.43850070238113403, "learning_rate": 8.96197908059922e-06, "loss": 0.0431, "step": 123040 }, { "epoch": 4.471618576931463, "grad_norm": 0.2648842930793762, "learning_rate": 8.957998670819546e-06, "loss": 0.0842, "step": 123050 }, { "epoch": 4.471981975434261, "grad_norm": 0.4260450601577759, "learning_rate": 8.954018952232623e-06, "loss": 0.0661, "step": 123060 }, { "epoch": 4.47234537393706, "grad_norm": 0.5455771684646606, "learning_rate": 8.950039925009932e-06, "loss": 0.0485, "step": 123070 }, { "epoch": 4.472708772439858, "grad_norm": 0.24678508937358856, "learning_rate": 8.946061589322916e-06, "loss": 0.0585, "step": 123080 }, { "epoch": 4.473072170942656, "grad_norm": 0.45582863688468933, "learning_rate": 8.942083945342993e-06, "loss": 0.0426, "step": 123090 }, { "epoch": 4.473435569445454, "grad_norm": 1.2508575916290283, "learning_rate": 8.93810699324153e-06, "loss": 0.0782, "step": 123100 }, { "epoch": 4.473798967948252, "grad_norm": 0.7792302370071411, "learning_rate": 8.934130733189896e-06, "loss": 0.0565, "step": 123110 }, { "epoch": 4.474162366451051, "grad_norm": 0.8651388883590698, "learning_rate": 8.930155165359391e-06, "loss": 0.0841, "step": 123120 }, { "epoch": 4.474525764953849, "grad_norm": 0.5148172974586487, "learning_rate": 8.92618028992134e-06, "loss": 0.0488, "step": 123130 }, { "epoch": 4.474889163456647, "grad_norm": 0.24792881309986115, "learning_rate": 8.922206107046983e-06, "loss": 0.0542, "step": 123140 }, { "epoch": 4.4752525619594445, "grad_norm": 0.29278936982154846, "learning_rate": 8.918232616907565e-06, "loss": 0.0605, "step": 123150 }, { "epoch": 4.4756159604622425, "grad_norm": 1.8992652893066406, "learning_rate": 8.914259819674276e-06, "loss": 0.0626, "step": 123160 }, { "epoch": 4.475979358965041, "grad_norm": 0.4565250873565674, "learning_rate": 8.9102877155183e-06, "loss": 0.0821, "step": 123170 }, { "epoch": 4.476342757467839, "grad_norm": 0.2567780315876007, "learning_rate": 8.906316304610778e-06, "loss": 0.0623, "step": 123180 }, { "epoch": 4.476706155970637, "grad_norm": 0.4977375566959381, "learning_rate": 8.902345587122835e-06, "loss": 0.0461, "step": 123190 }, { "epoch": 4.477069554473435, "grad_norm": 1.1738780736923218, "learning_rate": 8.898375563225533e-06, "loss": 0.0795, "step": 123200 }, { "epoch": 4.477432952976233, "grad_norm": 0.6591735482215881, "learning_rate": 8.894406233089949e-06, "loss": 0.0642, "step": 123210 }, { "epoch": 4.477796351479032, "grad_norm": 0.47659456729888916, "learning_rate": 8.89043759688708e-06, "loss": 0.0763, "step": 123220 }, { "epoch": 4.47815974998183, "grad_norm": 4.389726638793945, "learning_rate": 8.886469654787951e-06, "loss": 0.0622, "step": 123230 }, { "epoch": 4.478523148484628, "grad_norm": 0.23761561512947083, "learning_rate": 8.882502406963509e-06, "loss": 0.0614, "step": 123240 }, { "epoch": 4.478886546987426, "grad_norm": 0.5141186714172363, "learning_rate": 8.878535853584697e-06, "loss": 0.0582, "step": 123250 }, { "epoch": 4.479249945490224, "grad_norm": 0.7577188014984131, "learning_rate": 8.874569994822399e-06, "loss": 0.079, "step": 123260 }, { "epoch": 4.479613343993023, "grad_norm": 0.4065344035625458, "learning_rate": 8.87060483084752e-06, "loss": 0.0608, "step": 123270 }, { "epoch": 4.479976742495821, "grad_norm": 0.5756973624229431, "learning_rate": 8.866640361830883e-06, "loss": 0.0474, "step": 123280 }, { "epoch": 4.480340140998619, "grad_norm": 0.6659473180770874, "learning_rate": 8.86267658794332e-06, "loss": 0.0429, "step": 123290 }, { "epoch": 4.480703539501417, "grad_norm": 0.3194977045059204, "learning_rate": 8.858713509355598e-06, "loss": 0.0621, "step": 123300 }, { "epoch": 4.481066938004215, "grad_norm": 0.30986449122428894, "learning_rate": 8.854751126238478e-06, "loss": 0.0505, "step": 123310 }, { "epoch": 4.481430336507014, "grad_norm": 0.8367308974266052, "learning_rate": 8.850789438762689e-06, "loss": 0.0883, "step": 123320 }, { "epoch": 4.481793735009812, "grad_norm": 0.2709091305732727, "learning_rate": 8.846828447098931e-06, "loss": 0.0432, "step": 123330 }, { "epoch": 4.48215713351261, "grad_norm": 5.889291286468506, "learning_rate": 8.842868151417851e-06, "loss": 0.0582, "step": 123340 }, { "epoch": 4.482520532015408, "grad_norm": 4.822190284729004, "learning_rate": 8.838908551890107e-06, "loss": 0.0747, "step": 123350 }, { "epoch": 4.482883930518207, "grad_norm": 4.108373641967773, "learning_rate": 8.834949648686274e-06, "loss": 0.075, "step": 123360 }, { "epoch": 4.483247329021005, "grad_norm": 0.3938525915145874, "learning_rate": 8.830991441976967e-06, "loss": 0.0589, "step": 123370 }, { "epoch": 4.483610727523803, "grad_norm": 0.9209822416305542, "learning_rate": 8.827033931932693e-06, "loss": 0.0603, "step": 123380 }, { "epoch": 4.483974126026601, "grad_norm": 0.3776785135269165, "learning_rate": 8.823077118723996e-06, "loss": 0.0497, "step": 123390 }, { "epoch": 4.484337524529399, "grad_norm": 0.5516955852508545, "learning_rate": 8.819121002521342e-06, "loss": 0.055, "step": 123400 }, { "epoch": 4.4847009230321975, "grad_norm": 0.6123373508453369, "learning_rate": 8.815165583495189e-06, "loss": 0.0685, "step": 123410 }, { "epoch": 4.4850643215349955, "grad_norm": 0.6224818229675293, "learning_rate": 8.811210861815963e-06, "loss": 0.0803, "step": 123420 }, { "epoch": 4.485427720037793, "grad_norm": 0.5297130346298218, "learning_rate": 8.807256837654074e-06, "loss": 0.0922, "step": 123430 }, { "epoch": 4.485791118540591, "grad_norm": 0.48976364731788635, "learning_rate": 8.803303511179861e-06, "loss": 0.0573, "step": 123440 }, { "epoch": 4.486154517043389, "grad_norm": 0.9344358444213867, "learning_rate": 8.799350882563676e-06, "loss": 0.0686, "step": 123450 }, { "epoch": 4.486517915546188, "grad_norm": 0.32611021399497986, "learning_rate": 8.795398951975816e-06, "loss": 0.0776, "step": 123460 }, { "epoch": 4.486881314048986, "grad_norm": 1.0592504739761353, "learning_rate": 8.791447719586571e-06, "loss": 0.0649, "step": 123470 }, { "epoch": 4.487244712551784, "grad_norm": 0.6356394290924072, "learning_rate": 8.787497185566162e-06, "loss": 0.0429, "step": 123480 }, { "epoch": 4.487608111054582, "grad_norm": 0.5969418287277222, "learning_rate": 8.783547350084826e-06, "loss": 0.0479, "step": 123490 }, { "epoch": 4.48797150955738, "grad_norm": 0.7391321063041687, "learning_rate": 8.779598213312718e-06, "loss": 0.057, "step": 123500 }, { "epoch": 4.488334908060179, "grad_norm": 0.5683807730674744, "learning_rate": 8.775649775420025e-06, "loss": 0.067, "step": 123510 }, { "epoch": 4.488698306562977, "grad_norm": 0.6567642688751221, "learning_rate": 8.771702036576851e-06, "loss": 0.0546, "step": 123520 }, { "epoch": 4.489061705065775, "grad_norm": 0.4564456343650818, "learning_rate": 8.767754996953307e-06, "loss": 0.047, "step": 123530 }, { "epoch": 4.489425103568573, "grad_norm": 0.3566185534000397, "learning_rate": 8.763808656719432e-06, "loss": 0.0568, "step": 123540 }, { "epoch": 4.489788502071372, "grad_norm": 0.3047824800014496, "learning_rate": 8.759863016045275e-06, "loss": 0.2188, "step": 123550 }, { "epoch": 4.49015190057417, "grad_norm": 0.445416122674942, "learning_rate": 8.75591807510084e-06, "loss": 0.0683, "step": 123560 }, { "epoch": 4.490515299076968, "grad_norm": 0.5570577383041382, "learning_rate": 8.751973834056104e-06, "loss": 0.0701, "step": 123570 }, { "epoch": 4.490878697579766, "grad_norm": 0.6754629611968994, "learning_rate": 8.748030293081e-06, "loss": 0.0617, "step": 123580 }, { "epoch": 4.491242096082564, "grad_norm": 0.7759541273117065, "learning_rate": 8.744087452345451e-06, "loss": 0.0543, "step": 123590 }, { "epoch": 4.491605494585363, "grad_norm": 2.056215524673462, "learning_rate": 8.74014531201932e-06, "loss": 0.0719, "step": 123600 }, { "epoch": 4.491605494585363, "eval_loss": 0.3111928701400757, "eval_runtime": 179.0699, "eval_samples_per_second": 41.403, "eval_steps_per_second": 5.177, "eval_wer": 0.12540163740991522, "step": 123600 }, { "epoch": 4.491968893088161, "grad_norm": 0.2962838411331177, "learning_rate": 8.736203872272492e-06, "loss": 0.0583, "step": 123610 }, { "epoch": 4.492332291590959, "grad_norm": 0.5641674399375916, "learning_rate": 8.732263133274762e-06, "loss": 0.0767, "step": 123620 }, { "epoch": 4.492695690093757, "grad_norm": 2.1979660987854004, "learning_rate": 8.728323095195943e-06, "loss": 0.0833, "step": 123630 }, { "epoch": 4.493059088596555, "grad_norm": 0.6289680004119873, "learning_rate": 8.724383758205779e-06, "loss": 0.0513, "step": 123640 }, { "epoch": 4.4934224870993535, "grad_norm": 0.35623329877853394, "learning_rate": 8.720445122474008e-06, "loss": 0.0725, "step": 123650 }, { "epoch": 4.4937858856021515, "grad_norm": 0.3222827911376953, "learning_rate": 8.716507188170337e-06, "loss": 0.0538, "step": 123660 }, { "epoch": 4.4941492841049495, "grad_norm": 0.5709748864173889, "learning_rate": 8.712569955464439e-06, "loss": 0.0647, "step": 123670 }, { "epoch": 4.4945126826077475, "grad_norm": 0.3695675730705261, "learning_rate": 8.708633424525944e-06, "loss": 0.0768, "step": 123680 }, { "epoch": 4.4948760811105455, "grad_norm": 0.2032776027917862, "learning_rate": 8.704697595524469e-06, "loss": 0.0646, "step": 123690 }, { "epoch": 4.495239479613344, "grad_norm": 0.9944433569908142, "learning_rate": 8.700762468629597e-06, "loss": 0.0469, "step": 123700 }, { "epoch": 4.495602878116142, "grad_norm": 0.44947701692581177, "learning_rate": 8.696828044010884e-06, "loss": 0.0597, "step": 123710 }, { "epoch": 4.49596627661894, "grad_norm": 0.48200881481170654, "learning_rate": 8.692894321837839e-06, "loss": 0.0531, "step": 123720 }, { "epoch": 4.496329675121738, "grad_norm": 0.5654940009117126, "learning_rate": 8.688961302279963e-06, "loss": 0.066, "step": 123730 }, { "epoch": 4.496693073624536, "grad_norm": 0.5162728428840637, "learning_rate": 8.68502898550669e-06, "loss": 0.0531, "step": 123740 }, { "epoch": 4.497056472127335, "grad_norm": 0.40529751777648926, "learning_rate": 8.68109737168749e-06, "loss": 0.0479, "step": 123750 }, { "epoch": 4.497419870630133, "grad_norm": 1.0609490871429443, "learning_rate": 8.677166460991731e-06, "loss": 0.0633, "step": 123760 }, { "epoch": 4.497783269132931, "grad_norm": 0.8281724452972412, "learning_rate": 8.673236253588802e-06, "loss": 0.0683, "step": 123770 }, { "epoch": 4.498146667635729, "grad_norm": 0.4784390330314636, "learning_rate": 8.669306749648024e-06, "loss": 0.0435, "step": 123780 }, { "epoch": 4.498510066138527, "grad_norm": 0.27055010199546814, "learning_rate": 8.665377949338712e-06, "loss": 0.0511, "step": 123790 }, { "epoch": 4.498873464641326, "grad_norm": 0.3888944089412689, "learning_rate": 8.661449852830145e-06, "loss": 0.0516, "step": 123800 }, { "epoch": 4.499236863144124, "grad_norm": 0.4067471921443939, "learning_rate": 8.657522460291581e-06, "loss": 0.0855, "step": 123810 }, { "epoch": 4.499600261646922, "grad_norm": 0.48232707381248474, "learning_rate": 8.653595771892217e-06, "loss": 0.0859, "step": 123820 }, { "epoch": 4.49996366014972, "grad_norm": 0.4570091664791107, "learning_rate": 8.649669787801263e-06, "loss": 0.0612, "step": 123830 }, { "epoch": 4.500327058652518, "grad_norm": 0.7011997103691101, "learning_rate": 8.645744508187842e-06, "loss": 0.0482, "step": 123840 }, { "epoch": 4.500690457155317, "grad_norm": 0.5194302201271057, "learning_rate": 8.64181993322112e-06, "loss": 0.0492, "step": 123850 }, { "epoch": 4.501053855658115, "grad_norm": 0.3431602716445923, "learning_rate": 8.637896063070166e-06, "loss": 0.0807, "step": 123860 }, { "epoch": 4.501417254160913, "grad_norm": 0.4593067169189453, "learning_rate": 8.633972897904063e-06, "loss": 0.0629, "step": 123870 }, { "epoch": 4.501780652663711, "grad_norm": 0.5573797821998596, "learning_rate": 8.630050437891821e-06, "loss": 0.0614, "step": 123880 }, { "epoch": 4.502144051166509, "grad_norm": 0.2954452335834503, "learning_rate": 8.626128683202476e-06, "loss": 0.0508, "step": 123890 }, { "epoch": 4.502507449669308, "grad_norm": 0.9056379795074463, "learning_rate": 8.62220763400498e-06, "loss": 0.0517, "step": 123900 }, { "epoch": 4.502870848172106, "grad_norm": 0.4388275146484375, "learning_rate": 8.618287290468294e-06, "loss": 0.0702, "step": 123910 }, { "epoch": 4.503234246674904, "grad_norm": 0.5206169486045837, "learning_rate": 8.614367652761312e-06, "loss": 0.5081, "step": 123920 }, { "epoch": 4.503597645177702, "grad_norm": 0.35538193583488464, "learning_rate": 8.61044872105293e-06, "loss": 0.054, "step": 123930 }, { "epoch": 4.5039610436804995, "grad_norm": 0.7723787426948547, "learning_rate": 8.606530495511997e-06, "loss": 0.0511, "step": 123940 }, { "epoch": 4.504324442183298, "grad_norm": 0.8132055401802063, "learning_rate": 8.602612976307345e-06, "loss": 0.0511, "step": 123950 }, { "epoch": 4.504687840686096, "grad_norm": 0.43475204706192017, "learning_rate": 8.59869616360775e-06, "loss": 0.0777, "step": 123960 }, { "epoch": 4.505051239188894, "grad_norm": 0.6687668561935425, "learning_rate": 8.594780057581989e-06, "loss": 0.0789, "step": 123970 }, { "epoch": 4.505414637691692, "grad_norm": 0.38912665843963623, "learning_rate": 8.59086465839877e-06, "loss": 0.0539, "step": 123980 }, { "epoch": 4.505778036194491, "grad_norm": 0.4856990873813629, "learning_rate": 8.586949966226824e-06, "loss": 3.0037, "step": 123990 }, { "epoch": 4.506141434697289, "grad_norm": 0.353549063205719, "learning_rate": 8.583035981234799e-06, "loss": 0.0602, "step": 124000 }, { "epoch": 4.506504833200087, "grad_norm": 0.5612092018127441, "learning_rate": 8.579122703591349e-06, "loss": 0.0501, "step": 124010 }, { "epoch": 4.506868231702885, "grad_norm": 0.6677350401878357, "learning_rate": 8.57521013346507e-06, "loss": 0.0569, "step": 124020 }, { "epoch": 4.507231630205683, "grad_norm": 0.5060855746269226, "learning_rate": 8.571298271024544e-06, "loss": 0.0651, "step": 124030 }, { "epoch": 4.507595028708482, "grad_norm": 0.26367509365081787, "learning_rate": 8.567387116438322e-06, "loss": 0.0576, "step": 124040 }, { "epoch": 4.50795842721128, "grad_norm": 0.24013179540634155, "learning_rate": 8.563476669874932e-06, "loss": 0.0705, "step": 124050 }, { "epoch": 4.508321825714078, "grad_norm": 0.6652961373329163, "learning_rate": 8.559566931502841e-06, "loss": 0.0652, "step": 124060 }, { "epoch": 4.508685224216876, "grad_norm": 0.7214508056640625, "learning_rate": 8.555657901490519e-06, "loss": 0.0754, "step": 124070 }, { "epoch": 4.509048622719675, "grad_norm": 1.4731504917144775, "learning_rate": 8.551749580006385e-06, "loss": 0.7939, "step": 124080 }, { "epoch": 4.509412021222473, "grad_norm": 0.9404433369636536, "learning_rate": 8.547841967218847e-06, "loss": 0.0499, "step": 124090 }, { "epoch": 4.509775419725271, "grad_norm": 0.34830185770988464, "learning_rate": 8.543935063296255e-06, "loss": 0.0602, "step": 124100 }, { "epoch": 4.510138818228069, "grad_norm": 0.41602763533592224, "learning_rate": 8.540028868406949e-06, "loss": 0.0513, "step": 124110 }, { "epoch": 4.510502216730867, "grad_norm": 0.5137819647789001, "learning_rate": 8.536123382719236e-06, "loss": 0.0719, "step": 124120 }, { "epoch": 4.510865615233666, "grad_norm": 0.4242905378341675, "learning_rate": 8.532218606401396e-06, "loss": 0.0803, "step": 124130 }, { "epoch": 4.511229013736464, "grad_norm": 0.3295011818408966, "learning_rate": 8.528314539621654e-06, "loss": 0.053, "step": 124140 }, { "epoch": 4.511592412239262, "grad_norm": 0.3779134452342987, "learning_rate": 8.524411182548242e-06, "loss": 0.0628, "step": 124150 }, { "epoch": 4.51195581074206, "grad_norm": 0.3721155524253845, "learning_rate": 8.520508535349323e-06, "loss": 0.064, "step": 124160 }, { "epoch": 4.512319209244858, "grad_norm": 2.0881879329681396, "learning_rate": 8.516606598193055e-06, "loss": 0.0528, "step": 124170 }, { "epoch": 4.5126826077476565, "grad_norm": 1.5842095613479614, "learning_rate": 8.512705371247562e-06, "loss": 0.0676, "step": 124180 }, { "epoch": 4.5130460062504545, "grad_norm": 0.31822651624679565, "learning_rate": 8.508804854680941e-06, "loss": 6.2595, "step": 124190 }, { "epoch": 4.5134094047532525, "grad_norm": 45.849029541015625, "learning_rate": 8.504905048661236e-06, "loss": 0.536, "step": 124200 }, { "epoch": 4.5134094047532525, "eval_loss": 0.27491259574890137, "eval_runtime": 179.6859, "eval_samples_per_second": 41.261, "eval_steps_per_second": 5.159, "eval_wer": 0.1245030587979015, "step": 124200 }, { "epoch": 4.5137728032560505, "grad_norm": 0.453096479177475, "learning_rate": 8.501005953356481e-06, "loss": 0.0585, "step": 124210 }, { "epoch": 4.514136201758848, "grad_norm": 0.5666411519050598, "learning_rate": 8.497107568934676e-06, "loss": 0.0566, "step": 124220 }, { "epoch": 4.514499600261647, "grad_norm": 1.2568055391311646, "learning_rate": 8.493209895563795e-06, "loss": 0.0521, "step": 124230 }, { "epoch": 4.514862998764445, "grad_norm": 0.4471326172351837, "learning_rate": 8.489312933411761e-06, "loss": 0.0558, "step": 124240 }, { "epoch": 4.515226397267243, "grad_norm": 0.3415360152721405, "learning_rate": 8.48541668264649e-06, "loss": 0.0576, "step": 124250 }, { "epoch": 4.515589795770041, "grad_norm": 0.3588046729564667, "learning_rate": 8.481521143435852e-06, "loss": 0.0849, "step": 124260 }, { "epoch": 4.515953194272839, "grad_norm": 0.5977602005004883, "learning_rate": 8.477626315947707e-06, "loss": 0.0599, "step": 124270 }, { "epoch": 4.516316592775638, "grad_norm": 0.46335968375205994, "learning_rate": 8.473732200349846e-06, "loss": 0.0561, "step": 124280 }, { "epoch": 4.516679991278436, "grad_norm": 1.0987800359725952, "learning_rate": 8.469838796810065e-06, "loss": 0.0459, "step": 124290 }, { "epoch": 4.517043389781234, "grad_norm": 0.41976675391197205, "learning_rate": 8.465946105496123e-06, "loss": 0.0543, "step": 124300 }, { "epoch": 4.517406788284032, "grad_norm": 1.19144606590271, "learning_rate": 8.462054126575725e-06, "loss": 0.1122, "step": 124310 }, { "epoch": 4.51777018678683, "grad_norm": 2.3775861263275146, "learning_rate": 8.458162860216575e-06, "loss": 0.0866, "step": 124320 }, { "epoch": 4.518133585289629, "grad_norm": 1.845650315284729, "learning_rate": 8.454272306586338e-06, "loss": 0.0531, "step": 124330 }, { "epoch": 4.518496983792427, "grad_norm": 0.5290558338165283, "learning_rate": 8.450382465852633e-06, "loss": 0.0491, "step": 124340 }, { "epoch": 4.518860382295225, "grad_norm": 0.6978418827056885, "learning_rate": 8.44649333818306e-06, "loss": 0.0664, "step": 124350 }, { "epoch": 4.519223780798023, "grad_norm": 0.2968754768371582, "learning_rate": 8.442604923745193e-06, "loss": 0.053, "step": 124360 }, { "epoch": 4.519587179300821, "grad_norm": 0.9654338359832764, "learning_rate": 8.438717222706577e-06, "loss": 0.0743, "step": 124370 }, { "epoch": 4.51995057780362, "grad_norm": 0.3819337785243988, "learning_rate": 8.4348302352347e-06, "loss": 0.0714, "step": 124380 }, { "epoch": 4.520313976306418, "grad_norm": 0.6333800554275513, "learning_rate": 8.430943961497051e-06, "loss": 0.0484, "step": 124390 }, { "epoch": 4.520677374809216, "grad_norm": 0.45619499683380127, "learning_rate": 8.427058401661083e-06, "loss": 0.0749, "step": 124400 }, { "epoch": 4.521040773312014, "grad_norm": 0.3392558991909027, "learning_rate": 8.423173555894193e-06, "loss": 0.0671, "step": 124410 }, { "epoch": 4.521404171814812, "grad_norm": 0.4107264578342438, "learning_rate": 8.419289424363772e-06, "loss": 0.0752, "step": 124420 }, { "epoch": 4.521767570317611, "grad_norm": 0.45977672934532166, "learning_rate": 8.415406007237175e-06, "loss": 0.0665, "step": 124430 }, { "epoch": 4.5221309688204085, "grad_norm": 0.41177213191986084, "learning_rate": 8.411523304681738e-06, "loss": 0.0428, "step": 124440 }, { "epoch": 4.5224943673232065, "grad_norm": 0.7198271751403809, "learning_rate": 8.407641316864725e-06, "loss": 0.1033, "step": 124450 }, { "epoch": 4.5228577658260045, "grad_norm": 0.6751904487609863, "learning_rate": 8.403760043953419e-06, "loss": 0.0641, "step": 124460 }, { "epoch": 4.5232211643288025, "grad_norm": 0.39268776774406433, "learning_rate": 8.399879486115044e-06, "loss": 0.0636, "step": 124470 }, { "epoch": 4.523584562831601, "grad_norm": 1.1051416397094727, "learning_rate": 8.395999643516796e-06, "loss": 0.0782, "step": 124480 }, { "epoch": 4.523947961334399, "grad_norm": 0.9526166915893555, "learning_rate": 8.392120516325843e-06, "loss": 0.0577, "step": 124490 }, { "epoch": 4.524311359837197, "grad_norm": 0.5902212262153625, "learning_rate": 8.388242104709329e-06, "loss": 0.0551, "step": 124500 }, { "epoch": 4.524674758339995, "grad_norm": 1.0079960823059082, "learning_rate": 8.384364408834364e-06, "loss": 0.0746, "step": 124510 }, { "epoch": 4.525038156842793, "grad_norm": 1.2905350923538208, "learning_rate": 8.380487428868009e-06, "loss": 0.062, "step": 124520 }, { "epoch": 4.525401555345592, "grad_norm": 1.5737414360046387, "learning_rate": 8.376611164977319e-06, "loss": 0.0694, "step": 124530 }, { "epoch": 4.52576495384839, "grad_norm": 0.49750620126724243, "learning_rate": 8.372735617329316e-06, "loss": 0.0502, "step": 124540 }, { "epoch": 4.526128352351188, "grad_norm": 0.4365397095680237, "learning_rate": 8.368860786090967e-06, "loss": 0.0375, "step": 124550 }, { "epoch": 4.526491750853986, "grad_norm": 0.47341033816337585, "learning_rate": 8.364986671429235e-06, "loss": 0.0678, "step": 124560 }, { "epoch": 4.526855149356785, "grad_norm": 0.7484754323959351, "learning_rate": 8.361113273511037e-06, "loss": 0.0719, "step": 124570 }, { "epoch": 4.527218547859583, "grad_norm": 0.9976155161857605, "learning_rate": 8.357240592503273e-06, "loss": 0.0439, "step": 124580 }, { "epoch": 4.527581946362381, "grad_norm": 1.9797228574752808, "learning_rate": 8.353368628572792e-06, "loss": 0.0621, "step": 124590 }, { "epoch": 4.527945344865179, "grad_norm": 0.3955487310886383, "learning_rate": 8.34949738188643e-06, "loss": 0.0573, "step": 124600 }, { "epoch": 4.528308743367977, "grad_norm": 0.4459396302700043, "learning_rate": 8.345626852610977e-06, "loss": 0.0669, "step": 124610 }, { "epoch": 4.528672141870776, "grad_norm": 0.6977239847183228, "learning_rate": 8.34175704091322e-06, "loss": 0.07, "step": 124620 }, { "epoch": 4.529035540373574, "grad_norm": 0.5316757559776306, "learning_rate": 8.337887946959872e-06, "loss": 0.058, "step": 124630 }, { "epoch": 4.529398938876372, "grad_norm": 0.21470944583415985, "learning_rate": 8.334019570917654e-06, "loss": 0.0447, "step": 124640 }, { "epoch": 4.52976233737917, "grad_norm": 0.40101158618927, "learning_rate": 8.33015191295323e-06, "loss": 0.0503, "step": 124650 }, { "epoch": 4.530125735881969, "grad_norm": 0.6712108850479126, "learning_rate": 8.326284973233248e-06, "loss": 0.0667, "step": 124660 }, { "epoch": 4.530489134384767, "grad_norm": 0.9560113549232483, "learning_rate": 8.322418751924318e-06, "loss": 0.0635, "step": 124670 }, { "epoch": 4.530852532887565, "grad_norm": 0.46889641880989075, "learning_rate": 8.318553249193038e-06, "loss": 0.0629, "step": 124680 }, { "epoch": 4.531215931390363, "grad_norm": 4.488816738128662, "learning_rate": 8.314688465205937e-06, "loss": 0.0389, "step": 124690 }, { "epoch": 4.531579329893161, "grad_norm": 0.5281501412391663, "learning_rate": 8.31082440012954e-06, "loss": 0.0601, "step": 124700 }, { "epoch": 4.5319427283959595, "grad_norm": 0.4834880828857422, "learning_rate": 8.30696105413034e-06, "loss": 0.0784, "step": 124710 }, { "epoch": 4.5323061268987574, "grad_norm": 0.5478177666664124, "learning_rate": 8.303098427374807e-06, "loss": 0.0852, "step": 124720 }, { "epoch": 4.532669525401555, "grad_norm": 0.32610076665878296, "learning_rate": 8.29923652002934e-06, "loss": 0.054, "step": 124730 }, { "epoch": 4.533032923904353, "grad_norm": 1.545202612876892, "learning_rate": 8.295375332260354e-06, "loss": 0.0465, "step": 124740 }, { "epoch": 4.533396322407151, "grad_norm": 0.3422263264656067, "learning_rate": 8.29151486423421e-06, "loss": 0.0541, "step": 124750 }, { "epoch": 4.53375972090995, "grad_norm": 1.2290678024291992, "learning_rate": 8.287655116117249e-06, "loss": 0.0534, "step": 124760 }, { "epoch": 4.534123119412748, "grad_norm": 0.76679527759552, "learning_rate": 8.283796088075757e-06, "loss": 0.0518, "step": 124770 }, { "epoch": 4.534486517915546, "grad_norm": 1.319697380065918, "learning_rate": 8.279937780276024e-06, "loss": 0.0717, "step": 124780 }, { "epoch": 4.534849916418344, "grad_norm": 0.32244163751602173, "learning_rate": 8.276080192884273e-06, "loss": 0.0568, "step": 124790 }, { "epoch": 4.535213314921142, "grad_norm": 0.4346468448638916, "learning_rate": 8.272223326066725e-06, "loss": 0.06, "step": 124800 }, { "epoch": 4.535213314921142, "eval_loss": 0.3044581711292267, "eval_runtime": 179.1886, "eval_samples_per_second": 41.375, "eval_steps_per_second": 5.173, "eval_wer": 0.12486612086336159, "step": 124800 }, { "epoch": 4.535576713423941, "grad_norm": 0.7160072922706604, "learning_rate": 8.268367179989553e-06, "loss": 2.4351, "step": 124810 }, { "epoch": 4.535940111926739, "grad_norm": 0.47949934005737305, "learning_rate": 8.264511754818919e-06, "loss": 0.0578, "step": 124820 }, { "epoch": 4.536303510429537, "grad_norm": 1.0472005605697632, "learning_rate": 8.260657050720918e-06, "loss": 0.0735, "step": 124830 }, { "epoch": 4.536666908932335, "grad_norm": 0.3444649577140808, "learning_rate": 8.256803067861646e-06, "loss": 0.0514, "step": 124840 }, { "epoch": 4.537030307435133, "grad_norm": 0.6875000596046448, "learning_rate": 8.25294980640716e-06, "loss": 0.0511, "step": 124850 }, { "epoch": 4.537393705937932, "grad_norm": 0.3071330189704895, "learning_rate": 8.249097266523486e-06, "loss": 0.0771, "step": 124860 }, { "epoch": 4.53775710444073, "grad_norm": 0.4984486997127533, "learning_rate": 8.245245448376604e-06, "loss": 0.0566, "step": 124870 }, { "epoch": 4.538120502943528, "grad_norm": 0.6224886775016785, "learning_rate": 8.241394352132481e-06, "loss": 0.0582, "step": 124880 }, { "epoch": 4.538483901446326, "grad_norm": 0.2929985523223877, "learning_rate": 8.237543977957049e-06, "loss": 0.6875, "step": 124890 }, { "epoch": 4.538847299949124, "grad_norm": 0.5459185838699341, "learning_rate": 8.233694326016214e-06, "loss": 0.0523, "step": 124900 }, { "epoch": 4.539210698451923, "grad_norm": 0.3112967014312744, "learning_rate": 8.229845396475828e-06, "loss": 0.0583, "step": 124910 }, { "epoch": 4.539574096954721, "grad_norm": 0.7101778388023376, "learning_rate": 8.225997189501742e-06, "loss": 0.0652, "step": 124920 }, { "epoch": 4.539937495457519, "grad_norm": 0.3670494556427002, "learning_rate": 8.22214970525974e-06, "loss": 0.0527, "step": 124930 }, { "epoch": 4.540300893960317, "grad_norm": 0.4395025372505188, "learning_rate": 8.218302943915626e-06, "loss": 0.0487, "step": 124940 }, { "epoch": 4.540664292463115, "grad_norm": 0.44454246759414673, "learning_rate": 8.214456905635124e-06, "loss": 0.0449, "step": 124950 }, { "epoch": 4.5410276909659135, "grad_norm": 0.35817548632621765, "learning_rate": 8.210611590583955e-06, "loss": 0.1497, "step": 124960 }, { "epoch": 4.5413910894687115, "grad_norm": 0.7225530743598938, "learning_rate": 8.206766998927792e-06, "loss": 0.0599, "step": 124970 }, { "epoch": 4.5417544879715095, "grad_norm": 0.45392826199531555, "learning_rate": 8.202923130832286e-06, "loss": 0.0438, "step": 124980 }, { "epoch": 4.5421178864743075, "grad_norm": 0.43107280135154724, "learning_rate": 8.199079986463058e-06, "loss": 1.4127, "step": 124990 }, { "epoch": 4.5424812849771055, "grad_norm": 0.386159211397171, "learning_rate": 8.195237565985703e-06, "loss": 0.0551, "step": 125000 }, { "epoch": 4.542844683479904, "grad_norm": 0.3740684986114502, "learning_rate": 8.191395869565762e-06, "loss": 0.0666, "step": 125010 }, { "epoch": 4.543208081982702, "grad_norm": 0.4995236396789551, "learning_rate": 8.187554897368776e-06, "loss": 0.0801, "step": 125020 }, { "epoch": 4.5435714804855, "grad_norm": 0.41527360677719116, "learning_rate": 8.183714649560215e-06, "loss": 0.0463, "step": 125030 }, { "epoch": 4.543934878988298, "grad_norm": 0.4730013310909271, "learning_rate": 8.179875126305575e-06, "loss": 0.0505, "step": 125040 }, { "epoch": 4.544298277491096, "grad_norm": 0.5069946646690369, "learning_rate": 8.176036327770259e-06, "loss": 0.0458, "step": 125050 }, { "epoch": 4.544661675993895, "grad_norm": 0.6612190008163452, "learning_rate": 8.172198254119687e-06, "loss": 0.0597, "step": 125060 }, { "epoch": 4.545025074496693, "grad_norm": 0.614518940448761, "learning_rate": 8.168360905519202e-06, "loss": 0.0549, "step": 125070 }, { "epoch": 4.545388472999491, "grad_norm": 0.4206506609916687, "learning_rate": 8.164524282134173e-06, "loss": 0.0572, "step": 125080 }, { "epoch": 4.545751871502289, "grad_norm": 0.6965306997299194, "learning_rate": 8.160688384129886e-06, "loss": 0.0493, "step": 125090 }, { "epoch": 4.546115270005087, "grad_norm": 0.2887246012687683, "learning_rate": 8.15685321167163e-06, "loss": 0.0598, "step": 125100 }, { "epoch": 4.546478668507886, "grad_norm": 0.6751300096511841, "learning_rate": 8.153018764924633e-06, "loss": 0.065, "step": 125110 }, { "epoch": 4.546842067010684, "grad_norm": 0.7887641191482544, "learning_rate": 8.149185044054115e-06, "loss": 0.0484, "step": 125120 }, { "epoch": 4.547205465513482, "grad_norm": 0.5545349717140198, "learning_rate": 8.145352049225257e-06, "loss": 0.0555, "step": 125130 }, { "epoch": 4.54756886401628, "grad_norm": 2.4700214862823486, "learning_rate": 8.141519780603221e-06, "loss": 0.0495, "step": 125140 }, { "epoch": 4.547932262519079, "grad_norm": 1.5873525142669678, "learning_rate": 8.137688238353105e-06, "loss": 0.0601, "step": 125150 }, { "epoch": 4.548295661021877, "grad_norm": 0.2735642194747925, "learning_rate": 8.133857422640016e-06, "loss": 0.0696, "step": 125160 }, { "epoch": 4.548659059524675, "grad_norm": 4.075057506561279, "learning_rate": 8.13002733362898e-06, "loss": 0.0621, "step": 125170 }, { "epoch": 4.549022458027473, "grad_norm": 0.6919664740562439, "learning_rate": 8.126197971485064e-06, "loss": 0.0704, "step": 125180 }, { "epoch": 4.549385856530271, "grad_norm": 0.6229852437973022, "learning_rate": 8.122369336373232e-06, "loss": 0.044, "step": 125190 }, { "epoch": 4.54974925503307, "grad_norm": 0.4134267568588257, "learning_rate": 8.11854142845846e-06, "loss": 0.0621, "step": 125200 }, { "epoch": 4.550112653535868, "grad_norm": 0.7249795794487, "learning_rate": 8.114714247905666e-06, "loss": 0.0698, "step": 125210 }, { "epoch": 4.550476052038666, "grad_norm": 5.056555271148682, "learning_rate": 8.110887794879756e-06, "loss": 0.0681, "step": 125220 }, { "epoch": 4.5508394505414635, "grad_norm": 2.870107412338257, "learning_rate": 8.107062069545601e-06, "loss": 0.0442, "step": 125230 }, { "epoch": 4.551202849044262, "grad_norm": 1.6329604387283325, "learning_rate": 8.103237072068045e-06, "loss": 0.0588, "step": 125240 }, { "epoch": 4.55156624754706, "grad_norm": 0.7168179750442505, "learning_rate": 8.099412802611875e-06, "loss": 0.0821, "step": 125250 }, { "epoch": 4.551929646049858, "grad_norm": 0.39133715629577637, "learning_rate": 8.095589261341876e-06, "loss": 0.0526, "step": 125260 }, { "epoch": 4.552293044552656, "grad_norm": 0.3881663978099823, "learning_rate": 8.091766448422788e-06, "loss": 0.0517, "step": 125270 }, { "epoch": 4.552656443055454, "grad_norm": 0.9824138879776001, "learning_rate": 8.087944364019334e-06, "loss": 0.0551, "step": 125280 }, { "epoch": 4.553019841558253, "grad_norm": 0.33601242303848267, "learning_rate": 8.084123008296177e-06, "loss": 0.0509, "step": 125290 }, { "epoch": 4.553383240061051, "grad_norm": 5.748452663421631, "learning_rate": 8.080302381417979e-06, "loss": 0.0391, "step": 125300 }, { "epoch": 4.553746638563849, "grad_norm": 0.6535000205039978, "learning_rate": 8.076482483549333e-06, "loss": 0.078, "step": 125310 }, { "epoch": 4.554110037066647, "grad_norm": 1.300309181213379, "learning_rate": 8.07266331485486e-06, "loss": 0.0398, "step": 125320 }, { "epoch": 4.554473435569445, "grad_norm": 0.4497128427028656, "learning_rate": 8.06884487549909e-06, "loss": 0.0436, "step": 125330 }, { "epoch": 4.554836834072244, "grad_norm": 0.6887450218200684, "learning_rate": 8.06502716564656e-06, "loss": 1.1866, "step": 125340 }, { "epoch": 4.555200232575042, "grad_norm": 0.8471201658248901, "learning_rate": 8.061210185461749e-06, "loss": 0.0684, "step": 125350 }, { "epoch": 4.55556363107784, "grad_norm": 0.4937605559825897, "learning_rate": 8.05739393510912e-06, "loss": 0.07, "step": 125360 }, { "epoch": 4.555927029580638, "grad_norm": 0.4995588958263397, "learning_rate": 8.053578414753105e-06, "loss": 0.0641, "step": 125370 }, { "epoch": 4.556290428083436, "grad_norm": 1.386864423751831, "learning_rate": 8.04976362455811e-06, "loss": 0.0639, "step": 125380 }, { "epoch": 4.556653826586235, "grad_norm": 0.5394189953804016, "learning_rate": 8.045949564688481e-06, "loss": 0.0526, "step": 125390 }, { "epoch": 4.557017225089033, "grad_norm": 0.2875339984893799, "learning_rate": 8.042136235308572e-06, "loss": 0.0593, "step": 125400 }, { "epoch": 4.557017225089033, "eval_loss": 0.3054760992527008, "eval_runtime": 179.4921, "eval_samples_per_second": 41.305, "eval_steps_per_second": 5.165, "eval_wer": 0.12472089603717755, "step": 125400 }, { "epoch": 4.557380623591831, "grad_norm": 0.3843660056591034, "learning_rate": 8.03832363658266e-06, "loss": 0.0643, "step": 125410 }, { "epoch": 4.557744022094629, "grad_norm": 0.40120866894721985, "learning_rate": 8.034511768675048e-06, "loss": 0.0661, "step": 125420 }, { "epoch": 4.558107420597427, "grad_norm": 0.3208017945289612, "learning_rate": 8.030700631749955e-06, "loss": 0.3983, "step": 125430 }, { "epoch": 4.558470819100226, "grad_norm": 0.4574812650680542, "learning_rate": 8.0268902259716e-06, "loss": 0.062, "step": 125440 }, { "epoch": 4.558834217603024, "grad_norm": 0.6221907138824463, "learning_rate": 8.02308055150414e-06, "loss": 0.0562, "step": 125450 }, { "epoch": 4.559197616105822, "grad_norm": 0.7196207046508789, "learning_rate": 8.01927160851175e-06, "loss": 0.2367, "step": 125460 }, { "epoch": 4.55956101460862, "grad_norm": 0.8422996997833252, "learning_rate": 8.015463397158521e-06, "loss": 0.0651, "step": 125470 }, { "epoch": 4.559924413111418, "grad_norm": 0.4113635718822479, "learning_rate": 8.01165591760855e-06, "loss": 0.0446, "step": 125480 }, { "epoch": 4.5602878116142165, "grad_norm": 0.26394638419151306, "learning_rate": 8.007849170025878e-06, "loss": 0.0394, "step": 125490 }, { "epoch": 4.5606512101170145, "grad_norm": 0.7490254640579224, "learning_rate": 8.004043154574523e-06, "loss": 0.0723, "step": 125500 }, { "epoch": 4.5610146086198124, "grad_norm": 0.4459429979324341, "learning_rate": 8.000237871418475e-06, "loss": 0.0692, "step": 125510 }, { "epoch": 4.56137800712261, "grad_norm": 2.0403530597686768, "learning_rate": 7.996433320721703e-06, "loss": 0.0634, "step": 125520 }, { "epoch": 4.561741405625408, "grad_norm": 1.2519432306289673, "learning_rate": 7.99262950264811e-06, "loss": 0.0479, "step": 125530 }, { "epoch": 4.562104804128207, "grad_norm": 0.7229874730110168, "learning_rate": 7.988826417361609e-06, "loss": 0.0479, "step": 125540 }, { "epoch": 4.562468202631005, "grad_norm": 1.2834559679031372, "learning_rate": 7.985024065026035e-06, "loss": 0.0596, "step": 125550 }, { "epoch": 4.562831601133803, "grad_norm": 1.3731257915496826, "learning_rate": 7.98122244580525e-06, "loss": 0.0668, "step": 125560 }, { "epoch": 4.563194999636601, "grad_norm": 0.6043723225593567, "learning_rate": 7.97742155986303e-06, "loss": 0.2091, "step": 125570 }, { "epoch": 4.563558398139399, "grad_norm": 0.6116291880607605, "learning_rate": 7.973621407363156e-06, "loss": 0.0577, "step": 125580 }, { "epoch": 4.563921796642198, "grad_norm": 0.2291097193956375, "learning_rate": 7.969821988469345e-06, "loss": 0.0563, "step": 125590 }, { "epoch": 4.564285195144996, "grad_norm": 0.42240583896636963, "learning_rate": 7.966023303345313e-06, "loss": 0.0759, "step": 125600 }, { "epoch": 4.564648593647794, "grad_norm": 3.611849546432495, "learning_rate": 7.962225352154732e-06, "loss": 0.0646, "step": 125610 }, { "epoch": 4.565011992150592, "grad_norm": 1.5979939699172974, "learning_rate": 7.958428135061246e-06, "loss": 0.0614, "step": 125620 }, { "epoch": 4.56537539065339, "grad_norm": 0.4119722545146942, "learning_rate": 7.954631652228447e-06, "loss": 3.1419, "step": 125630 }, { "epoch": 4.565738789156189, "grad_norm": 0.6527985334396362, "learning_rate": 7.950835903819928e-06, "loss": 0.0541, "step": 125640 }, { "epoch": 4.566102187658987, "grad_norm": 0.6249573230743408, "learning_rate": 7.947040889999225e-06, "loss": 0.0565, "step": 125650 }, { "epoch": 4.566465586161785, "grad_norm": 0.24648499488830566, "learning_rate": 7.943246610929863e-06, "loss": 0.0744, "step": 125660 }, { "epoch": 4.566828984664583, "grad_norm": 0.42753612995147705, "learning_rate": 7.93945306677531e-06, "loss": 0.0819, "step": 125670 }, { "epoch": 4.567192383167381, "grad_norm": 0.41722577810287476, "learning_rate": 7.935660257699032e-06, "loss": 0.0695, "step": 125680 }, { "epoch": 4.56755578167018, "grad_norm": 19.217390060424805, "learning_rate": 7.931868183864419e-06, "loss": 0.1349, "step": 125690 }, { "epoch": 4.567919180172978, "grad_norm": 3.756126880645752, "learning_rate": 7.928076845434895e-06, "loss": 0.1057, "step": 125700 }, { "epoch": 4.568282578675776, "grad_norm": 0.47838127613067627, "learning_rate": 7.924286242573792e-06, "loss": 0.0592, "step": 125710 }, { "epoch": 4.568645977178574, "grad_norm": 0.7187171578407288, "learning_rate": 7.920496375444444e-06, "loss": 0.0654, "step": 125720 }, { "epoch": 4.5690093756813726, "grad_norm": 0.40447700023651123, "learning_rate": 7.916707244210131e-06, "loss": 0.0421, "step": 125730 }, { "epoch": 4.5693727741841705, "grad_norm": 0.5948053002357483, "learning_rate": 7.912918849034121e-06, "loss": 0.0602, "step": 125740 }, { "epoch": 4.5697361726869685, "grad_norm": 1.1611541509628296, "learning_rate": 7.909131190079641e-06, "loss": 0.0586, "step": 125750 }, { "epoch": 4.5700995711897665, "grad_norm": 0.3957841694355011, "learning_rate": 7.905344267509895e-06, "loss": 0.0535, "step": 125760 }, { "epoch": 4.5704629696925645, "grad_norm": 0.7486786246299744, "learning_rate": 7.901558081488036e-06, "loss": 0.0674, "step": 125770 }, { "epoch": 4.570826368195363, "grad_norm": 0.5237159132957458, "learning_rate": 7.897772632177212e-06, "loss": 0.1308, "step": 125780 }, { "epoch": 4.571189766698161, "grad_norm": 0.5817849040031433, "learning_rate": 7.893987919740494e-06, "loss": 0.4336, "step": 125790 }, { "epoch": 4.571553165200959, "grad_norm": 0.5413478016853333, "learning_rate": 7.890203944340993e-06, "loss": 0.0603, "step": 125800 }, { "epoch": 4.571916563703757, "grad_norm": 0.37014704942703247, "learning_rate": 7.88642070614172e-06, "loss": 0.0706, "step": 125810 }, { "epoch": 4.572279962206556, "grad_norm": 0.6111258864402771, "learning_rate": 7.882638205305696e-06, "loss": 0.0712, "step": 125820 }, { "epoch": 4.572643360709354, "grad_norm": 0.8887606263160706, "learning_rate": 7.878856441995878e-06, "loss": 0.0483, "step": 125830 }, { "epoch": 4.573006759212152, "grad_norm": 0.36010318994522095, "learning_rate": 7.875075416375219e-06, "loss": 0.0532, "step": 125840 }, { "epoch": 4.57337015771495, "grad_norm": 0.7919467091560364, "learning_rate": 7.871295128606632e-06, "loss": 0.1206, "step": 125850 }, { "epoch": 4.573733556217748, "grad_norm": 0.4224424362182617, "learning_rate": 7.867515578852999e-06, "loss": 0.0622, "step": 125860 }, { "epoch": 4.574096954720547, "grad_norm": 1.468988299369812, "learning_rate": 7.863736767277156e-06, "loss": 0.0523, "step": 125870 }, { "epoch": 4.574460353223345, "grad_norm": 0.44766831398010254, "learning_rate": 7.859958694041925e-06, "loss": 0.0478, "step": 125880 }, { "epoch": 4.574823751726143, "grad_norm": 0.37048444151878357, "learning_rate": 7.856181359310088e-06, "loss": 0.0451, "step": 125890 }, { "epoch": 4.575187150228941, "grad_norm": 0.49744728207588196, "learning_rate": 7.852404763244406e-06, "loss": 0.0805, "step": 125900 }, { "epoch": 4.575550548731739, "grad_norm": 0.35201868414878845, "learning_rate": 7.848628906007585e-06, "loss": 0.0714, "step": 125910 }, { "epoch": 4.575913947234538, "grad_norm": 0.5504136085510254, "learning_rate": 7.844853787762327e-06, "loss": 0.0602, "step": 125920 }, { "epoch": 4.576277345737336, "grad_norm": 0.5599771738052368, "learning_rate": 7.841079408671265e-06, "loss": 0.0656, "step": 125930 }, { "epoch": 4.576640744240134, "grad_norm": 1.252106785774231, "learning_rate": 7.837305768897055e-06, "loss": 0.0488, "step": 125940 }, { "epoch": 4.577004142742932, "grad_norm": 2.146782636642456, "learning_rate": 7.833532868602267e-06, "loss": 0.0767, "step": 125950 }, { "epoch": 4.57736754124573, "grad_norm": 0.39708051085472107, "learning_rate": 7.829760707949476e-06, "loss": 0.0701, "step": 125960 }, { "epoch": 4.577730939748529, "grad_norm": 0.9206411242485046, "learning_rate": 7.825989287101198e-06, "loss": 0.0633, "step": 125970 }, { "epoch": 4.578094338251327, "grad_norm": 1.8759825229644775, "learning_rate": 7.822218606219936e-06, "loss": 0.057, "step": 125980 }, { "epoch": 4.578457736754125, "grad_norm": 0.36060214042663574, "learning_rate": 7.818448665468156e-06, "loss": 0.0423, "step": 125990 }, { "epoch": 4.578821135256923, "grad_norm": 0.6770915985107422, "learning_rate": 7.814679465008299e-06, "loss": 0.0716, "step": 126000 }, { "epoch": 4.578821135256923, "eval_loss": 0.3099243938922882, "eval_runtime": 178.7365, "eval_samples_per_second": 41.48, "eval_steps_per_second": 5.186, "eval_wer": 0.12349556156624975, "step": 126000 }, { "epoch": 4.579184533759721, "grad_norm": 0.3488273322582245, "learning_rate": 7.810911005002746e-06, "loss": 0.0625, "step": 126010 }, { "epoch": 4.579547932262519, "grad_norm": 0.5793322920799255, "learning_rate": 7.807143285613892e-06, "loss": 0.0634, "step": 126020 }, { "epoch": 4.579911330765317, "grad_norm": 0.3971642255783081, "learning_rate": 7.803376307004042e-06, "loss": 0.1003, "step": 126030 }, { "epoch": 4.580274729268115, "grad_norm": 0.3912244141101837, "learning_rate": 7.799610069335538e-06, "loss": 0.0518, "step": 126040 }, { "epoch": 4.580638127770913, "grad_norm": 0.7339867949485779, "learning_rate": 7.795844572770631e-06, "loss": 0.0585, "step": 126050 }, { "epoch": 4.581001526273711, "grad_norm": 0.7593865990638733, "learning_rate": 7.792079817471576e-06, "loss": 0.0661, "step": 126060 }, { "epoch": 4.58136492477651, "grad_norm": 1.096570372581482, "learning_rate": 7.78831580360056e-06, "loss": 0.0652, "step": 126070 }, { "epoch": 4.581728323279308, "grad_norm": 0.524372935295105, "learning_rate": 7.784552531319792e-06, "loss": 0.0466, "step": 126080 }, { "epoch": 4.582091721782106, "grad_norm": 0.23096977174282074, "learning_rate": 7.780790000791394e-06, "loss": 0.0489, "step": 126090 }, { "epoch": 4.582455120284904, "grad_norm": 0.5077711939811707, "learning_rate": 7.777028212177495e-06, "loss": 0.0626, "step": 126100 }, { "epoch": 4.582818518787702, "grad_norm": 0.6009909510612488, "learning_rate": 7.773267165640166e-06, "loss": 0.0524, "step": 126110 }, { "epoch": 4.583181917290501, "grad_norm": 9.999988555908203, "learning_rate": 7.76950686134146e-06, "loss": 0.0771, "step": 126120 }, { "epoch": 4.583545315793299, "grad_norm": 0.40103012323379517, "learning_rate": 7.765747299443399e-06, "loss": 0.0955, "step": 126130 }, { "epoch": 4.583908714296097, "grad_norm": 2.226865768432617, "learning_rate": 7.761988480107973e-06, "loss": 0.0532, "step": 126140 }, { "epoch": 4.584272112798895, "grad_norm": 1.181962013244629, "learning_rate": 7.758230403497124e-06, "loss": 0.0528, "step": 126150 }, { "epoch": 4.584635511301693, "grad_norm": 0.6523319482803345, "learning_rate": 7.754473069772788e-06, "loss": 0.103, "step": 126160 }, { "epoch": 4.584998909804492, "grad_norm": 0.941371500492096, "learning_rate": 7.750716479096831e-06, "loss": 0.0648, "step": 126170 }, { "epoch": 4.58536230830729, "grad_norm": 0.5867627263069153, "learning_rate": 7.746960631631145e-06, "loss": 0.0625, "step": 126180 }, { "epoch": 4.585725706810088, "grad_norm": 0.5842635035514832, "learning_rate": 7.743205527537531e-06, "loss": 0.0402, "step": 126190 }, { "epoch": 4.586089105312886, "grad_norm": 0.5364586710929871, "learning_rate": 7.739451166977799e-06, "loss": 0.0589, "step": 126200 }, { "epoch": 4.586452503815684, "grad_norm": 0.34649497270584106, "learning_rate": 7.735697550113694e-06, "loss": 0.062, "step": 126210 }, { "epoch": 4.586815902318483, "grad_norm": 0.7509855031967163, "learning_rate": 7.731944677106954e-06, "loss": 0.0842, "step": 126220 }, { "epoch": 4.587179300821281, "grad_norm": 0.41522151231765747, "learning_rate": 7.728192548119279e-06, "loss": 0.0542, "step": 126230 }, { "epoch": 4.587542699324079, "grad_norm": 0.44053012132644653, "learning_rate": 7.724441163312344e-06, "loss": 0.054, "step": 126240 }, { "epoch": 4.587906097826877, "grad_norm": 0.44163236021995544, "learning_rate": 7.72069052284776e-06, "loss": 0.0554, "step": 126250 }, { "epoch": 4.588269496329675, "grad_norm": 0.41186171770095825, "learning_rate": 7.716940626887145e-06, "loss": 0.0634, "step": 126260 }, { "epoch": 4.5886328948324735, "grad_norm": 1.269209384918213, "learning_rate": 7.713191475592064e-06, "loss": 0.0679, "step": 126270 }, { "epoch": 4.5889962933352715, "grad_norm": 0.4185671806335449, "learning_rate": 7.709443069124062e-06, "loss": 0.0498, "step": 126280 }, { "epoch": 4.5893596918380695, "grad_norm": 0.31529852747917175, "learning_rate": 7.705695407644634e-06, "loss": 0.0425, "step": 126290 }, { "epoch": 4.5897230903408675, "grad_norm": 0.4557393193244934, "learning_rate": 7.701948491315262e-06, "loss": 0.0481, "step": 126300 }, { "epoch": 4.590086488843666, "grad_norm": 0.3848503828048706, "learning_rate": 7.698202320297368e-06, "loss": 0.0801, "step": 126310 }, { "epoch": 4.590449887346464, "grad_norm": 0.48702195286750793, "learning_rate": 7.694456894752392e-06, "loss": 0.0601, "step": 126320 }, { "epoch": 4.590813285849262, "grad_norm": 0.5936059951782227, "learning_rate": 7.69071221484169e-06, "loss": 0.0671, "step": 126330 }, { "epoch": 4.59117668435206, "grad_norm": 0.4030866026878357, "learning_rate": 7.686968280726614e-06, "loss": 0.0777, "step": 126340 }, { "epoch": 4.591540082854858, "grad_norm": 0.859853982925415, "learning_rate": 7.683225092568471e-06, "loss": 0.0532, "step": 126350 }, { "epoch": 4.591903481357657, "grad_norm": 1.11529541015625, "learning_rate": 7.679482650528545e-06, "loss": 0.0597, "step": 126360 }, { "epoch": 4.592266879860455, "grad_norm": 0.7762242555618286, "learning_rate": 7.675740954768085e-06, "loss": 0.058, "step": 126370 }, { "epoch": 4.592630278363253, "grad_norm": 0.5146819353103638, "learning_rate": 7.672000005448313e-06, "loss": 0.0478, "step": 126380 }, { "epoch": 4.592993676866051, "grad_norm": 0.8194442391395569, "learning_rate": 7.668259802730402e-06, "loss": 0.0482, "step": 126390 }, { "epoch": 4.59335707536885, "grad_norm": 0.7553524971008301, "learning_rate": 7.664520346775516e-06, "loss": 0.065, "step": 126400 }, { "epoch": 4.593720473871648, "grad_norm": 0.2427300363779068, "learning_rate": 7.660781637744754e-06, "loss": 0.0684, "step": 126410 }, { "epoch": 4.594083872374446, "grad_norm": 0.8332762122154236, "learning_rate": 7.657043675799234e-06, "loss": 0.2142, "step": 126420 }, { "epoch": 4.594447270877244, "grad_norm": 0.802563488483429, "learning_rate": 7.653306461099988e-06, "loss": 0.0705, "step": 126430 }, { "epoch": 4.594810669380042, "grad_norm": 0.4586730897426605, "learning_rate": 7.649569993808054e-06, "loss": 0.0606, "step": 126440 }, { "epoch": 4.595174067882841, "grad_norm": 0.3121570944786072, "learning_rate": 7.645834274084399e-06, "loss": 0.0522, "step": 126450 }, { "epoch": 4.595537466385639, "grad_norm": 0.4224018156528473, "learning_rate": 7.642099302090016e-06, "loss": 0.0538, "step": 126460 }, { "epoch": 4.595900864888437, "grad_norm": 0.5237678289413452, "learning_rate": 7.638365077985807e-06, "loss": 0.0728, "step": 126470 }, { "epoch": 4.596264263391235, "grad_norm": 0.6423215866088867, "learning_rate": 7.63463160193268e-06, "loss": 0.12, "step": 126480 }, { "epoch": 4.596627661894033, "grad_norm": 0.590101957321167, "learning_rate": 7.630898874091485e-06, "loss": 0.0509, "step": 126490 }, { "epoch": 4.596991060396832, "grad_norm": 14.327346801757812, "learning_rate": 7.627166894623055e-06, "loss": 0.0542, "step": 126500 }, { "epoch": 4.59735445889963, "grad_norm": 1.1073505878448486, "learning_rate": 7.623435663688194e-06, "loss": 0.054, "step": 126510 }, { "epoch": 4.5977178574024276, "grad_norm": 1.0423232316970825, "learning_rate": 7.619705181447673e-06, "loss": 0.0606, "step": 126520 }, { "epoch": 4.5980812559052255, "grad_norm": 0.4513705372810364, "learning_rate": 7.615975448062207e-06, "loss": 0.0473, "step": 126530 }, { "epoch": 4.5984446544080235, "grad_norm": 1.1003657579421997, "learning_rate": 7.612246463692513e-06, "loss": 0.0544, "step": 126540 }, { "epoch": 4.598808052910822, "grad_norm": 0.5768654346466064, "learning_rate": 7.608518228499237e-06, "loss": 0.0444, "step": 126550 }, { "epoch": 4.59917145141362, "grad_norm": 0.4278873801231384, "learning_rate": 7.6047907426430506e-06, "loss": 0.0721, "step": 126560 }, { "epoch": 4.599534849916418, "grad_norm": 1.1273231506347656, "learning_rate": 7.601064006284528e-06, "loss": 0.055, "step": 126570 }, { "epoch": 4.599898248419216, "grad_norm": 0.3639676570892334, "learning_rate": 7.5973380195842605e-06, "loss": 0.0725, "step": 126580 }, { "epoch": 4.600261646922014, "grad_norm": 0.4167264997959137, "learning_rate": 7.593612782702769e-06, "loss": 0.0602, "step": 126590 }, { "epoch": 4.600625045424813, "grad_norm": 0.47782692313194275, "learning_rate": 7.5898882958005736e-06, "loss": 0.0727, "step": 126600 }, { "epoch": 4.600625045424813, "eval_loss": 0.3074301779270172, "eval_runtime": 179.8002, "eval_samples_per_second": 41.235, "eval_steps_per_second": 5.156, "eval_wer": 0.12394031259643837, "step": 126600 }, { "epoch": 4.600988443927611, "grad_norm": 1.153037667274475, "learning_rate": 7.586164559038145e-06, "loss": 0.0487, "step": 126610 }, { "epoch": 4.601351842430409, "grad_norm": 0.35221338272094727, "learning_rate": 7.582441572575935e-06, "loss": 0.0615, "step": 126620 }, { "epoch": 4.601715240933207, "grad_norm": 0.39618822932243347, "learning_rate": 7.578719336574339e-06, "loss": 0.0534, "step": 126630 }, { "epoch": 4.602078639436005, "grad_norm": 3.276002883911133, "learning_rate": 7.574997851193738e-06, "loss": 0.0591, "step": 126640 }, { "epoch": 4.602442037938804, "grad_norm": 0.7974953651428223, "learning_rate": 7.571277116594486e-06, "loss": 0.0587, "step": 126650 }, { "epoch": 4.602805436441602, "grad_norm": 0.6264301538467407, "learning_rate": 7.567557132936895e-06, "loss": 0.0712, "step": 126660 }, { "epoch": 4.6031688349444, "grad_norm": 0.7257146239280701, "learning_rate": 7.563837900381238e-06, "loss": 0.0847, "step": 126670 }, { "epoch": 4.603532233447198, "grad_norm": 0.38312992453575134, "learning_rate": 7.560119419087766e-06, "loss": 0.0582, "step": 126680 }, { "epoch": 4.603895631949996, "grad_norm": 0.6692748069763184, "learning_rate": 7.556401689216696e-06, "loss": 0.1701, "step": 126690 }, { "epoch": 4.604259030452795, "grad_norm": 0.41986310482025146, "learning_rate": 7.5526847109282206e-06, "loss": 0.0488, "step": 126700 }, { "epoch": 4.604622428955593, "grad_norm": 0.7212282419204712, "learning_rate": 7.5489684843824765e-06, "loss": 0.0595, "step": 126710 }, { "epoch": 4.604985827458391, "grad_norm": 0.5916080474853516, "learning_rate": 7.5452530097395975e-06, "loss": 0.0568, "step": 126720 }, { "epoch": 4.605349225961189, "grad_norm": 0.4187203049659729, "learning_rate": 7.541538287159655e-06, "loss": 0.0591, "step": 126730 }, { "epoch": 4.605712624463987, "grad_norm": 0.3530846834182739, "learning_rate": 7.537824316802708e-06, "loss": 0.6324, "step": 126740 }, { "epoch": 4.606076022966786, "grad_norm": 0.5991231799125671, "learning_rate": 7.5341110988287834e-06, "loss": 0.0625, "step": 126750 }, { "epoch": 4.606439421469584, "grad_norm": 0.3676665723323822, "learning_rate": 7.530398633397873e-06, "loss": 0.0559, "step": 126760 }, { "epoch": 4.606802819972382, "grad_norm": 1.858966588973999, "learning_rate": 7.526686920669923e-06, "loss": 0.101, "step": 126770 }, { "epoch": 4.60716621847518, "grad_norm": 0.8563647866249084, "learning_rate": 7.522975960804862e-06, "loss": 0.0683, "step": 126780 }, { "epoch": 4.607529616977978, "grad_norm": 0.332241415977478, "learning_rate": 7.519265753962582e-06, "loss": 0.0584, "step": 126790 }, { "epoch": 4.6078930154807765, "grad_norm": 1.265038251876831, "learning_rate": 7.515556300302953e-06, "loss": 0.0562, "step": 126800 }, { "epoch": 4.608256413983574, "grad_norm": 0.47690728306770325, "learning_rate": 7.511847599985786e-06, "loss": 0.0541, "step": 126810 }, { "epoch": 4.608619812486372, "grad_norm": 0.49429044127464294, "learning_rate": 7.508139653170881e-06, "loss": 0.0644, "step": 126820 }, { "epoch": 4.60898321098917, "grad_norm": 0.39433813095092773, "learning_rate": 7.504432460018005e-06, "loss": 0.0577, "step": 126830 }, { "epoch": 4.609346609491968, "grad_norm": 0.9581649899482727, "learning_rate": 7.50072602068689e-06, "loss": 0.05, "step": 126840 }, { "epoch": 4.609710007994767, "grad_norm": 0.6286817789077759, "learning_rate": 7.4970203353372234e-06, "loss": 0.0629, "step": 126850 }, { "epoch": 4.610073406497565, "grad_norm": 0.4168870449066162, "learning_rate": 7.493315404128673e-06, "loss": 0.0639, "step": 126860 }, { "epoch": 4.610436805000363, "grad_norm": 4.045650482177734, "learning_rate": 7.48961122722088e-06, "loss": 0.0507, "step": 126870 }, { "epoch": 4.610800203503161, "grad_norm": 0.406231552362442, "learning_rate": 7.485907804773429e-06, "loss": 0.0592, "step": 126880 }, { "epoch": 4.61116360200596, "grad_norm": 1.1003469228744507, "learning_rate": 7.482205136945897e-06, "loss": 0.0405, "step": 126890 }, { "epoch": 4.611527000508758, "grad_norm": 0.3843145966529846, "learning_rate": 7.478503223897823e-06, "loss": 0.0502, "step": 126900 }, { "epoch": 4.611890399011556, "grad_norm": 0.5560893416404724, "learning_rate": 7.474802065788697e-06, "loss": 0.0717, "step": 126910 }, { "epoch": 4.612253797514354, "grad_norm": 1.3374834060668945, "learning_rate": 7.471101662777996e-06, "loss": 0.1144, "step": 126920 }, { "epoch": 4.612617196017152, "grad_norm": 0.3146264851093292, "learning_rate": 7.467402015025154e-06, "loss": 0.0612, "step": 126930 }, { "epoch": 4.612980594519951, "grad_norm": 0.4979982078075409, "learning_rate": 7.4637031226895885e-06, "loss": 0.0448, "step": 126940 }, { "epoch": 4.613343993022749, "grad_norm": 1.7161424160003662, "learning_rate": 7.460004985930652e-06, "loss": 0.0924, "step": 126950 }, { "epoch": 4.613707391525547, "grad_norm": 0.5294406414031982, "learning_rate": 7.456307604907692e-06, "loss": 0.0524, "step": 126960 }, { "epoch": 4.614070790028345, "grad_norm": 0.3305225074291229, "learning_rate": 7.452610979780026e-06, "loss": 0.054, "step": 126970 }, { "epoch": 4.614434188531144, "grad_norm": 0.44015273451805115, "learning_rate": 7.448915110706911e-06, "loss": 0.0528, "step": 126980 }, { "epoch": 4.614797587033942, "grad_norm": 0.40709996223449707, "learning_rate": 7.445219997847597e-06, "loss": 0.044, "step": 126990 }, { "epoch": 4.61516098553674, "grad_norm": 0.5878754258155823, "learning_rate": 7.441525641361291e-06, "loss": 0.0533, "step": 127000 }, { "epoch": 4.615524384039538, "grad_norm": 0.47025102376937866, "learning_rate": 7.4378320414071804e-06, "loss": 0.0592, "step": 127010 }, { "epoch": 4.615887782542336, "grad_norm": 0.3487691879272461, "learning_rate": 7.434139198144396e-06, "loss": 0.0487, "step": 127020 }, { "epoch": 4.6162511810451345, "grad_norm": 0.7760711312294006, "learning_rate": 7.43044711173205e-06, "loss": 0.058, "step": 127030 }, { "epoch": 4.6166145795479325, "grad_norm": 0.3424449563026428, "learning_rate": 7.426755782329236e-06, "loss": 0.0735, "step": 127040 }, { "epoch": 4.6169779780507305, "grad_norm": 0.8006175756454468, "learning_rate": 7.423065210094982e-06, "loss": 0.0588, "step": 127050 }, { "epoch": 4.6173413765535285, "grad_norm": 0.49378591775894165, "learning_rate": 7.4193753951883076e-06, "loss": 0.0639, "step": 127060 }, { "epoch": 4.6177047750563265, "grad_norm": 0.6729814410209656, "learning_rate": 7.4156863377681935e-06, "loss": 0.0798, "step": 127070 }, { "epoch": 4.618068173559125, "grad_norm": 0.20370444655418396, "learning_rate": 7.411998037993601e-06, "loss": 0.0446, "step": 127080 }, { "epoch": 4.618431572061923, "grad_norm": 0.2908482849597931, "learning_rate": 7.408310496023427e-06, "loss": 0.0436, "step": 127090 }, { "epoch": 4.618794970564721, "grad_norm": 0.6903005242347717, "learning_rate": 7.404623712016559e-06, "loss": 0.0697, "step": 127100 }, { "epoch": 4.619158369067519, "grad_norm": 0.36000099778175354, "learning_rate": 7.400937686131862e-06, "loss": 0.0661, "step": 127110 }, { "epoch": 4.619521767570317, "grad_norm": 0.9105935096740723, "learning_rate": 7.397252418528133e-06, "loss": 0.0707, "step": 127120 }, { "epoch": 4.619885166073116, "grad_norm": 1.8391764163970947, "learning_rate": 7.393567909364166e-06, "loss": 0.0571, "step": 127130 }, { "epoch": 4.620248564575914, "grad_norm": 0.413710355758667, "learning_rate": 7.389884158798715e-06, "loss": 0.0585, "step": 127140 }, { "epoch": 4.620611963078712, "grad_norm": 0.4115528166294098, "learning_rate": 7.386201166990506e-06, "loss": 0.0653, "step": 127150 }, { "epoch": 4.62097536158151, "grad_norm": 0.5686254501342773, "learning_rate": 7.382518934098212e-06, "loss": 0.0565, "step": 127160 }, { "epoch": 4.621338760084308, "grad_norm": 0.4464244246482849, "learning_rate": 7.378837460280491e-06, "loss": 0.0809, "step": 127170 }, { "epoch": 4.621702158587107, "grad_norm": 0.27027398347854614, "learning_rate": 7.375156745695972e-06, "loss": 0.0404, "step": 127180 }, { "epoch": 4.622065557089905, "grad_norm": 0.8303388953208923, "learning_rate": 7.371476790503243e-06, "loss": 0.0647, "step": 127190 }, { "epoch": 4.622428955592703, "grad_norm": 0.9346821308135986, "learning_rate": 7.367797594860851e-06, "loss": 0.0523, "step": 127200 }, { "epoch": 4.622428955592703, "eval_loss": 0.3119446039199829, "eval_runtime": 179.1801, "eval_samples_per_second": 41.377, "eval_steps_per_second": 5.174, "eval_wer": 0.12345925535970374, "step": 127200 }, { "epoch": 4.622792354095501, "grad_norm": 0.37024086713790894, "learning_rate": 7.364119158927332e-06, "loss": 0.0644, "step": 127210 }, { "epoch": 4.623155752598299, "grad_norm": 0.8160788416862488, "learning_rate": 7.360441482861161e-06, "loss": 0.081, "step": 127220 }, { "epoch": 4.623519151101098, "grad_norm": 0.27264857292175293, "learning_rate": 7.356764566820806e-06, "loss": 0.0486, "step": 127230 }, { "epoch": 4.623882549603896, "grad_norm": 0.4614958167076111, "learning_rate": 7.353088410964693e-06, "loss": 0.2458, "step": 127240 }, { "epoch": 4.624245948106694, "grad_norm": 0.7993619441986084, "learning_rate": 7.349413015451218e-06, "loss": 0.0559, "step": 127250 }, { "epoch": 4.624609346609492, "grad_norm": 0.2986343502998352, "learning_rate": 7.345738380438727e-06, "loss": 0.056, "step": 127260 }, { "epoch": 4.62497274511229, "grad_norm": 0.8213078379631042, "learning_rate": 7.342064506085556e-06, "loss": 0.0545, "step": 127270 }, { "epoch": 4.625336143615089, "grad_norm": 0.47884809970855713, "learning_rate": 7.338391392549998e-06, "loss": 0.0742, "step": 127280 }, { "epoch": 4.625699542117887, "grad_norm": 0.43907973170280457, "learning_rate": 7.334719039990323e-06, "loss": 0.0455, "step": 127290 }, { "epoch": 4.626062940620685, "grad_norm": 0.9261974692344666, "learning_rate": 7.331047448564743e-06, "loss": 0.0673, "step": 127300 }, { "epoch": 4.626426339123483, "grad_norm": 0.7862799167633057, "learning_rate": 7.327376618431462e-06, "loss": 0.0494, "step": 127310 }, { "epoch": 4.6267897376262805, "grad_norm": 0.4399845004081726, "learning_rate": 7.323706549748643e-06, "loss": 0.0778, "step": 127320 }, { "epoch": 4.627153136129079, "grad_norm": 2.8615126609802246, "learning_rate": 7.320037242674424e-06, "loss": 0.0668, "step": 127330 }, { "epoch": 4.627516534631877, "grad_norm": 0.4806099534034729, "learning_rate": 7.3163686973668875e-06, "loss": 0.0532, "step": 127340 }, { "epoch": 4.627879933134675, "grad_norm": 0.39327314496040344, "learning_rate": 7.3127009139841105e-06, "loss": 0.0606, "step": 127350 }, { "epoch": 4.628243331637473, "grad_norm": 0.5119825601577759, "learning_rate": 7.309033892684117e-06, "loss": 0.0653, "step": 127360 }, { "epoch": 4.628606730140271, "grad_norm": 0.847950279712677, "learning_rate": 7.305367633624905e-06, "loss": 0.0516, "step": 127370 }, { "epoch": 4.62897012864307, "grad_norm": 0.2810831367969513, "learning_rate": 7.3017021369644444e-06, "loss": 0.1433, "step": 127380 }, { "epoch": 4.629333527145868, "grad_norm": 0.5190812945365906, "learning_rate": 7.298037402860675e-06, "loss": 0.0442, "step": 127390 }, { "epoch": 4.629696925648666, "grad_norm": 0.365928590297699, "learning_rate": 7.294373431471482e-06, "loss": 0.0516, "step": 127400 }, { "epoch": 4.630060324151464, "grad_norm": 0.5027018785476685, "learning_rate": 7.290710222954741e-06, "loss": 0.0683, "step": 127410 }, { "epoch": 4.630423722654263, "grad_norm": 0.8706980347633362, "learning_rate": 7.28704777746829e-06, "loss": 0.0766, "step": 127420 }, { "epoch": 4.630787121157061, "grad_norm": 0.3457561433315277, "learning_rate": 7.283386095169933e-06, "loss": 0.0573, "step": 127430 }, { "epoch": 4.631150519659859, "grad_norm": 0.27861452102661133, "learning_rate": 7.2797251762174255e-06, "loss": 0.0482, "step": 127440 }, { "epoch": 4.631513918162657, "grad_norm": 0.29536715149879456, "learning_rate": 7.276065020768511e-06, "loss": 0.0499, "step": 127450 }, { "epoch": 4.631877316665455, "grad_norm": 0.3605838716030121, "learning_rate": 7.272405628980891e-06, "loss": 0.0606, "step": 127460 }, { "epoch": 4.632240715168254, "grad_norm": 0.4679373502731323, "learning_rate": 7.268747001012249e-06, "loss": 0.0674, "step": 127470 }, { "epoch": 4.632604113671052, "grad_norm": 0.36606264114379883, "learning_rate": 7.265089137020201e-06, "loss": 0.0569, "step": 127480 }, { "epoch": 4.63296751217385, "grad_norm": 0.8651442527770996, "learning_rate": 7.261432037162369e-06, "loss": 0.052, "step": 127490 }, { "epoch": 4.633330910676648, "grad_norm": 0.22644130885601044, "learning_rate": 7.2577757015963e-06, "loss": 0.0452, "step": 127500 }, { "epoch": 4.633694309179446, "grad_norm": 2.050048828125, "learning_rate": 7.254120130479566e-06, "loss": 0.0588, "step": 127510 }, { "epoch": 4.634057707682245, "grad_norm": 0.9934787154197693, "learning_rate": 7.250465323969646e-06, "loss": 0.0659, "step": 127520 }, { "epoch": 4.634421106185043, "grad_norm": 0.747897207736969, "learning_rate": 7.246811282224031e-06, "loss": 0.0465, "step": 127530 }, { "epoch": 4.634784504687841, "grad_norm": 0.38413527607917786, "learning_rate": 7.243158005400144e-06, "loss": 0.0494, "step": 127540 }, { "epoch": 4.635147903190639, "grad_norm": 0.18081793189048767, "learning_rate": 7.239505493655399e-06, "loss": 0.0432, "step": 127550 }, { "epoch": 4.6355113016934375, "grad_norm": 0.46738603711128235, "learning_rate": 7.2358537471471725e-06, "loss": 0.0716, "step": 127560 }, { "epoch": 4.6358747001962355, "grad_norm": 0.41053059697151184, "learning_rate": 7.232202766032808e-06, "loss": 0.0714, "step": 127570 }, { "epoch": 4.6362380986990335, "grad_norm": 0.387729287147522, "learning_rate": 7.2285525504696024e-06, "loss": 0.0596, "step": 127580 }, { "epoch": 4.6366014972018315, "grad_norm": 0.20796868205070496, "learning_rate": 7.224903100614844e-06, "loss": 0.0501, "step": 127590 }, { "epoch": 4.636964895704629, "grad_norm": 0.6116434335708618, "learning_rate": 7.22125441662575e-06, "loss": 0.0529, "step": 127600 }, { "epoch": 4.637328294207428, "grad_norm": 0.3101082444190979, "learning_rate": 7.217606498659562e-06, "loss": 0.0664, "step": 127610 }, { "epoch": 4.637691692710226, "grad_norm": 0.707249104976654, "learning_rate": 7.213959346873434e-06, "loss": 0.0589, "step": 127620 }, { "epoch": 4.638055091213024, "grad_norm": 0.3785873055458069, "learning_rate": 7.21031296142452e-06, "loss": 1.7327, "step": 127630 }, { "epoch": 4.638418489715822, "grad_norm": 0.48645132780075073, "learning_rate": 7.206667342469911e-06, "loss": 0.0395, "step": 127640 }, { "epoch": 4.63878188821862, "grad_norm": 0.9685658812522888, "learning_rate": 7.203022490166711e-06, "loss": 0.0493, "step": 127650 }, { "epoch": 4.639145286721419, "grad_norm": 1.025047779083252, "learning_rate": 7.1993784046719445e-06, "loss": 0.0677, "step": 127660 }, { "epoch": 4.639508685224217, "grad_norm": 0.5807701349258423, "learning_rate": 7.195735086142635e-06, "loss": 0.0759, "step": 127670 }, { "epoch": 4.639872083727015, "grad_norm": 0.4483569264411926, "learning_rate": 7.192092534735745e-06, "loss": 0.0611, "step": 127680 }, { "epoch": 4.640235482229813, "grad_norm": 0.7987115383148193, "learning_rate": 7.188450750608225e-06, "loss": 0.0531, "step": 127690 }, { "epoch": 4.640598880732611, "grad_norm": 0.5786421298980713, "learning_rate": 7.184809733916992e-06, "loss": 0.0541, "step": 127700 }, { "epoch": 4.64096227923541, "grad_norm": 0.2912478446960449, "learning_rate": 7.181169484818926e-06, "loss": 0.0471, "step": 127710 }, { "epoch": 4.641325677738208, "grad_norm": 1.787043571472168, "learning_rate": 7.177530003470861e-06, "loss": 0.0903, "step": 127720 }, { "epoch": 4.641689076241006, "grad_norm": 2.7219982147216797, "learning_rate": 7.173891290029622e-06, "loss": 0.0581, "step": 127730 }, { "epoch": 4.642052474743804, "grad_norm": 0.24585352838039398, "learning_rate": 7.170253344651967e-06, "loss": 0.0428, "step": 127740 }, { "epoch": 4.642415873246602, "grad_norm": 0.5193399786949158, "learning_rate": 7.1666161674946715e-06, "loss": 0.0562, "step": 127750 }, { "epoch": 4.642779271749401, "grad_norm": 0.39260658621788025, "learning_rate": 7.162979758714428e-06, "loss": 0.0823, "step": 127760 }, { "epoch": 4.643142670252199, "grad_norm": 0.7129606604576111, "learning_rate": 7.1593441184679285e-06, "loss": 0.0573, "step": 127770 }, { "epoch": 4.643506068754997, "grad_norm": 0.7730976343154907, "learning_rate": 7.155709246911807e-06, "loss": 0.0756, "step": 127780 }, { "epoch": 4.643869467257795, "grad_norm": 0.458992600440979, "learning_rate": 7.152075144202683e-06, "loss": 0.0489, "step": 127790 }, { "epoch": 4.644232865760593, "grad_norm": 3.6063594818115234, "learning_rate": 7.148441810497139e-06, "loss": 0.0457, "step": 127800 }, { "epoch": 4.644232865760593, "eval_loss": 0.3046877980232239, "eval_runtime": 178.8953, "eval_samples_per_second": 41.443, "eval_steps_per_second": 5.182, "eval_wer": 0.12434875742008096, "step": 127800 }, { "epoch": 4.644596264263392, "grad_norm": 0.603912889957428, "learning_rate": 7.144809245951728e-06, "loss": 0.0532, "step": 127810 }, { "epoch": 4.6449596627661895, "grad_norm": 0.5774056911468506, "learning_rate": 7.14117745072295e-06, "loss": 0.0655, "step": 127820 }, { "epoch": 4.6453230612689875, "grad_norm": 0.2671217918395996, "learning_rate": 7.137546424967295e-06, "loss": 0.4686, "step": 127830 }, { "epoch": 4.6456864597717855, "grad_norm": 0.2115388661623001, "learning_rate": 7.133916168841209e-06, "loss": 0.0526, "step": 127840 }, { "epoch": 4.6460498582745835, "grad_norm": 2.462585210800171, "learning_rate": 7.130286682501114e-06, "loss": 0.0628, "step": 127850 }, { "epoch": 4.646413256777382, "grad_norm": 0.3898829519748688, "learning_rate": 7.126657966103378e-06, "loss": 0.0514, "step": 127860 }, { "epoch": 4.64677665528018, "grad_norm": 0.9937441349029541, "learning_rate": 7.123030019804366e-06, "loss": 0.0621, "step": 127870 }, { "epoch": 4.647140053782978, "grad_norm": 0.44713079929351807, "learning_rate": 7.119765526698841e-06, "loss": 2.1326, "step": 127880 }, { "epoch": 4.647503452285776, "grad_norm": 0.2602557837963104, "learning_rate": 7.116139044018008e-06, "loss": 0.0546, "step": 127890 }, { "epoch": 4.647866850788574, "grad_norm": 0.6346684694290161, "learning_rate": 7.112513331889095e-06, "loss": 0.0652, "step": 127900 }, { "epoch": 4.648230249291373, "grad_norm": 183.4078369140625, "learning_rate": 7.1088883904683544e-06, "loss": 2.0049, "step": 127910 }, { "epoch": 4.648593647794171, "grad_norm": 0.4693729877471924, "learning_rate": 7.10526421991195e-06, "loss": 0.0582, "step": 127920 }, { "epoch": 4.648957046296969, "grad_norm": 0.2181413322687149, "learning_rate": 7.101640820376049e-06, "loss": 0.0613, "step": 127930 }, { "epoch": 4.649320444799767, "grad_norm": 0.2921404242515564, "learning_rate": 7.098018192016753e-06, "loss": 0.0515, "step": 127940 }, { "epoch": 4.649683843302565, "grad_norm": 0.48610949516296387, "learning_rate": 7.0943963349901585e-06, "loss": 0.0482, "step": 127950 }, { "epoch": 4.650047241805364, "grad_norm": 0.35596776008605957, "learning_rate": 7.090775249452317e-06, "loss": 0.0655, "step": 127960 }, { "epoch": 4.650410640308162, "grad_norm": 0.7509422898292542, "learning_rate": 7.087154935559256e-06, "loss": 0.0698, "step": 127970 }, { "epoch": 4.65077403881096, "grad_norm": 0.33018404245376587, "learning_rate": 7.08353539346695e-06, "loss": 0.0517, "step": 127980 }, { "epoch": 4.651137437313758, "grad_norm": 0.41769939661026, "learning_rate": 7.079916623331354e-06, "loss": 0.0402, "step": 127990 }, { "epoch": 4.651500835816557, "grad_norm": 0.822795033454895, "learning_rate": 7.076298625308392e-06, "loss": 0.0371, "step": 128000 }, { "epoch": 4.651864234319355, "grad_norm": 0.42674922943115234, "learning_rate": 7.072681399553957e-06, "loss": 0.0563, "step": 128010 }, { "epoch": 4.652227632822153, "grad_norm": 0.4882901906967163, "learning_rate": 7.069064946223888e-06, "loss": 0.0532, "step": 128020 }, { "epoch": 4.652591031324951, "grad_norm": 0.4875841438770294, "learning_rate": 7.065449265474017e-06, "loss": 0.0718, "step": 128030 }, { "epoch": 4.652954429827749, "grad_norm": 0.4452211856842041, "learning_rate": 7.061834357460112e-06, "loss": 1.8634, "step": 128040 }, { "epoch": 4.653317828330548, "grad_norm": 0.46467992663383484, "learning_rate": 7.058220222337958e-06, "loss": 0.0548, "step": 128050 }, { "epoch": 4.653681226833346, "grad_norm": 0.725618302822113, "learning_rate": 7.054606860263246e-06, "loss": 0.0598, "step": 128060 }, { "epoch": 4.654044625336144, "grad_norm": 0.4581023156642914, "learning_rate": 7.050994271391684e-06, "loss": 0.0744, "step": 128070 }, { "epoch": 4.654408023838942, "grad_norm": 0.5942229628562927, "learning_rate": 7.047382455878909e-06, "loss": 0.0469, "step": 128080 }, { "epoch": 4.65477142234174, "grad_norm": 0.4230112135410309, "learning_rate": 7.043771413880548e-06, "loss": 0.0475, "step": 128090 }, { "epoch": 4.6551348208445384, "grad_norm": 0.35370883345603943, "learning_rate": 7.0401611455521875e-06, "loss": 0.0448, "step": 128100 }, { "epoch": 4.655498219347336, "grad_norm": 0.5353536605834961, "learning_rate": 7.036551651049392e-06, "loss": 0.1437, "step": 128110 }, { "epoch": 4.655861617850134, "grad_norm": 0.8198840618133545, "learning_rate": 7.032942930527661e-06, "loss": 0.0519, "step": 128120 }, { "epoch": 4.656225016352932, "grad_norm": 0.3922458589076996, "learning_rate": 7.029334984142499e-06, "loss": 0.0666, "step": 128130 }, { "epoch": 4.656588414855731, "grad_norm": 0.8562478423118591, "learning_rate": 7.0257278120493405e-06, "loss": 0.0478, "step": 128140 }, { "epoch": 4.656951813358529, "grad_norm": 0.29740044474601746, "learning_rate": 7.0221214144036316e-06, "loss": 0.0497, "step": 128150 }, { "epoch": 4.657315211861327, "grad_norm": 0.38621461391448975, "learning_rate": 7.018515791360739e-06, "loss": 0.06, "step": 128160 }, { "epoch": 4.657678610364125, "grad_norm": 0.6945303082466125, "learning_rate": 7.0149109430760305e-06, "loss": 0.0637, "step": 128170 }, { "epoch": 4.658042008866923, "grad_norm": 0.48097801208496094, "learning_rate": 7.0113068697048e-06, "loss": 0.2878, "step": 128180 }, { "epoch": 4.658405407369722, "grad_norm": 0.6340762972831726, "learning_rate": 7.007703571402371e-06, "loss": 0.0447, "step": 128190 }, { "epoch": 4.65876880587252, "grad_norm": 0.33902111649513245, "learning_rate": 7.004101048323969e-06, "loss": 0.0515, "step": 128200 }, { "epoch": 4.659132204375318, "grad_norm": 0.4632921516895294, "learning_rate": 7.000499300624832e-06, "loss": 0.0621, "step": 128210 }, { "epoch": 4.659495602878116, "grad_norm": 0.4429139196872711, "learning_rate": 6.99689832846013e-06, "loss": 0.0709, "step": 128220 }, { "epoch": 4.659859001380914, "grad_norm": 1.389298677444458, "learning_rate": 6.993298131985024e-06, "loss": 0.0516, "step": 128230 }, { "epoch": 4.660222399883713, "grad_norm": 0.4037383496761322, "learning_rate": 6.989698711354631e-06, "loss": 0.0433, "step": 128240 }, { "epoch": 4.660585798386511, "grad_norm": 0.4419560432434082, "learning_rate": 6.98610006672405e-06, "loss": 0.0816, "step": 128250 }, { "epoch": 4.660949196889309, "grad_norm": 0.184518963098526, "learning_rate": 6.982502198248314e-06, "loss": 0.0723, "step": 128260 }, { "epoch": 4.661312595392107, "grad_norm": 0.45740950107574463, "learning_rate": 6.9789051060824585e-06, "loss": 0.0659, "step": 128270 }, { "epoch": 4.661675993894905, "grad_norm": 1.4633350372314453, "learning_rate": 6.975308790381452e-06, "loss": 0.0613, "step": 128280 }, { "epoch": 4.662039392397704, "grad_norm": 0.4328383207321167, "learning_rate": 6.9717132513002685e-06, "loss": 0.051, "step": 128290 }, { "epoch": 4.662402790900502, "grad_norm": 0.5073930621147156, "learning_rate": 6.968118488993811e-06, "loss": 0.0601, "step": 128300 }, { "epoch": 4.6627661894033, "grad_norm": 0.3514675796031952, "learning_rate": 6.964524503616976e-06, "loss": 1.7311, "step": 128310 }, { "epoch": 4.663129587906098, "grad_norm": 0.33624792098999023, "learning_rate": 6.9609312953246045e-06, "loss": 0.0755, "step": 128320 }, { "epoch": 4.663492986408896, "grad_norm": 0.45933443307876587, "learning_rate": 6.957338864271518e-06, "loss": 0.0472, "step": 128330 }, { "epoch": 4.6638563849116945, "grad_norm": 0.21107351779937744, "learning_rate": 6.953747210612505e-06, "loss": 0.0446, "step": 128340 }, { "epoch": 4.6642197834144925, "grad_norm": 0.3575725257396698, "learning_rate": 6.950156334502322e-06, "loss": 0.0495, "step": 128350 }, { "epoch": 4.6645831819172905, "grad_norm": 0.5739941000938416, "learning_rate": 6.946566236095675e-06, "loss": 0.0639, "step": 128360 }, { "epoch": 4.6649465804200885, "grad_norm": 1.234292984008789, "learning_rate": 6.942976915547253e-06, "loss": 0.0716, "step": 128370 }, { "epoch": 4.6653099789228865, "grad_norm": 0.6150774359703064, "learning_rate": 6.9393883730117095e-06, "loss": 0.0564, "step": 128380 }, { "epoch": 4.665673377425685, "grad_norm": 0.22023890912532806, "learning_rate": 6.935800608643669e-06, "loss": 0.047, "step": 128390 }, { "epoch": 4.666036775928483, "grad_norm": 0.6178179979324341, "learning_rate": 6.932213622597703e-06, "loss": 0.0546, "step": 128400 }, { "epoch": 4.666036775928483, "eval_loss": 0.30033043026924133, "eval_runtime": 178.9843, "eval_samples_per_second": 41.423, "eval_steps_per_second": 5.179, "eval_wer": 0.12424891535207944, "step": 128400 }, { "epoch": 4.666400174431281, "grad_norm": 4.590987205505371, "learning_rate": 6.928627415028369e-06, "loss": 0.0694, "step": 128410 }, { "epoch": 4.666763572934079, "grad_norm": 0.6074592471122742, "learning_rate": 6.9250419860901694e-06, "loss": 0.0667, "step": 128420 }, { "epoch": 4.667126971436877, "grad_norm": 0.35892540216445923, "learning_rate": 6.921457335937614e-06, "loss": 0.0481, "step": 128430 }, { "epoch": 4.667490369939676, "grad_norm": 0.5705949068069458, "learning_rate": 6.91787346472513e-06, "loss": 0.0403, "step": 128440 }, { "epoch": 4.667853768442474, "grad_norm": 0.8110564947128296, "learning_rate": 6.914290372607152e-06, "loss": 0.0685, "step": 128450 }, { "epoch": 4.668217166945272, "grad_norm": 0.686643660068512, "learning_rate": 6.910708059738041e-06, "loss": 0.0731, "step": 128460 }, { "epoch": 4.66858056544807, "grad_norm": 0.4402810037136078, "learning_rate": 6.907126526272162e-06, "loss": 0.0787, "step": 128470 }, { "epoch": 4.668943963950868, "grad_norm": 0.8963078856468201, "learning_rate": 6.903545772363826e-06, "loss": 0.0355, "step": 128480 }, { "epoch": 4.669307362453667, "grad_norm": 0.2838088870048523, "learning_rate": 6.899965798167321e-06, "loss": 0.0538, "step": 128490 }, { "epoch": 4.669670760956465, "grad_norm": 0.5873184204101562, "learning_rate": 6.8963866038368825e-06, "loss": 0.0689, "step": 128500 }, { "epoch": 4.670034159459263, "grad_norm": 0.5566408634185791, "learning_rate": 6.8928081895267396e-06, "loss": 0.091, "step": 128510 }, { "epoch": 4.670397557962061, "grad_norm": 0.703948438167572, "learning_rate": 6.889230555391052e-06, "loss": 0.085, "step": 128520 }, { "epoch": 4.670760956464859, "grad_norm": 0.4254933297634125, "learning_rate": 6.885653701583994e-06, "loss": 0.0568, "step": 128530 }, { "epoch": 4.671124354967658, "grad_norm": 0.8750547766685486, "learning_rate": 6.882077628259662e-06, "loss": 0.0543, "step": 128540 }, { "epoch": 4.671487753470456, "grad_norm": 0.6421937942504883, "learning_rate": 6.878502335572149e-06, "loss": 0.0523, "step": 128550 }, { "epoch": 4.671851151973254, "grad_norm": 0.6861023306846619, "learning_rate": 6.874927823675478e-06, "loss": 0.0604, "step": 128560 }, { "epoch": 4.672214550476052, "grad_norm": 8.858723640441895, "learning_rate": 6.871354092723692e-06, "loss": 0.0822, "step": 128570 }, { "epoch": 4.672577948978851, "grad_norm": 0.2819288969039917, "learning_rate": 6.867781142870749e-06, "loss": 0.0446, "step": 128580 }, { "epoch": 4.672941347481649, "grad_norm": 0.8527876138687134, "learning_rate": 6.864208974270611e-06, "loss": 1.1282, "step": 128590 }, { "epoch": 4.673304745984447, "grad_norm": 0.31138837337493896, "learning_rate": 6.860637587077173e-06, "loss": 0.3786, "step": 128600 }, { "epoch": 4.6736681444872445, "grad_norm": 0.3040253818035126, "learning_rate": 6.857066981444321e-06, "loss": 0.0513, "step": 128610 }, { "epoch": 4.6740315429900425, "grad_norm": 1.909954309463501, "learning_rate": 6.8534971575258985e-06, "loss": 0.1079, "step": 128620 }, { "epoch": 4.674394941492841, "grad_norm": 0.3345434367656708, "learning_rate": 6.849928115475726e-06, "loss": 0.0528, "step": 128630 }, { "epoch": 4.674758339995639, "grad_norm": 0.32240140438079834, "learning_rate": 6.846359855447568e-06, "loss": 0.0433, "step": 128640 }, { "epoch": 4.675121738498437, "grad_norm": 0.6456748843193054, "learning_rate": 6.842792377595178e-06, "loss": 0.0462, "step": 128650 }, { "epoch": 4.675485137001235, "grad_norm": 0.45580944418907166, "learning_rate": 6.839225682072248e-06, "loss": 0.0611, "step": 128660 }, { "epoch": 4.675848535504033, "grad_norm": 1.2019174098968506, "learning_rate": 6.835659769032479e-06, "loss": 0.0677, "step": 128670 }, { "epoch": 4.676211934006832, "grad_norm": 0.4169817864894867, "learning_rate": 6.832094638629497e-06, "loss": 0.0579, "step": 128680 }, { "epoch": 4.67657533250963, "grad_norm": 0.36477628350257874, "learning_rate": 6.828530291016921e-06, "loss": 0.0665, "step": 128690 }, { "epoch": 4.676938731012428, "grad_norm": 0.6020234823226929, "learning_rate": 6.824966726348314e-06, "loss": 0.0544, "step": 128700 }, { "epoch": 4.677302129515226, "grad_norm": 0.5708634257316589, "learning_rate": 6.821403944777222e-06, "loss": 0.0646, "step": 128710 }, { "epoch": 4.677665528018025, "grad_norm": 0.5916482210159302, "learning_rate": 6.817841946457157e-06, "loss": 0.0588, "step": 128720 }, { "epoch": 4.678028926520823, "grad_norm": 0.32066741585731506, "learning_rate": 6.814280731541594e-06, "loss": 0.045, "step": 128730 }, { "epoch": 4.678392325023621, "grad_norm": 11.839031219482422, "learning_rate": 6.810720300183965e-06, "loss": 0.18, "step": 128740 }, { "epoch": 4.678755723526419, "grad_norm": 0.3452649414539337, "learning_rate": 6.807160652537681e-06, "loss": 0.0594, "step": 128750 }, { "epoch": 4.679119122029217, "grad_norm": 0.37864238023757935, "learning_rate": 6.803601788756111e-06, "loss": 0.0609, "step": 128760 }, { "epoch": 4.679482520532016, "grad_norm": 0.8271676898002625, "learning_rate": 6.80004370899261e-06, "loss": 0.0638, "step": 128770 }, { "epoch": 4.679845919034814, "grad_norm": 0.39286208152770996, "learning_rate": 6.7964864134004575e-06, "loss": 0.0637, "step": 128780 }, { "epoch": 4.680209317537612, "grad_norm": 0.27077484130859375, "learning_rate": 6.792929902132947e-06, "loss": 0.0526, "step": 128790 }, { "epoch": 4.68057271604041, "grad_norm": 0.8405056595802307, "learning_rate": 6.789374175343288e-06, "loss": 0.0578, "step": 128800 }, { "epoch": 4.680936114543208, "grad_norm": 0.38048580288887024, "learning_rate": 6.78581923318472e-06, "loss": 0.0848, "step": 128810 }, { "epoch": 4.681299513046007, "grad_norm": 0.4703914225101471, "learning_rate": 6.782265075810388e-06, "loss": 0.0476, "step": 128820 }, { "epoch": 4.681662911548805, "grad_norm": 0.3932929039001465, "learning_rate": 6.778711703373439e-06, "loss": 0.0428, "step": 128830 }, { "epoch": 4.682026310051603, "grad_norm": 0.4011209011077881, "learning_rate": 6.7751591160269664e-06, "loss": 0.0573, "step": 128840 }, { "epoch": 4.682389708554401, "grad_norm": 0.3561095893383026, "learning_rate": 6.771607313924041e-06, "loss": 0.0456, "step": 128850 }, { "epoch": 4.682753107057199, "grad_norm": 3.1855311393737793, "learning_rate": 6.768056297217703e-06, "loss": 0.0754, "step": 128860 }, { "epoch": 4.6831165055599975, "grad_norm": 0.41924577951431274, "learning_rate": 6.764506066060955e-06, "loss": 0.0719, "step": 128870 }, { "epoch": 4.6834799040627955, "grad_norm": 1.3218019008636475, "learning_rate": 6.760956620606751e-06, "loss": 0.0385, "step": 128880 }, { "epoch": 4.6838433025655934, "grad_norm": 0.4003591239452362, "learning_rate": 6.757407961008041e-06, "loss": 0.1464, "step": 128890 }, { "epoch": 4.684206701068391, "grad_norm": 0.7760177850723267, "learning_rate": 6.753860087417699e-06, "loss": 0.0581, "step": 128900 }, { "epoch": 4.684570099571189, "grad_norm": 0.2737358510494232, "learning_rate": 6.750312999988617e-06, "loss": 0.1105, "step": 128910 }, { "epoch": 4.684933498073988, "grad_norm": 1.2209004163742065, "learning_rate": 6.746766698873613e-06, "loss": 0.0761, "step": 128920 }, { "epoch": 4.685296896576786, "grad_norm": 0.2809113562107086, "learning_rate": 6.74322118422549e-06, "loss": 0.0524, "step": 128930 }, { "epoch": 4.685660295079584, "grad_norm": 0.3102165162563324, "learning_rate": 6.739676456196997e-06, "loss": 0.0545, "step": 128940 }, { "epoch": 4.686023693582382, "grad_norm": 0.4045947790145874, "learning_rate": 6.736132514940887e-06, "loss": 0.0537, "step": 128950 }, { "epoch": 4.68638709208518, "grad_norm": 0.4421917200088501, "learning_rate": 6.732589360609839e-06, "loss": 0.2583, "step": 128960 }, { "epoch": 4.686750490587979, "grad_norm": 0.7428149580955505, "learning_rate": 6.729046993356525e-06, "loss": 0.0502, "step": 128970 }, { "epoch": 4.687113889090777, "grad_norm": 0.32052141427993774, "learning_rate": 6.7255054133335636e-06, "loss": 0.0422, "step": 128980 }, { "epoch": 4.687477287593575, "grad_norm": 0.32129278779029846, "learning_rate": 6.721964620693549e-06, "loss": 0.0577, "step": 128990 }, { "epoch": 4.687840686096373, "grad_norm": 0.5244696736335754, "learning_rate": 6.718424615589047e-06, "loss": 0.0599, "step": 129000 }, { "epoch": 4.687840686096373, "eval_loss": 0.29525643587112427, "eval_runtime": 179.696, "eval_samples_per_second": 41.259, "eval_steps_per_second": 5.159, "eval_wer": 0.12320511191388168, "step": 129000 }, { "epoch": 4.688204084599171, "grad_norm": 0.3688945174217224, "learning_rate": 6.714885398172591e-06, "loss": 0.0493, "step": 129010 }, { "epoch": 4.68856748310197, "grad_norm": 1.4156838655471802, "learning_rate": 6.711346968596655e-06, "loss": 0.0652, "step": 129020 }, { "epoch": 4.688930881604768, "grad_norm": 0.5096936821937561, "learning_rate": 6.707809327013717e-06, "loss": 0.0819, "step": 129030 }, { "epoch": 4.689294280107566, "grad_norm": 0.3473042845726013, "learning_rate": 6.704272473576173e-06, "loss": 0.0493, "step": 129040 }, { "epoch": 4.689657678610364, "grad_norm": 0.3818475604057312, "learning_rate": 6.700736408436447e-06, "loss": 0.0619, "step": 129050 }, { "epoch": 4.690021077113162, "grad_norm": 0.4471918046474457, "learning_rate": 6.697201131746875e-06, "loss": 0.0628, "step": 129060 }, { "epoch": 4.690384475615961, "grad_norm": 0.5936034917831421, "learning_rate": 6.693666643659793e-06, "loss": 0.0543, "step": 129070 }, { "epoch": 4.690747874118759, "grad_norm": 0.9600713849067688, "learning_rate": 6.69013294432747e-06, "loss": 0.0688, "step": 129080 }, { "epoch": 4.691111272621557, "grad_norm": 0.38676294684410095, "learning_rate": 6.686600033902174e-06, "loss": 0.0981, "step": 129090 }, { "epoch": 4.691474671124355, "grad_norm": 1.2095938920974731, "learning_rate": 6.683067912536123e-06, "loss": 0.0625, "step": 129100 }, { "epoch": 4.691838069627153, "grad_norm": 0.5480767488479614, "learning_rate": 6.679536580381512e-06, "loss": 0.0603, "step": 129110 }, { "epoch": 4.6922014681299515, "grad_norm": 0.44632649421691895, "learning_rate": 6.676006037590477e-06, "loss": 0.0768, "step": 129120 }, { "epoch": 4.6925648666327495, "grad_norm": 0.33365398645401, "learning_rate": 6.672476284315151e-06, "loss": 0.069, "step": 129130 }, { "epoch": 4.6929282651355475, "grad_norm": 0.37417006492614746, "learning_rate": 6.6689473207076e-06, "loss": 0.0553, "step": 129140 }, { "epoch": 4.6932916636383455, "grad_norm": 0.4316401779651642, "learning_rate": 6.665419146919902e-06, "loss": 0.0531, "step": 129150 }, { "epoch": 4.693655062141144, "grad_norm": 0.5395501852035522, "learning_rate": 6.661891763104047e-06, "loss": 0.0637, "step": 129160 }, { "epoch": 4.694018460643942, "grad_norm": 0.5121721625328064, "learning_rate": 6.65836516941204e-06, "loss": 0.0997, "step": 129170 }, { "epoch": 4.69438185914674, "grad_norm": 0.3345545828342438, "learning_rate": 6.654839365995802e-06, "loss": 0.0598, "step": 129180 }, { "epoch": 4.694745257649538, "grad_norm": 0.36829403042793274, "learning_rate": 6.651314353007276e-06, "loss": 0.0526, "step": 129190 }, { "epoch": 4.695108656152336, "grad_norm": 0.3336644172668457, "learning_rate": 6.647790130598325e-06, "loss": 0.0518, "step": 129200 }, { "epoch": 4.695472054655135, "grad_norm": 0.3858420252799988, "learning_rate": 6.644266698920809e-06, "loss": 0.0807, "step": 129210 }, { "epoch": 4.695835453157933, "grad_norm": 0.7446789145469666, "learning_rate": 6.640744058126519e-06, "loss": 0.063, "step": 129220 }, { "epoch": 4.696198851660731, "grad_norm": 0.46862494945526123, "learning_rate": 6.63722220836725e-06, "loss": 0.0611, "step": 129230 }, { "epoch": 4.696562250163529, "grad_norm": 0.4275161027908325, "learning_rate": 6.633701149794738e-06, "loss": 0.0473, "step": 129240 }, { "epoch": 4.696925648666327, "grad_norm": 2.1498615741729736, "learning_rate": 6.630180882560702e-06, "loss": 0.0867, "step": 129250 }, { "epoch": 4.697289047169126, "grad_norm": 0.2726826071739197, "learning_rate": 6.626661406816806e-06, "loss": 0.0563, "step": 129260 }, { "epoch": 4.697652445671924, "grad_norm": 0.910847008228302, "learning_rate": 6.623142722714706e-06, "loss": 0.0781, "step": 129270 }, { "epoch": 4.698015844174722, "grad_norm": 0.4067237079143524, "learning_rate": 6.619624830405985e-06, "loss": 0.0505, "step": 129280 }, { "epoch": 4.69837924267752, "grad_norm": 0.59093177318573, "learning_rate": 6.616107730042248e-06, "loss": 0.0469, "step": 129290 }, { "epoch": 4.698742641180319, "grad_norm": 1.6943495273590088, "learning_rate": 6.612591421775011e-06, "loss": 0.0465, "step": 129300 }, { "epoch": 4.699106039683117, "grad_norm": 0.8201996684074402, "learning_rate": 6.609075905755793e-06, "loss": 0.0515, "step": 129310 }, { "epoch": 4.699469438185915, "grad_norm": 3.282536745071411, "learning_rate": 6.6055611821360554e-06, "loss": 0.0635, "step": 129320 }, { "epoch": 4.699832836688713, "grad_norm": 0.30091121792793274, "learning_rate": 6.602047251067234e-06, "loss": 0.0473, "step": 129330 }, { "epoch": 4.700196235191511, "grad_norm": 0.6387266516685486, "learning_rate": 6.598534112700741e-06, "loss": 0.0457, "step": 129340 }, { "epoch": 4.70055963369431, "grad_norm": 0.7519434690475464, "learning_rate": 6.595021767187945e-06, "loss": 0.0498, "step": 129350 }, { "epoch": 4.700923032197108, "grad_norm": 0.43606069684028625, "learning_rate": 6.591510214680169e-06, "loss": 0.0576, "step": 129360 }, { "epoch": 4.701286430699906, "grad_norm": 0.49613672494888306, "learning_rate": 6.58799945532872e-06, "loss": 0.0817, "step": 129370 }, { "epoch": 4.701649829202704, "grad_norm": 0.3905036449432373, "learning_rate": 6.5844894892848645e-06, "loss": 0.0619, "step": 129380 }, { "epoch": 4.702013227705502, "grad_norm": 0.73691725730896, "learning_rate": 6.580980316699842e-06, "loss": 0.0528, "step": 129390 }, { "epoch": 4.7023766262083, "grad_norm": 2.8484795093536377, "learning_rate": 6.577471937724836e-06, "loss": 0.0601, "step": 129400 }, { "epoch": 4.702740024711098, "grad_norm": 0.346022367477417, "learning_rate": 6.573964352511017e-06, "loss": 0.0795, "step": 129410 }, { "epoch": 4.703103423213896, "grad_norm": 0.5846353769302368, "learning_rate": 6.570457561209512e-06, "loss": 0.065, "step": 129420 }, { "epoch": 4.703466821716694, "grad_norm": 0.2490587681531906, "learning_rate": 6.566951563971427e-06, "loss": 0.0499, "step": 129430 }, { "epoch": 4.703830220219492, "grad_norm": 0.46338027715682983, "learning_rate": 6.5634463609478055e-06, "loss": 0.0544, "step": 129440 }, { "epoch": 4.704193618722291, "grad_norm": 0.38640037178993225, "learning_rate": 6.559941952289691e-06, "loss": 0.0474, "step": 129450 }, { "epoch": 4.704557017225089, "grad_norm": 0.4975273013114929, "learning_rate": 6.55643833814806e-06, "loss": 0.0624, "step": 129460 }, { "epoch": 4.704920415727887, "grad_norm": 0.5440640449523926, "learning_rate": 6.552935518673881e-06, "loss": 0.065, "step": 129470 }, { "epoch": 4.705283814230685, "grad_norm": 0.5180317163467407, "learning_rate": 6.549433494018073e-06, "loss": 0.0621, "step": 129480 }, { "epoch": 4.705647212733483, "grad_norm": 0.5355479717254639, "learning_rate": 6.545932264331536e-06, "loss": 0.0625, "step": 129490 }, { "epoch": 4.706010611236282, "grad_norm": 0.34078866243362427, "learning_rate": 6.542431829765111e-06, "loss": 0.0506, "step": 129500 }, { "epoch": 4.70637400973908, "grad_norm": 0.5945143699645996, "learning_rate": 6.538932190469627e-06, "loss": 0.0618, "step": 129510 }, { "epoch": 4.706737408241878, "grad_norm": 0.44419458508491516, "learning_rate": 6.535433346595868e-06, "loss": 0.0752, "step": 129520 }, { "epoch": 4.707100806744676, "grad_norm": 0.5059182643890381, "learning_rate": 6.531935298294597e-06, "loss": 0.0439, "step": 129530 }, { "epoch": 4.707464205247474, "grad_norm": 0.3021637499332428, "learning_rate": 6.528438045716517e-06, "loss": 0.1007, "step": 129540 }, { "epoch": 4.707827603750273, "grad_norm": 0.4170551002025604, "learning_rate": 6.524941589012321e-06, "loss": 0.0489, "step": 129550 }, { "epoch": 4.708191002253071, "grad_norm": 0.2920963764190674, "learning_rate": 6.521445928332657e-06, "loss": 0.0593, "step": 129560 }, { "epoch": 4.708554400755869, "grad_norm": 0.5099427103996277, "learning_rate": 6.5179510638281485e-06, "loss": 0.0673, "step": 129570 }, { "epoch": 4.708917799258667, "grad_norm": 0.23309321701526642, "learning_rate": 6.5144569956493626e-06, "loss": 0.0532, "step": 129580 }, { "epoch": 4.709281197761465, "grad_norm": 0.20432619750499725, "learning_rate": 6.510963723946858e-06, "loss": 0.0468, "step": 129590 }, { "epoch": 4.709644596264264, "grad_norm": 0.6698244214057922, "learning_rate": 6.507471248871136e-06, "loss": 0.0568, "step": 129600 }, { "epoch": 4.709644596264264, "eval_loss": 0.3111569881439209, "eval_runtime": 179.1998, "eval_samples_per_second": 41.373, "eval_steps_per_second": 5.173, "eval_wer": 0.12284204984842159, "step": 129600 }, { "epoch": 4.710007994767062, "grad_norm": 0.3534618318080902, "learning_rate": 6.5039795705726825e-06, "loss": 0.0671, "step": 129610 }, { "epoch": 4.71037139326986, "grad_norm": 0.5126802325248718, "learning_rate": 6.500488689201939e-06, "loss": 0.0779, "step": 129620 }, { "epoch": 4.710734791772658, "grad_norm": 0.44337987899780273, "learning_rate": 6.496998604909324e-06, "loss": 0.0568, "step": 129630 }, { "epoch": 4.711098190275456, "grad_norm": 1.2513412237167358, "learning_rate": 6.493509317845198e-06, "loss": 0.0448, "step": 129640 }, { "epoch": 4.7114615887782545, "grad_norm": 0.7345016002655029, "learning_rate": 6.49002082815991e-06, "loss": 0.0555, "step": 129650 }, { "epoch": 4.7118249872810525, "grad_norm": 0.43675392866134644, "learning_rate": 6.486533136003764e-06, "loss": 0.0534, "step": 129660 }, { "epoch": 4.7121883857838505, "grad_norm": 0.500683605670929, "learning_rate": 6.483046241527044e-06, "loss": 0.0713, "step": 129670 }, { "epoch": 4.7125517842866484, "grad_norm": 0.4286242127418518, "learning_rate": 6.479560144879967e-06, "loss": 0.0491, "step": 129680 }, { "epoch": 4.712915182789446, "grad_norm": 0.33650335669517517, "learning_rate": 6.476074846212751e-06, "loss": 0.0418, "step": 129690 }, { "epoch": 4.713278581292245, "grad_norm": 0.32824280858039856, "learning_rate": 6.472590345675567e-06, "loss": 0.0461, "step": 129700 }, { "epoch": 4.713641979795043, "grad_norm": 0.6823800802230835, "learning_rate": 6.469106643418538e-06, "loss": 0.0631, "step": 129710 }, { "epoch": 4.714005378297841, "grad_norm": 0.8632909059524536, "learning_rate": 6.46562373959177e-06, "loss": 0.0627, "step": 129720 }, { "epoch": 4.714368776800639, "grad_norm": 0.832693874835968, "learning_rate": 6.462141634345328e-06, "loss": 0.0653, "step": 129730 }, { "epoch": 4.714732175303438, "grad_norm": 0.4526560306549072, "learning_rate": 6.458660327829255e-06, "loss": 0.0564, "step": 129740 }, { "epoch": 4.715095573806236, "grad_norm": 0.3915243446826935, "learning_rate": 6.45517982019353e-06, "loss": 0.0581, "step": 129750 }, { "epoch": 4.715458972309034, "grad_norm": 0.5198982954025269, "learning_rate": 6.451700111588122e-06, "loss": 0.0596, "step": 129760 }, { "epoch": 4.715822370811832, "grad_norm": 0.9538114070892334, "learning_rate": 6.448221202162971e-06, "loss": 0.0681, "step": 129770 }, { "epoch": 4.71618576931463, "grad_norm": 0.32724645733833313, "learning_rate": 6.444743092067951e-06, "loss": 0.0724, "step": 129780 }, { "epoch": 4.716549167817429, "grad_norm": 0.38839659094810486, "learning_rate": 6.441265781452935e-06, "loss": 0.0491, "step": 129790 }, { "epoch": 4.716912566320227, "grad_norm": 0.8698714971542358, "learning_rate": 6.43778927046774e-06, "loss": 0.0577, "step": 129800 }, { "epoch": 4.717275964823025, "grad_norm": 0.8276525139808655, "learning_rate": 6.43431355926217e-06, "loss": 3.1565, "step": 129810 }, { "epoch": 4.717639363325823, "grad_norm": 0.32100144028663635, "learning_rate": 6.4308386479859654e-06, "loss": 0.0488, "step": 129820 }, { "epoch": 4.718002761828621, "grad_norm": 1.3341399431228638, "learning_rate": 6.427364536788854e-06, "loss": 0.0514, "step": 129830 }, { "epoch": 4.71836616033142, "grad_norm": 0.47460418939590454, "learning_rate": 6.42389122582053e-06, "loss": 0.0499, "step": 129840 }, { "epoch": 4.718729558834218, "grad_norm": 3.089568853378296, "learning_rate": 6.420418715230631e-06, "loss": 0.0553, "step": 129850 }, { "epoch": 4.719092957337016, "grad_norm": 0.5936382412910461, "learning_rate": 6.416947005168785e-06, "loss": 0.0539, "step": 129860 }, { "epoch": 4.719456355839814, "grad_norm": 0.5608541369438171, "learning_rate": 6.413476095784571e-06, "loss": 0.0577, "step": 129870 }, { "epoch": 4.719819754342613, "grad_norm": 0.18805667757987976, "learning_rate": 6.410005987227552e-06, "loss": 0.0708, "step": 129880 }, { "epoch": 4.720183152845411, "grad_norm": 1.2388358116149902, "learning_rate": 6.406536679647221e-06, "loss": 0.0553, "step": 129890 }, { "epoch": 4.7205465513482086, "grad_norm": 1.1845836639404297, "learning_rate": 6.4030681731930695e-06, "loss": 0.061, "step": 129900 }, { "epoch": 4.7209099498510065, "grad_norm": 0.25244084000587463, "learning_rate": 6.399600468014552e-06, "loss": 0.1068, "step": 129910 }, { "epoch": 4.7212733483538045, "grad_norm": 1.2392243146896362, "learning_rate": 6.396133564261059e-06, "loss": 0.0569, "step": 129920 }, { "epoch": 4.721636746856603, "grad_norm": 0.23514771461486816, "learning_rate": 6.392667462081981e-06, "loss": 0.0523, "step": 129930 }, { "epoch": 4.722000145359401, "grad_norm": 0.5710695385932922, "learning_rate": 6.3892021616266564e-06, "loss": 0.0552, "step": 129940 }, { "epoch": 4.722363543862199, "grad_norm": 0.4440504312515259, "learning_rate": 6.385737663044397e-06, "loss": 0.0508, "step": 129950 }, { "epoch": 4.722726942364997, "grad_norm": 2.0856614112854004, "learning_rate": 6.38227396648447e-06, "loss": 0.0787, "step": 129960 }, { "epoch": 4.723090340867795, "grad_norm": 0.9323398470878601, "learning_rate": 6.3788110720961105e-06, "loss": 0.0683, "step": 129970 }, { "epoch": 4.723453739370594, "grad_norm": 0.4498850107192993, "learning_rate": 6.375348980028539e-06, "loss": 0.0486, "step": 129980 }, { "epoch": 4.723817137873392, "grad_norm": 0.2748260796070099, "learning_rate": 6.3718876904309026e-06, "loss": 0.0446, "step": 129990 }, { "epoch": 4.72418053637619, "grad_norm": 0.46685680747032166, "learning_rate": 6.368427203452348e-06, "loss": 0.0682, "step": 130000 }, { "epoch": 4.724543934878988, "grad_norm": 0.6914308667182922, "learning_rate": 6.364967519241977e-06, "loss": 0.0617, "step": 130010 }, { "epoch": 4.724907333381786, "grad_norm": 0.4431219696998596, "learning_rate": 6.361508637948854e-06, "loss": 0.0677, "step": 130020 }, { "epoch": 4.725270731884585, "grad_norm": 0.3967810869216919, "learning_rate": 6.358050559722003e-06, "loss": 0.0464, "step": 130030 }, { "epoch": 4.725634130387383, "grad_norm": 0.996138334274292, "learning_rate": 6.354593284710425e-06, "loss": 0.0414, "step": 130040 }, { "epoch": 4.725997528890181, "grad_norm": 0.4194527864456177, "learning_rate": 6.3511368130630824e-06, "loss": 0.0591, "step": 130050 }, { "epoch": 4.726360927392979, "grad_norm": 0.6759289503097534, "learning_rate": 6.347681144928908e-06, "loss": 0.0704, "step": 130060 }, { "epoch": 4.726724325895777, "grad_norm": 0.6853132843971252, "learning_rate": 6.34422628045678e-06, "loss": 0.0649, "step": 130070 }, { "epoch": 4.727087724398576, "grad_norm": 0.7420358061790466, "learning_rate": 6.340772219795574e-06, "loss": 0.06, "step": 130080 }, { "epoch": 4.727451122901374, "grad_norm": 0.5346302390098572, "learning_rate": 6.337318963094094e-06, "loss": 0.0739, "step": 130090 }, { "epoch": 4.727814521404172, "grad_norm": 0.5945446491241455, "learning_rate": 6.333866510501138e-06, "loss": 0.0459, "step": 130100 }, { "epoch": 4.72817791990697, "grad_norm": 0.45598119497299194, "learning_rate": 6.33041486216546e-06, "loss": 0.0697, "step": 130110 }, { "epoch": 4.728541318409768, "grad_norm": 1.1875252723693848, "learning_rate": 6.326964018235787e-06, "loss": 0.074, "step": 130120 }, { "epoch": 4.728904716912567, "grad_norm": 0.3237745761871338, "learning_rate": 6.323513978860787e-06, "loss": 0.0668, "step": 130130 }, { "epoch": 4.729268115415365, "grad_norm": 0.42035388946533203, "learning_rate": 6.3200647441891194e-06, "loss": 0.0481, "step": 130140 }, { "epoch": 4.729631513918163, "grad_norm": 0.704536497592926, "learning_rate": 6.316616314369397e-06, "loss": 0.0495, "step": 130150 }, { "epoch": 4.729994912420961, "grad_norm": 0.2975251078605652, "learning_rate": 6.313168689550212e-06, "loss": 0.0667, "step": 130160 }, { "epoch": 4.730358310923759, "grad_norm": 0.6002227663993835, "learning_rate": 6.309721869880092e-06, "loss": 0.0721, "step": 130170 }, { "epoch": 4.7307217094265575, "grad_norm": 0.5422096848487854, "learning_rate": 6.306275855507554e-06, "loss": 0.0538, "step": 130180 }, { "epoch": 4.731085107929355, "grad_norm": 0.22503329813480377, "learning_rate": 6.302830646581079e-06, "loss": 0.0419, "step": 130190 }, { "epoch": 4.731448506432153, "grad_norm": 1.041131854057312, "learning_rate": 6.2993862432491145e-06, "loss": 0.0625, "step": 130200 }, { "epoch": 4.731448506432153, "eval_loss": 0.3110126554965973, "eval_runtime": 180.2988, "eval_samples_per_second": 41.121, "eval_steps_per_second": 5.141, "eval_wer": 0.12221576778550293, "step": 130200 }, { "epoch": 4.731811904934951, "grad_norm": 0.3097676932811737, "learning_rate": 6.295942645660053e-06, "loss": 0.0567, "step": 130210 }, { "epoch": 4.732175303437749, "grad_norm": 0.7841809988021851, "learning_rate": 6.292499853962277e-06, "loss": 0.0524, "step": 130220 }, { "epoch": 4.732538701940548, "grad_norm": 0.27510207891464233, "learning_rate": 6.289057868304116e-06, "loss": 0.0416, "step": 130230 }, { "epoch": 4.732902100443346, "grad_norm": 0.24780768156051636, "learning_rate": 6.2856166888338785e-06, "loss": 0.0495, "step": 130240 }, { "epoch": 4.733265498946144, "grad_norm": 0.2533951699733734, "learning_rate": 6.282176315699831e-06, "loss": 0.0438, "step": 130250 }, { "epoch": 4.733628897448942, "grad_norm": 0.6860194206237793, "learning_rate": 6.278736749050215e-06, "loss": 0.0582, "step": 130260 }, { "epoch": 4.73399229595174, "grad_norm": 0.6110296845436096, "learning_rate": 6.275297989033216e-06, "loss": 0.0677, "step": 130270 }, { "epoch": 4.734355694454539, "grad_norm": 0.21672260761260986, "learning_rate": 6.271860035797003e-06, "loss": 0.0545, "step": 130280 }, { "epoch": 4.734719092957337, "grad_norm": 0.7487679719924927, "learning_rate": 6.268422889489706e-06, "loss": 0.0477, "step": 130290 }, { "epoch": 4.735082491460135, "grad_norm": 0.3150898516178131, "learning_rate": 6.264986550259427e-06, "loss": 0.0376, "step": 130300 }, { "epoch": 4.735445889962933, "grad_norm": 0.58106929063797, "learning_rate": 6.261551018254211e-06, "loss": 0.0548, "step": 130310 }, { "epoch": 4.735809288465732, "grad_norm": 0.8996534943580627, "learning_rate": 6.258116293622096e-06, "loss": 0.0626, "step": 130320 }, { "epoch": 4.73617268696853, "grad_norm": 0.5048168897628784, "learning_rate": 6.254682376511053e-06, "loss": 0.1198, "step": 130330 }, { "epoch": 4.736536085471328, "grad_norm": 0.22452345490455627, "learning_rate": 6.251249267069065e-06, "loss": 0.0355, "step": 130340 }, { "epoch": 4.736899483974126, "grad_norm": 0.5849490165710449, "learning_rate": 6.247816965444028e-06, "loss": 0.054, "step": 130350 }, { "epoch": 4.737262882476924, "grad_norm": 0.24893200397491455, "learning_rate": 6.244385471783848e-06, "loss": 0.0523, "step": 130360 }, { "epoch": 4.737626280979723, "grad_norm": 0.5670130252838135, "learning_rate": 6.24095478623635e-06, "loss": 0.0574, "step": 130370 }, { "epoch": 4.737989679482521, "grad_norm": 0.32030385732650757, "learning_rate": 6.237524908949377e-06, "loss": 0.0445, "step": 130380 }, { "epoch": 4.738353077985319, "grad_norm": 0.40795063972473145, "learning_rate": 6.2340958400706945e-06, "loss": 0.0461, "step": 130390 }, { "epoch": 4.738716476488117, "grad_norm": 0.37213465571403503, "learning_rate": 6.230667579748059e-06, "loss": 0.0538, "step": 130400 }, { "epoch": 4.739079874990915, "grad_norm": 0.297336220741272, "learning_rate": 6.227240128129169e-06, "loss": 0.0651, "step": 130410 }, { "epoch": 4.7394432734937135, "grad_norm": 0.3883402347564697, "learning_rate": 6.2238134853617095e-06, "loss": 0.0617, "step": 130420 }, { "epoch": 4.7398066719965115, "grad_norm": 0.3898281455039978, "learning_rate": 6.22038765159332e-06, "loss": 0.0641, "step": 130430 }, { "epoch": 4.7401700704993095, "grad_norm": 0.41251251101493835, "learning_rate": 6.216962626971615e-06, "loss": 0.0402, "step": 130440 }, { "epoch": 4.7405334690021075, "grad_norm": 0.7306762933731079, "learning_rate": 6.2135384116441546e-06, "loss": 0.0508, "step": 130450 }, { "epoch": 4.740896867504906, "grad_norm": 3.0864450931549072, "learning_rate": 6.210115005758488e-06, "loss": 0.069, "step": 130460 }, { "epoch": 4.741260266007704, "grad_norm": 0.4342913329601288, "learning_rate": 6.206692409462095e-06, "loss": 0.0597, "step": 130470 }, { "epoch": 4.741623664510502, "grad_norm": 1.2082334756851196, "learning_rate": 6.203270622902477e-06, "loss": 0.0632, "step": 130480 }, { "epoch": 4.7419870630133, "grad_norm": 0.35210108757019043, "learning_rate": 6.199849646227038e-06, "loss": 0.0431, "step": 130490 }, { "epoch": 4.742350461516098, "grad_norm": 0.3718969523906708, "learning_rate": 6.196429479583196e-06, "loss": 0.0548, "step": 130500 }, { "epoch": 4.742713860018897, "grad_norm": 0.5681741237640381, "learning_rate": 6.193010123118298e-06, "loss": 0.0833, "step": 130510 }, { "epoch": 4.743077258521695, "grad_norm": 0.4673754870891571, "learning_rate": 6.189591576979676e-06, "loss": 0.0552, "step": 130520 }, { "epoch": 4.743440657024493, "grad_norm": 0.32294797897338867, "learning_rate": 6.186173841314627e-06, "loss": 0.0489, "step": 130530 }, { "epoch": 4.743804055527291, "grad_norm": 0.7136963605880737, "learning_rate": 6.182756916270413e-06, "loss": 0.0483, "step": 130540 }, { "epoch": 4.744167454030089, "grad_norm": 0.500449001789093, "learning_rate": 6.179340801994246e-06, "loss": 0.0714, "step": 130550 }, { "epoch": 4.744530852532888, "grad_norm": 0.3116528391838074, "learning_rate": 6.17592549863332e-06, "loss": 0.0515, "step": 130560 }, { "epoch": 4.744894251035686, "grad_norm": 0.8031904697418213, "learning_rate": 6.172511006334786e-06, "loss": 0.0718, "step": 130570 }, { "epoch": 4.745257649538484, "grad_norm": 0.31882643699645996, "learning_rate": 6.169097325245774e-06, "loss": 0.039, "step": 130580 }, { "epoch": 4.745621048041282, "grad_norm": 0.22889333963394165, "learning_rate": 6.165684455513349e-06, "loss": 0.0491, "step": 130590 }, { "epoch": 4.74598444654408, "grad_norm": 0.34061500430107117, "learning_rate": 6.162272397284574e-06, "loss": 0.0473, "step": 130600 }, { "epoch": 4.746347845046879, "grad_norm": 1.1783151626586914, "learning_rate": 6.158861150706444e-06, "loss": 0.0716, "step": 130610 }, { "epoch": 4.746711243549677, "grad_norm": 0.34315845370292664, "learning_rate": 6.155450715925964e-06, "loss": 0.0536, "step": 130620 }, { "epoch": 4.747074642052475, "grad_norm": 0.36053702235221863, "learning_rate": 6.152041093090055e-06, "loss": 0.0559, "step": 130630 }, { "epoch": 4.747438040555273, "grad_norm": 0.5911653637886047, "learning_rate": 6.148632282345643e-06, "loss": 0.0428, "step": 130640 }, { "epoch": 4.747801439058071, "grad_norm": 0.7819790244102478, "learning_rate": 6.145224283839584e-06, "loss": 0.0533, "step": 130650 }, { "epoch": 4.74816483756087, "grad_norm": 0.3394904136657715, "learning_rate": 6.141817097718728e-06, "loss": 0.0708, "step": 130660 }, { "epoch": 4.748528236063668, "grad_norm": 0.683914065361023, "learning_rate": 6.138410724129873e-06, "loss": 0.0626, "step": 130670 }, { "epoch": 4.748891634566466, "grad_norm": 0.5896586179733276, "learning_rate": 6.135005163219798e-06, "loss": 0.0626, "step": 130680 }, { "epoch": 4.7492550330692636, "grad_norm": 0.7747721672058105, "learning_rate": 6.131600415135222e-06, "loss": 0.1326, "step": 130690 }, { "epoch": 4.7496184315720615, "grad_norm": 0.4595179259777069, "learning_rate": 6.128196480022857e-06, "loss": 0.0471, "step": 130700 }, { "epoch": 4.74998183007486, "grad_norm": 0.3833105266094208, "learning_rate": 6.124793358029343e-06, "loss": 0.0676, "step": 130710 }, { "epoch": 4.750345228577658, "grad_norm": 1.146299958229065, "learning_rate": 6.121391049301342e-06, "loss": 0.0751, "step": 130720 }, { "epoch": 4.750708627080456, "grad_norm": 0.609552800655365, "learning_rate": 6.117989553985423e-06, "loss": 0.0614, "step": 130730 }, { "epoch": 4.751072025583254, "grad_norm": 0.3877856731414795, "learning_rate": 6.114588872228158e-06, "loss": 0.0444, "step": 130740 }, { "epoch": 4.751435424086052, "grad_norm": 0.8963897228240967, "learning_rate": 6.111189004176052e-06, "loss": 0.0728, "step": 130750 }, { "epoch": 4.751798822588851, "grad_norm": 0.5534847974777222, "learning_rate": 6.107789949975618e-06, "loss": 0.0557, "step": 130760 }, { "epoch": 4.752162221091649, "grad_norm": 0.6594924926757812, "learning_rate": 6.104391709773288e-06, "loss": 0.0488, "step": 130770 }, { "epoch": 4.752525619594447, "grad_norm": 0.36533448100090027, "learning_rate": 6.100994283715494e-06, "loss": 0.0637, "step": 130780 }, { "epoch": 4.752889018097245, "grad_norm": 0.3370342254638672, "learning_rate": 6.09759767194861e-06, "loss": 0.0606, "step": 130790 }, { "epoch": 4.753252416600043, "grad_norm": 0.2772499918937683, "learning_rate": 6.094201874618985e-06, "loss": 0.0461, "step": 130800 }, { "epoch": 4.753252416600043, "eval_loss": 0.311342716217041, "eval_runtime": 181.1509, "eval_samples_per_second": 40.927, "eval_steps_per_second": 5.117, "eval_wer": 0.12366801604734329, "step": 130800 }, { "epoch": 4.753615815102842, "grad_norm": 2.380986452102661, "learning_rate": 6.090806891872935e-06, "loss": 0.5858, "step": 130810 }, { "epoch": 4.75397921360564, "grad_norm": 0.3085069954395294, "learning_rate": 6.087412723856745e-06, "loss": 0.0557, "step": 130820 }, { "epoch": 4.754342612108438, "grad_norm": 0.45930215716362, "learning_rate": 6.084019370716643e-06, "loss": 0.0682, "step": 130830 }, { "epoch": 4.754706010611236, "grad_norm": 0.9580581188201904, "learning_rate": 6.080626832598849e-06, "loss": 0.0538, "step": 130840 }, { "epoch": 4.755069409114034, "grad_norm": 0.9226281046867371, "learning_rate": 6.077235109649515e-06, "loss": 0.0619, "step": 130850 }, { "epoch": 4.755432807616833, "grad_norm": 0.3445203900337219, "learning_rate": 6.0738442020148055e-06, "loss": 0.0529, "step": 130860 }, { "epoch": 4.755796206119631, "grad_norm": 0.8632385730743408, "learning_rate": 6.070454109840804e-06, "loss": 0.0721, "step": 130870 }, { "epoch": 4.756159604622429, "grad_norm": 120.1999740600586, "learning_rate": 6.067064833273592e-06, "loss": 1.5171, "step": 130880 }, { "epoch": 4.756523003125227, "grad_norm": 0.8258788585662842, "learning_rate": 6.0636763724591845e-06, "loss": 0.0514, "step": 130890 }, { "epoch": 4.756886401628026, "grad_norm": 0.6023174524307251, "learning_rate": 6.060288727543584e-06, "loss": 0.0829, "step": 130900 }, { "epoch": 4.757249800130824, "grad_norm": 1.7511353492736816, "learning_rate": 6.056901898672756e-06, "loss": 0.0752, "step": 130910 }, { "epoch": 4.757613198633622, "grad_norm": 0.4650043845176697, "learning_rate": 6.053515885992636e-06, "loss": 0.0701, "step": 130920 }, { "epoch": 4.75797659713642, "grad_norm": 0.796540379524231, "learning_rate": 6.050130689649095e-06, "loss": 0.0525, "step": 130930 }, { "epoch": 4.758339995639218, "grad_norm": 0.3653056025505066, "learning_rate": 6.046746309787998e-06, "loss": 0.0838, "step": 130940 }, { "epoch": 4.7587033941420165, "grad_norm": 0.3232150673866272, "learning_rate": 6.043362746555167e-06, "loss": 0.0666, "step": 130950 }, { "epoch": 4.7590667926448145, "grad_norm": 0.9111303687095642, "learning_rate": 6.039980000096393e-06, "loss": 0.0674, "step": 130960 }, { "epoch": 4.7594301911476125, "grad_norm": 8.4576416015625, "learning_rate": 6.036598070557417e-06, "loss": 0.0822, "step": 130970 }, { "epoch": 4.75979358965041, "grad_norm": 0.7691876292228699, "learning_rate": 6.033216958083962e-06, "loss": 0.0844, "step": 130980 }, { "epoch": 4.760156988153208, "grad_norm": 0.34943076968193054, "learning_rate": 6.029836662821692e-06, "loss": 0.0566, "step": 130990 }, { "epoch": 4.760520386656007, "grad_norm": 1.459910273551941, "learning_rate": 6.026457184916279e-06, "loss": 0.0621, "step": 131000 }, { "epoch": 4.760883785158805, "grad_norm": 0.49941325187683105, "learning_rate": 6.023078524513309e-06, "loss": 0.0644, "step": 131010 }, { "epoch": 4.761247183661603, "grad_norm": 0.8072572946548462, "learning_rate": 6.019700681758373e-06, "loss": 0.075, "step": 131020 }, { "epoch": 4.761610582164401, "grad_norm": 9.545544624328613, "learning_rate": 6.016323656796996e-06, "loss": 0.0519, "step": 131030 }, { "epoch": 4.7619739806672, "grad_norm": 0.4941442608833313, "learning_rate": 6.012947449774689e-06, "loss": 0.0699, "step": 131040 }, { "epoch": 4.762337379169998, "grad_norm": 0.3336218297481537, "learning_rate": 6.009572060836918e-06, "loss": 0.057, "step": 131050 }, { "epoch": 4.762700777672796, "grad_norm": 0.9082197546958923, "learning_rate": 6.006197490129129e-06, "loss": 0.1432, "step": 131060 }, { "epoch": 4.763064176175594, "grad_norm": 0.3561367988586426, "learning_rate": 6.0028237377966976e-06, "loss": 0.0504, "step": 131070 }, { "epoch": 4.763427574678392, "grad_norm": 0.4948752820491791, "learning_rate": 5.99945080398501e-06, "loss": 0.0462, "step": 131080 }, { "epoch": 4.763790973181191, "grad_norm": 0.5399004220962524, "learning_rate": 5.996078688839368e-06, "loss": 0.054, "step": 131090 }, { "epoch": 4.764154371683989, "grad_norm": 1.4239376783370972, "learning_rate": 5.992707392505095e-06, "loss": 0.067, "step": 131100 }, { "epoch": 4.764517770186787, "grad_norm": 0.5691198706626892, "learning_rate": 5.989336915127425e-06, "loss": 0.0719, "step": 131110 }, { "epoch": 4.764881168689585, "grad_norm": 0.9730380177497864, "learning_rate": 5.985967256851593e-06, "loss": 0.0727, "step": 131120 }, { "epoch": 4.765244567192383, "grad_norm": 0.46243080496788025, "learning_rate": 5.982598417822765e-06, "loss": 0.055, "step": 131130 }, { "epoch": 4.765607965695182, "grad_norm": 0.30193954706192017, "learning_rate": 5.979230398186123e-06, "loss": 0.0457, "step": 131140 }, { "epoch": 4.76597136419798, "grad_norm": 0.37593069672584534, "learning_rate": 5.975863198086759e-06, "loss": 0.064, "step": 131150 }, { "epoch": 4.766334762700778, "grad_norm": 0.6618345379829407, "learning_rate": 5.972496817669767e-06, "loss": 0.0469, "step": 131160 }, { "epoch": 4.766698161203576, "grad_norm": 0.37825489044189453, "learning_rate": 5.969131257080182e-06, "loss": 0.0662, "step": 131170 }, { "epoch": 4.767061559706374, "grad_norm": 0.2928861081600189, "learning_rate": 5.96576651646302e-06, "loss": 0.0515, "step": 131180 }, { "epoch": 4.767424958209173, "grad_norm": 0.32887423038482666, "learning_rate": 5.962402595963254e-06, "loss": 0.0906, "step": 131190 }, { "epoch": 4.7677883567119705, "grad_norm": 0.31427526473999023, "learning_rate": 5.959039495725829e-06, "loss": 0.0599, "step": 131200 }, { "epoch": 4.7681517552147685, "grad_norm": 2.128185749053955, "learning_rate": 5.955677215895639e-06, "loss": 0.0634, "step": 131210 }, { "epoch": 4.7685151537175665, "grad_norm": 0.5010257363319397, "learning_rate": 5.9523157566175655e-06, "loss": 0.0717, "step": 131220 }, { "epoch": 4.7688785522203645, "grad_norm": 0.5313354134559631, "learning_rate": 5.94895511803642e-06, "loss": 0.0572, "step": 131230 }, { "epoch": 4.769241950723163, "grad_norm": 0.27394428849220276, "learning_rate": 5.945595300297027e-06, "loss": 0.0553, "step": 131240 }, { "epoch": 4.769605349225961, "grad_norm": 1.3612923622131348, "learning_rate": 5.942236303544133e-06, "loss": 0.0405, "step": 131250 }, { "epoch": 4.769968747728759, "grad_norm": 0.42567941546440125, "learning_rate": 5.938878127922473e-06, "loss": 0.0621, "step": 131260 }, { "epoch": 4.770332146231557, "grad_norm": 1.045571208000183, "learning_rate": 5.935520773576728e-06, "loss": 0.0489, "step": 131270 }, { "epoch": 4.770695544734355, "grad_norm": 0.3013080954551697, "learning_rate": 5.932164240651561e-06, "loss": 0.0523, "step": 131280 }, { "epoch": 4.771058943237154, "grad_norm": 1.1697763204574585, "learning_rate": 5.9288085292915934e-06, "loss": 0.0469, "step": 131290 }, { "epoch": 4.771422341739952, "grad_norm": 0.42474037408828735, "learning_rate": 5.92545363964142e-06, "loss": 0.0612, "step": 131300 }, { "epoch": 4.77178574024275, "grad_norm": 0.42094117403030396, "learning_rate": 5.922099571845574e-06, "loss": 0.1137, "step": 131310 }, { "epoch": 4.772149138745548, "grad_norm": 0.8938696980476379, "learning_rate": 5.918746326048577e-06, "loss": 0.0644, "step": 131320 }, { "epoch": 4.772512537248346, "grad_norm": 0.47095391154289246, "learning_rate": 5.915393902394908e-06, "loss": 0.0459, "step": 131330 }, { "epoch": 4.772875935751145, "grad_norm": 2.828216552734375, "learning_rate": 5.912042301029022e-06, "loss": 1.4387, "step": 131340 }, { "epoch": 4.773239334253943, "grad_norm": 0.5628988742828369, "learning_rate": 5.908691522095311e-06, "loss": 0.0521, "step": 131350 }, { "epoch": 4.773602732756741, "grad_norm": 1.2877994775772095, "learning_rate": 5.905341565738162e-06, "loss": 0.0702, "step": 131360 }, { "epoch": 4.773966131259539, "grad_norm": 2.8811938762664795, "learning_rate": 5.901992432101891e-06, "loss": 0.0909, "step": 131370 }, { "epoch": 4.774329529762337, "grad_norm": 0.4034007787704468, "learning_rate": 5.898644121330829e-06, "loss": 0.0486, "step": 131380 }, { "epoch": 4.774692928265136, "grad_norm": 0.2713332176208496, "learning_rate": 5.895296633569222e-06, "loss": 0.045, "step": 131390 }, { "epoch": 4.775056326767934, "grad_norm": 0.7791275978088379, "learning_rate": 5.891949968961316e-06, "loss": 0.0581, "step": 131400 }, { "epoch": 4.775056326767934, "eval_loss": 0.3022659122943878, "eval_runtime": 180.5518, "eval_samples_per_second": 41.063, "eval_steps_per_second": 5.134, "eval_wer": 0.12192531813313486, "step": 131400 }, { "epoch": 4.775419725270732, "grad_norm": 0.3656981587409973, "learning_rate": 5.888604127651293e-06, "loss": 0.0778, "step": 131410 }, { "epoch": 4.77578312377353, "grad_norm": 0.959745466709137, "learning_rate": 5.8852591097833174e-06, "loss": 0.0798, "step": 131420 }, { "epoch": 4.776146522276328, "grad_norm": 0.40228071808815, "learning_rate": 5.8819149155015184e-06, "loss": 0.0531, "step": 131430 }, { "epoch": 4.776509920779127, "grad_norm": 0.4559895694255829, "learning_rate": 5.87857154494999e-06, "loss": 0.078, "step": 131440 }, { "epoch": 4.776873319281925, "grad_norm": 0.7085505127906799, "learning_rate": 5.875228998272772e-06, "loss": 0.0651, "step": 131450 }, { "epoch": 4.777236717784723, "grad_norm": 0.4186992645263672, "learning_rate": 5.871887275613897e-06, "loss": 0.0545, "step": 131460 }, { "epoch": 4.777600116287521, "grad_norm": 0.8590594530105591, "learning_rate": 5.868546377117326e-06, "loss": 0.0674, "step": 131470 }, { "epoch": 4.777963514790319, "grad_norm": 0.44466155767440796, "learning_rate": 5.86520630292704e-06, "loss": 0.0571, "step": 131480 }, { "epoch": 4.778326913293117, "grad_norm": 0.33942288160324097, "learning_rate": 5.86186705318692e-06, "loss": 0.0579, "step": 131490 }, { "epoch": 4.778690311795915, "grad_norm": 0.7904481887817383, "learning_rate": 5.858528628040866e-06, "loss": 0.0588, "step": 131500 }, { "epoch": 4.779053710298713, "grad_norm": 0.7502536177635193, "learning_rate": 5.855191027632701e-06, "loss": 0.0486, "step": 131510 }, { "epoch": 4.779417108801511, "grad_norm": 0.42741015553474426, "learning_rate": 5.851854252106234e-06, "loss": 0.0631, "step": 131520 }, { "epoch": 4.77978050730431, "grad_norm": 0.3122212886810303, "learning_rate": 5.848518301605241e-06, "loss": 0.0586, "step": 131530 }, { "epoch": 4.780143905807108, "grad_norm": 0.7013571262359619, "learning_rate": 5.845183176273458e-06, "loss": 0.0529, "step": 131540 }, { "epoch": 4.780507304309906, "grad_norm": 1.1957616806030273, "learning_rate": 5.8418488762545745e-06, "loss": 0.0628, "step": 131550 }, { "epoch": 4.780870702812704, "grad_norm": 0.3330773115158081, "learning_rate": 5.838515401692254e-06, "loss": 0.0674, "step": 131560 }, { "epoch": 4.781234101315502, "grad_norm": 2.1025235652923584, "learning_rate": 5.835182752730131e-06, "loss": 0.0651, "step": 131570 }, { "epoch": 4.781597499818301, "grad_norm": 0.3245607614517212, "learning_rate": 5.8318509295118e-06, "loss": 0.0789, "step": 131580 }, { "epoch": 4.781960898321099, "grad_norm": 0.5522608757019043, "learning_rate": 5.828519932180807e-06, "loss": 0.6258, "step": 131590 }, { "epoch": 4.782324296823897, "grad_norm": 1.0999367237091064, "learning_rate": 5.825189760880684e-06, "loss": 0.0562, "step": 131600 }, { "epoch": 4.782687695326695, "grad_norm": 0.5096704959869385, "learning_rate": 5.8218604157548954e-06, "loss": 0.0615, "step": 131610 }, { "epoch": 4.783051093829494, "grad_norm": 1.028265357017517, "learning_rate": 5.818531896946918e-06, "loss": 0.0505, "step": 131620 }, { "epoch": 4.783414492332292, "grad_norm": 0.5174747109413147, "learning_rate": 5.81520420460015e-06, "loss": 0.0548, "step": 131630 }, { "epoch": 4.78377789083509, "grad_norm": 16.3861026763916, "learning_rate": 5.8118773388579766e-06, "loss": 0.2035, "step": 131640 }, { "epoch": 4.784141289337888, "grad_norm": 0.44235244393348694, "learning_rate": 5.808551299863732e-06, "loss": 0.049, "step": 131650 }, { "epoch": 4.784504687840686, "grad_norm": 10.77482795715332, "learning_rate": 5.805226087760729e-06, "loss": 0.0537, "step": 131660 }, { "epoch": 4.784868086343485, "grad_norm": 0.5570608377456665, "learning_rate": 5.801901702692236e-06, "loss": 0.0652, "step": 131670 }, { "epoch": 4.785231484846283, "grad_norm": 1.9826894998550415, "learning_rate": 5.7985781448015e-06, "loss": 0.0593, "step": 131680 }, { "epoch": 4.785594883349081, "grad_norm": 0.26268863677978516, "learning_rate": 5.795255414231706e-06, "loss": 0.055, "step": 131690 }, { "epoch": 4.785958281851879, "grad_norm": 0.6368954181671143, "learning_rate": 5.791933511126036e-06, "loss": 0.0563, "step": 131700 }, { "epoch": 4.786321680354677, "grad_norm": 1.2241665124893188, "learning_rate": 5.788612435627588e-06, "loss": 0.0602, "step": 131710 }, { "epoch": 4.7866850788574755, "grad_norm": 0.698296070098877, "learning_rate": 5.7852921878794955e-06, "loss": 0.0732, "step": 131720 }, { "epoch": 4.7870484773602735, "grad_norm": 0.252273291349411, "learning_rate": 5.781972768024785e-06, "loss": 0.0646, "step": 131730 }, { "epoch": 4.7874118758630715, "grad_norm": 0.6165762543678284, "learning_rate": 5.778654176206499e-06, "loss": 0.0484, "step": 131740 }, { "epoch": 4.7877752743658695, "grad_norm": 2.4464125633239746, "learning_rate": 5.775336412567603e-06, "loss": 0.0613, "step": 131750 }, { "epoch": 4.7881386728686675, "grad_norm": 0.5546942353248596, "learning_rate": 5.77201947725107e-06, "loss": 0.0656, "step": 131760 }, { "epoch": 4.788502071371466, "grad_norm": 0.6739498972892761, "learning_rate": 5.7687033703997975e-06, "loss": 0.072, "step": 131770 }, { "epoch": 4.788865469874264, "grad_norm": 0.6554346680641174, "learning_rate": 5.7653880921566785e-06, "loss": 0.039, "step": 131780 }, { "epoch": 4.789228868377062, "grad_norm": 0.458868145942688, "learning_rate": 5.762073642664545e-06, "loss": 0.0445, "step": 131790 }, { "epoch": 4.78959226687986, "grad_norm": Infinity, "learning_rate": 5.759091346821757e-06, "loss": 1.9795, "step": 131800 }, { "epoch": 4.789955665382658, "grad_norm": 0.2927743196487427, "learning_rate": 5.75577847234991e-06, "loss": 0.0703, "step": 131810 }, { "epoch": 4.790319063885457, "grad_norm": 1.2780346870422363, "learning_rate": 5.752466427043096e-06, "loss": 0.0846, "step": 131820 }, { "epoch": 4.790682462388255, "grad_norm": 0.4276678264141083, "learning_rate": 5.749155211044021e-06, "loss": 0.0499, "step": 131830 }, { "epoch": 4.791045860891053, "grad_norm": 0.6398546695709229, "learning_rate": 5.745844824495361e-06, "loss": 0.043, "step": 131840 }, { "epoch": 4.791409259393851, "grad_norm": 0.4557580351829529, "learning_rate": 5.742535267539733e-06, "loss": 0.0585, "step": 131850 }, { "epoch": 4.791772657896649, "grad_norm": 0.41562730073928833, "learning_rate": 5.739226540319745e-06, "loss": 0.0749, "step": 131860 }, { "epoch": 4.792136056399448, "grad_norm": 0.7240803241729736, "learning_rate": 5.735918642977953e-06, "loss": 0.0571, "step": 131870 }, { "epoch": 4.792499454902246, "grad_norm": 0.3443463444709778, "learning_rate": 5.73261157565689e-06, "loss": 0.0769, "step": 131880 }, { "epoch": 4.792862853405044, "grad_norm": 0.5987421870231628, "learning_rate": 5.729305338499039e-06, "loss": 0.0463, "step": 131890 }, { "epoch": 4.793226251907842, "grad_norm": 0.5730900168418884, "learning_rate": 5.7259999316468585e-06, "loss": 0.0644, "step": 131900 }, { "epoch": 4.79358965041064, "grad_norm": 0.94599848985672, "learning_rate": 5.722695355242749e-06, "loss": 0.0605, "step": 131910 }, { "epoch": 4.793953048913439, "grad_norm": 0.5644907355308533, "learning_rate": 5.719391609429126e-06, "loss": 0.0627, "step": 131920 }, { "epoch": 4.794316447416237, "grad_norm": 0.2820222079753876, "learning_rate": 5.716088694348307e-06, "loss": 0.072, "step": 131930 }, { "epoch": 4.794679845919035, "grad_norm": 1.1738067865371704, "learning_rate": 5.712786610142623e-06, "loss": 0.0425, "step": 131940 }, { "epoch": 4.795043244421833, "grad_norm": 0.4355091154575348, "learning_rate": 5.709485356954331e-06, "loss": 0.0721, "step": 131950 }, { "epoch": 4.795406642924631, "grad_norm": 0.6250638365745544, "learning_rate": 5.706184934925679e-06, "loss": 0.0733, "step": 131960 }, { "epoch": 4.79577004142743, "grad_norm": 0.6460831761360168, "learning_rate": 5.702885344198872e-06, "loss": 0.0583, "step": 131970 }, { "epoch": 4.796133439930228, "grad_norm": 0.3038908541202545, "learning_rate": 5.699586584916083e-06, "loss": 0.0516, "step": 131980 }, { "epoch": 4.7964968384330255, "grad_norm": 0.6024642586708069, "learning_rate": 5.696288657219431e-06, "loss": 0.0508, "step": 131990 }, { "epoch": 4.7968602369358235, "grad_norm": 0.5624711513519287, "learning_rate": 5.692991561251024e-06, "loss": 0.0478, "step": 132000 }, { "epoch": 4.7968602369358235, "eval_loss": 0.3121514618396759, "eval_runtime": 179.3387, "eval_samples_per_second": 41.341, "eval_steps_per_second": 5.169, "eval_wer": 0.1220977726142284, "step": 132000 }, { "epoch": 4.7972236354386215, "grad_norm": 0.34073111414909363, "learning_rate": 5.6896952971529e-06, "loss": 1.2873, "step": 132010 }, { "epoch": 4.79758703394142, "grad_norm": 0.3385801613330841, "learning_rate": 5.686399865067116e-06, "loss": 0.0491, "step": 132020 }, { "epoch": 4.797950432444218, "grad_norm": 0.6304453611373901, "learning_rate": 5.683105265135635e-06, "loss": 0.0541, "step": 132030 }, { "epoch": 4.798313830947016, "grad_norm": 0.32110467553138733, "learning_rate": 5.679811497500429e-06, "loss": 0.0462, "step": 132040 }, { "epoch": 4.798677229449814, "grad_norm": 0.6101498007774353, "learning_rate": 5.676518562303388e-06, "loss": 0.0819, "step": 132050 }, { "epoch": 4.799040627952613, "grad_norm": 0.40379151701927185, "learning_rate": 5.673226459686421e-06, "loss": 0.0612, "step": 132060 }, { "epoch": 4.799404026455411, "grad_norm": 1.8730882406234741, "learning_rate": 5.669935189791356e-06, "loss": 0.075, "step": 132070 }, { "epoch": 4.799767424958209, "grad_norm": 0.8211833238601685, "learning_rate": 5.666644752760011e-06, "loss": 0.0422, "step": 132080 }, { "epoch": 4.800130823461007, "grad_norm": 0.38069042563438416, "learning_rate": 5.663355148734153e-06, "loss": 0.0499, "step": 132090 }, { "epoch": 4.800494221963805, "grad_norm": 1.0929598808288574, "learning_rate": 5.660066377855519e-06, "loss": 0.09, "step": 132100 }, { "epoch": 4.800857620466604, "grad_norm": 0.33361852169036865, "learning_rate": 5.6567784402658145e-06, "loss": 0.0666, "step": 132110 }, { "epoch": 4.801221018969402, "grad_norm": 0.814060628414154, "learning_rate": 5.6534913361067084e-06, "loss": 0.0507, "step": 132120 }, { "epoch": 4.8015844174722, "grad_norm": 0.6368971467018127, "learning_rate": 5.6502050655198205e-06, "loss": 0.0559, "step": 132130 }, { "epoch": 4.801947815974998, "grad_norm": 0.29044511914253235, "learning_rate": 5.646919628646754e-06, "loss": 0.046, "step": 132140 }, { "epoch": 4.802311214477796, "grad_norm": 0.7863609790802002, "learning_rate": 5.6436350256290515e-06, "loss": 0.0608, "step": 132150 }, { "epoch": 4.802674612980595, "grad_norm": 0.25330251455307007, "learning_rate": 5.6403512566082565e-06, "loss": 0.0651, "step": 132160 }, { "epoch": 4.803038011483393, "grad_norm": 0.59688800573349, "learning_rate": 5.637068321725838e-06, "loss": 0.0702, "step": 132170 }, { "epoch": 4.803401409986191, "grad_norm": 0.45993706583976746, "learning_rate": 5.63378622112326e-06, "loss": 0.065, "step": 132180 }, { "epoch": 4.803764808488989, "grad_norm": 0.8543741106987, "learning_rate": 5.630504954941923e-06, "loss": 0.0554, "step": 132190 }, { "epoch": 4.804128206991788, "grad_norm": 5.9259233474731445, "learning_rate": 5.627224523323208e-06, "loss": 1.0605, "step": 132200 }, { "epoch": 4.804491605494586, "grad_norm": 0.606934666633606, "learning_rate": 5.6239449264084615e-06, "loss": 0.0702, "step": 132210 }, { "epoch": 4.804855003997384, "grad_norm": 0.6586235761642456, "learning_rate": 5.620666164338992e-06, "loss": 0.0595, "step": 132220 }, { "epoch": 4.805218402500182, "grad_norm": 0.28526026010513306, "learning_rate": 5.617388237256061e-06, "loss": 0.066, "step": 132230 }, { "epoch": 4.80558180100298, "grad_norm": 0.40299439430236816, "learning_rate": 5.614111145300907e-06, "loss": 0.0497, "step": 132240 }, { "epoch": 4.8059451995057785, "grad_norm": 0.7809686660766602, "learning_rate": 5.6108348886147274e-06, "loss": 0.0749, "step": 132250 }, { "epoch": 4.8063085980085765, "grad_norm": 0.33365485072135925, "learning_rate": 5.607559467338694e-06, "loss": 0.052, "step": 132260 }, { "epoch": 4.8066719965113744, "grad_norm": 0.9140803813934326, "learning_rate": 5.604284881613916e-06, "loss": 0.0539, "step": 132270 }, { "epoch": 4.807035395014172, "grad_norm": 1.8128081560134888, "learning_rate": 5.601011131581493e-06, "loss": 0.0581, "step": 132280 }, { "epoch": 4.80739879351697, "grad_norm": 0.6614809632301331, "learning_rate": 5.597738217382481e-06, "loss": 0.0489, "step": 132290 }, { "epoch": 4.807762192019769, "grad_norm": 0.3625745475292206, "learning_rate": 5.5944661391579e-06, "loss": 0.0599, "step": 132300 }, { "epoch": 4.808125590522567, "grad_norm": 0.525924026966095, "learning_rate": 5.591194897048721e-06, "loss": 0.084, "step": 132310 }, { "epoch": 4.808488989025365, "grad_norm": 8.123866081237793, "learning_rate": 5.587924491195906e-06, "loss": 0.0579, "step": 132320 }, { "epoch": 4.808852387528163, "grad_norm": 0.9061564803123474, "learning_rate": 5.584654921740348e-06, "loss": 0.0554, "step": 132330 }, { "epoch": 4.809215786030961, "grad_norm": 0.5745807886123657, "learning_rate": 5.581386188822929e-06, "loss": 0.0441, "step": 132340 }, { "epoch": 4.80957918453376, "grad_norm": 0.5543896555900574, "learning_rate": 5.578118292584486e-06, "loss": 0.0638, "step": 132350 }, { "epoch": 4.809942583036558, "grad_norm": 0.7645826935768127, "learning_rate": 5.5748512331658315e-06, "loss": 0.0705, "step": 132360 }, { "epoch": 4.810305981539356, "grad_norm": 0.5520281195640564, "learning_rate": 5.571585010707717e-06, "loss": 0.0966, "step": 132370 }, { "epoch": 4.810669380042154, "grad_norm": 0.8521866202354431, "learning_rate": 5.568319625350877e-06, "loss": 0.0877, "step": 132380 }, { "epoch": 4.811032778544952, "grad_norm": 0.30126193165779114, "learning_rate": 5.5650550772360036e-06, "loss": 0.0496, "step": 132390 }, { "epoch": 4.811396177047751, "grad_norm": 0.3532141447067261, "learning_rate": 5.561791366503766e-06, "loss": 0.0497, "step": 132400 }, { "epoch": 4.811759575550549, "grad_norm": 0.742138147354126, "learning_rate": 5.558528493294771e-06, "loss": 0.0817, "step": 132410 }, { "epoch": 4.812122974053347, "grad_norm": 1.2228798866271973, "learning_rate": 5.555266457749609e-06, "loss": 0.0868, "step": 132420 }, { "epoch": 4.812486372556145, "grad_norm": 0.44440752267837524, "learning_rate": 5.552005260008838e-06, "loss": 0.0412, "step": 132430 }, { "epoch": 4.812849771058943, "grad_norm": 0.32134416699409485, "learning_rate": 5.548744900212957e-06, "loss": 0.0575, "step": 132440 }, { "epoch": 4.813213169561742, "grad_norm": 0.5850133895874023, "learning_rate": 5.545485378502449e-06, "loss": 0.0479, "step": 132450 }, { "epoch": 4.81357656806454, "grad_norm": 0.2757713496685028, "learning_rate": 5.542226695017766e-06, "loss": 0.0596, "step": 132460 }, { "epoch": 4.813939966567338, "grad_norm": 0.5057161450386047, "learning_rate": 5.5389688498992955e-06, "loss": 0.0746, "step": 132470 }, { "epoch": 4.814303365070136, "grad_norm": 0.2524872422218323, "learning_rate": 5.535711843287417e-06, "loss": 0.3915, "step": 132480 }, { "epoch": 4.814666763572934, "grad_norm": 0.4024585485458374, "learning_rate": 5.53245567532246e-06, "loss": 0.0651, "step": 132490 }, { "epoch": 4.8150301620757325, "grad_norm": 0.654587984085083, "learning_rate": 5.529200346144728e-06, "loss": 0.0596, "step": 132500 }, { "epoch": 4.8153935605785305, "grad_norm": 0.4986790418624878, "learning_rate": 5.525945855894468e-06, "loss": 0.0602, "step": 132510 }, { "epoch": 4.8157569590813285, "grad_norm": 0.3489739000797272, "learning_rate": 5.522692204711913e-06, "loss": 0.0589, "step": 132520 }, { "epoch": 4.8161203575841265, "grad_norm": 0.38818836212158203, "learning_rate": 5.51943939273725e-06, "loss": 0.0483, "step": 132530 }, { "epoch": 4.8164837560869245, "grad_norm": 0.26392772793769836, "learning_rate": 5.5161874201106415e-06, "loss": 1.2781, "step": 132540 }, { "epoch": 4.816847154589723, "grad_norm": 3.239645004272461, "learning_rate": 5.512936286972181e-06, "loss": 0.0857, "step": 132550 }, { "epoch": 4.817210553092521, "grad_norm": 0.5371155142784119, "learning_rate": 5.509685993461966e-06, "loss": 0.0696, "step": 132560 }, { "epoch": 4.817573951595319, "grad_norm": 0.8646908402442932, "learning_rate": 5.506436539720039e-06, "loss": 0.0527, "step": 132570 }, { "epoch": 4.817937350098117, "grad_norm": 0.3295968472957611, "learning_rate": 5.5031879258863965e-06, "loss": 0.0644, "step": 132580 }, { "epoch": 4.818300748600915, "grad_norm": 5.000588417053223, "learning_rate": 5.499940152101016e-06, "loss": 0.0485, "step": 132590 }, { "epoch": 4.818664147103714, "grad_norm": 0.5105762481689453, "learning_rate": 5.496693218503835e-06, "loss": 0.0705, "step": 132600 }, { "epoch": 4.818664147103714, "eval_loss": 0.3079814910888672, "eval_runtime": 179.9955, "eval_samples_per_second": 41.19, "eval_steps_per_second": 5.15, "eval_wer": 0.12189808847822535, "step": 132600 }, { "epoch": 4.819027545606512, "grad_norm": 0.7122285962104797, "learning_rate": 5.493447125234755e-06, "loss": 0.0535, "step": 132610 }, { "epoch": 4.81939094410931, "grad_norm": 30.25271987915039, "learning_rate": 5.490201872433629e-06, "loss": 1.2154, "step": 132620 }, { "epoch": 4.819754342612108, "grad_norm": 0.5388603210449219, "learning_rate": 5.486957460240286e-06, "loss": 0.0495, "step": 132630 }, { "epoch": 4.820117741114907, "grad_norm": 0.3952212631702423, "learning_rate": 5.4837138887945255e-06, "loss": 0.0627, "step": 132640 }, { "epoch": 4.820481139617705, "grad_norm": 0.676246166229248, "learning_rate": 5.480471158236089e-06, "loss": 0.0568, "step": 132650 }, { "epoch": 4.820844538120503, "grad_norm": 0.3212704658508301, "learning_rate": 5.477229268704698e-06, "loss": 0.0698, "step": 132660 }, { "epoch": 4.821207936623301, "grad_norm": 0.45353221893310547, "learning_rate": 5.473988220340038e-06, "loss": 0.0622, "step": 132670 }, { "epoch": 4.821571335126099, "grad_norm": 0.8591037392616272, "learning_rate": 5.470748013281757e-06, "loss": 0.0614, "step": 132680 }, { "epoch": 4.821934733628898, "grad_norm": 0.5293501019477844, "learning_rate": 5.46750864766945e-06, "loss": 0.0395, "step": 132690 }, { "epoch": 4.822298132131696, "grad_norm": 0.8341845273971558, "learning_rate": 5.464270123642701e-06, "loss": 0.0589, "step": 132700 }, { "epoch": 4.822661530634494, "grad_norm": 0.49425092339515686, "learning_rate": 5.461032441341052e-06, "loss": 0.0722, "step": 132710 }, { "epoch": 4.823024929137292, "grad_norm": 0.8270822763442993, "learning_rate": 5.457795600903986e-06, "loss": 0.099, "step": 132720 }, { "epoch": 4.82338832764009, "grad_norm": 0.5040695071220398, "learning_rate": 5.454559602470977e-06, "loss": 0.0484, "step": 132730 }, { "epoch": 4.823751726142889, "grad_norm": 0.39929285645484924, "learning_rate": 5.451324446181452e-06, "loss": 1.0094, "step": 132740 }, { "epoch": 4.824115124645687, "grad_norm": 0.6781763434410095, "learning_rate": 5.44809013217481e-06, "loss": 0.0511, "step": 132750 }, { "epoch": 4.824478523148485, "grad_norm": 0.48135650157928467, "learning_rate": 5.444856660590389e-06, "loss": 0.0654, "step": 132760 }, { "epoch": 4.824841921651283, "grad_norm": 0.473336786031723, "learning_rate": 5.441624031567519e-06, "loss": 0.059, "step": 132770 }, { "epoch": 4.825205320154081, "grad_norm": 0.3868461847305298, "learning_rate": 5.438392245245485e-06, "loss": 0.0472, "step": 132780 }, { "epoch": 4.825568718656879, "grad_norm": 0.41346660256385803, "learning_rate": 5.435161301763525e-06, "loss": 0.0501, "step": 132790 }, { "epoch": 4.825932117159677, "grad_norm": 0.9812760949134827, "learning_rate": 5.431931201260851e-06, "loss": 0.0578, "step": 132800 }, { "epoch": 4.826295515662475, "grad_norm": 1.0222513675689697, "learning_rate": 5.428701943876646e-06, "loss": 0.0637, "step": 132810 }, { "epoch": 4.826658914165273, "grad_norm": 0.6054512858390808, "learning_rate": 5.425473529750031e-06, "loss": 0.0703, "step": 132820 }, { "epoch": 4.827022312668072, "grad_norm": 0.502322256565094, "learning_rate": 5.422245959020117e-06, "loss": 0.0577, "step": 132830 }, { "epoch": 4.82738571117087, "grad_norm": 0.6554481983184814, "learning_rate": 5.419019231825964e-06, "loss": 0.0499, "step": 132840 }, { "epoch": 4.827749109673668, "grad_norm": 0.636435866355896, "learning_rate": 5.4157933483066106e-06, "loss": 0.0674, "step": 132850 }, { "epoch": 4.828112508176466, "grad_norm": 0.4140775501728058, "learning_rate": 5.412568308601038e-06, "loss": 0.0501, "step": 132860 }, { "epoch": 4.828475906679264, "grad_norm": 1.1406289339065552, "learning_rate": 5.4093441128482e-06, "loss": 0.0738, "step": 132870 }, { "epoch": 4.828839305182063, "grad_norm": 0.3797268569469452, "learning_rate": 5.406120761187022e-06, "loss": 0.0553, "step": 132880 }, { "epoch": 4.829202703684861, "grad_norm": 0.5640438795089722, "learning_rate": 5.402898253756392e-06, "loss": 0.0482, "step": 132890 }, { "epoch": 4.829566102187659, "grad_norm": 0.4556131064891815, "learning_rate": 5.399676590695143e-06, "loss": 0.0541, "step": 132900 }, { "epoch": 4.829929500690457, "grad_norm": 0.4256148636341095, "learning_rate": 5.396455772142092e-06, "loss": 0.0748, "step": 132910 }, { "epoch": 4.830292899193255, "grad_norm": 0.6198804378509521, "learning_rate": 5.3932357982360096e-06, "loss": 0.0733, "step": 132920 }, { "epoch": 4.830656297696054, "grad_norm": 0.5393226742744446, "learning_rate": 5.390016669115644e-06, "loss": 0.047, "step": 132930 }, { "epoch": 4.831019696198852, "grad_norm": 0.34723788499832153, "learning_rate": 5.386798384919684e-06, "loss": 1.6248, "step": 132940 }, { "epoch": 4.83138309470165, "grad_norm": 0.45461782813072205, "learning_rate": 5.383580945786801e-06, "loss": 0.0551, "step": 132950 }, { "epoch": 4.831746493204448, "grad_norm": 0.5043591260910034, "learning_rate": 5.380364351855613e-06, "loss": 0.0745, "step": 132960 }, { "epoch": 4.832109891707246, "grad_norm": 0.6043432354927063, "learning_rate": 5.377148603264717e-06, "loss": 0.0518, "step": 132970 }, { "epoch": 4.832473290210045, "grad_norm": 1.0558514595031738, "learning_rate": 5.373933700152669e-06, "loss": 0.0649, "step": 132980 }, { "epoch": 4.832836688712843, "grad_norm": 0.5405622124671936, "learning_rate": 5.370719642657998e-06, "loss": 0.0481, "step": 132990 }, { "epoch": 4.833200087215641, "grad_norm": 0.633976936340332, "learning_rate": 5.367506430919169e-06, "loss": 0.0534, "step": 133000 }, { "epoch": 4.833563485718439, "grad_norm": 0.42149069905281067, "learning_rate": 5.364294065074635e-06, "loss": 0.0577, "step": 133010 }, { "epoch": 4.833926884221237, "grad_norm": 0.6711609959602356, "learning_rate": 5.361082545262805e-06, "loss": 0.0519, "step": 133020 }, { "epoch": 4.8342902827240355, "grad_norm": 0.6866138577461243, "learning_rate": 5.3578718716220586e-06, "loss": 0.0577, "step": 133030 }, { "epoch": 4.8346536812268335, "grad_norm": 0.1845683604478836, "learning_rate": 5.35466204429072e-06, "loss": 0.0454, "step": 133040 }, { "epoch": 4.8350170797296315, "grad_norm": 0.23281435668468475, "learning_rate": 5.351453063407097e-06, "loss": 0.0631, "step": 133050 }, { "epoch": 4.8353804782324294, "grad_norm": 0.32664600014686584, "learning_rate": 5.34824492910945e-06, "loss": 0.0638, "step": 133060 }, { "epoch": 4.835743876735227, "grad_norm": 0.6019701361656189, "learning_rate": 5.345037641536016e-06, "loss": 0.0687, "step": 133070 }, { "epoch": 4.836107275238026, "grad_norm": 0.5875816345214844, "learning_rate": 5.34183120082497e-06, "loss": 0.0487, "step": 133080 }, { "epoch": 4.836470673740824, "grad_norm": 1.6033200025558472, "learning_rate": 5.338625607114481e-06, "loss": 0.0784, "step": 133090 }, { "epoch": 4.836834072243622, "grad_norm": 0.529167890548706, "learning_rate": 5.3354208605426504e-06, "loss": 0.0773, "step": 133100 }, { "epoch": 4.83719747074642, "grad_norm": 0.4267202615737915, "learning_rate": 5.332216961247571e-06, "loss": 0.0648, "step": 133110 }, { "epoch": 4.837560869249218, "grad_norm": 0.43763381242752075, "learning_rate": 5.329013909367283e-06, "loss": 0.0626, "step": 133120 }, { "epoch": 4.837924267752017, "grad_norm": 0.3443093001842499, "learning_rate": 5.3258117050398016e-06, "loss": 0.0614, "step": 133130 }, { "epoch": 4.838287666254815, "grad_norm": 0.3487081229686737, "learning_rate": 5.32261034840309e-06, "loss": 0.0576, "step": 133140 }, { "epoch": 4.838651064757613, "grad_norm": 0.48579782247543335, "learning_rate": 5.319409839595082e-06, "loss": 0.0499, "step": 133150 }, { "epoch": 4.839014463260411, "grad_norm": 0.653016984462738, "learning_rate": 5.316210178753681e-06, "loss": 0.0488, "step": 133160 }, { "epoch": 4.839377861763209, "grad_norm": 1.7668088674545288, "learning_rate": 5.3130113660167566e-06, "loss": 0.0625, "step": 133170 }, { "epoch": 4.839741260266008, "grad_norm": 0.6149731874465942, "learning_rate": 5.309813401522118e-06, "loss": 0.0538, "step": 133180 }, { "epoch": 4.840104658768806, "grad_norm": 1.0906972885131836, "learning_rate": 5.30661628540757e-06, "loss": 0.0487, "step": 133190 }, { "epoch": 4.840468057271604, "grad_norm": 0.30425411462783813, "learning_rate": 5.303420017810843e-06, "loss": 0.0543, "step": 133200 }, { "epoch": 4.840468057271604, "eval_loss": 0.30090659856796265, "eval_runtime": 179.3713, "eval_samples_per_second": 41.333, "eval_steps_per_second": 5.168, "eval_wer": 0.12187993537495234, "step": 133200 }, { "epoch": 4.840831455774402, "grad_norm": 0.49348288774490356, "learning_rate": 5.300224598869679e-06, "loss": 0.0533, "step": 133210 }, { "epoch": 4.841194854277201, "grad_norm": 0.509075939655304, "learning_rate": 5.297030028721739e-06, "loss": 0.0575, "step": 133220 }, { "epoch": 4.841558252779999, "grad_norm": 0.2496069073677063, "learning_rate": 5.293836307504679e-06, "loss": 0.0559, "step": 133230 }, { "epoch": 4.841921651282797, "grad_norm": 0.5446711778640747, "learning_rate": 5.290643435356082e-06, "loss": 0.0542, "step": 133240 }, { "epoch": 4.842285049785595, "grad_norm": 0.26632824540138245, "learning_rate": 5.2874514124135496e-06, "loss": 0.0556, "step": 133250 }, { "epoch": 4.842648448288393, "grad_norm": 0.37130212783813477, "learning_rate": 5.284260238814592e-06, "loss": 0.0804, "step": 133260 }, { "epoch": 4.843011846791192, "grad_norm": 0.39752310514450073, "learning_rate": 5.281069914696715e-06, "loss": 0.0636, "step": 133270 }, { "epoch": 4.8433752452939896, "grad_norm": 0.42162778973579407, "learning_rate": 5.277880440197372e-06, "loss": 0.0504, "step": 133280 }, { "epoch": 4.8437386437967875, "grad_norm": 0.2999958395957947, "learning_rate": 5.274691815453989e-06, "loss": 0.1921, "step": 133290 }, { "epoch": 4.8441020422995855, "grad_norm": 0.6132074594497681, "learning_rate": 5.27150404060395e-06, "loss": 0.0709, "step": 133300 }, { "epoch": 4.844465440802384, "grad_norm": 0.5938759446144104, "learning_rate": 5.2683171157846155e-06, "loss": 0.0541, "step": 133310 }, { "epoch": 4.844828839305182, "grad_norm": 0.7031438946723938, "learning_rate": 5.265131041133287e-06, "loss": 0.067, "step": 133320 }, { "epoch": 4.84519223780798, "grad_norm": 0.28125789761543274, "learning_rate": 5.261945816787247e-06, "loss": 0.0445, "step": 133330 }, { "epoch": 4.845555636310778, "grad_norm": 0.9672883749008179, "learning_rate": 5.258761442883725e-06, "loss": 0.0447, "step": 133340 }, { "epoch": 4.845919034813576, "grad_norm": 0.3946448564529419, "learning_rate": 5.255577919559942e-06, "loss": 0.0648, "step": 133350 }, { "epoch": 4.846282433316375, "grad_norm": 0.3410206437110901, "learning_rate": 5.252395246953052e-06, "loss": 0.058, "step": 133360 }, { "epoch": 4.846645831819173, "grad_norm": 0.44879835844039917, "learning_rate": 5.249213425200195e-06, "loss": 0.0678, "step": 133370 }, { "epoch": 4.847009230321971, "grad_norm": 0.6794377565383911, "learning_rate": 5.2460324544384484e-06, "loss": 0.0445, "step": 133380 }, { "epoch": 4.847372628824769, "grad_norm": 0.6270850896835327, "learning_rate": 5.2428523348048815e-06, "loss": 0.0542, "step": 133390 }, { "epoch": 4.847736027327567, "grad_norm": 0.5464701652526855, "learning_rate": 5.239673066436512e-06, "loss": 0.077, "step": 133400 }, { "epoch": 4.848099425830366, "grad_norm": 0.5443971157073975, "learning_rate": 5.236494649470328e-06, "loss": 0.056, "step": 133410 }, { "epoch": 4.848462824333164, "grad_norm": 1.9334089756011963, "learning_rate": 5.233317084043263e-06, "loss": 0.0486, "step": 133420 }, { "epoch": 4.848826222835962, "grad_norm": 0.5123773813247681, "learning_rate": 5.2301403702922355e-06, "loss": 0.0514, "step": 133430 }, { "epoch": 4.84918962133876, "grad_norm": 0.4335247576236725, "learning_rate": 5.226964508354118e-06, "loss": 0.0483, "step": 133440 }, { "epoch": 4.849553019841558, "grad_norm": 0.680203378200531, "learning_rate": 5.223789498365753e-06, "loss": 0.0513, "step": 133450 }, { "epoch": 4.849916418344357, "grad_norm": 0.36326515674591064, "learning_rate": 5.22061534046393e-06, "loss": 0.0561, "step": 133460 }, { "epoch": 4.850279816847155, "grad_norm": 1.3510302305221558, "learning_rate": 5.217442034785422e-06, "loss": 0.0549, "step": 133470 }, { "epoch": 4.850643215349953, "grad_norm": 0.6176362633705139, "learning_rate": 5.214269581466938e-06, "loss": 0.0611, "step": 133480 }, { "epoch": 4.851006613852751, "grad_norm": 9.4448881149292, "learning_rate": 5.211097980645191e-06, "loss": 0.0995, "step": 133490 }, { "epoch": 4.851370012355549, "grad_norm": 0.8972245454788208, "learning_rate": 5.207927232456816e-06, "loss": 0.0682, "step": 133500 }, { "epoch": 4.851733410858348, "grad_norm": 0.7225368022918701, "learning_rate": 5.204757337038446e-06, "loss": 0.0729, "step": 133510 }, { "epoch": 4.852096809361146, "grad_norm": 0.730660080909729, "learning_rate": 5.201588294526641e-06, "loss": 0.0729, "step": 133520 }, { "epoch": 4.852460207863944, "grad_norm": 0.8854877948760986, "learning_rate": 5.198420105057955e-06, "loss": 0.0531, "step": 133530 }, { "epoch": 4.852823606366742, "grad_norm": 1.4167195558547974, "learning_rate": 5.195252768768893e-06, "loss": 0.047, "step": 133540 }, { "epoch": 4.85318700486954, "grad_norm": 0.325641006231308, "learning_rate": 5.192086285795927e-06, "loss": 0.0494, "step": 133550 }, { "epoch": 4.8535504033723385, "grad_norm": 0.7112864851951599, "learning_rate": 5.188920656275484e-06, "loss": 0.0571, "step": 133560 }, { "epoch": 4.853913801875136, "grad_norm": 10.70383071899414, "learning_rate": 5.185755880343965e-06, "loss": 0.0626, "step": 133570 }, { "epoch": 4.854277200377934, "grad_norm": 0.6314172744750977, "learning_rate": 5.182591958137714e-06, "loss": 0.0706, "step": 133580 }, { "epoch": 4.854640598880732, "grad_norm": 0.4412120282649994, "learning_rate": 5.179428889793078e-06, "loss": 0.0524, "step": 133590 }, { "epoch": 4.85500399738353, "grad_norm": 2.447517156600952, "learning_rate": 5.176266675446323e-06, "loss": 0.0683, "step": 133600 }, { "epoch": 4.855367395886329, "grad_norm": 0.6324079036712646, "learning_rate": 5.173105315233712e-06, "loss": 0.077, "step": 133610 }, { "epoch": 4.855730794389127, "grad_norm": 0.47590774297714233, "learning_rate": 5.169944809291438e-06, "loss": 0.0619, "step": 133620 }, { "epoch": 4.856094192891925, "grad_norm": 0.387892484664917, "learning_rate": 5.166785157755691e-06, "loss": 0.0612, "step": 133630 }, { "epoch": 4.856457591394723, "grad_norm": 0.3840842843055725, "learning_rate": 5.163626360762602e-06, "loss": 0.0415, "step": 133640 }, { "epoch": 4.856820989897521, "grad_norm": 39.84265899658203, "learning_rate": 5.160468418448283e-06, "loss": 0.5947, "step": 133650 }, { "epoch": 4.85718438840032, "grad_norm": 0.6043709516525269, "learning_rate": 5.157311330948783e-06, "loss": 0.0779, "step": 133660 }, { "epoch": 4.857547786903118, "grad_norm": 0.4519200325012207, "learning_rate": 5.154155098400138e-06, "loss": 0.0727, "step": 133670 }, { "epoch": 4.857911185405916, "grad_norm": 0.4794279634952545, "learning_rate": 5.150999720938337e-06, "loss": 0.0428, "step": 133680 }, { "epoch": 4.858274583908714, "grad_norm": 0.7044159173965454, "learning_rate": 5.147845198699342e-06, "loss": 0.0488, "step": 133690 }, { "epoch": 4.858637982411512, "grad_norm": 0.3337533175945282, "learning_rate": 5.144691531819057e-06, "loss": 0.067, "step": 133700 }, { "epoch": 4.859001380914311, "grad_norm": 0.4542635381221771, "learning_rate": 5.141538720433376e-06, "loss": 0.0665, "step": 133710 }, { "epoch": 4.859364779417109, "grad_norm": 0.5084032416343689, "learning_rate": 5.138386764678119e-06, "loss": 0.0784, "step": 133720 }, { "epoch": 4.859728177919907, "grad_norm": 0.19773313403129578, "learning_rate": 5.135235664689125e-06, "loss": 0.0448, "step": 133730 }, { "epoch": 4.860091576422705, "grad_norm": 0.4775860011577606, "learning_rate": 5.132085420602137e-06, "loss": 0.0517, "step": 133740 }, { "epoch": 4.860454974925503, "grad_norm": 0.6707072257995605, "learning_rate": 5.128936032552909e-06, "loss": 0.0587, "step": 133750 }, { "epoch": 4.860818373428302, "grad_norm": 0.6019258499145508, "learning_rate": 5.125787500677115e-06, "loss": 0.0693, "step": 133760 }, { "epoch": 4.8611817719311, "grad_norm": 5.205812454223633, "learning_rate": 5.122639825110426e-06, "loss": 0.0628, "step": 133770 }, { "epoch": 4.861545170433898, "grad_norm": 0.3823084235191345, "learning_rate": 5.119493005988465e-06, "loss": 0.0582, "step": 133780 }, { "epoch": 4.861908568936696, "grad_norm": 0.3912712037563324, "learning_rate": 5.11634704344682e-06, "loss": 0.048, "step": 133790 }, { "epoch": 4.8622719674394945, "grad_norm": 0.5319221615791321, "learning_rate": 5.113201937621029e-06, "loss": 0.0529, "step": 133800 }, { "epoch": 4.8622719674394945, "eval_loss": 0.3144836127758026, "eval_runtime": 179.4394, "eval_samples_per_second": 41.318, "eval_steps_per_second": 5.166, "eval_wer": 0.12277851398696607, "step": 133800 }, { "epoch": 4.8626353659422925, "grad_norm": 0.3594875931739807, "learning_rate": 5.110057688646616e-06, "loss": 0.0735, "step": 133810 }, { "epoch": 4.8629987644450905, "grad_norm": 2.0811986923217773, "learning_rate": 5.106914296659035e-06, "loss": 0.068, "step": 133820 }, { "epoch": 4.8633621629478885, "grad_norm": 3.2955965995788574, "learning_rate": 5.103771761793749e-06, "loss": 0.0524, "step": 133830 }, { "epoch": 4.8637255614506865, "grad_norm": 0.16276855766773224, "learning_rate": 5.1006300841861425e-06, "loss": 0.2014, "step": 133840 }, { "epoch": 4.864088959953485, "grad_norm": 0.7564295530319214, "learning_rate": 5.097489263971589e-06, "loss": 0.054, "step": 133850 }, { "epoch": 4.864452358456283, "grad_norm": 0.5737159252166748, "learning_rate": 5.094349301285395e-06, "loss": 0.0592, "step": 133860 }, { "epoch": 4.864815756959081, "grad_norm": 0.446867436170578, "learning_rate": 5.09121019626288e-06, "loss": 0.0501, "step": 133870 }, { "epoch": 4.865179155461879, "grad_norm": 0.32116255164146423, "learning_rate": 5.088071949039275e-06, "loss": 0.0762, "step": 133880 }, { "epoch": 4.865542553964678, "grad_norm": 0.3649749755859375, "learning_rate": 5.0849345597498096e-06, "loss": 0.0394, "step": 133890 }, { "epoch": 4.865905952467476, "grad_norm": 1.7839421033859253, "learning_rate": 5.0817980285296466e-06, "loss": 0.0625, "step": 133900 }, { "epoch": 4.866269350970274, "grad_norm": 0.5425037741661072, "learning_rate": 5.078662355513942e-06, "loss": 0.0842, "step": 133910 }, { "epoch": 4.866632749473072, "grad_norm": 0.55345219373703, "learning_rate": 5.075527540837793e-06, "loss": 0.0555, "step": 133920 }, { "epoch": 4.86699614797587, "grad_norm": 1.1643956899642944, "learning_rate": 5.072393584636279e-06, "loss": 0.0774, "step": 133930 }, { "epoch": 4.867359546478669, "grad_norm": 0.45696577429771423, "learning_rate": 5.069260487044417e-06, "loss": 0.0521, "step": 133940 }, { "epoch": 4.867722944981467, "grad_norm": 3.843625545501709, "learning_rate": 5.066128248197211e-06, "loss": 0.0641, "step": 133950 }, { "epoch": 4.868086343484265, "grad_norm": 0.4715985953807831, "learning_rate": 5.062996868229602e-06, "loss": 0.0628, "step": 133960 }, { "epoch": 4.868449741987063, "grad_norm": 0.48865512013435364, "learning_rate": 5.059866347276535e-06, "loss": 0.0706, "step": 133970 }, { "epoch": 4.868813140489861, "grad_norm": 1.0587096214294434, "learning_rate": 5.056736685472871e-06, "loss": 0.0463, "step": 133980 }, { "epoch": 4.86917653899266, "grad_norm": 0.6733787059783936, "learning_rate": 5.0536078829534734e-06, "loss": 0.0453, "step": 133990 }, { "epoch": 4.869539937495458, "grad_norm": 1.23604154586792, "learning_rate": 5.050479939853137e-06, "loss": 0.0723, "step": 134000 }, { "epoch": 4.869903335998256, "grad_norm": 0.893674910068512, "learning_rate": 5.047352856306639e-06, "loss": 0.0635, "step": 134010 }, { "epoch": 4.870266734501054, "grad_norm": 0.33654364943504333, "learning_rate": 5.044226632448715e-06, "loss": 0.0646, "step": 134020 }, { "epoch": 4.870630133003852, "grad_norm": 0.3059425354003906, "learning_rate": 5.041101268414067e-06, "loss": 0.0602, "step": 134030 }, { "epoch": 4.870993531506651, "grad_norm": 0.5584850907325745, "learning_rate": 5.037976764337346e-06, "loss": 0.0589, "step": 134040 }, { "epoch": 4.871356930009449, "grad_norm": 3.6153693199157715, "learning_rate": 5.034853120353181e-06, "loss": 0.0726, "step": 134050 }, { "epoch": 4.871720328512247, "grad_norm": 0.3579678237438202, "learning_rate": 5.031730336596161e-06, "loss": 0.0556, "step": 134060 }, { "epoch": 4.8720837270150446, "grad_norm": 4.7592949867248535, "learning_rate": 5.028608413200836e-06, "loss": 0.0608, "step": 134070 }, { "epoch": 4.8724471255178425, "grad_norm": 0.3128441572189331, "learning_rate": 5.025487350301711e-06, "loss": 0.0546, "step": 134080 }, { "epoch": 4.872810524020641, "grad_norm": 0.5973222851753235, "learning_rate": 5.022367148033274e-06, "loss": 0.0383, "step": 134090 }, { "epoch": 4.873173922523439, "grad_norm": 0.3861252963542938, "learning_rate": 5.019247806529942e-06, "loss": 0.0477, "step": 134100 }, { "epoch": 4.873537321026237, "grad_norm": 0.4435428977012634, "learning_rate": 5.016129325926145e-06, "loss": 0.0661, "step": 134110 }, { "epoch": 4.873900719529035, "grad_norm": 1.2726280689239502, "learning_rate": 5.013011706356224e-06, "loss": 0.0674, "step": 134120 }, { "epoch": 4.874264118031833, "grad_norm": 0.29669496417045593, "learning_rate": 5.010206585038293e-06, "loss": 4.9748, "step": 134130 }, { "epoch": 4.874627516534632, "grad_norm": 0.2576645016670227, "learning_rate": 5.007090601802794e-06, "loss": 0.0446, "step": 134140 }, { "epoch": 4.87499091503743, "grad_norm": 1.2048192024230957, "learning_rate": 5.003975479990633e-06, "loss": 0.0579, "step": 134150 }, { "epoch": 4.875354313540228, "grad_norm": 0.4176105260848999, "learning_rate": 5.000861219736008e-06, "loss": 0.0714, "step": 134160 }, { "epoch": 4.875717712043026, "grad_norm": 1.1698194742202759, "learning_rate": 4.9977478211731295e-06, "loss": 0.0651, "step": 134170 }, { "epoch": 4.876081110545824, "grad_norm": 1.0174731016159058, "learning_rate": 4.994635284436125e-06, "loss": 0.0748, "step": 134180 }, { "epoch": 4.876444509048623, "grad_norm": 0.9534470438957214, "learning_rate": 4.99152360965911e-06, "loss": 0.0855, "step": 134190 }, { "epoch": 4.876807907551421, "grad_norm": 66.4375991821289, "learning_rate": 4.988412796976147e-06, "loss": 0.8669, "step": 134200 }, { "epoch": 4.877171306054219, "grad_norm": 0.4856536090373993, "learning_rate": 4.985302846521275e-06, "loss": 0.0569, "step": 134210 }, { "epoch": 4.877534704557017, "grad_norm": 0.3952353298664093, "learning_rate": 4.982193758428491e-06, "loss": 0.0588, "step": 134220 }, { "epoch": 4.877898103059815, "grad_norm": 0.32934287190437317, "learning_rate": 4.979085532831762e-06, "loss": 0.0608, "step": 134230 }, { "epoch": 4.878261501562614, "grad_norm": 3.58503794670105, "learning_rate": 4.9759781698649945e-06, "loss": 3.2491, "step": 134240 }, { "epoch": 4.878624900065412, "grad_norm": 0.5776059031486511, "learning_rate": 4.972871669662088e-06, "loss": 0.0628, "step": 134250 }, { "epoch": 4.87898829856821, "grad_norm": 0.3532797396183014, "learning_rate": 4.9697660323568735e-06, "loss": 0.0743, "step": 134260 }, { "epoch": 4.879351697071008, "grad_norm": 0.31687241792678833, "learning_rate": 4.966661258083183e-06, "loss": 0.0614, "step": 134270 }, { "epoch": 4.879715095573806, "grad_norm": 0.4512006342411041, "learning_rate": 4.963557346974776e-06, "loss": 0.043, "step": 134280 }, { "epoch": 4.880078494076605, "grad_norm": 0.34858423471450806, "learning_rate": 4.960454299165398e-06, "loss": 0.0479, "step": 134290 }, { "epoch": 4.880441892579403, "grad_norm": 2.416607141494751, "learning_rate": 4.957352114788738e-06, "loss": 0.0823, "step": 134300 }, { "epoch": 4.880805291082201, "grad_norm": 1.379605770111084, "learning_rate": 4.954250793978463e-06, "loss": 0.0573, "step": 134310 }, { "epoch": 4.881168689584999, "grad_norm": 0.6505770087242126, "learning_rate": 4.951150336868199e-06, "loss": 0.0596, "step": 134320 }, { "epoch": 4.881532088087797, "grad_norm": 0.6505630016326904, "learning_rate": 4.94805074359154e-06, "loss": 0.1076, "step": 134330 }, { "epoch": 4.8818954865905955, "grad_norm": 0.5218392610549927, "learning_rate": 4.94495201428202e-06, "loss": 0.039, "step": 134340 }, { "epoch": 4.8822588850933935, "grad_norm": 2.2523157596588135, "learning_rate": 4.941854149073164e-06, "loss": 0.0561, "step": 134350 }, { "epoch": 4.882622283596191, "grad_norm": 12.027015686035156, "learning_rate": 4.938757148098444e-06, "loss": 0.1109, "step": 134360 }, { "epoch": 4.882985682098989, "grad_norm": 2.0272796154022217, "learning_rate": 4.935661011491308e-06, "loss": 0.0491, "step": 134370 }, { "epoch": 4.883349080601788, "grad_norm": 0.5193353295326233, "learning_rate": 4.9325657393851445e-06, "loss": 0.0517, "step": 134380 }, { "epoch": 4.883712479104586, "grad_norm": 0.38418057560920715, "learning_rate": 4.929471331913327e-06, "loss": 0.0398, "step": 134390 }, { "epoch": 4.884075877607384, "grad_norm": 0.6307530999183655, "learning_rate": 4.926377789209169e-06, "loss": 0.055, "step": 134400 }, { "epoch": 4.884075877607384, "eval_loss": 0.313152015209198, "eval_runtime": 181.0676, "eval_samples_per_second": 40.946, "eval_steps_per_second": 5.12, "eval_wer": 0.12240637536986948, "step": 134400 }, { "epoch": 4.884439276110182, "grad_norm": 0.44101715087890625, "learning_rate": 4.9232851114059785e-06, "loss": 0.0614, "step": 134410 }, { "epoch": 4.88480267461298, "grad_norm": 0.5291188359260559, "learning_rate": 4.920193298636997e-06, "loss": 0.0573, "step": 134420 }, { "epoch": 4.885166073115779, "grad_norm": 0.29965198040008545, "learning_rate": 4.917102351035446e-06, "loss": 0.0584, "step": 134430 }, { "epoch": 4.885529471618577, "grad_norm": 0.3013246953487396, "learning_rate": 4.914012268734494e-06, "loss": 0.0485, "step": 134440 }, { "epoch": 4.885892870121375, "grad_norm": 5.743584632873535, "learning_rate": 4.910923051867286e-06, "loss": 0.0553, "step": 134450 }, { "epoch": 4.886256268624173, "grad_norm": 0.46954047679901123, "learning_rate": 4.907834700566929e-06, "loss": 0.0597, "step": 134460 }, { "epoch": 4.886619667126972, "grad_norm": 0.44186410307884216, "learning_rate": 4.90474721496649e-06, "loss": 0.0649, "step": 134470 }, { "epoch": 4.88698306562977, "grad_norm": 0.5196414589881897, "learning_rate": 4.9016605951989895e-06, "loss": 0.0613, "step": 134480 }, { "epoch": 4.887346464132568, "grad_norm": 0.47843530774116516, "learning_rate": 4.898574841397433e-06, "loss": 0.0641, "step": 134490 }, { "epoch": 4.887709862635366, "grad_norm": 0.5206328630447388, "learning_rate": 4.8954899536947485e-06, "loss": 0.0552, "step": 134500 }, { "epoch": 4.888073261138164, "grad_norm": 0.5103901624679565, "learning_rate": 4.892405932223884e-06, "loss": 0.0697, "step": 134510 }, { "epoch": 4.888436659640963, "grad_norm": 7.005363464355469, "learning_rate": 4.889322777117697e-06, "loss": 0.0753, "step": 134520 }, { "epoch": 4.888800058143761, "grad_norm": 0.7842527031898499, "learning_rate": 4.8862404885090476e-06, "loss": 0.0596, "step": 134530 }, { "epoch": 4.889163456646559, "grad_norm": 1.1054315567016602, "learning_rate": 4.883159066530721e-06, "loss": 2.2958, "step": 134540 }, { "epoch": 4.889526855149357, "grad_norm": 1.1994401216506958, "learning_rate": 4.880078511315495e-06, "loss": 0.046, "step": 134550 }, { "epoch": 4.889890253652155, "grad_norm": 0.469723641872406, "learning_rate": 4.876998822996098e-06, "loss": 0.052, "step": 134560 }, { "epoch": 4.890253652154954, "grad_norm": 10.84537124633789, "learning_rate": 4.873920001705232e-06, "loss": 0.1084, "step": 134570 }, { "epoch": 4.8906170506577515, "grad_norm": 1.3978731632232666, "learning_rate": 4.870842047575538e-06, "loss": 0.0437, "step": 134580 }, { "epoch": 4.8909804491605495, "grad_norm": 0.8992718458175659, "learning_rate": 4.86776496073964e-06, "loss": 0.0384, "step": 134590 }, { "epoch": 4.8913438476633475, "grad_norm": 0.2479039430618286, "learning_rate": 4.8646887413301174e-06, "loss": 0.0501, "step": 134600 }, { "epoch": 4.8917072461661455, "grad_norm": 0.4045531451702118, "learning_rate": 4.861613389479524e-06, "loss": 0.0566, "step": 134610 }, { "epoch": 4.892070644668944, "grad_norm": 4.182795524597168, "learning_rate": 4.8585389053203505e-06, "loss": 0.0733, "step": 134620 }, { "epoch": 4.892434043171742, "grad_norm": 0.41220977902412415, "learning_rate": 4.855465288985078e-06, "loss": 3.4312, "step": 134630 }, { "epoch": 4.89279744167454, "grad_norm": 0.35393601655960083, "learning_rate": 4.852392540606118e-06, "loss": 0.0363, "step": 134640 }, { "epoch": 4.893160840177338, "grad_norm": 1.1564468145370483, "learning_rate": 4.849320660315893e-06, "loss": 0.0559, "step": 134650 }, { "epoch": 4.893524238680136, "grad_norm": 0.2777707874774933, "learning_rate": 4.846249648246734e-06, "loss": 0.0614, "step": 134660 }, { "epoch": 4.893887637182935, "grad_norm": 7.759231090545654, "learning_rate": 4.8431795045309785e-06, "loss": 0.0696, "step": 134670 }, { "epoch": 4.894251035685733, "grad_norm": 0.38638877868652344, "learning_rate": 4.840110229300895e-06, "loss": 0.059, "step": 134680 }, { "epoch": 4.894614434188531, "grad_norm": 0.4673669636249542, "learning_rate": 4.837041822688732e-06, "loss": 0.0502, "step": 134690 }, { "epoch": 4.894977832691329, "grad_norm": 21.516677856445312, "learning_rate": 4.8339742848266975e-06, "loss": 0.0782, "step": 134700 }, { "epoch": 4.895341231194127, "grad_norm": 0.45382869243621826, "learning_rate": 4.830907615846966e-06, "loss": 0.1023, "step": 134710 }, { "epoch": 4.895704629696926, "grad_norm": 1.4939895868301392, "learning_rate": 4.827841815881657e-06, "loss": 0.072, "step": 134720 }, { "epoch": 4.896068028199724, "grad_norm": 0.5316845774650574, "learning_rate": 4.824776885062876e-06, "loss": 0.1065, "step": 134730 }, { "epoch": 4.896431426702522, "grad_norm": 0.317969411611557, "learning_rate": 4.821712823522665e-06, "loss": 0.0625, "step": 134740 }, { "epoch": 4.89679482520532, "grad_norm": 0.6256914138793945, "learning_rate": 4.818649631393066e-06, "loss": 0.0579, "step": 134750 }, { "epoch": 4.897158223708118, "grad_norm": 0.5761133432388306, "learning_rate": 4.815587308806041e-06, "loss": 0.0574, "step": 134760 }, { "epoch": 4.897521622210917, "grad_norm": 1.8064147233963013, "learning_rate": 4.8125258558935515e-06, "loss": 0.0823, "step": 134770 }, { "epoch": 4.897885020713715, "grad_norm": 0.27050405740737915, "learning_rate": 4.80946527278748e-06, "loss": 0.0594, "step": 134780 }, { "epoch": 4.898248419216513, "grad_norm": 0.39903682470321655, "learning_rate": 4.806405559619728e-06, "loss": 3.724, "step": 134790 }, { "epoch": 4.898611817719311, "grad_norm": 0.9503474831581116, "learning_rate": 4.803346716522103e-06, "loss": 0.0636, "step": 134800 }, { "epoch": 4.898975216222109, "grad_norm": 0.32447028160095215, "learning_rate": 4.800288743626413e-06, "loss": 0.0647, "step": 134810 }, { "epoch": 4.899338614724908, "grad_norm": 2.7910213470458984, "learning_rate": 4.797231641064406e-06, "loss": 0.055, "step": 134820 }, { "epoch": 4.899702013227706, "grad_norm": 0.7851807475090027, "learning_rate": 4.794175408967805e-06, "loss": 0.0686, "step": 134830 }, { "epoch": 4.900065411730504, "grad_norm": 0.29661354422569275, "learning_rate": 4.7911200474682925e-06, "loss": 0.0531, "step": 134840 }, { "epoch": 4.900428810233302, "grad_norm": 0.38068997859954834, "learning_rate": 4.788065556697521e-06, "loss": 0.0546, "step": 134850 }, { "epoch": 4.9007922087360996, "grad_norm": 0.651679515838623, "learning_rate": 4.785011936787084e-06, "loss": 0.0504, "step": 134860 }, { "epoch": 4.901155607238898, "grad_norm": 0.4893336594104767, "learning_rate": 4.781959187868565e-06, "loss": 0.0589, "step": 134870 }, { "epoch": 4.901519005741696, "grad_norm": 0.2626447379589081, "learning_rate": 4.778907310073475e-06, "loss": 0.0484, "step": 134880 }, { "epoch": 4.901882404244494, "grad_norm": 0.25698304176330566, "learning_rate": 4.7758563035333325e-06, "loss": 0.0409, "step": 134890 }, { "epoch": 4.902245802747292, "grad_norm": 0.6791503429412842, "learning_rate": 4.7728061683795815e-06, "loss": 0.1159, "step": 134900 }, { "epoch": 4.90260920125009, "grad_norm": 0.3163858950138092, "learning_rate": 4.769756904743647e-06, "loss": 0.0749, "step": 134910 }, { "epoch": 4.902972599752889, "grad_norm": 0.8377051949501038, "learning_rate": 4.7667085127569036e-06, "loss": 0.0584, "step": 134920 }, { "epoch": 4.903335998255687, "grad_norm": 0.4694441854953766, "learning_rate": 4.7636609925507e-06, "loss": 0.0616, "step": 134930 }, { "epoch": 4.903699396758485, "grad_norm": 0.3535062074661255, "learning_rate": 4.760614344256342e-06, "loss": 0.0647, "step": 134940 }, { "epoch": 4.904062795261283, "grad_norm": 0.6276897192001343, "learning_rate": 4.757568568005108e-06, "loss": 0.0653, "step": 134950 }, { "epoch": 4.904426193764082, "grad_norm": 0.3415055572986603, "learning_rate": 4.754523663928215e-06, "loss": 0.0549, "step": 134960 }, { "epoch": 4.90478959226688, "grad_norm": 0.47502434253692627, "learning_rate": 4.751479632156864e-06, "loss": 0.0474, "step": 134970 }, { "epoch": 4.905152990769678, "grad_norm": 0.635908305644989, "learning_rate": 4.74843647282221e-06, "loss": 0.0532, "step": 134980 }, { "epoch": 4.905516389272476, "grad_norm": 0.29876193404197693, "learning_rate": 4.74539418605538e-06, "loss": 1.7914, "step": 134990 }, { "epoch": 4.905879787775274, "grad_norm": 1.087601661682129, "learning_rate": 4.742352771987441e-06, "loss": 0.0504, "step": 135000 }, { "epoch": 4.905879787775274, "eval_loss": 0.29693716764450073, "eval_runtime": 179.7829, "eval_samples_per_second": 41.239, "eval_steps_per_second": 5.156, "eval_wer": 0.12187993537495234, "step": 135000 } ], "logging_steps": 10, "max_steps": 165108, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 1800, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.418859722241896e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }