diff --git "a/checkpoint-117000/trainer_state.json" "b/checkpoint-117000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-117000/trainer_state.json" @@ -0,0 +1,83688 @@ +{ + "best_metric": 0.1268266560168461, + "best_model_checkpoint": "./checkpoints/w2v-pa-v2/checkpoint-117000", + "epoch": 4.251762482738571, + "eval_steps": 600, + "global_step": 117000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00036339850279816846, + "grad_norm": 12.712770462036133, + "learning_rate": 3.633985027981685e-08, + "loss": 8.1015, + "step": 10 + }, + { + "epoch": 0.0007267970055963369, + "grad_norm": 14.929631233215332, + "learning_rate": 7.26797005596337e-08, + "loss": 8.5111, + "step": 20 + }, + { + "epoch": 0.0010901955083945055, + "grad_norm": 12.774781227111816, + "learning_rate": 1.0901955083945056e-07, + "loss": 8.71, + "step": 30 + }, + { + "epoch": 0.0014535940111926739, + "grad_norm": 15.27083969116211, + "learning_rate": 1.417254160912857e-07, + "loss": 9.2894, + "step": 40 + }, + { + "epoch": 0.0018169925139908424, + "grad_norm": 31.209775924682617, + "learning_rate": 1.7806526637110256e-07, + "loss": 8.6779, + "step": 50 + }, + { + "epoch": 0.002180391016789011, + "grad_norm": 12.960335731506348, + "learning_rate": 2.1440511665091943e-07, + "loss": 8.0399, + "step": 60 + }, + { + "epoch": 0.002543789519587179, + "grad_norm": 14.452157020568848, + "learning_rate": 2.5074496693073626e-07, + "loss": 7.9357, + "step": 70 + }, + { + "epoch": 0.0029071880223853477, + "grad_norm": 12.74867057800293, + "learning_rate": 2.8708481721055307e-07, + "loss": 8.0764, + "step": 80 + }, + { + "epoch": 0.0032705865251835163, + "grad_norm": 19.905397415161133, + "learning_rate": 3.2342466749036993e-07, + "loss": 8.2522, + "step": 90 + }, + { + "epoch": 0.003633985027981685, + "grad_norm": Infinity, + "learning_rate": 3.5613053274220513e-07, + "loss": 7.8161, + "step": 100 + }, + { + "epoch": 0.003997383530779853, + "grad_norm": 15.877684593200684, + "learning_rate": 3.92470383022022e-07, + "loss": 7.4866, + "step": 110 + }, + { + "epoch": 0.004360782033578022, + "grad_norm": 19.216800689697266, + "learning_rate": 4.2881023330183885e-07, + "loss": 7.0324, + "step": 120 + }, + { + "epoch": 0.0047241805363761906, + "grad_norm": 16.937118530273438, + "learning_rate": 4.651500835816557e-07, + "loss": 6.7873, + "step": 130 + }, + { + "epoch": 0.005087579039174358, + "grad_norm": 27.858692169189453, + "learning_rate": 5.014899338614725e-07, + "loss": 7.2063, + "step": 140 + }, + { + "epoch": 0.005450977541972527, + "grad_norm": Infinity, + "learning_rate": 5.341957991133076e-07, + "loss": 6.5827, + "step": 150 + }, + { + "epoch": 0.005814376044770695, + "grad_norm": 21.252164840698242, + "learning_rate": 5.669016643651428e-07, + "loss": 7.819, + "step": 160 + }, + { + "epoch": 0.006177774547568864, + "grad_norm": 20.977886199951172, + "learning_rate": 6.032415146449597e-07, + "loss": 5.4741, + "step": 170 + }, + { + "epoch": 0.0065411730503670325, + "grad_norm": 35.25390625, + "learning_rate": 6.395813649247765e-07, + "loss": 5.7225, + "step": 180 + }, + { + "epoch": 0.006904571553165201, + "grad_norm": 7.165033340454102, + "learning_rate": 6.759212152045934e-07, + "loss": 4.7475, + "step": 190 + }, + { + "epoch": 0.00726797005596337, + "grad_norm": 14.877301216125488, + "learning_rate": 7.122610654844103e-07, + "loss": 4.6375, + "step": 200 + }, + { + "epoch": 0.007631368558761538, + "grad_norm": 5.826667785644531, + "learning_rate": 7.486009157642272e-07, + "loss": 4.356, + "step": 210 + }, + { + "epoch": 0.007994767061559707, + "grad_norm": 6.022212982177734, + "learning_rate": 7.84940766044044e-07, + "loss": 4.2138, + "step": 220 + }, + { + "epoch": 0.008358165564357875, + "grad_norm": 4.790489196777344, + "learning_rate": 8.212806163238608e-07, + "loss": 4.0662, + "step": 230 + }, + { + "epoch": 0.008721564067156044, + "grad_norm": 4.448057174682617, + "learning_rate": 8.576204666036777e-07, + "loss": 3.9507, + "step": 240 + }, + { + "epoch": 0.009084962569954213, + "grad_norm": 47.487003326416016, + "learning_rate": 8.939603168834945e-07, + "loss": 3.9576, + "step": 250 + }, + { + "epoch": 0.009448361072752381, + "grad_norm": 8.64856243133545, + "learning_rate": 9.303001671633114e-07, + "loss": 3.7102, + "step": 260 + }, + { + "epoch": 0.009811759575550548, + "grad_norm": 8.821709632873535, + "learning_rate": 9.66640017443128e-07, + "loss": 3.6644, + "step": 270 + }, + { + "epoch": 0.010175158078348717, + "grad_norm": 14.071539878845215, + "learning_rate": 1.002979867722945e-06, + "loss": 3.6909, + "step": 280 + }, + { + "epoch": 0.010538556581146885, + "grad_norm": 6.68039083480835, + "learning_rate": 1.0393197180027619e-06, + "loss": 3.6458, + "step": 290 + }, + { + "epoch": 0.010901955083945054, + "grad_norm": 20.664649963378906, + "learning_rate": 1.0756595682825787e-06, + "loss": 3.6332, + "step": 300 + }, + { + "epoch": 0.011265353586743222, + "grad_norm": 2.5272624492645264, + "learning_rate": 1.1119994185623955e-06, + "loss": 3.4835, + "step": 310 + }, + { + "epoch": 0.01162875208954139, + "grad_norm": 8.353235244750977, + "learning_rate": 1.1483392688422123e-06, + "loss": 3.4892, + "step": 320 + }, + { + "epoch": 0.01199215059233956, + "grad_norm": 7.0964531898498535, + "learning_rate": 1.1846791191220293e-06, + "loss": 3.5586, + "step": 330 + }, + { + "epoch": 0.012355549095137728, + "grad_norm": 4.734161376953125, + "learning_rate": 1.2210189694018461e-06, + "loss": 3.4996, + "step": 340 + }, + { + "epoch": 0.012718947597935897, + "grad_norm": 47.409996032714844, + "learning_rate": 1.257358819681663e-06, + "loss": 3.5565, + "step": 350 + }, + { + "epoch": 0.013082346100734065, + "grad_norm": 2.880244016647339, + "learning_rate": 1.2936986699614797e-06, + "loss": 3.4154, + "step": 360 + }, + { + "epoch": 0.013445744603532234, + "grad_norm": 6.637233734130859, + "learning_rate": 1.3300385202412968e-06, + "loss": 3.4119, + "step": 370 + }, + { + "epoch": 0.013809143106330402, + "grad_norm": 11.791736602783203, + "learning_rate": 1.3663783705211136e-06, + "loss": 3.4347, + "step": 380 + }, + { + "epoch": 0.01417254160912857, + "grad_norm": 8.274836540222168, + "learning_rate": 1.4027182208009304e-06, + "loss": 3.4253, + "step": 390 + }, + { + "epoch": 0.01453594011192674, + "grad_norm": 10.09929084777832, + "learning_rate": 1.4390580710807472e-06, + "loss": 3.4702, + "step": 400 + }, + { + "epoch": 0.014899338614724908, + "grad_norm": 6.32951545715332, + "learning_rate": 1.4753979213605642e-06, + "loss": 3.3513, + "step": 410 + }, + { + "epoch": 0.015262737117523077, + "grad_norm": 2.4888486862182617, + "learning_rate": 1.511737771640381e-06, + "loss": 3.3421, + "step": 420 + }, + { + "epoch": 0.015626135620321245, + "grad_norm": 3.02103328704834, + "learning_rate": 1.5480776219201978e-06, + "loss": 3.397, + "step": 430 + }, + { + "epoch": 0.015989534123119414, + "grad_norm": 7.464268207550049, + "learning_rate": 1.5844174722000146e-06, + "loss": 3.3582, + "step": 440 + }, + { + "epoch": 0.016352932625917582, + "grad_norm": 18.908123016357422, + "learning_rate": 1.6207573224798317e-06, + "loss": 3.4034, + "step": 450 + }, + { + "epoch": 0.01671633112871575, + "grad_norm": 2.487326145172119, + "learning_rate": 1.6570971727596485e-06, + "loss": 3.2229, + "step": 460 + }, + { + "epoch": 0.01707972963151392, + "grad_norm": 2.3999946117401123, + "learning_rate": 1.6934370230394653e-06, + "loss": 3.2185, + "step": 470 + }, + { + "epoch": 0.017443128134312088, + "grad_norm": 5.007234573364258, + "learning_rate": 1.729776873319282e-06, + "loss": 3.2069, + "step": 480 + }, + { + "epoch": 0.017806526637110257, + "grad_norm": 6.393301963806152, + "learning_rate": 1.766116723599099e-06, + "loss": 3.0687, + "step": 490 + }, + { + "epoch": 0.018169925139908425, + "grad_norm": 45.44938278198242, + "learning_rate": 1.802456573878916e-06, + "loss": 3.112, + "step": 500 + }, + { + "epoch": 0.018533323642706594, + "grad_norm": 7.32182502746582, + "learning_rate": 1.8387964241587327e-06, + "loss": 2.951, + "step": 510 + }, + { + "epoch": 0.018896722145504762, + "grad_norm": 3.3864173889160156, + "learning_rate": 1.8751362744385495e-06, + "loss": 2.8879, + "step": 520 + }, + { + "epoch": 0.019260120648302927, + "grad_norm": 5.429958343505859, + "learning_rate": 1.911476124718366e-06, + "loss": 2.7393, + "step": 530 + }, + { + "epoch": 0.019623519151101096, + "grad_norm": 5.3577985763549805, + "learning_rate": 1.947815974998183e-06, + "loss": 2.4813, + "step": 540 + }, + { + "epoch": 0.019986917653899264, + "grad_norm": 13.970659255981445, + "learning_rate": 1.9841558252779998e-06, + "loss": 2.3787, + "step": 550 + }, + { + "epoch": 0.020350316156697433, + "grad_norm": 5.2666754722595215, + "learning_rate": 2.0204956755578166e-06, + "loss": 2.207, + "step": 560 + }, + { + "epoch": 0.0207137146594956, + "grad_norm": 4.184991359710693, + "learning_rate": 2.0568355258376334e-06, + "loss": 2.0383, + "step": 570 + }, + { + "epoch": 0.02107711316229377, + "grad_norm": 6.312343597412109, + "learning_rate": 2.09317537611745e-06, + "loss": 1.8416, + "step": 580 + }, + { + "epoch": 0.02144051166509194, + "grad_norm": 4.754147529602051, + "learning_rate": 2.1295152263972674e-06, + "loss": 1.6002, + "step": 590 + }, + { + "epoch": 0.021803910167890107, + "grad_norm": 21.47913360595703, + "learning_rate": 2.1658550766770842e-06, + "loss": 1.6015, + "step": 600 + }, + { + "epoch": 0.021803910167890107, + "eval_loss": 1.5154471397399902, + "eval_runtime": 180.9184, + "eval_samples_per_second": 40.98, + "eval_steps_per_second": 5.124, + "eval_wer": 0.7997531177954872, + "step": 600 + }, + { + "epoch": 0.022167308670688276, + "grad_norm": 4.2374348640441895, + "learning_rate": 2.202194926956901e-06, + "loss": 1.4842, + "step": 610 + }, + { + "epoch": 0.022530707173486444, + "grad_norm": 4.392132759094238, + "learning_rate": 2.238534777236718e-06, + "loss": 1.3776, + "step": 620 + }, + { + "epoch": 0.022894105676284613, + "grad_norm": 4.682064533233643, + "learning_rate": 2.2748746275165347e-06, + "loss": 1.3177, + "step": 630 + }, + { + "epoch": 0.02325750417908278, + "grad_norm": 4.8396077156066895, + "learning_rate": 2.3112144777963515e-06, + "loss": 1.0737, + "step": 640 + }, + { + "epoch": 0.02362090268188095, + "grad_norm": 33.27382278442383, + "learning_rate": 2.3475543280761683e-06, + "loss": 1.3046, + "step": 650 + }, + { + "epoch": 0.02398430118467912, + "grad_norm": 5.410325050354004, + "learning_rate": 2.383894178355985e-06, + "loss": 1.1021, + "step": 660 + }, + { + "epoch": 0.024347699687477287, + "grad_norm": 3.9523680210113525, + "learning_rate": 2.420234028635802e-06, + "loss": 1.0602, + "step": 670 + }, + { + "epoch": 0.024711098190275456, + "grad_norm": 9.141073226928711, + "learning_rate": 2.456573878915619e-06, + "loss": 1.0631, + "step": 680 + }, + { + "epoch": 0.025074496693073624, + "grad_norm": 5.3534626960754395, + "learning_rate": 2.492913729195436e-06, + "loss": 0.8968, + "step": 690 + }, + { + "epoch": 0.025437895195871793, + "grad_norm": 32.30677795410156, + "learning_rate": 2.5292535794752527e-06, + "loss": 1.0439, + "step": 700 + }, + { + "epoch": 0.02580129369866996, + "grad_norm": 4.310474872589111, + "learning_rate": 2.5655934297550696e-06, + "loss": 0.954, + "step": 710 + }, + { + "epoch": 0.02616469220146813, + "grad_norm": 5.586440563201904, + "learning_rate": 2.6019332800348864e-06, + "loss": 1.0031, + "step": 720 + }, + { + "epoch": 0.0265280907042663, + "grad_norm": 3.6927313804626465, + "learning_rate": 2.638273130314703e-06, + "loss": 0.7956, + "step": 730 + }, + { + "epoch": 0.026891489207064467, + "grad_norm": 4.270529747009277, + "learning_rate": 2.67461298059452e-06, + "loss": 0.8874, + "step": 740 + }, + { + "epoch": 0.027254887709862636, + "grad_norm": 23.553489685058594, + "learning_rate": 2.710952830874337e-06, + "loss": 0.8523, + "step": 750 + }, + { + "epoch": 0.027618286212660804, + "grad_norm": 5.342041492462158, + "learning_rate": 2.747292681154154e-06, + "loss": 0.9029, + "step": 760 + }, + { + "epoch": 0.027981684715458973, + "grad_norm": 3.3802621364593506, + "learning_rate": 2.783632531433971e-06, + "loss": 0.8378, + "step": 770 + }, + { + "epoch": 0.02834508321825714, + "grad_norm": 6.378807067871094, + "learning_rate": 2.8199723817137876e-06, + "loss": 0.8085, + "step": 780 + }, + { + "epoch": 0.02870848172105531, + "grad_norm": 4.007000923156738, + "learning_rate": 2.8563122319936045e-06, + "loss": 0.8218, + "step": 790 + }, + { + "epoch": 0.02907188022385348, + "grad_norm": 68.16226196289062, + "learning_rate": 2.8926520822734213e-06, + "loss": 1.2055, + "step": 800 + }, + { + "epoch": 0.029435278726651647, + "grad_norm": 6.70043420791626, + "learning_rate": 2.928991932553238e-06, + "loss": 0.7641, + "step": 810 + }, + { + "epoch": 0.029798677229449816, + "grad_norm": 5.498161315917969, + "learning_rate": 2.965331782833055e-06, + "loss": 0.7739, + "step": 820 + }, + { + "epoch": 0.030162075732247984, + "grad_norm": 9.515852928161621, + "learning_rate": 3.0016716331128717e-06, + "loss": 0.8293, + "step": 830 + }, + { + "epoch": 0.030525474235046153, + "grad_norm": 13.3881196975708, + "learning_rate": 3.0380114833926885e-06, + "loss": 0.5597, + "step": 840 + }, + { + "epoch": 0.03088887273784432, + "grad_norm": 13.670549392700195, + "learning_rate": 3.0743513336725057e-06, + "loss": 0.7658, + "step": 850 + }, + { + "epoch": 0.03125227124064249, + "grad_norm": 3.58305287361145, + "learning_rate": 3.1106911839523226e-06, + "loss": 0.7036, + "step": 860 + }, + { + "epoch": 0.031615669743440655, + "grad_norm": 4.119450569152832, + "learning_rate": 3.147031034232139e-06, + "loss": 0.6842, + "step": 870 + }, + { + "epoch": 0.03197906824623883, + "grad_norm": 6.412299156188965, + "learning_rate": 3.183370884511956e-06, + "loss": 0.7148, + "step": 880 + }, + { + "epoch": 0.03234246674903699, + "grad_norm": 8.700023651123047, + "learning_rate": 3.2197107347917726e-06, + "loss": 1.4861, + "step": 890 + }, + { + "epoch": 0.032705865251835164, + "grad_norm": 18.78075408935547, + "learning_rate": 3.25605058507159e-06, + "loss": 0.7162, + "step": 900 + }, + { + "epoch": 0.03306926375463333, + "grad_norm": 4.078335762023926, + "learning_rate": 3.292390435351406e-06, + "loss": 0.643, + "step": 910 + }, + { + "epoch": 0.0334326622574315, + "grad_norm": 6.603452682495117, + "learning_rate": 3.3287302856312234e-06, + "loss": 0.6623, + "step": 920 + }, + { + "epoch": 0.03379606076022967, + "grad_norm": 5.817732334136963, + "learning_rate": 3.3650701359110402e-06, + "loss": 0.6265, + "step": 930 + }, + { + "epoch": 0.03415945926302784, + "grad_norm": 8.310086250305176, + "learning_rate": 3.4014099861908575e-06, + "loss": 0.5343, + "step": 940 + }, + { + "epoch": 0.034522857765826004, + "grad_norm": NaN, + "learning_rate": 3.4341158514426923e-06, + "loss": 0.6999, + "step": 950 + }, + { + "epoch": 0.034886256268624176, + "grad_norm": 4.416926860809326, + "learning_rate": 3.4704557017225087e-06, + "loss": 0.6688, + "step": 960 + }, + { + "epoch": 0.03524965477142234, + "grad_norm": 3.2407495975494385, + "learning_rate": 3.506795552002326e-06, + "loss": 0.5808, + "step": 970 + }, + { + "epoch": 0.03561305327422051, + "grad_norm": NaN, + "learning_rate": 3.539501417254161e-06, + "loss": 3.0266, + "step": 980 + }, + { + "epoch": 0.03597645177701868, + "grad_norm": 8.086112022399902, + "learning_rate": 3.575841267533978e-06, + "loss": 0.5829, + "step": 990 + }, + { + "epoch": 0.03633985027981685, + "grad_norm": 252.45077514648438, + "learning_rate": 3.612181117813795e-06, + "loss": 0.7371, + "step": 1000 + }, + { + "epoch": 0.036703248782615015, + "grad_norm": 3.5969936847686768, + "learning_rate": 3.648520968093612e-06, + "loss": 0.6632, + "step": 1010 + }, + { + "epoch": 0.03706664728541319, + "grad_norm": 3.0116841793060303, + "learning_rate": 3.6848608183734285e-06, + "loss": 0.537, + "step": 1020 + }, + { + "epoch": 0.03743004578821135, + "grad_norm": 5.494657039642334, + "learning_rate": 3.7212006686532457e-06, + "loss": 0.5422, + "step": 1030 + }, + { + "epoch": 0.037793444291009524, + "grad_norm": 21.526798248291016, + "learning_rate": 3.757540518933062e-06, + "loss": 0.5003, + "step": 1040 + }, + { + "epoch": 0.03815684279380769, + "grad_norm": 80.90055084228516, + "learning_rate": 3.7938803692128793e-06, + "loss": 0.6566, + "step": 1050 + }, + { + "epoch": 0.038520241296605855, + "grad_norm": 3.7678096294403076, + "learning_rate": 3.830220219492696e-06, + "loss": 0.5758, + "step": 1060 + }, + { + "epoch": 0.03888363979940403, + "grad_norm": 4.526616096496582, + "learning_rate": 3.866560069772512e-06, + "loss": 0.5648, + "step": 1070 + }, + { + "epoch": 0.03924703830220219, + "grad_norm": 4.571674346923828, + "learning_rate": 3.90289992005233e-06, + "loss": 0.5864, + "step": 1080 + }, + { + "epoch": 0.039610436805000364, + "grad_norm": 5.295219421386719, + "learning_rate": 3.939239770332146e-06, + "loss": 0.4476, + "step": 1090 + }, + { + "epoch": 0.03997383530779853, + "grad_norm": 16.631162643432617, + "learning_rate": 3.975579620611963e-06, + "loss": 0.6198, + "step": 1100 + }, + { + "epoch": 0.0403372338105967, + "grad_norm": 4.685397624969482, + "learning_rate": 4.01191947089178e-06, + "loss": 0.7512, + "step": 1110 + }, + { + "epoch": 0.040700632313394866, + "grad_norm": 3.333232879638672, + "learning_rate": 4.048259321171597e-06, + "loss": 0.5087, + "step": 1120 + }, + { + "epoch": 0.04106403081619304, + "grad_norm": 5.501911640167236, + "learning_rate": 4.084599171451414e-06, + "loss": 0.5772, + "step": 1130 + }, + { + "epoch": 0.0414274293189912, + "grad_norm": 8.066693305969238, + "learning_rate": 4.120939021731231e-06, + "loss": 0.4641, + "step": 1140 + }, + { + "epoch": 0.041790827821789375, + "grad_norm": 13.463829040527344, + "learning_rate": 4.1572788720110474e-06, + "loss": 0.5192, + "step": 1150 + }, + { + "epoch": 0.04215422632458754, + "grad_norm": 4.132773399353027, + "learning_rate": 4.193618722290864e-06, + "loss": 0.4696, + "step": 1160 + }, + { + "epoch": 0.04251762482738571, + "grad_norm": 6.176777362823486, + "learning_rate": 4.229958572570681e-06, + "loss": 0.4851, + "step": 1170 + }, + { + "epoch": 0.04288102333018388, + "grad_norm": 8.26610279083252, + "learning_rate": 4.266298422850498e-06, + "loss": 0.4967, + "step": 1180 + }, + { + "epoch": 0.04324442183298205, + "grad_norm": 3.9725544452667236, + "learning_rate": 4.302638273130315e-06, + "loss": 0.431, + "step": 1190 + }, + { + "epoch": 0.043607820335780215, + "grad_norm": 22.353294372558594, + "learning_rate": 4.338978123410132e-06, + "loss": 0.6523, + "step": 1200 + }, + { + "epoch": 0.043607820335780215, + "eval_loss": 0.5945897102355957, + "eval_runtime": 180.5168, + "eval_samples_per_second": 41.071, + "eval_steps_per_second": 5.135, + "eval_wer": 0.3718390908925881, + "step": 1200 + }, + { + "epoch": 0.04397121883857839, + "grad_norm": 3.7954189777374268, + "learning_rate": 4.375317973689948e-06, + "loss": 0.4511, + "step": 1210 + }, + { + "epoch": 0.04433461734137655, + "grad_norm": 5.583435535430908, + "learning_rate": 4.411657823969766e-06, + "loss": 1.4019, + "step": 1220 + }, + { + "epoch": 0.044698015844174724, + "grad_norm": 8.544243812561035, + "learning_rate": 4.447997674249582e-06, + "loss": 0.4467, + "step": 1230 + }, + { + "epoch": 0.04506141434697289, + "grad_norm": 3.8716418743133545, + "learning_rate": 4.4843375245293996e-06, + "loss": 0.4347, + "step": 1240 + }, + { + "epoch": 0.04542481284977106, + "grad_norm": 19.459606170654297, + "learning_rate": 4.5206773748092155e-06, + "loss": 0.616, + "step": 1250 + }, + { + "epoch": 0.045788211352569226, + "grad_norm": 5.474793434143066, + "learning_rate": 4.557017225089033e-06, + "loss": 0.4689, + "step": 1260 + }, + { + "epoch": 0.0461516098553674, + "grad_norm": 4.705495834350586, + "learning_rate": 4.593357075368849e-06, + "loss": 0.4623, + "step": 1270 + }, + { + "epoch": 0.04651500835816556, + "grad_norm": 6.779942035675049, + "learning_rate": 4.629696925648667e-06, + "loss": 0.4418, + "step": 1280 + }, + { + "epoch": 0.046878406860963735, + "grad_norm": 6.802936553955078, + "learning_rate": 4.666036775928484e-06, + "loss": 0.4429, + "step": 1290 + }, + { + "epoch": 0.0472418053637619, + "grad_norm": 17.47754669189453, + "learning_rate": 4.7023766262083004e-06, + "loss": 0.596, + "step": 1300 + }, + { + "epoch": 0.04760520386656007, + "grad_norm": 4.036036968231201, + "learning_rate": 4.738716476488117e-06, + "loss": 0.4362, + "step": 1310 + }, + { + "epoch": 0.04796860236935824, + "grad_norm": 6.022701740264893, + "learning_rate": 4.775056326767934e-06, + "loss": 0.5092, + "step": 1320 + }, + { + "epoch": 0.04833200087215641, + "grad_norm": 5.533923625946045, + "learning_rate": 4.811396177047751e-06, + "loss": 0.4358, + "step": 1330 + }, + { + "epoch": 0.048695399374954575, + "grad_norm": 3.4037017822265625, + "learning_rate": 4.847736027327568e-06, + "loss": 0.3684, + "step": 1340 + }, + { + "epoch": 0.04905879787775275, + "grad_norm": 13.625974655151367, + "learning_rate": 4.8840758776073845e-06, + "loss": 0.583, + "step": 1350 + }, + { + "epoch": 0.04942219638055091, + "grad_norm": 3.597294330596924, + "learning_rate": 4.920415727887201e-06, + "loss": 0.4561, + "step": 1360 + }, + { + "epoch": 0.049785594883349084, + "grad_norm": 2.8846936225891113, + "learning_rate": 4.956755578167018e-06, + "loss": 0.409, + "step": 1370 + }, + { + "epoch": 0.05014899338614725, + "grad_norm": 5.500187397003174, + "learning_rate": 4.993095428446836e-06, + "loss": 0.4531, + "step": 1380 + }, + { + "epoch": 0.05051239188894542, + "grad_norm": 3.1203413009643555, + "learning_rate": 5.029435278726652e-06, + "loss": 0.4004, + "step": 1390 + }, + { + "epoch": 0.050875790391743586, + "grad_norm": 98.18115234375, + "learning_rate": 5.0657751290064685e-06, + "loss": 0.522, + "step": 1400 + }, + { + "epoch": 0.05123918889454175, + "grad_norm": 3.901418924331665, + "learning_rate": 5.102114979286285e-06, + "loss": 0.4041, + "step": 1410 + }, + { + "epoch": 0.05160258739733992, + "grad_norm": 4.045637130737305, + "learning_rate": 5.138454829566102e-06, + "loss": 0.4051, + "step": 1420 + }, + { + "epoch": 0.05196598590013809, + "grad_norm": 6.835183143615723, + "learning_rate": 5.174794679845919e-06, + "loss": 0.4937, + "step": 1430 + }, + { + "epoch": 0.05232938440293626, + "grad_norm": 7.708272457122803, + "learning_rate": 5.211134530125736e-06, + "loss": 0.3818, + "step": 1440 + }, + { + "epoch": 0.052692782905734425, + "grad_norm": 24.2607364654541, + "learning_rate": 5.247474380405553e-06, + "loss": 0.5445, + "step": 1450 + }, + { + "epoch": 0.0530561814085326, + "grad_norm": 3.3517005443573, + "learning_rate": 5.283814230685369e-06, + "loss": 0.4079, + "step": 1460 + }, + { + "epoch": 0.05341957991133076, + "grad_norm": 12.727778434753418, + "learning_rate": 5.320154080965187e-06, + "loss": 0.4285, + "step": 1470 + }, + { + "epoch": 0.053782978414128935, + "grad_norm": 4.984294891357422, + "learning_rate": 5.356493931245003e-06, + "loss": 0.5006, + "step": 1480 + }, + { + "epoch": 0.0541463769169271, + "grad_norm": 3.3041558265686035, + "learning_rate": 5.392833781524821e-06, + "loss": 0.3729, + "step": 1490 + }, + { + "epoch": 0.05450977541972527, + "grad_norm": 38.074546813964844, + "learning_rate": 5.429173631804637e-06, + "loss": 0.5401, + "step": 1500 + }, + { + "epoch": 0.05487317392252344, + "grad_norm": 5.649720668792725, + "learning_rate": 5.465513482084454e-06, + "loss": 0.3879, + "step": 1510 + }, + { + "epoch": 0.05523657242532161, + "grad_norm": 3.107583522796631, + "learning_rate": 5.501853332364271e-06, + "loss": 0.4144, + "step": 1520 + }, + { + "epoch": 0.055599970928119774, + "grad_norm": 19.246564865112305, + "learning_rate": 5.538193182644088e-06, + "loss": 0.4314, + "step": 1530 + }, + { + "epoch": 0.055963369430917946, + "grad_norm": 4.72367525100708, + "learning_rate": 5.574533032923905e-06, + "loss": 0.3576, + "step": 1540 + }, + { + "epoch": 0.05632676793371611, + "grad_norm": 25.88886260986328, + "learning_rate": 5.6108728832037215e-06, + "loss": 0.5385, + "step": 1550 + }, + { + "epoch": 0.05669016643651428, + "grad_norm": 3.1524956226348877, + "learning_rate": 5.647212733483538e-06, + "loss": 0.4075, + "step": 1560 + }, + { + "epoch": 0.05705356493931245, + "grad_norm": 3.883281707763672, + "learning_rate": 5.683552583763355e-06, + "loss": 0.4242, + "step": 1570 + }, + { + "epoch": 0.05741696344211062, + "grad_norm": 16.935935974121094, + "learning_rate": 5.719892434043172e-06, + "loss": 0.6194, + "step": 1580 + }, + { + "epoch": 0.057780361944908785, + "grad_norm": 4.23909330368042, + "learning_rate": 5.756232284322989e-06, + "loss": 0.4206, + "step": 1590 + }, + { + "epoch": 0.05814376044770696, + "grad_norm": 16.6039981842041, + "learning_rate": 5.7925721346028056e-06, + "loss": 0.4854, + "step": 1600 + }, + { + "epoch": 0.05850715895050512, + "grad_norm": 2.5220890045166016, + "learning_rate": 5.828911984882622e-06, + "loss": 0.4186, + "step": 1610 + }, + { + "epoch": 0.058870557453303295, + "grad_norm": 3.075101613998413, + "learning_rate": 5.865251835162439e-06, + "loss": 0.8877, + "step": 1620 + }, + { + "epoch": 0.05923395595610146, + "grad_norm": 5.511383056640625, + "learning_rate": 5.901591685442257e-06, + "loss": 0.4219, + "step": 1630 + }, + { + "epoch": 0.05959735445889963, + "grad_norm": 2.9449989795684814, + "learning_rate": 5.937931535722073e-06, + "loss": 0.2992, + "step": 1640 + }, + { + "epoch": 0.0599607529616978, + "grad_norm": 31.823612213134766, + "learning_rate": 5.9742713860018905e-06, + "loss": 0.5939, + "step": 1650 + }, + { + "epoch": 0.06032415146449597, + "grad_norm": 4.240995407104492, + "learning_rate": 6.010611236281706e-06, + "loss": 0.4176, + "step": 1660 + }, + { + "epoch": 0.060687549967294134, + "grad_norm": 2.6084980964660645, + "learning_rate": 6.046951086561524e-06, + "loss": 0.3542, + "step": 1670 + }, + { + "epoch": 0.061050948470092306, + "grad_norm": 8.318774223327637, + "learning_rate": 6.08329093684134e-06, + "loss": 0.3968, + "step": 1680 + }, + { + "epoch": 0.06141434697289047, + "grad_norm": 5.18604850769043, + "learning_rate": 6.119630787121158e-06, + "loss": 0.3879, + "step": 1690 + }, + { + "epoch": 0.06177774547568864, + "grad_norm": 51.732086181640625, + "learning_rate": 6.1559706374009745e-06, + "loss": 0.5025, + "step": 1700 + }, + { + "epoch": 0.06214114397848681, + "grad_norm": 2.5876500606536865, + "learning_rate": 6.192310487680791e-06, + "loss": 0.3558, + "step": 1710 + }, + { + "epoch": 0.06250454248128498, + "grad_norm": 5.071794033050537, + "learning_rate": 6.228650337960608e-06, + "loss": 0.3534, + "step": 1720 + }, + { + "epoch": 0.06286794098408315, + "grad_norm": 11.539891242980957, + "learning_rate": 6.264990188240424e-06, + "loss": 0.4628, + "step": 1730 + }, + { + "epoch": 0.06323133948688131, + "grad_norm": 3.275383710861206, + "learning_rate": 6.301330038520243e-06, + "loss": 0.3368, + "step": 1740 + }, + { + "epoch": 0.06359473798967948, + "grad_norm": 41.4942741394043, + "learning_rate": 6.3376698888000586e-06, + "loss": 0.53, + "step": 1750 + }, + { + "epoch": 0.06395813649247765, + "grad_norm": 3.0071399211883545, + "learning_rate": 6.374009739079875e-06, + "loss": 0.3623, + "step": 1760 + }, + { + "epoch": 0.06432153499527582, + "grad_norm": 3.385955333709717, + "learning_rate": 6.410349589359692e-06, + "loss": 0.3476, + "step": 1770 + }, + { + "epoch": 0.06468493349807398, + "grad_norm": 3.872527599334717, + "learning_rate": 6.446689439639508e-06, + "loss": 0.3168, + "step": 1780 + }, + { + "epoch": 0.06504833200087215, + "grad_norm": 4.668768882751465, + "learning_rate": 6.483029289919327e-06, + "loss": 0.3813, + "step": 1790 + }, + { + "epoch": 0.06541173050367033, + "grad_norm": 69.33656311035156, + "learning_rate": 6.519369140199143e-06, + "loss": 0.4557, + "step": 1800 + }, + { + "epoch": 0.06541173050367033, + "eval_loss": 0.5579342246055603, + "eval_runtime": 180.31, + "eval_samples_per_second": 41.118, + "eval_steps_per_second": 5.141, + "eval_wer": 0.34055221740156477, + "step": 1800 + }, + { + "epoch": 0.0657751290064685, + "grad_norm": 4.2848381996154785, + "learning_rate": 6.555708990478959e-06, + "loss": 0.5481, + "step": 1810 + }, + { + "epoch": 0.06613852750926666, + "grad_norm": 11.31700325012207, + "learning_rate": 6.592048840758775e-06, + "loss": 0.3479, + "step": 1820 + }, + { + "epoch": 0.06650192601206482, + "grad_norm": 6.088991165161133, + "learning_rate": 6.628388691038594e-06, + "loss": 0.3994, + "step": 1830 + }, + { + "epoch": 0.066865324514863, + "grad_norm": 4.342681407928467, + "learning_rate": 6.66472854131841e-06, + "loss": 0.2953, + "step": 1840 + }, + { + "epoch": 0.06722872301766117, + "grad_norm": 75.97467041015625, + "learning_rate": 6.701068391598227e-06, + "loss": 0.4807, + "step": 1850 + }, + { + "epoch": 0.06759212152045933, + "grad_norm": 3.8739049434661865, + "learning_rate": 6.7374082418780435e-06, + "loss": 0.3963, + "step": 1860 + }, + { + "epoch": 0.0679555200232575, + "grad_norm": 2.6209168434143066, + "learning_rate": 6.773748092157861e-06, + "loss": 0.422, + "step": 1870 + }, + { + "epoch": 0.06831891852605568, + "grad_norm": 17.530773162841797, + "learning_rate": 6.810087942437678e-06, + "loss": 0.3939, + "step": 1880 + }, + { + "epoch": 0.06868231702885384, + "grad_norm": 3.475748300552368, + "learning_rate": 6.846427792717494e-06, + "loss": 0.2996, + "step": 1890 + }, + { + "epoch": 0.06904571553165201, + "grad_norm": 20.979995727539062, + "learning_rate": 6.882767642997311e-06, + "loss": 0.4528, + "step": 1900 + }, + { + "epoch": 0.06940911403445017, + "grad_norm": 3.8432774543762207, + "learning_rate": 6.919107493277128e-06, + "loss": 0.2959, + "step": 1910 + }, + { + "epoch": 0.06977251253724835, + "grad_norm": 7.830467700958252, + "learning_rate": 6.955447343556945e-06, + "loss": 0.3378, + "step": 1920 + }, + { + "epoch": 0.07013591104004652, + "grad_norm": 15.633039474487305, + "learning_rate": 6.991787193836762e-06, + "loss": 0.4, + "step": 1930 + }, + { + "epoch": 0.07049930954284468, + "grad_norm": 13.628314971923828, + "learning_rate": 7.028127044116578e-06, + "loss": 0.3255, + "step": 1940 + }, + { + "epoch": 0.07086270804564285, + "grad_norm": 33.001773834228516, + "learning_rate": 7.064466894396396e-06, + "loss": 0.4367, + "step": 1950 + }, + { + "epoch": 0.07122610654844103, + "grad_norm": 3.5115041732788086, + "learning_rate": 7.100806744676212e-06, + "loss": 0.3279, + "step": 1960 + }, + { + "epoch": 0.07158950505123919, + "grad_norm": 3.0497541427612305, + "learning_rate": 7.137146594956029e-06, + "loss": 2.8797, + "step": 1970 + }, + { + "epoch": 0.07195290355403736, + "grad_norm": 6.17769718170166, + "learning_rate": 7.173486445235845e-06, + "loss": 0.3534, + "step": 1980 + }, + { + "epoch": 0.07231630205683552, + "grad_norm": 5.4114789962768555, + "learning_rate": 7.209826295515664e-06, + "loss": 0.3309, + "step": 1990 + }, + { + "epoch": 0.0726797005596337, + "grad_norm": 11.600439071655273, + "learning_rate": 7.24616614579548e-06, + "loss": 0.4382, + "step": 2000 + }, + { + "epoch": 0.07304309906243187, + "grad_norm": 3.4476027488708496, + "learning_rate": 7.2825059960752965e-06, + "loss": 0.3487, + "step": 2010 + }, + { + "epoch": 0.07340649756523003, + "grad_norm": 5.642564296722412, + "learning_rate": 7.318845846355113e-06, + "loss": 0.3513, + "step": 2020 + }, + { + "epoch": 0.0737698960680282, + "grad_norm": 7.132052898406982, + "learning_rate": 7.355185696634931e-06, + "loss": 0.4564, + "step": 2030 + }, + { + "epoch": 0.07413329457082637, + "grad_norm": 6.583246231079102, + "learning_rate": 7.391525546914748e-06, + "loss": 0.3376, + "step": 2040 + }, + { + "epoch": 0.07449669307362454, + "grad_norm": 23.98805809020996, + "learning_rate": 7.427865397194564e-06, + "loss": 0.4217, + "step": 2050 + }, + { + "epoch": 0.0748600915764227, + "grad_norm": 3.9135584831237793, + "learning_rate": 7.4642052474743805e-06, + "loss": 0.3324, + "step": 2060 + }, + { + "epoch": 0.07522349007922087, + "grad_norm": 3.4022698402404785, + "learning_rate": 7.500545097754198e-06, + "loss": 0.3391, + "step": 2070 + }, + { + "epoch": 0.07558688858201905, + "grad_norm": 8.37547779083252, + "learning_rate": 7.536884948034015e-06, + "loss": 0.3119, + "step": 2080 + }, + { + "epoch": 0.07595028708481721, + "grad_norm": 6.2167558670043945, + "learning_rate": 7.573224798313831e-06, + "loss": 0.3247, + "step": 2090 + }, + { + "epoch": 0.07631368558761538, + "grad_norm": 81.76036834716797, + "learning_rate": 7.609564648593648e-06, + "loss": 0.4281, + "step": 2100 + }, + { + "epoch": 0.07667708409041354, + "grad_norm": 2.8961973190307617, + "learning_rate": 7.645904498873465e-06, + "loss": 0.3368, + "step": 2110 + }, + { + "epoch": 0.07704048259321171, + "grad_norm": 4.699477195739746, + "learning_rate": 7.682244349153282e-06, + "loss": 0.3403, + "step": 2120 + }, + { + "epoch": 0.07740388109600989, + "grad_norm": 4.429138660430908, + "learning_rate": 7.718584199433098e-06, + "loss": 0.3182, + "step": 2130 + }, + { + "epoch": 0.07776727959880805, + "grad_norm": 2.7269580364227295, + "learning_rate": 7.754924049712916e-06, + "loss": 0.2828, + "step": 2140 + }, + { + "epoch": 0.07813067810160622, + "grad_norm": 15.126232147216797, + "learning_rate": 7.791263899992732e-06, + "loss": 0.4606, + "step": 2150 + }, + { + "epoch": 0.07849407660440438, + "grad_norm": 10.14072322845459, + "learning_rate": 7.82760375027255e-06, + "loss": 0.3451, + "step": 2160 + }, + { + "epoch": 0.07885747510720256, + "grad_norm": 4.95914363861084, + "learning_rate": 7.863943600552365e-06, + "loss": 0.3612, + "step": 2170 + }, + { + "epoch": 0.07922087361000073, + "grad_norm": 4.115192413330078, + "learning_rate": 7.900283450832183e-06, + "loss": 0.3222, + "step": 2180 + }, + { + "epoch": 0.07958427211279889, + "grad_norm": 5.405594825744629, + "learning_rate": 7.936623301111999e-06, + "loss": 0.3474, + "step": 2190 + }, + { + "epoch": 0.07994767061559706, + "grad_norm": 23.328718185424805, + "learning_rate": 7.972963151391817e-06, + "loss": 0.4797, + "step": 2200 + }, + { + "epoch": 0.08031106911839524, + "grad_norm": 3.5595099925994873, + "learning_rate": 8.009303001671634e-06, + "loss": 0.3305, + "step": 2210 + }, + { + "epoch": 0.0806744676211934, + "grad_norm": 3.048445463180542, + "learning_rate": 8.04564285195145e-06, + "loss": 0.318, + "step": 2220 + }, + { + "epoch": 0.08103786612399157, + "grad_norm": 5.857702732086182, + "learning_rate": 8.081982702231266e-06, + "loss": 0.3497, + "step": 2230 + }, + { + "epoch": 0.08140126462678973, + "grad_norm": 3.0092968940734863, + "learning_rate": 8.118322552511084e-06, + "loss": 0.2995, + "step": 2240 + }, + { + "epoch": 0.08176466312958791, + "grad_norm": 9.337843894958496, + "learning_rate": 8.154662402790902e-06, + "loss": 0.4517, + "step": 2250 + }, + { + "epoch": 0.08212806163238608, + "grad_norm": 3.136950969696045, + "learning_rate": 8.191002253070718e-06, + "loss": 0.2927, + "step": 2260 + }, + { + "epoch": 0.08249146013518424, + "grad_norm": 4.228198051452637, + "learning_rate": 8.227342103350534e-06, + "loss": 1.2185, + "step": 2270 + }, + { + "epoch": 0.0828548586379824, + "grad_norm": 7.404679298400879, + "learning_rate": 8.263681953630351e-06, + "loss": 0.3448, + "step": 2280 + }, + { + "epoch": 0.08321825714078059, + "grad_norm": 7.873497009277344, + "learning_rate": 8.300021803910169e-06, + "loss": 0.2965, + "step": 2290 + }, + { + "epoch": 0.08358165564357875, + "grad_norm": 12.266081809997559, + "learning_rate": 8.336361654189985e-06, + "loss": 0.4631, + "step": 2300 + }, + { + "epoch": 0.08394505414637692, + "grad_norm": 3.3576557636260986, + "learning_rate": 8.3727015044698e-06, + "loss": 0.3339, + "step": 2310 + }, + { + "epoch": 0.08430845264917508, + "grad_norm": 3.0854902267456055, + "learning_rate": 8.40904135474962e-06, + "loss": 0.3448, + "step": 2320 + }, + { + "epoch": 0.08467185115197326, + "grad_norm": 6.1308746337890625, + "learning_rate": 8.445381205029436e-06, + "loss": 0.386, + "step": 2330 + }, + { + "epoch": 0.08503524965477142, + "grad_norm": 4.458275318145752, + "learning_rate": 8.481721055309252e-06, + "loss": 0.2916, + "step": 2340 + }, + { + "epoch": 0.08539864815756959, + "grad_norm": 25.443647384643555, + "learning_rate": 8.51806090558907e-06, + "loss": 0.4232, + "step": 2350 + }, + { + "epoch": 0.08576204666036776, + "grad_norm": 324.4353332519531, + "learning_rate": 8.554400755868887e-06, + "loss": 2.4995, + "step": 2360 + }, + { + "epoch": 0.08612544516316593, + "grad_norm": 17.593692779541016, + "learning_rate": 8.590740606148703e-06, + "loss": 0.2952, + "step": 2370 + }, + { + "epoch": 0.0864888436659641, + "grad_norm": 3.4646732807159424, + "learning_rate": 8.62708045642852e-06, + "loss": 0.2961, + "step": 2380 + }, + { + "epoch": 0.08685224216876226, + "grad_norm": 2.9895999431610107, + "learning_rate": 8.663420306708337e-06, + "loss": 0.2852, + "step": 2390 + }, + { + "epoch": 0.08721564067156043, + "grad_norm": 24.221176147460938, + "learning_rate": 8.699760156988155e-06, + "loss": 0.4343, + "step": 2400 + }, + { + "epoch": 0.08721564067156043, + "eval_loss": 0.47036415338516235, + "eval_runtime": 180.1154, + "eval_samples_per_second": 41.162, + "eval_steps_per_second": 5.147, + "eval_wer": 0.28054713453264835, + "step": 2400 + }, + { + "epoch": 0.08757903917435861, + "grad_norm": 161.69967651367188, + "learning_rate": 8.73610000726797e-06, + "loss": 1.4598, + "step": 2410 + }, + { + "epoch": 0.08794243767715677, + "grad_norm": 10.37559700012207, + "learning_rate": 8.772439857547786e-06, + "loss": 0.3042, + "step": 2420 + }, + { + "epoch": 0.08830583617995494, + "grad_norm": 5.90106725692749, + "learning_rate": 8.808779707827604e-06, + "loss": 0.3385, + "step": 2430 + }, + { + "epoch": 0.0886692346827531, + "grad_norm": 9.207955360412598, + "learning_rate": 8.845119558107422e-06, + "loss": 0.2963, + "step": 2440 + }, + { + "epoch": 0.08903263318555127, + "grad_norm": 22.280956268310547, + "learning_rate": 8.881459408387238e-06, + "loss": 0.4505, + "step": 2450 + }, + { + "epoch": 0.08939603168834945, + "grad_norm": 3.090710401535034, + "learning_rate": 8.917799258667055e-06, + "loss": 0.3114, + "step": 2460 + }, + { + "epoch": 0.08975943019114761, + "grad_norm": 4.144134044647217, + "learning_rate": 8.954139108946871e-06, + "loss": 0.2855, + "step": 2470 + }, + { + "epoch": 0.09012282869394578, + "grad_norm": 4.343112468719482, + "learning_rate": 8.990478959226687e-06, + "loss": 0.2906, + "step": 2480 + }, + { + "epoch": 0.09048622719674394, + "grad_norm": 2.6925292015075684, + "learning_rate": 9.026818809506505e-06, + "loss": 0.284, + "step": 2490 + }, + { + "epoch": 0.09084962569954212, + "grad_norm": 29.639341354370117, + "learning_rate": 9.063158659786323e-06, + "loss": 0.3411, + "step": 2500 + }, + { + "epoch": 0.09121302420234029, + "grad_norm": 4.425374984741211, + "learning_rate": 9.099498510066139e-06, + "loss": 0.3041, + "step": 2510 + }, + { + "epoch": 0.09157642270513845, + "grad_norm": 5.6643195152282715, + "learning_rate": 9.135838360345955e-06, + "loss": 0.3123, + "step": 2520 + }, + { + "epoch": 0.09193982120793662, + "grad_norm": 3.9098479747772217, + "learning_rate": 9.172178210625772e-06, + "loss": 0.3664, + "step": 2530 + }, + { + "epoch": 0.0923032197107348, + "grad_norm": 3.133389949798584, + "learning_rate": 9.20851806090559e-06, + "loss": 0.2708, + "step": 2540 + }, + { + "epoch": 0.09266661821353296, + "grad_norm": 43.00468063354492, + "learning_rate": 9.244857911185406e-06, + "loss": 0.4215, + "step": 2550 + }, + { + "epoch": 0.09303001671633113, + "grad_norm": 3.1411876678466797, + "learning_rate": 9.281197761465222e-06, + "loss": 0.2983, + "step": 2560 + }, + { + "epoch": 0.09339341521912929, + "grad_norm": 3.263828754425049, + "learning_rate": 9.317537611745041e-06, + "loss": 0.3137, + "step": 2570 + }, + { + "epoch": 0.09375681372192747, + "grad_norm": 3.618751049041748, + "learning_rate": 9.353877462024857e-06, + "loss": 0.3279, + "step": 2580 + }, + { + "epoch": 0.09412021222472564, + "grad_norm": 3.6551568508148193, + "learning_rate": 9.390217312304673e-06, + "loss": 0.2409, + "step": 2590 + }, + { + "epoch": 0.0944836107275238, + "grad_norm": 8.680901527404785, + "learning_rate": 9.42655716258449e-06, + "loss": 0.373, + "step": 2600 + }, + { + "epoch": 0.09484700923032197, + "grad_norm": 4.761026382446289, + "learning_rate": 9.462897012864308e-06, + "loss": 0.2777, + "step": 2610 + }, + { + "epoch": 0.09521040773312014, + "grad_norm": 3.142723321914673, + "learning_rate": 9.499236863144124e-06, + "loss": 0.2882, + "step": 2620 + }, + { + "epoch": 0.09557380623591831, + "grad_norm": 2.969968795776367, + "learning_rate": 9.53557671342394e-06, + "loss": 0.3086, + "step": 2630 + }, + { + "epoch": 0.09593720473871648, + "grad_norm": 3.754549264907837, + "learning_rate": 9.571916563703758e-06, + "loss": 0.259, + "step": 2640 + }, + { + "epoch": 0.09630060324151464, + "grad_norm": 23.7288761138916, + "learning_rate": 9.608256413983576e-06, + "loss": 0.4284, + "step": 2650 + }, + { + "epoch": 0.09666400174431282, + "grad_norm": 2.7727372646331787, + "learning_rate": 9.644596264263392e-06, + "loss": 0.2602, + "step": 2660 + }, + { + "epoch": 0.09702740024711098, + "grad_norm": 14.707064628601074, + "learning_rate": 9.680936114543208e-06, + "loss": 0.3059, + "step": 2670 + }, + { + "epoch": 0.09739079874990915, + "grad_norm": 3.8396642208099365, + "learning_rate": 9.717275964823025e-06, + "loss": 0.2811, + "step": 2680 + }, + { + "epoch": 0.09775419725270731, + "grad_norm": 2.9460713863372803, + "learning_rate": 9.753615815102843e-06, + "loss": 0.2686, + "step": 2690 + }, + { + "epoch": 0.0981175957555055, + "grad_norm": 20.107336044311523, + "learning_rate": 9.789955665382659e-06, + "loss": 0.4306, + "step": 2700 + }, + { + "epoch": 0.09848099425830366, + "grad_norm": 3.1286280155181885, + "learning_rate": 9.826295515662477e-06, + "loss": 0.3059, + "step": 2710 + }, + { + "epoch": 0.09884439276110182, + "grad_norm": 6.160215854644775, + "learning_rate": 9.862635365942292e-06, + "loss": 0.3046, + "step": 2720 + }, + { + "epoch": 0.09920779126389999, + "grad_norm": 6.1921186447143555, + "learning_rate": 9.89897521622211e-06, + "loss": 0.285, + "step": 2730 + }, + { + "epoch": 0.09957118976669817, + "grad_norm": 13.759759902954102, + "learning_rate": 9.935315066501926e-06, + "loss": 0.2888, + "step": 2740 + }, + { + "epoch": 0.09993458826949633, + "grad_norm": 13.92764949798584, + "learning_rate": 9.971654916781744e-06, + "loss": 0.4266, + "step": 2750 + }, + { + "epoch": 0.1002979867722945, + "grad_norm": 3.3999857902526855, + "learning_rate": 1.000799476706156e-05, + "loss": 0.2858, + "step": 2760 + }, + { + "epoch": 0.10066138527509266, + "grad_norm": 4.103928089141846, + "learning_rate": 1.0044334617341377e-05, + "loss": 0.262, + "step": 2770 + }, + { + "epoch": 0.10102478377789084, + "grad_norm": 6.15985107421875, + "learning_rate": 1.0080674467621195e-05, + "loss": 0.2866, + "step": 2780 + }, + { + "epoch": 0.10138818228068901, + "grad_norm": 4.904097557067871, + "learning_rate": 1.0117014317901011e-05, + "loss": 0.5057, + "step": 2790 + }, + { + "epoch": 0.10175158078348717, + "grad_norm": 15.2875337600708, + "learning_rate": 1.0153354168180827e-05, + "loss": 0.4345, + "step": 2800 + }, + { + "epoch": 0.10211497928628534, + "grad_norm": 2.4697763919830322, + "learning_rate": 1.0189694018460643e-05, + "loss": 0.2693, + "step": 2810 + }, + { + "epoch": 0.1024783777890835, + "grad_norm": 5.04618501663208, + "learning_rate": 1.0226033868740462e-05, + "loss": 0.2868, + "step": 2820 + }, + { + "epoch": 0.10284177629188168, + "grad_norm": 5.851120948791504, + "learning_rate": 1.0262373719020278e-05, + "loss": 0.3425, + "step": 2830 + }, + { + "epoch": 0.10320517479467985, + "grad_norm": 2.1007258892059326, + "learning_rate": 1.0298713569300094e-05, + "loss": 0.2394, + "step": 2840 + }, + { + "epoch": 0.10356857329747801, + "grad_norm": 23.411701202392578, + "learning_rate": 1.0335053419579912e-05, + "loss": 0.4125, + "step": 2850 + }, + { + "epoch": 0.10393197180027618, + "grad_norm": 4.178852558135986, + "learning_rate": 1.037139326985973e-05, + "loss": 0.2951, + "step": 2860 + }, + { + "epoch": 0.10429537030307436, + "grad_norm": 1.7873708009719849, + "learning_rate": 1.0407733120139545e-05, + "loss": 0.3272, + "step": 2870 + }, + { + "epoch": 0.10465876880587252, + "grad_norm": 7.603367328643799, + "learning_rate": 1.0444072970419361e-05, + "loss": 0.2779, + "step": 2880 + }, + { + "epoch": 0.10502216730867069, + "grad_norm": 3.468761444091797, + "learning_rate": 1.0480412820699179e-05, + "loss": 0.3007, + "step": 2890 + }, + { + "epoch": 0.10538556581146885, + "grad_norm": 16.35407829284668, + "learning_rate": 1.0516752670978997e-05, + "loss": 0.3918, + "step": 2900 + }, + { + "epoch": 0.10574896431426703, + "grad_norm": 3.4226725101470947, + "learning_rate": 1.0553092521258813e-05, + "loss": 3.7156, + "step": 2910 + }, + { + "epoch": 0.1061123628170652, + "grad_norm": 9.006295204162598, + "learning_rate": 1.058943237153863e-05, + "loss": 0.4075, + "step": 2920 + }, + { + "epoch": 0.10647576131986336, + "grad_norm": 4.993385314941406, + "learning_rate": 1.0625772221818446e-05, + "loss": 0.3588, + "step": 2930 + }, + { + "epoch": 0.10683915982266153, + "grad_norm": 3.7684736251831055, + "learning_rate": 1.0662112072098264e-05, + "loss": 0.2429, + "step": 2940 + }, + { + "epoch": 0.1072025583254597, + "grad_norm": 40.301170349121094, + "learning_rate": 1.069845192237808e-05, + "loss": 0.4739, + "step": 2950 + }, + { + "epoch": 0.10756595682825787, + "grad_norm": 3.772693157196045, + "learning_rate": 1.0734791772657898e-05, + "loss": 0.3284, + "step": 2960 + }, + { + "epoch": 0.10792935533105603, + "grad_norm": 3.0183212757110596, + "learning_rate": 1.0771131622937714e-05, + "loss": 0.38, + "step": 2970 + }, + { + "epoch": 0.1082927538338542, + "grad_norm": 6.61776876449585, + "learning_rate": 1.0807471473217531e-05, + "loss": 0.2793, + "step": 2980 + }, + { + "epoch": 0.10865615233665238, + "grad_norm": 6.112472057342529, + "learning_rate": 1.0843811323497347e-05, + "loss": 0.2447, + "step": 2990 + }, + { + "epoch": 0.10901955083945054, + "grad_norm": 10.800559997558594, + "learning_rate": 1.0880151173777165e-05, + "loss": 0.373, + "step": 3000 + }, + { + "epoch": 0.10901955083945054, + "eval_loss": 0.4652940630912781, + "eval_runtime": 180.0765, + "eval_samples_per_second": 41.171, + "eval_steps_per_second": 5.148, + "eval_wer": 0.27681667181004593, + "step": 3000 + }, + { + "epoch": 0.10938294934224871, + "grad_norm": 7.778831958770752, + "learning_rate": 1.091649102405698e-05, + "loss": 0.29, + "step": 3010 + }, + { + "epoch": 0.10974634784504687, + "grad_norm": 2.855592966079712, + "learning_rate": 1.0952830874336798e-05, + "loss": 0.2411, + "step": 3020 + }, + { + "epoch": 0.11010974634784505, + "grad_norm": 4.229335784912109, + "learning_rate": 1.0989170724616616e-05, + "loss": 0.3247, + "step": 3030 + }, + { + "epoch": 0.11047314485064322, + "grad_norm": 3.8145949840545654, + "learning_rate": 1.1025510574896432e-05, + "loss": 0.2242, + "step": 3040 + }, + { + "epoch": 0.11083654335344138, + "grad_norm": 22.571304321289062, + "learning_rate": 1.1061850425176248e-05, + "loss": 0.3959, + "step": 3050 + }, + { + "epoch": 0.11119994185623955, + "grad_norm": 2.4706461429595947, + "learning_rate": 1.1098190275456066e-05, + "loss": 0.2466, + "step": 3060 + }, + { + "epoch": 0.11156334035903773, + "grad_norm": 4.497069358825684, + "learning_rate": 1.1134530125735883e-05, + "loss": 2.1968, + "step": 3070 + }, + { + "epoch": 0.11192673886183589, + "grad_norm": 5.060062885284424, + "learning_rate": 1.11708699760157e-05, + "loss": 0.2921, + "step": 3080 + }, + { + "epoch": 0.11229013736463406, + "grad_norm": 2.7882325649261475, + "learning_rate": 1.1207209826295515e-05, + "loss": 0.2534, + "step": 3090 + }, + { + "epoch": 0.11265353586743222, + "grad_norm": 9.96241569519043, + "learning_rate": 1.1243549676575333e-05, + "loss": 0.421, + "step": 3100 + }, + { + "epoch": 0.1130169343702304, + "grad_norm": 31.262916564941406, + "learning_rate": 1.127988952685515e-05, + "loss": 0.4048, + "step": 3110 + }, + { + "epoch": 0.11338033287302857, + "grad_norm": 3.472343921661377, + "learning_rate": 1.1316229377134967e-05, + "loss": 0.2798, + "step": 3120 + }, + { + "epoch": 0.11374373137582673, + "grad_norm": 4.074085235595703, + "learning_rate": 1.1352569227414783e-05, + "loss": 0.299, + "step": 3130 + }, + { + "epoch": 0.1141071298786249, + "grad_norm": 2.879512310028076, + "learning_rate": 1.1388909077694602e-05, + "loss": 0.2137, + "step": 3140 + }, + { + "epoch": 0.11447052838142306, + "grad_norm": 125.17889404296875, + "learning_rate": 1.1425248927974418e-05, + "loss": 0.5418, + "step": 3150 + }, + { + "epoch": 0.11483392688422124, + "grad_norm": 4.171487808227539, + "learning_rate": 1.1461588778254234e-05, + "loss": 0.2685, + "step": 3160 + }, + { + "epoch": 0.1151973253870194, + "grad_norm": 2.1496529579162598, + "learning_rate": 1.1497928628534051e-05, + "loss": 0.2421, + "step": 3170 + }, + { + "epoch": 0.11556072388981757, + "grad_norm": 2.6266047954559326, + "learning_rate": 1.1534268478813867e-05, + "loss": 0.3288, + "step": 3180 + }, + { + "epoch": 0.11592412239261574, + "grad_norm": 3.7677230834960938, + "learning_rate": 1.1570608329093685e-05, + "loss": 0.3093, + "step": 3190 + }, + { + "epoch": 0.11628752089541392, + "grad_norm": 9.4945707321167, + "learning_rate": 1.1606948179373501e-05, + "loss": 0.3066, + "step": 3200 + }, + { + "epoch": 0.11665091939821208, + "grad_norm": 2.5509915351867676, + "learning_rate": 1.1643288029653319e-05, + "loss": 0.2615, + "step": 3210 + }, + { + "epoch": 0.11701431790101025, + "grad_norm": 3.066624641418457, + "learning_rate": 1.1679627879933135e-05, + "loss": 0.3224, + "step": 3220 + }, + { + "epoch": 0.11737771640380841, + "grad_norm": 6.494440078735352, + "learning_rate": 1.1715967730212952e-05, + "loss": 0.3017, + "step": 3230 + }, + { + "epoch": 0.11774111490660659, + "grad_norm": 3.4675605297088623, + "learning_rate": 1.1752307580492768e-05, + "loss": 0.2152, + "step": 3240 + }, + { + "epoch": 0.11810451340940475, + "grad_norm": 15.5110445022583, + "learning_rate": 1.1788647430772586e-05, + "loss": 0.349, + "step": 3250 + }, + { + "epoch": 0.11846791191220292, + "grad_norm": 1.972530484199524, + "learning_rate": 1.1824987281052402e-05, + "loss": 0.2728, + "step": 3260 + }, + { + "epoch": 0.11883131041500108, + "grad_norm": 4.018677711486816, + "learning_rate": 1.186132713133222e-05, + "loss": 0.254, + "step": 3270 + }, + { + "epoch": 0.11919470891779926, + "grad_norm": 4.95416784286499, + "learning_rate": 1.1897666981612037e-05, + "loss": 0.2465, + "step": 3280 + }, + { + "epoch": 0.11955810742059743, + "grad_norm": 3.165599822998047, + "learning_rate": 1.1934006831891853e-05, + "loss": 0.2537, + "step": 3290 + }, + { + "epoch": 0.1199215059233956, + "grad_norm": 8.508636474609375, + "learning_rate": 1.1970346682171669e-05, + "loss": 0.3655, + "step": 3300 + }, + { + "epoch": 0.12028490442619376, + "grad_norm": 2.3892879486083984, + "learning_rate": 1.2006686532451487e-05, + "loss": 0.252, + "step": 3310 + }, + { + "epoch": 0.12064830292899194, + "grad_norm": 3.591564178466797, + "learning_rate": 1.2043026382731304e-05, + "loss": 0.2401, + "step": 3320 + }, + { + "epoch": 0.1210117014317901, + "grad_norm": 3.891261577606201, + "learning_rate": 1.207936623301112e-05, + "loss": 0.2909, + "step": 3330 + }, + { + "epoch": 0.12137509993458827, + "grad_norm": 4.691511154174805, + "learning_rate": 1.2115706083290936e-05, + "loss": 0.2304, + "step": 3340 + }, + { + "epoch": 0.12173849843738643, + "grad_norm": 18.415170669555664, + "learning_rate": 1.2152045933570754e-05, + "loss": 0.35, + "step": 3350 + }, + { + "epoch": 0.12210189694018461, + "grad_norm": 3.9105615615844727, + "learning_rate": 1.2188385783850572e-05, + "loss": 0.3112, + "step": 3360 + }, + { + "epoch": 0.12246529544298278, + "grad_norm": 3.215313196182251, + "learning_rate": 1.2224725634130388e-05, + "loss": 0.2492, + "step": 3370 + }, + { + "epoch": 0.12282869394578094, + "grad_norm": 9.30749225616455, + "learning_rate": 1.2261065484410204e-05, + "loss": 0.2696, + "step": 3380 + }, + { + "epoch": 0.12319209244857911, + "grad_norm": 4.9797682762146, + "learning_rate": 1.2297405334690023e-05, + "loss": 0.2197, + "step": 3390 + }, + { + "epoch": 0.12355549095137729, + "grad_norm": 19.632797241210938, + "learning_rate": 1.2333745184969839e-05, + "loss": 0.3411, + "step": 3400 + }, + { + "epoch": 0.12391888945417545, + "grad_norm": 4.509830474853516, + "learning_rate": 1.2370085035249655e-05, + "loss": 0.2394, + "step": 3410 + }, + { + "epoch": 0.12428228795697362, + "grad_norm": 2.253514051437378, + "learning_rate": 1.2406424885529473e-05, + "loss": 0.266, + "step": 3420 + }, + { + "epoch": 0.12464568645977178, + "grad_norm": 3.123828172683716, + "learning_rate": 1.244276473580929e-05, + "loss": 0.2675, + "step": 3430 + }, + { + "epoch": 0.12500908496256996, + "grad_norm": 34.37680435180664, + "learning_rate": 1.2479104586089106e-05, + "loss": 0.279, + "step": 3440 + }, + { + "epoch": 0.1253724834653681, + "grad_norm": 10.051690101623535, + "learning_rate": 1.2515444436368922e-05, + "loss": 0.3364, + "step": 3450 + }, + { + "epoch": 0.1257358819681663, + "grad_norm": 2.1765711307525635, + "learning_rate": 1.255178428664874e-05, + "loss": 0.2288, + "step": 3460 + }, + { + "epoch": 0.12609928047096447, + "grad_norm": 2.4910778999328613, + "learning_rate": 1.2588124136928556e-05, + "loss": 0.2866, + "step": 3470 + }, + { + "epoch": 0.12646267897376262, + "grad_norm": 7.379613876342773, + "learning_rate": 1.2624463987208373e-05, + "loss": 0.2618, + "step": 3480 + }, + { + "epoch": 0.1268260774765608, + "grad_norm": 2.681814432144165, + "learning_rate": 1.266080383748819e-05, + "loss": 0.2405, + "step": 3490 + }, + { + "epoch": 0.12718947597935895, + "grad_norm": 83.93474578857422, + "learning_rate": 1.2697143687768007e-05, + "loss": 0.338, + "step": 3500 + }, + { + "epoch": 0.12755287448215713, + "grad_norm": 1.5564826726913452, + "learning_rate": 1.2733483538047825e-05, + "loss": 0.2305, + "step": 3510 + }, + { + "epoch": 0.1279162729849553, + "grad_norm": 2.6026437282562256, + "learning_rate": 1.2769823388327639e-05, + "loss": 0.2618, + "step": 3520 + }, + { + "epoch": 0.12827967148775346, + "grad_norm": 8.228372573852539, + "learning_rate": 1.2806163238607458e-05, + "loss": 0.2586, + "step": 3530 + }, + { + "epoch": 0.12864306999055164, + "grad_norm": 2.643139362335205, + "learning_rate": 1.2842503088887276e-05, + "loss": 0.2197, + "step": 3540 + }, + { + "epoch": 0.12900646849334982, + "grad_norm": 400.0296325683594, + "learning_rate": 1.287884293916709e-05, + "loss": 0.3586, + "step": 3550 + }, + { + "epoch": 0.12936986699614797, + "grad_norm": 1.6349281072616577, + "learning_rate": 1.2915182789446908e-05, + "loss": 0.2364, + "step": 3560 + }, + { + "epoch": 0.12973326549894615, + "grad_norm": 2.6573753356933594, + "learning_rate": 1.2951522639726724e-05, + "loss": 0.2195, + "step": 3570 + }, + { + "epoch": 0.1300966640017443, + "grad_norm": 4.2721686363220215, + "learning_rate": 1.2987862490006542e-05, + "loss": 0.3092, + "step": 3580 + }, + { + "epoch": 0.13046006250454248, + "grad_norm": 2.9982502460479736, + "learning_rate": 1.302420234028636e-05, + "loss": 0.2826, + "step": 3590 + }, + { + "epoch": 0.13082346100734066, + "grad_norm": 8.903009414672852, + "learning_rate": 1.3060542190566175e-05, + "loss": 0.3367, + "step": 3600 + }, + { + "epoch": 0.13082346100734066, + "eval_loss": 0.4490436017513275, + "eval_runtime": 179.8743, + "eval_samples_per_second": 41.218, + "eval_steps_per_second": 5.154, + "eval_wer": 0.2664058670829778, + "step": 3600 + }, + { + "epoch": 0.1311868595101388, + "grad_norm": 2.9746363162994385, + "learning_rate": 1.3096882040845993e-05, + "loss": 0.2418, + "step": 3610 + }, + { + "epoch": 0.131550258012937, + "grad_norm": 2.274872303009033, + "learning_rate": 1.313322189112581e-05, + "loss": 0.3052, + "step": 3620 + }, + { + "epoch": 0.13191365651573517, + "grad_norm": 7.114847660064697, + "learning_rate": 1.3169561741405625e-05, + "loss": 0.2821, + "step": 3630 + }, + { + "epoch": 0.13227705501853332, + "grad_norm": 3.2101128101348877, + "learning_rate": 1.3205901591685444e-05, + "loss": 0.2223, + "step": 3640 + }, + { + "epoch": 0.1326404535213315, + "grad_norm": 18.914968490600586, + "learning_rate": 1.3242241441965258e-05, + "loss": 0.3809, + "step": 3650 + }, + { + "epoch": 0.13300385202412965, + "grad_norm": 2.399569272994995, + "learning_rate": 1.3278581292245076e-05, + "loss": 0.2221, + "step": 3660 + }, + { + "epoch": 0.13336725052692783, + "grad_norm": 5.76792573928833, + "learning_rate": 1.3314921142524894e-05, + "loss": 0.2487, + "step": 3670 + }, + { + "epoch": 0.133730649029726, + "grad_norm": 3.6859967708587646, + "learning_rate": 1.335126099280471e-05, + "loss": 0.2781, + "step": 3680 + }, + { + "epoch": 0.13409404753252416, + "grad_norm": 2.9653141498565674, + "learning_rate": 1.3387600843084527e-05, + "loss": 0.2258, + "step": 3690 + }, + { + "epoch": 0.13445744603532234, + "grad_norm": 19.170753479003906, + "learning_rate": 1.3423940693364345e-05, + "loss": 0.3902, + "step": 3700 + }, + { + "epoch": 0.13482084453812052, + "grad_norm": 2.2880115509033203, + "learning_rate": 1.3460280543644161e-05, + "loss": 0.2745, + "step": 3710 + }, + { + "epoch": 0.13518424304091867, + "grad_norm": 2.5196125507354736, + "learning_rate": 1.3496620393923979e-05, + "loss": 0.2293, + "step": 3720 + }, + { + "epoch": 0.13554764154371685, + "grad_norm": 3.827986001968384, + "learning_rate": 1.3532960244203793e-05, + "loss": 0.259, + "step": 3730 + }, + { + "epoch": 0.135911040046515, + "grad_norm": 3.4211530685424805, + "learning_rate": 1.356930009448361e-05, + "loss": 0.3256, + "step": 3740 + }, + { + "epoch": 0.13627443854931318, + "grad_norm": 26.879398345947266, + "learning_rate": 1.360563994476343e-05, + "loss": 0.3208, + "step": 3750 + }, + { + "epoch": 0.13663783705211135, + "grad_norm": 2.316091775894165, + "learning_rate": 1.3641979795043244e-05, + "loss": 0.2316, + "step": 3760 + }, + { + "epoch": 0.1370012355549095, + "grad_norm": 4.098924160003662, + "learning_rate": 1.3678319645323062e-05, + "loss": 0.2399, + "step": 3770 + }, + { + "epoch": 0.13736463405770769, + "grad_norm": 6.9372687339782715, + "learning_rate": 1.371465949560288e-05, + "loss": 0.2858, + "step": 3780 + }, + { + "epoch": 0.13772803256050586, + "grad_norm": 2.509535789489746, + "learning_rate": 1.3750999345882695e-05, + "loss": 0.2113, + "step": 3790 + }, + { + "epoch": 0.13809143106330402, + "grad_norm": 7.7181077003479, + "learning_rate": 1.3787339196162513e-05, + "loss": 0.3279, + "step": 3800 + }, + { + "epoch": 0.1384548295661022, + "grad_norm": 2.6843245029449463, + "learning_rate": 1.3823679046442329e-05, + "loss": 0.2267, + "step": 3810 + }, + { + "epoch": 0.13881822806890035, + "grad_norm": 3.05159068107605, + "learning_rate": 1.3860018896722147e-05, + "loss": 0.229, + "step": 3820 + }, + { + "epoch": 0.13918162657169852, + "grad_norm": 5.029635429382324, + "learning_rate": 1.3896358747001964e-05, + "loss": 0.2627, + "step": 3830 + }, + { + "epoch": 0.1395450250744967, + "grad_norm": 2.8287103176116943, + "learning_rate": 1.3932698597281779e-05, + "loss": 0.2294, + "step": 3840 + }, + { + "epoch": 0.13990842357729485, + "grad_norm": 24.862224578857422, + "learning_rate": 1.3969038447561598e-05, + "loss": 0.3198, + "step": 3850 + }, + { + "epoch": 0.14027182208009303, + "grad_norm": 5.624647617340088, + "learning_rate": 1.4005378297841412e-05, + "loss": 0.2641, + "step": 3860 + }, + { + "epoch": 0.14063522058289118, + "grad_norm": 1.6199389696121216, + "learning_rate": 1.404171814812123e-05, + "loss": 0.2279, + "step": 3870 + }, + { + "epoch": 0.14099861908568936, + "grad_norm": 2.864058017730713, + "learning_rate": 1.4078057998401047e-05, + "loss": 0.2448, + "step": 3880 + }, + { + "epoch": 0.14136201758848754, + "grad_norm": 3.897899627685547, + "learning_rate": 1.4114397848680863e-05, + "loss": 0.2438, + "step": 3890 + }, + { + "epoch": 0.1417254160912857, + "grad_norm": 42.4840087890625, + "learning_rate": 1.4150737698960681e-05, + "loss": 0.3604, + "step": 3900 + }, + { + "epoch": 0.14208881459408387, + "grad_norm": 1.6532913446426392, + "learning_rate": 1.4187077549240499e-05, + "loss": 0.2469, + "step": 3910 + }, + { + "epoch": 0.14245221309688205, + "grad_norm": 2.3755931854248047, + "learning_rate": 1.4223417399520315e-05, + "loss": 0.2233, + "step": 3920 + }, + { + "epoch": 0.1428156115996802, + "grad_norm": 5.866461277008057, + "learning_rate": 1.4259757249800132e-05, + "loss": 0.2952, + "step": 3930 + }, + { + "epoch": 0.14317901010247838, + "grad_norm": 3.171570301055908, + "learning_rate": 1.4296097100079947e-05, + "loss": 0.2329, + "step": 3940 + }, + { + "epoch": 0.14354240860527653, + "grad_norm": 23.302635192871094, + "learning_rate": 1.4332436950359764e-05, + "loss": 0.3592, + "step": 3950 + }, + { + "epoch": 0.1439058071080747, + "grad_norm": 2.3609213829040527, + "learning_rate": 1.4368776800639584e-05, + "loss": 1.1975, + "step": 3960 + }, + { + "epoch": 0.1442692056108729, + "grad_norm": 2.857872486114502, + "learning_rate": 1.4405116650919398e-05, + "loss": 0.265, + "step": 3970 + }, + { + "epoch": 0.14463260411367104, + "grad_norm": 6.918335914611816, + "learning_rate": 1.4441456501199216e-05, + "loss": 0.4057, + "step": 3980 + }, + { + "epoch": 0.14499600261646922, + "grad_norm": 3.8019461631774902, + "learning_rate": 1.4477796351479033e-05, + "loss": 0.299, + "step": 3990 + }, + { + "epoch": 0.1453594011192674, + "grad_norm": 30.963428497314453, + "learning_rate": 1.451413620175885e-05, + "loss": 0.335, + "step": 4000 + }, + { + "epoch": 0.14572279962206555, + "grad_norm": 2.3968963623046875, + "learning_rate": 1.4550476052038667e-05, + "loss": 0.224, + "step": 4010 + }, + { + "epoch": 0.14608619812486373, + "grad_norm": 6.7229485511779785, + "learning_rate": 1.4586815902318481e-05, + "loss": 0.2657, + "step": 4020 + }, + { + "epoch": 0.14644959662766188, + "grad_norm": 17.447879791259766, + "learning_rate": 1.46231557525983e-05, + "loss": 0.2199, + "step": 4030 + }, + { + "epoch": 0.14681299513046006, + "grad_norm": 2.020756721496582, + "learning_rate": 1.4659495602878118e-05, + "loss": 0.31, + "step": 4040 + }, + { + "epoch": 0.14717639363325824, + "grad_norm": 38.28268814086914, + "learning_rate": 1.4695835453157932e-05, + "loss": 0.3861, + "step": 4050 + }, + { + "epoch": 0.1475397921360564, + "grad_norm": 2.085073232650757, + "learning_rate": 1.473217530343775e-05, + "loss": 0.2078, + "step": 4060 + }, + { + "epoch": 0.14790319063885457, + "grad_norm": 3.453597068786621, + "learning_rate": 1.476851515371757e-05, + "loss": 0.296, + "step": 4070 + }, + { + "epoch": 0.14826658914165275, + "grad_norm": 2.3039424419403076, + "learning_rate": 1.4804855003997384e-05, + "loss": 0.2346, + "step": 4080 + }, + { + "epoch": 0.1486299876444509, + "grad_norm": 3.217890977859497, + "learning_rate": 1.4841194854277201e-05, + "loss": 0.2243, + "step": 4090 + }, + { + "epoch": 0.14899338614724908, + "grad_norm": 12.48748779296875, + "learning_rate": 1.4877534704557017e-05, + "loss": 0.3378, + "step": 4100 + }, + { + "epoch": 0.14935678465004723, + "grad_norm": 2.781388282775879, + "learning_rate": 1.4913874554836835e-05, + "loss": 0.2167, + "step": 4110 + }, + { + "epoch": 0.1497201831528454, + "grad_norm": 2.564457893371582, + "learning_rate": 1.4950214405116653e-05, + "loss": 0.2187, + "step": 4120 + }, + { + "epoch": 0.1500835816556436, + "grad_norm": 9.590895652770996, + "learning_rate": 1.4986554255396469e-05, + "loss": 0.2444, + "step": 4130 + }, + { + "epoch": 0.15044698015844174, + "grad_norm": 2.8055028915405273, + "learning_rate": 1.5022894105676286e-05, + "loss": 0.2499, + "step": 4140 + }, + { + "epoch": 0.15081037866123992, + "grad_norm": 7.157045364379883, + "learning_rate": 1.5059233955956104e-05, + "loss": 0.361, + "step": 4150 + }, + { + "epoch": 0.1511737771640381, + "grad_norm": 3.369006633758545, + "learning_rate": 1.5095573806235918e-05, + "loss": 0.259, + "step": 4160 + }, + { + "epoch": 0.15153717566683625, + "grad_norm": 5.334355354309082, + "learning_rate": 1.5131913656515736e-05, + "loss": 0.2797, + "step": 4170 + }, + { + "epoch": 0.15190057416963443, + "grad_norm": 6.667120456695557, + "learning_rate": 1.5168253506795552e-05, + "loss": 0.2678, + "step": 4180 + }, + { + "epoch": 0.15226397267243258, + "grad_norm": 1.7419887781143188, + "learning_rate": 1.520459335707537e-05, + "loss": 0.2432, + "step": 4190 + }, + { + "epoch": 0.15262737117523076, + "grad_norm": 7.022573947906494, + "learning_rate": 1.5240933207355187e-05, + "loss": 0.2955, + "step": 4200 + }, + { + "epoch": 0.15262737117523076, + "eval_loss": 0.4361402690410614, + "eval_runtime": 180.5933, + "eval_samples_per_second": 41.054, + "eval_steps_per_second": 5.133, + "eval_wer": 0.2589540181894095, + "step": 4200 + }, + { + "epoch": 0.15299076967802894, + "grad_norm": 3.108078718185425, + "learning_rate": 1.5277273057635e-05, + "loss": 3.6147, + "step": 4210 + }, + { + "epoch": 0.1533541681808271, + "grad_norm": 2.6063787937164307, + "learning_rate": 1.531361290791482e-05, + "loss": 0.232, + "step": 4220 + }, + { + "epoch": 0.15371756668362527, + "grad_norm": 3.581697463989258, + "learning_rate": 1.5349952758194637e-05, + "loss": 0.2451, + "step": 4230 + }, + { + "epoch": 0.15408096518642342, + "grad_norm": 2.5910837650299072, + "learning_rate": 1.5386292608474453e-05, + "loss": 0.2283, + "step": 4240 + }, + { + "epoch": 0.1544443636892216, + "grad_norm": 70.38739013671875, + "learning_rate": 1.5422632458754272e-05, + "loss": 0.396, + "step": 4250 + }, + { + "epoch": 0.15480776219201978, + "grad_norm": 3.5658187866210938, + "learning_rate": 1.5458972309034088e-05, + "loss": 0.2116, + "step": 4260 + }, + { + "epoch": 0.15517116069481793, + "grad_norm": 5.393126487731934, + "learning_rate": 1.5495312159313904e-05, + "loss": 0.2382, + "step": 4270 + }, + { + "epoch": 0.1555345591976161, + "grad_norm": 10.135586738586426, + "learning_rate": 1.5531652009593723e-05, + "loss": 0.2485, + "step": 4280 + }, + { + "epoch": 0.15589795770041429, + "grad_norm": 2.1143031120300293, + "learning_rate": 1.5567991859873536e-05, + "loss": 0.1936, + "step": 4290 + }, + { + "epoch": 0.15626135620321244, + "grad_norm": 20.077383041381836, + "learning_rate": 1.5604331710153355e-05, + "loss": 0.3818, + "step": 4300 + }, + { + "epoch": 0.15662475470601062, + "grad_norm": 3.793126344680786, + "learning_rate": 1.564067156043317e-05, + "loss": 0.2245, + "step": 4310 + }, + { + "epoch": 0.15698815320880877, + "grad_norm": 3.2057955265045166, + "learning_rate": 1.5677011410712987e-05, + "loss": 0.2551, + "step": 4320 + }, + { + "epoch": 0.15735155171160695, + "grad_norm": 5.002716064453125, + "learning_rate": 1.5713351260992806e-05, + "loss": 0.2951, + "step": 4330 + }, + { + "epoch": 0.15771495021440513, + "grad_norm": 2.2240726947784424, + "learning_rate": 1.5749691111272622e-05, + "loss": 0.1993, + "step": 4340 + }, + { + "epoch": 0.15807834871720328, + "grad_norm": 55.30891036987305, + "learning_rate": 1.578603096155244e-05, + "loss": 0.2803, + "step": 4350 + }, + { + "epoch": 0.15844174722000146, + "grad_norm": 1.9186596870422363, + "learning_rate": 1.5822370811832258e-05, + "loss": 0.2234, + "step": 4360 + }, + { + "epoch": 0.15880514572279963, + "grad_norm": 1.7817661762237549, + "learning_rate": 1.5858710662112074e-05, + "loss": 0.2038, + "step": 4370 + }, + { + "epoch": 0.15916854422559779, + "grad_norm": 3.046330690383911, + "learning_rate": 1.589505051239189e-05, + "loss": 0.2809, + "step": 4380 + }, + { + "epoch": 0.15953194272839596, + "grad_norm": 5.43302583694458, + "learning_rate": 1.5931390362671706e-05, + "loss": 0.1896, + "step": 4390 + }, + { + "epoch": 0.15989534123119412, + "grad_norm": 12.185855865478516, + "learning_rate": 1.596773021295152e-05, + "loss": 0.2984, + "step": 4400 + }, + { + "epoch": 0.1602587397339923, + "grad_norm": 1.9507842063903809, + "learning_rate": 1.600407006323134e-05, + "loss": 0.2064, + "step": 4410 + }, + { + "epoch": 0.16062213823679047, + "grad_norm": 4.536543846130371, + "learning_rate": 1.6040409913511157e-05, + "loss": 0.2433, + "step": 4420 + }, + { + "epoch": 0.16098553673958862, + "grad_norm": 3.101174831390381, + "learning_rate": 1.6076749763790973e-05, + "loss": 0.2746, + "step": 4430 + }, + { + "epoch": 0.1613489352423868, + "grad_norm": 2.2098021507263184, + "learning_rate": 1.6113089614070792e-05, + "loss": 0.17, + "step": 4440 + }, + { + "epoch": 0.16171233374518498, + "grad_norm": 59.360809326171875, + "learning_rate": 1.6149429464350608e-05, + "loss": 0.379, + "step": 4450 + }, + { + "epoch": 0.16207573224798313, + "grad_norm": 6.364736557006836, + "learning_rate": 1.6185769314630424e-05, + "loss": 0.2224, + "step": 4460 + }, + { + "epoch": 0.1624391307507813, + "grad_norm": 3.2455356121063232, + "learning_rate": 1.622210916491024e-05, + "loss": 0.2195, + "step": 4470 + }, + { + "epoch": 0.16280252925357946, + "grad_norm": 6.399629592895508, + "learning_rate": 1.625844901519006e-05, + "loss": 0.266, + "step": 4480 + }, + { + "epoch": 0.16316592775637764, + "grad_norm": 16.19785499572754, + "learning_rate": 1.6294788865469875e-05, + "loss": 0.1836, + "step": 4490 + }, + { + "epoch": 0.16352932625917582, + "grad_norm": 7.909778594970703, + "learning_rate": 1.633112871574969e-05, + "loss": 0.6016, + "step": 4500 + }, + { + "epoch": 0.16389272476197397, + "grad_norm": 2.8134663105010986, + "learning_rate": 1.636746856602951e-05, + "loss": 0.2148, + "step": 4510 + }, + { + "epoch": 0.16425612326477215, + "grad_norm": 2.667999505996704, + "learning_rate": 1.6403808416309327e-05, + "loss": 0.2294, + "step": 4520 + }, + { + "epoch": 0.1646195217675703, + "grad_norm": 3.355242967605591, + "learning_rate": 1.6440148266589143e-05, + "loss": 0.2097, + "step": 4530 + }, + { + "epoch": 0.16498292027036848, + "grad_norm": 2.6241908073425293, + "learning_rate": 1.647648811686896e-05, + "loss": 0.2337, + "step": 4540 + }, + { + "epoch": 0.16534631877316666, + "grad_norm": 16.759428024291992, + "learning_rate": 1.6512827967148775e-05, + "loss": 0.2944, + "step": 4550 + }, + { + "epoch": 0.1657097172759648, + "grad_norm": 3.098898410797119, + "learning_rate": 1.6549167817428594e-05, + "loss": 0.1895, + "step": 4560 + }, + { + "epoch": 0.166073115778763, + "grad_norm": 4.042644023895264, + "learning_rate": 1.658550766770841e-05, + "loss": 0.2369, + "step": 4570 + }, + { + "epoch": 0.16643651428156117, + "grad_norm": 7.174807548522949, + "learning_rate": 1.6621847517988226e-05, + "loss": 0.2331, + "step": 4580 + }, + { + "epoch": 0.16679991278435932, + "grad_norm": 2.1805012226104736, + "learning_rate": 1.6658187368268045e-05, + "loss": 0.2422, + "step": 4590 + }, + { + "epoch": 0.1671633112871575, + "grad_norm": 18.097871780395508, + "learning_rate": 1.6694527218547858e-05, + "loss": 0.347, + "step": 4600 + }, + { + "epoch": 0.16752670978995565, + "grad_norm": 3.48561429977417, + "learning_rate": 1.6730867068827677e-05, + "loss": 0.2985, + "step": 4610 + }, + { + "epoch": 0.16789010829275383, + "grad_norm": 1.7519229650497437, + "learning_rate": 1.6767206919107496e-05, + "loss": 0.2204, + "step": 4620 + }, + { + "epoch": 0.168253506795552, + "grad_norm": 3.7641661167144775, + "learning_rate": 1.680354676938731e-05, + "loss": 0.2348, + "step": 4630 + }, + { + "epoch": 0.16861690529835016, + "grad_norm": 3.0688085556030273, + "learning_rate": 1.683988661966713e-05, + "loss": 0.2147, + "step": 4640 + }, + { + "epoch": 0.16898030380114834, + "grad_norm": 25.845094680786133, + "learning_rate": 1.6876226469946944e-05, + "loss": 0.3671, + "step": 4650 + }, + { + "epoch": 0.16934370230394652, + "grad_norm": 2.841994524002075, + "learning_rate": 1.691256632022676e-05, + "loss": 0.2182, + "step": 4660 + }, + { + "epoch": 0.16970710080674467, + "grad_norm": 1.0501997470855713, + "learning_rate": 1.694890617050658e-05, + "loss": 0.1791, + "step": 4670 + }, + { + "epoch": 0.17007049930954285, + "grad_norm": 3.3973441123962402, + "learning_rate": 1.6985246020786392e-05, + "loss": 0.3338, + "step": 4680 + }, + { + "epoch": 0.170433897812341, + "grad_norm": 1.8442267179489136, + "learning_rate": 1.702158587106621e-05, + "loss": 0.2528, + "step": 4690 + }, + { + "epoch": 0.17079729631513918, + "grad_norm": 42.373409271240234, + "learning_rate": 1.705792572134603e-05, + "loss": 0.2892, + "step": 4700 + }, + { + "epoch": 0.17116069481793736, + "grad_norm": 6.344671726226807, + "learning_rate": 1.7094265571625844e-05, + "loss": 0.2474, + "step": 4710 + }, + { + "epoch": 0.1715240933207355, + "grad_norm": 1.6177664995193481, + "learning_rate": 1.7130605421905663e-05, + "loss": 0.2364, + "step": 4720 + }, + { + "epoch": 0.1718874918235337, + "grad_norm": 4.98591423034668, + "learning_rate": 1.7166945272185482e-05, + "loss": 0.2046, + "step": 4730 + }, + { + "epoch": 0.17225089032633187, + "grad_norm": 7.943169116973877, + "learning_rate": 1.7203285122465295e-05, + "loss": 0.293, + "step": 4740 + }, + { + "epoch": 0.17261428882913002, + "grad_norm": 7.402034759521484, + "learning_rate": 1.7239624972745114e-05, + "loss": 0.2722, + "step": 4750 + }, + { + "epoch": 0.1729776873319282, + "grad_norm": 13.290019035339355, + "learning_rate": 1.727596482302493e-05, + "loss": 0.347, + "step": 4760 + }, + { + "epoch": 0.17334108583472635, + "grad_norm": 1.8591586351394653, + "learning_rate": 1.7312304673304746e-05, + "loss": 0.2291, + "step": 4770 + }, + { + "epoch": 0.17370448433752453, + "grad_norm": 2.5220861434936523, + "learning_rate": 1.7348644523584565e-05, + "loss": 0.2436, + "step": 4780 + }, + { + "epoch": 0.1740678828403227, + "grad_norm": 1.8692690134048462, + "learning_rate": 1.738498437386438e-05, + "loss": 0.1782, + "step": 4790 + }, + { + "epoch": 0.17443128134312086, + "grad_norm": 12.558557510375977, + "learning_rate": 1.7421324224144197e-05, + "loss": 0.3347, + "step": 4800 + }, + { + "epoch": 0.17443128134312086, + "eval_loss": 0.4148472547531128, + "eval_runtime": 180.0999, + "eval_samples_per_second": 41.166, + "eval_steps_per_second": 5.147, + "eval_wer": 0.23564543358687168, + "step": 4800 + }, + { + "epoch": 0.17479467984591904, + "grad_norm": 6.168694972991943, + "learning_rate": 1.7457664074424017e-05, + "loss": 0.2183, + "step": 4810 + }, + { + "epoch": 0.17515807834871722, + "grad_norm": 5.153416633605957, + "learning_rate": 1.749400392470383e-05, + "loss": 0.2689, + "step": 4820 + }, + { + "epoch": 0.17552147685151537, + "grad_norm": 2.8500893115997314, + "learning_rate": 1.753034377498365e-05, + "loss": 0.2848, + "step": 4830 + }, + { + "epoch": 0.17588487535431355, + "grad_norm": 17.89117431640625, + "learning_rate": 1.7566683625263465e-05, + "loss": 0.2539, + "step": 4840 + }, + { + "epoch": 0.1762482738571117, + "grad_norm": 19.455005645751953, + "learning_rate": 1.760302347554328e-05, + "loss": 0.3166, + "step": 4850 + }, + { + "epoch": 0.17661167235990988, + "grad_norm": 1.7975777387619019, + "learning_rate": 1.76393633258231e-05, + "loss": 0.1927, + "step": 4860 + }, + { + "epoch": 0.17697507086270806, + "grad_norm": 4.6790690422058105, + "learning_rate": 1.7675703176102916e-05, + "loss": 0.2248, + "step": 4870 + }, + { + "epoch": 0.1773384693655062, + "grad_norm": 3.2644243240356445, + "learning_rate": 1.7712043026382732e-05, + "loss": 0.2239, + "step": 4880 + }, + { + "epoch": 0.1777018678683044, + "grad_norm": 1.9375410079956055, + "learning_rate": 1.7748382876662548e-05, + "loss": 0.2053, + "step": 4890 + }, + { + "epoch": 0.17806526637110254, + "grad_norm": 15.435178756713867, + "learning_rate": 1.7784722726942367e-05, + "loss": 0.2903, + "step": 4900 + }, + { + "epoch": 0.17842866487390072, + "grad_norm": 2.486330270767212, + "learning_rate": 1.7821062577222183e-05, + "loss": 0.2598, + "step": 4910 + }, + { + "epoch": 0.1787920633766989, + "grad_norm": 2.5542314052581787, + "learning_rate": 1.7857402427502e-05, + "loss": 0.2305, + "step": 4920 + }, + { + "epoch": 0.17915546187949705, + "grad_norm": 3.6416103839874268, + "learning_rate": 1.7893742277781815e-05, + "loss": 1.046, + "step": 4930 + }, + { + "epoch": 0.17951886038229523, + "grad_norm": 1.9395058155059814, + "learning_rate": 1.7930082128061634e-05, + "loss": 0.2466, + "step": 4940 + }, + { + "epoch": 0.1798822588850934, + "grad_norm": 7.664824962615967, + "learning_rate": 1.796642197834145e-05, + "loss": 0.2871, + "step": 4950 + }, + { + "epoch": 0.18024565738789156, + "grad_norm": 2.0301320552825928, + "learning_rate": 1.8002761828621266e-05, + "loss": 0.1996, + "step": 4960 + }, + { + "epoch": 0.18060905589068973, + "grad_norm": 8.371182441711426, + "learning_rate": 1.8039101678901082e-05, + "loss": 0.1947, + "step": 4970 + }, + { + "epoch": 0.18097245439348789, + "grad_norm": 2.6746129989624023, + "learning_rate": 1.80754415291809e-05, + "loss": 0.2679, + "step": 4980 + }, + { + "epoch": 0.18133585289628606, + "grad_norm": 3.448202133178711, + "learning_rate": 1.8111781379460718e-05, + "loss": 0.1859, + "step": 4990 + }, + { + "epoch": 0.18169925139908424, + "grad_norm": 28.57021141052246, + "learning_rate": 1.8148121229740534e-05, + "loss": 0.3318, + "step": 5000 + }, + { + "epoch": 0.1820626499018824, + "grad_norm": 4.731750965118408, + "learning_rate": 1.8184461080020353e-05, + "loss": 0.2354, + "step": 5010 + }, + { + "epoch": 0.18242604840468057, + "grad_norm": 1.6815394163131714, + "learning_rate": 1.822080093030017e-05, + "loss": 0.2075, + "step": 5020 + }, + { + "epoch": 0.18278944690747875, + "grad_norm": 3.868263006210327, + "learning_rate": 1.8257140780579985e-05, + "loss": 0.2345, + "step": 5030 + }, + { + "epoch": 0.1831528454102769, + "grad_norm": 1.964240550994873, + "learning_rate": 1.82934806308598e-05, + "loss": 0.222, + "step": 5040 + }, + { + "epoch": 0.18351624391307508, + "grad_norm": 11.881858825683594, + "learning_rate": 1.8329820481139617e-05, + "loss": 0.3251, + "step": 5050 + }, + { + "epoch": 0.18387964241587323, + "grad_norm": 1.8463056087493896, + "learning_rate": 1.8366160331419436e-05, + "loss": 0.2255, + "step": 5060 + }, + { + "epoch": 0.1842430409186714, + "grad_norm": 2.592672348022461, + "learning_rate": 1.8402500181699252e-05, + "loss": 0.1904, + "step": 5070 + }, + { + "epoch": 0.1846064394214696, + "grad_norm": 4.0694074630737305, + "learning_rate": 1.8438840031979068e-05, + "loss": 0.2, + "step": 5080 + }, + { + "epoch": 0.18496983792426774, + "grad_norm": 2.101837396621704, + "learning_rate": 1.8475179882258887e-05, + "loss": 0.1927, + "step": 5090 + }, + { + "epoch": 0.18533323642706592, + "grad_norm": 22.162702560424805, + "learning_rate": 1.8511519732538703e-05, + "loss": 0.3481, + "step": 5100 + }, + { + "epoch": 0.1856966349298641, + "grad_norm": 2.7928340435028076, + "learning_rate": 1.854785958281852e-05, + "loss": 0.2344, + "step": 5110 + }, + { + "epoch": 0.18606003343266225, + "grad_norm": 1.8618485927581787, + "learning_rate": 1.858419943309834e-05, + "loss": 0.2139, + "step": 5120 + }, + { + "epoch": 0.18642343193546043, + "grad_norm": 2.9611120223999023, + "learning_rate": 1.862053928337815e-05, + "loss": 0.2194, + "step": 5130 + }, + { + "epoch": 0.18678683043825858, + "grad_norm": 5.181276321411133, + "learning_rate": 1.865687913365797e-05, + "loss": 0.2596, + "step": 5140 + }, + { + "epoch": 0.18715022894105676, + "grad_norm": 10.01041030883789, + "learning_rate": 1.8693218983937787e-05, + "loss": 0.3122, + "step": 5150 + }, + { + "epoch": 0.18751362744385494, + "grad_norm": 4.952126979827881, + "learning_rate": 1.8729558834217603e-05, + "loss": 0.2183, + "step": 5160 + }, + { + "epoch": 0.1878770259466531, + "grad_norm": 2.19279146194458, + "learning_rate": 1.8765898684497422e-05, + "loss": 0.2439, + "step": 5170 + }, + { + "epoch": 0.18824042444945127, + "grad_norm": 3.5189321041107178, + "learning_rate": 1.8802238534777238e-05, + "loss": 0.2343, + "step": 5180 + }, + { + "epoch": 0.18860382295224945, + "grad_norm": 2.0936787128448486, + "learning_rate": 1.8838578385057054e-05, + "loss": 0.1831, + "step": 5190 + }, + { + "epoch": 0.1889672214550476, + "grad_norm": 12.835061073303223, + "learning_rate": 1.8874918235336873e-05, + "loss": 0.2561, + "step": 5200 + }, + { + "epoch": 0.18933061995784578, + "grad_norm": 1.6738308668136597, + "learning_rate": 1.8911258085616686e-05, + "loss": 1.0257, + "step": 5210 + }, + { + "epoch": 0.18969401846064393, + "grad_norm": 2.7661142349243164, + "learning_rate": 1.8947597935896505e-05, + "loss": 0.2398, + "step": 5220 + }, + { + "epoch": 0.1900574169634421, + "grad_norm": 4.173921585083008, + "learning_rate": 1.8983937786176324e-05, + "loss": 0.2157, + "step": 5230 + }, + { + "epoch": 0.1904208154662403, + "grad_norm": 3.7037158012390137, + "learning_rate": 1.9020277636456137e-05, + "loss": 0.2182, + "step": 5240 + }, + { + "epoch": 0.19078421396903844, + "grad_norm": 16.288227081298828, + "learning_rate": 1.9056617486735956e-05, + "loss": 0.2829, + "step": 5250 + }, + { + "epoch": 0.19114761247183662, + "grad_norm": 2.0504090785980225, + "learning_rate": 1.9092957337015772e-05, + "loss": 0.201, + "step": 5260 + }, + { + "epoch": 0.19151101097463477, + "grad_norm": 1.2266415357589722, + "learning_rate": 1.9129297187295588e-05, + "loss": 0.2072, + "step": 5270 + }, + { + "epoch": 0.19187440947743295, + "grad_norm": 4.910546779632568, + "learning_rate": 1.9165637037575408e-05, + "loss": 0.1824, + "step": 5280 + }, + { + "epoch": 0.19223780798023113, + "grad_norm": 3.093318223953247, + "learning_rate": 1.9201976887855224e-05, + "loss": 0.2471, + "step": 5290 + }, + { + "epoch": 0.19260120648302928, + "grad_norm": 6.74167013168335, + "learning_rate": 1.923831673813504e-05, + "loss": 0.2912, + "step": 5300 + }, + { + "epoch": 0.19296460498582746, + "grad_norm": 2.0540058612823486, + "learning_rate": 1.927465658841486e-05, + "loss": 0.2599, + "step": 5310 + }, + { + "epoch": 0.19332800348862564, + "grad_norm": 2.407750129699707, + "learning_rate": 1.931099643869467e-05, + "loss": 0.2478, + "step": 5320 + }, + { + "epoch": 0.1936914019914238, + "grad_norm": 5.479567527770996, + "learning_rate": 1.934733628897449e-05, + "loss": 0.5936, + "step": 5330 + }, + { + "epoch": 0.19405480049422197, + "grad_norm": 1.912705659866333, + "learning_rate": 1.9383676139254307e-05, + "loss": 0.215, + "step": 5340 + }, + { + "epoch": 0.19441819899702012, + "grad_norm": 38.24689865112305, + "learning_rate": 1.9420015989534123e-05, + "loss": 0.286, + "step": 5350 + }, + { + "epoch": 0.1947815974998183, + "grad_norm": 3.4196550846099854, + "learning_rate": 1.9456355839813942e-05, + "loss": 0.4764, + "step": 5360 + }, + { + "epoch": 0.19514499600261648, + "grad_norm": 1.705702781677246, + "learning_rate": 1.9492695690093758e-05, + "loss": 0.195, + "step": 5370 + }, + { + "epoch": 0.19550839450541463, + "grad_norm": 2.7188572883605957, + "learning_rate": 1.9529035540373574e-05, + "loss": 0.2318, + "step": 5380 + }, + { + "epoch": 0.1958717930082128, + "grad_norm": 5.217918872833252, + "learning_rate": 1.9565375390653393e-05, + "loss": 0.2288, + "step": 5390 + }, + { + "epoch": 0.196235191511011, + "grad_norm": 7.094780921936035, + "learning_rate": 1.960171524093321e-05, + "loss": 0.3607, + "step": 5400 + }, + { + "epoch": 0.196235191511011, + "eval_loss": 0.3953820765018463, + "eval_runtime": 180.5214, + "eval_samples_per_second": 41.07, + "eval_steps_per_second": 5.135, + "eval_wer": 0.23525514186650207, + "step": 5400 + }, + { + "epoch": 0.19659859001380914, + "grad_norm": 2.379298448562622, + "learning_rate": 1.9638055091213025e-05, + "loss": 0.1981, + "step": 5410 + }, + { + "epoch": 0.19696198851660732, + "grad_norm": 1.2755372524261475, + "learning_rate": 1.967439494149284e-05, + "loss": 0.3185, + "step": 5420 + }, + { + "epoch": 0.19732538701940547, + "grad_norm": 2.6385338306427, + "learning_rate": 1.9710734791772657e-05, + "loss": 0.2231, + "step": 5430 + }, + { + "epoch": 0.19768878552220365, + "grad_norm": 4.030337810516357, + "learning_rate": 1.9747074642052477e-05, + "loss": 0.2417, + "step": 5440 + }, + { + "epoch": 0.19805218402500183, + "grad_norm": 10.988908767700195, + "learning_rate": 1.9783414492332293e-05, + "loss": 0.3163, + "step": 5450 + }, + { + "epoch": 0.19841558252779998, + "grad_norm": 2.8273231983184814, + "learning_rate": 1.981975434261211e-05, + "loss": 0.2062, + "step": 5460 + }, + { + "epoch": 0.19877898103059816, + "grad_norm": 1.880952000617981, + "learning_rate": 1.9856094192891928e-05, + "loss": 0.2103, + "step": 5470 + }, + { + "epoch": 0.19914237953339634, + "grad_norm": 12.882647514343262, + "learning_rate": 1.9892434043171744e-05, + "loss": 0.2513, + "step": 5480 + }, + { + "epoch": 0.1995057780361945, + "grad_norm": 2.8202428817749023, + "learning_rate": 1.992877389345156e-05, + "loss": 0.2002, + "step": 5490 + }, + { + "epoch": 0.19986917653899267, + "grad_norm": 11.30123519897461, + "learning_rate": 1.9965113743731376e-05, + "loss": 0.3399, + "step": 5500 + }, + { + "epoch": 0.20023257504179082, + "grad_norm": 3.016954183578491, + "learning_rate": 2.0001453594011195e-05, + "loss": 0.2016, + "step": 5510 + }, + { + "epoch": 0.200595973544589, + "grad_norm": 1.3506131172180176, + "learning_rate": 2.003779344429101e-05, + "loss": 0.6008, + "step": 5520 + }, + { + "epoch": 0.20095937204738717, + "grad_norm": 3.711284637451172, + "learning_rate": 2.0074133294570827e-05, + "loss": 0.2297, + "step": 5530 + }, + { + "epoch": 0.20132277055018533, + "grad_norm": 2.8310322761535645, + "learning_rate": 2.0110473144850643e-05, + "loss": 0.19, + "step": 5540 + }, + { + "epoch": 0.2016861690529835, + "grad_norm": 14.37038516998291, + "learning_rate": 2.0146812995130462e-05, + "loss": 0.3418, + "step": 5550 + }, + { + "epoch": 0.20204956755578168, + "grad_norm": 2.037245988845825, + "learning_rate": 2.0183152845410278e-05, + "loss": 0.2054, + "step": 5560 + }, + { + "epoch": 0.20241296605857984, + "grad_norm": 2.47495698928833, + "learning_rate": 2.0219492695690094e-05, + "loss": 0.2102, + "step": 5570 + }, + { + "epoch": 0.20277636456137801, + "grad_norm": 5.948564529418945, + "learning_rate": 2.025583254596991e-05, + "loss": 0.2299, + "step": 5580 + }, + { + "epoch": 0.20313976306417617, + "grad_norm": 2.010765552520752, + "learning_rate": 2.029217239624973e-05, + "loss": 0.2214, + "step": 5590 + }, + { + "epoch": 0.20350316156697434, + "grad_norm": 109.07927703857422, + "learning_rate": 2.0328512246529546e-05, + "loss": 0.327, + "step": 5600 + }, + { + "epoch": 0.20386656006977252, + "grad_norm": 2.708141565322876, + "learning_rate": 2.036485209680936e-05, + "loss": 0.2128, + "step": 5610 + }, + { + "epoch": 0.20422995857257067, + "grad_norm": 4.145051002502441, + "learning_rate": 2.040119194708918e-05, + "loss": 1.5499, + "step": 5620 + }, + { + "epoch": 0.20459335707536885, + "grad_norm": 5.204433917999268, + "learning_rate": 2.0437531797368993e-05, + "loss": 0.2238, + "step": 5630 + }, + { + "epoch": 0.204956755578167, + "grad_norm": 3.625671625137329, + "learning_rate": 2.0473871647648813e-05, + "loss": 0.2009, + "step": 5640 + }, + { + "epoch": 0.20532015408096518, + "grad_norm": 7.134413719177246, + "learning_rate": 2.051021149792863e-05, + "loss": 0.3236, + "step": 5650 + }, + { + "epoch": 0.20568355258376336, + "grad_norm": 3.090585708618164, + "learning_rate": 2.0546551348208445e-05, + "loss": 0.2245, + "step": 5660 + }, + { + "epoch": 0.20604695108656151, + "grad_norm": 1.5290725231170654, + "learning_rate": 2.0582891198488264e-05, + "loss": 0.9725, + "step": 5670 + }, + { + "epoch": 0.2064103495893597, + "grad_norm": 12.433088302612305, + "learning_rate": 2.061923104876808e-05, + "loss": 0.2755, + "step": 5680 + }, + { + "epoch": 0.20677374809215787, + "grad_norm": 4.399518013000488, + "learning_rate": 2.0655570899047896e-05, + "loss": 0.2136, + "step": 5690 + }, + { + "epoch": 0.20713714659495602, + "grad_norm": 12.662751197814941, + "learning_rate": 2.0691910749327715e-05, + "loss": 0.3022, + "step": 5700 + }, + { + "epoch": 0.2075005450977542, + "grad_norm": 1.8056265115737915, + "learning_rate": 2.0728250599607528e-05, + "loss": 0.3538, + "step": 5710 + }, + { + "epoch": 0.20786394360055235, + "grad_norm": 1.3133045434951782, + "learning_rate": 2.0764590449887347e-05, + "loss": 0.1829, + "step": 5720 + }, + { + "epoch": 0.20822734210335053, + "grad_norm": 6.10534143447876, + "learning_rate": 2.0800930300167167e-05, + "loss": 0.2819, + "step": 5730 + }, + { + "epoch": 0.2085907406061487, + "grad_norm": 4.327618598937988, + "learning_rate": 2.083727015044698e-05, + "loss": 0.2029, + "step": 5740 + }, + { + "epoch": 0.20895413910894686, + "grad_norm": 6.878536224365234, + "learning_rate": 2.08736100007268e-05, + "loss": 0.3301, + "step": 5750 + }, + { + "epoch": 0.20931753761174504, + "grad_norm": 2.8301913738250732, + "learning_rate": 2.0909949851006614e-05, + "loss": 0.2144, + "step": 5760 + }, + { + "epoch": 0.20968093611454322, + "grad_norm": 2.248054265975952, + "learning_rate": 2.094628970128643e-05, + "loss": 0.2046, + "step": 5770 + }, + { + "epoch": 0.21004433461734137, + "grad_norm": 4.619300842285156, + "learning_rate": 2.098262955156625e-05, + "loss": 0.2487, + "step": 5780 + }, + { + "epoch": 0.21040773312013955, + "grad_norm": 2.6446404457092285, + "learning_rate": 2.1018969401846066e-05, + "loss": 0.2222, + "step": 5790 + }, + { + "epoch": 0.2107711316229377, + "grad_norm": 7.827177047729492, + "learning_rate": 2.1055309252125882e-05, + "loss": 0.2684, + "step": 5800 + }, + { + "epoch": 0.21113453012573588, + "grad_norm": 5.37054967880249, + "learning_rate": 2.10916491024057e-05, + "loss": 0.216, + "step": 5810 + }, + { + "epoch": 0.21149792862853406, + "grad_norm": 1.5430680513381958, + "learning_rate": 2.1127988952685514e-05, + "loss": 0.1723, + "step": 5820 + }, + { + "epoch": 0.2118613271313322, + "grad_norm": 4.355040550231934, + "learning_rate": 2.1164328802965333e-05, + "loss": 0.3078, + "step": 5830 + }, + { + "epoch": 0.2122247256341304, + "grad_norm": 2.70613169670105, + "learning_rate": 2.1200668653245152e-05, + "loss": 0.1857, + "step": 5840 + }, + { + "epoch": 0.21258812413692857, + "grad_norm": 17.876861572265625, + "learning_rate": 2.1237008503524965e-05, + "loss": 0.335, + "step": 5850 + }, + { + "epoch": 0.21295152263972672, + "grad_norm": 2.048499822616577, + "learning_rate": 2.1273348353804784e-05, + "loss": 0.2588, + "step": 5860 + }, + { + "epoch": 0.2133149211425249, + "grad_norm": 2.2033607959747314, + "learning_rate": 2.13096882040846e-05, + "loss": 0.1973, + "step": 5870 + }, + { + "epoch": 0.21367831964532305, + "grad_norm": 5.563814640045166, + "learning_rate": 2.1346028054364416e-05, + "loss": 0.2632, + "step": 5880 + }, + { + "epoch": 0.21404171814812123, + "grad_norm": 1.4629203081130981, + "learning_rate": 2.1382367904644236e-05, + "loss": 0.1714, + "step": 5890 + }, + { + "epoch": 0.2144051166509194, + "grad_norm": 9.641836166381836, + "learning_rate": 2.141870775492405e-05, + "loss": 0.3329, + "step": 5900 + }, + { + "epoch": 0.21476851515371756, + "grad_norm": 3.0128610134124756, + "learning_rate": 2.1455047605203867e-05, + "loss": 0.205, + "step": 5910 + }, + { + "epoch": 0.21513191365651574, + "grad_norm": 6.38659143447876, + "learning_rate": 2.1491387455483687e-05, + "loss": 0.2066, + "step": 5920 + }, + { + "epoch": 0.2154953121593139, + "grad_norm": 3.397566080093384, + "learning_rate": 2.15277273057635e-05, + "loss": 0.2682, + "step": 5930 + }, + { + "epoch": 0.21585871066211207, + "grad_norm": 1.8110759258270264, + "learning_rate": 2.156406715604332e-05, + "loss": 0.2357, + "step": 5940 + }, + { + "epoch": 0.21622210916491025, + "grad_norm": 12.391556739807129, + "learning_rate": 2.1600407006323135e-05, + "loss": 0.3043, + "step": 5950 + }, + { + "epoch": 0.2165855076677084, + "grad_norm": 1.8203914165496826, + "learning_rate": 2.163674685660295e-05, + "loss": 0.2979, + "step": 5960 + }, + { + "epoch": 0.21694890617050658, + "grad_norm": 3.362252950668335, + "learning_rate": 2.167308670688277e-05, + "loss": 0.1667, + "step": 5970 + }, + { + "epoch": 0.21731230467330476, + "grad_norm": 4.1468000411987305, + "learning_rate": 2.1709426557162586e-05, + "loss": 0.3419, + "step": 5980 + }, + { + "epoch": 0.2176757031761029, + "grad_norm": 2.479288339614868, + "learning_rate": 2.1745766407442402e-05, + "loss": 0.1938, + "step": 5990 + }, + { + "epoch": 0.2180391016789011, + "grad_norm": 26.185468673706055, + "learning_rate": 2.1782106257722218e-05, + "loss": 0.2818, + "step": 6000 + }, + { + "epoch": 0.2180391016789011, + "eval_loss": 0.4106527864933014, + "eval_runtime": 179.9044, + "eval_samples_per_second": 41.211, + "eval_steps_per_second": 5.153, + "eval_wer": 0.2305625646704304, + "step": 6000 + }, + { + "epoch": 0.21840250018169924, + "grad_norm": 2.2452592849731445, + "learning_rate": 2.1818446108002037e-05, + "loss": 0.2208, + "step": 6010 + }, + { + "epoch": 0.21876589868449742, + "grad_norm": 2.273920774459839, + "learning_rate": 2.1854785958281853e-05, + "loss": 0.2268, + "step": 6020 + }, + { + "epoch": 0.2191292971872956, + "grad_norm": 1.9621226787567139, + "learning_rate": 2.189112580856167e-05, + "loss": 0.1965, + "step": 6030 + }, + { + "epoch": 0.21949269569009375, + "grad_norm": 2.866110324859619, + "learning_rate": 2.1927465658841485e-05, + "loss": 0.223, + "step": 6040 + }, + { + "epoch": 0.21985609419289193, + "grad_norm": 15.169930458068848, + "learning_rate": 2.1963805509121305e-05, + "loss": 0.254, + "step": 6050 + }, + { + "epoch": 0.2202194926956901, + "grad_norm": 2.174626350402832, + "learning_rate": 2.200014535940112e-05, + "loss": 0.2056, + "step": 6060 + }, + { + "epoch": 0.22058289119848826, + "grad_norm": 1.9627354145050049, + "learning_rate": 2.2036485209680936e-05, + "loss": 0.2211, + "step": 6070 + }, + { + "epoch": 0.22094628970128644, + "grad_norm": 5.444493770599365, + "learning_rate": 2.2072825059960752e-05, + "loss": 0.2819, + "step": 6080 + }, + { + "epoch": 0.2213096882040846, + "grad_norm": 2.5131990909576416, + "learning_rate": 2.2109164910240572e-05, + "loss": 0.2262, + "step": 6090 + }, + { + "epoch": 0.22167308670688277, + "grad_norm": 15.716779708862305, + "learning_rate": 2.2145504760520388e-05, + "loss": 0.2833, + "step": 6100 + }, + { + "epoch": 0.22203648520968094, + "grad_norm": 1.7514111995697021, + "learning_rate": 2.2181844610800204e-05, + "loss": 0.2238, + "step": 6110 + }, + { + "epoch": 0.2223998837124791, + "grad_norm": 1.8236886262893677, + "learning_rate": 2.2218184461080023e-05, + "loss": 0.1872, + "step": 6120 + }, + { + "epoch": 0.22276328221527728, + "grad_norm": 4.081092834472656, + "learning_rate": 2.225452431135984e-05, + "loss": 0.4926, + "step": 6130 + }, + { + "epoch": 0.22312668071807545, + "grad_norm": 3.3254685401916504, + "learning_rate": 2.2290864161639655e-05, + "loss": 0.2179, + "step": 6140 + }, + { + "epoch": 0.2234900792208736, + "grad_norm": 9.953665733337402, + "learning_rate": 2.232720401191947e-05, + "loss": 0.3221, + "step": 6150 + }, + { + "epoch": 0.22385347772367178, + "grad_norm": 3.531538724899292, + "learning_rate": 2.2363543862199287e-05, + "loss": 0.3639, + "step": 6160 + }, + { + "epoch": 0.22421687622646994, + "grad_norm": 1.6166915893554688, + "learning_rate": 2.2399883712479106e-05, + "loss": 0.1906, + "step": 6170 + }, + { + "epoch": 0.22458027472926811, + "grad_norm": 3.0561792850494385, + "learning_rate": 2.2436223562758922e-05, + "loss": 0.222, + "step": 6180 + }, + { + "epoch": 0.2249436732320663, + "grad_norm": 7.607283115386963, + "learning_rate": 2.2472563413038738e-05, + "loss": 0.1769, + "step": 6190 + }, + { + "epoch": 0.22530707173486444, + "grad_norm": 38.86745834350586, + "learning_rate": 2.2508903263318557e-05, + "loss": 0.3523, + "step": 6200 + }, + { + "epoch": 0.22567047023766262, + "grad_norm": 1.2490432262420654, + "learning_rate": 2.2545243113598373e-05, + "loss": 0.2241, + "step": 6210 + }, + { + "epoch": 0.2260338687404608, + "grad_norm": 3.8632936477661133, + "learning_rate": 2.258158296387819e-05, + "loss": 0.2761, + "step": 6220 + }, + { + "epoch": 0.22639726724325895, + "grad_norm": 6.057976722717285, + "learning_rate": 2.261792281415801e-05, + "loss": 0.2534, + "step": 6230 + }, + { + "epoch": 0.22676066574605713, + "grad_norm": 5.2983551025390625, + "learning_rate": 2.265426266443782e-05, + "loss": 0.1972, + "step": 6240 + }, + { + "epoch": 0.22712406424885528, + "grad_norm": 7.395950794219971, + "learning_rate": 2.269060251471764e-05, + "loss": 0.3446, + "step": 6250 + }, + { + "epoch": 0.22748746275165346, + "grad_norm": 2.7409260272979736, + "learning_rate": 2.2726942364997457e-05, + "loss": 0.1894, + "step": 6260 + }, + { + "epoch": 0.22785086125445164, + "grad_norm": 1.7545270919799805, + "learning_rate": 2.2763282215277273e-05, + "loss": 0.2376, + "step": 6270 + }, + { + "epoch": 0.2282142597572498, + "grad_norm": 112.10614013671875, + "learning_rate": 2.2799622065557092e-05, + "loss": 2.0322, + "step": 6280 + }, + { + "epoch": 0.22857765826004797, + "grad_norm": 3.6547396183013916, + "learning_rate": 2.2835961915836908e-05, + "loss": 0.2942, + "step": 6290 + }, + { + "epoch": 0.22894105676284612, + "grad_norm": 50.726261138916016, + "learning_rate": 2.2872301766116724e-05, + "loss": 0.3279, + "step": 6300 + }, + { + "epoch": 0.2293044552656443, + "grad_norm": 1.2374241352081299, + "learning_rate": 2.2908641616396543e-05, + "loss": 0.1912, + "step": 6310 + }, + { + "epoch": 0.22966785376844248, + "grad_norm": 1.6278152465820312, + "learning_rate": 2.2944981466676356e-05, + "loss": 0.1913, + "step": 6320 + }, + { + "epoch": 0.23003125227124063, + "grad_norm": 7.58544397354126, + "learning_rate": 2.2981321316956175e-05, + "loss": 0.2393, + "step": 6330 + }, + { + "epoch": 0.2303946507740388, + "grad_norm": 1.7094483375549316, + "learning_rate": 2.3017661167235995e-05, + "loss": 0.2333, + "step": 6340 + }, + { + "epoch": 0.230758049276837, + "grad_norm": 24.214885711669922, + "learning_rate": 2.3054001017515807e-05, + "loss": 0.3019, + "step": 6350 + }, + { + "epoch": 0.23112144777963514, + "grad_norm": 1.962106704711914, + "learning_rate": 2.3090340867795626e-05, + "loss": 0.8948, + "step": 6360 + }, + { + "epoch": 0.23148484628243332, + "grad_norm": 1.3703123331069946, + "learning_rate": 2.3126680718075442e-05, + "loss": 0.1936, + "step": 6370 + }, + { + "epoch": 0.23184824478523147, + "grad_norm": 7.507201194763184, + "learning_rate": 2.316302056835526e-05, + "loss": 0.2185, + "step": 6380 + }, + { + "epoch": 0.23221164328802965, + "grad_norm": 2.6310977935791016, + "learning_rate": 2.3199360418635078e-05, + "loss": 0.1961, + "step": 6390 + }, + { + "epoch": 0.23257504179082783, + "grad_norm": 4.186092376708984, + "learning_rate": 2.3235700268914894e-05, + "loss": 0.2734, + "step": 6400 + }, + { + "epoch": 0.23293844029362598, + "grad_norm": 1.817269206047058, + "learning_rate": 2.327204011919471e-05, + "loss": 0.1966, + "step": 6410 + }, + { + "epoch": 0.23330183879642416, + "grad_norm": 1.9503989219665527, + "learning_rate": 2.330837996947453e-05, + "loss": 2.7438, + "step": 6420 + }, + { + "epoch": 0.23366523729922234, + "grad_norm": 3.1107656955718994, + "learning_rate": 2.334471981975434e-05, + "loss": 0.2534, + "step": 6430 + }, + { + "epoch": 0.2340286358020205, + "grad_norm": 5.268273830413818, + "learning_rate": 2.338105967003416e-05, + "loss": 0.1963, + "step": 6440 + }, + { + "epoch": 0.23439203430481867, + "grad_norm": 9.586852073669434, + "learning_rate": 2.3417399520313977e-05, + "loss": 0.2342, + "step": 6450 + }, + { + "epoch": 0.23475543280761682, + "grad_norm": 3.0218632221221924, + "learning_rate": 2.3453739370593793e-05, + "loss": 0.231, + "step": 6460 + }, + { + "epoch": 0.235118831310415, + "grad_norm": 1.9708057641983032, + "learning_rate": 2.3490079220873612e-05, + "loss": 0.2156, + "step": 6470 + }, + { + "epoch": 0.23548222981321318, + "grad_norm": 3.6212944984436035, + "learning_rate": 2.3526419071153428e-05, + "loss": 0.2172, + "step": 6480 + }, + { + "epoch": 0.23584562831601133, + "grad_norm": 2.5205702781677246, + "learning_rate": 2.3562758921433244e-05, + "loss": 0.4643, + "step": 6490 + }, + { + "epoch": 0.2362090268188095, + "grad_norm": 4.1570305824279785, + "learning_rate": 2.3599098771713063e-05, + "loss": 0.2722, + "step": 6500 + }, + { + "epoch": 0.2365724253216077, + "grad_norm": 1.8376798629760742, + "learning_rate": 2.363543862199288e-05, + "loss": 0.2027, + "step": 6510 + }, + { + "epoch": 0.23693582382440584, + "grad_norm": 2.0464930534362793, + "learning_rate": 2.3671778472272695e-05, + "loss": 0.183, + "step": 6520 + }, + { + "epoch": 0.23729922232720402, + "grad_norm": 4.8776469230651855, + "learning_rate": 2.370811832255251e-05, + "loss": 0.2169, + "step": 6530 + }, + { + "epoch": 0.23766262083000217, + "grad_norm": 1.5764952898025513, + "learning_rate": 2.3744458172832327e-05, + "loss": 0.1917, + "step": 6540 + }, + { + "epoch": 0.23802601933280035, + "grad_norm": 16.132232666015625, + "learning_rate": 2.3780798023112147e-05, + "loss": 0.2732, + "step": 6550 + }, + { + "epoch": 0.23838941783559853, + "grad_norm": 8.105748176574707, + "learning_rate": 2.3817137873391963e-05, + "loss": 0.2055, + "step": 6560 + }, + { + "epoch": 0.23875281633839668, + "grad_norm": 2.087362051010132, + "learning_rate": 2.385347772367178e-05, + "loss": 0.186, + "step": 6570 + }, + { + "epoch": 0.23911621484119486, + "grad_norm": 2.8280205726623535, + "learning_rate": 2.3889817573951598e-05, + "loss": 0.201, + "step": 6580 + }, + { + "epoch": 0.23947961334399304, + "grad_norm": 1.2525794506072998, + "learning_rate": 2.3926157424231414e-05, + "loss": 0.1893, + "step": 6590 + }, + { + "epoch": 0.2398430118467912, + "grad_norm": 23.419832229614258, + "learning_rate": 2.396249727451123e-05, + "loss": 0.2554, + "step": 6600 + }, + { + "epoch": 0.2398430118467912, + "eval_loss": 0.4065987765789032, + "eval_runtime": 179.638, + "eval_samples_per_second": 41.272, + "eval_steps_per_second": 5.16, + "eval_wer": 0.24529380797647357, + "step": 6600 + }, + { + "epoch": 0.24020641034958937, + "grad_norm": 1.3757339715957642, + "learning_rate": 2.3998837124791046e-05, + "loss": 0.1962, + "step": 6610 + }, + { + "epoch": 0.24056980885238752, + "grad_norm": 4.00860071182251, + "learning_rate": 2.4035176975070865e-05, + "loss": 0.1848, + "step": 6620 + }, + { + "epoch": 0.2409332073551857, + "grad_norm": 5.544015407562256, + "learning_rate": 2.407151682535068e-05, + "loss": 0.245, + "step": 6630 + }, + { + "epoch": 0.24129660585798388, + "grad_norm": 1.0618844032287598, + "learning_rate": 2.4107856675630497e-05, + "loss": 0.191, + "step": 6640 + }, + { + "epoch": 0.24166000436078203, + "grad_norm": 125.15505981445312, + "learning_rate": 2.4144196525910313e-05, + "loss": 0.3055, + "step": 6650 + }, + { + "epoch": 0.2420234028635802, + "grad_norm": 5.015167713165283, + "learning_rate": 2.418053637619013e-05, + "loss": 0.2701, + "step": 6660 + }, + { + "epoch": 0.24238680136637836, + "grad_norm": 3.944514274597168, + "learning_rate": 2.421687622646995e-05, + "loss": 0.2107, + "step": 6670 + }, + { + "epoch": 0.24275019986917654, + "grad_norm": 3.1539418697357178, + "learning_rate": 2.4253216076749764e-05, + "loss": 0.232, + "step": 6680 + }, + { + "epoch": 0.24311359837197472, + "grad_norm": 2.980459213256836, + "learning_rate": 2.428955592702958e-05, + "loss": 0.2391, + "step": 6690 + }, + { + "epoch": 0.24347699687477287, + "grad_norm": 35.02157211303711, + "learning_rate": 2.43258957773094e-05, + "loss": 0.3172, + "step": 6700 + }, + { + "epoch": 0.24384039537757105, + "grad_norm": 1.606570839881897, + "learning_rate": 2.4362235627589216e-05, + "loss": 1.5707, + "step": 6710 + }, + { + "epoch": 0.24420379388036922, + "grad_norm": 3.940394401550293, + "learning_rate": 2.439857547786903e-05, + "loss": 0.1969, + "step": 6720 + }, + { + "epoch": 0.24456719238316738, + "grad_norm": 3.8990156650543213, + "learning_rate": 2.443491532814885e-05, + "loss": 0.2475, + "step": 6730 + }, + { + "epoch": 0.24493059088596555, + "grad_norm": 2.523500442504883, + "learning_rate": 2.4471255178428664e-05, + "loss": 0.194, + "step": 6740 + }, + { + "epoch": 0.2452939893887637, + "grad_norm": 4.920846939086914, + "learning_rate": 2.4507595028708483e-05, + "loss": 0.2417, + "step": 6750 + }, + { + "epoch": 0.24565738789156188, + "grad_norm": 2.2269723415374756, + "learning_rate": 2.4543934878988302e-05, + "loss": 0.2148, + "step": 6760 + }, + { + "epoch": 0.24602078639436006, + "grad_norm": 1.669722557067871, + "learning_rate": 2.4580274729268115e-05, + "loss": 0.1979, + "step": 6770 + }, + { + "epoch": 0.24638418489715821, + "grad_norm": 4.581501007080078, + "learning_rate": 2.4616614579547934e-05, + "loss": 0.2412, + "step": 6780 + }, + { + "epoch": 0.2467475833999564, + "grad_norm": 2.6605944633483887, + "learning_rate": 2.465295442982775e-05, + "loss": 0.1992, + "step": 6790 + }, + { + "epoch": 0.24711098190275457, + "grad_norm": 7.089646816253662, + "learning_rate": 2.4689294280107566e-05, + "loss": 0.2789, + "step": 6800 + }, + { + "epoch": 0.24747438040555272, + "grad_norm": 1.9901385307312012, + "learning_rate": 2.4725634130387385e-05, + "loss": 1.907, + "step": 6810 + }, + { + "epoch": 0.2478377789083509, + "grad_norm": 2.5120224952697754, + "learning_rate": 2.4761973980667198e-05, + "loss": 0.1908, + "step": 6820 + }, + { + "epoch": 0.24820117741114905, + "grad_norm": 1.553806185722351, + "learning_rate": 2.4794679845919035e-05, + "loss": 1.6707, + "step": 6830 + }, + { + "epoch": 0.24856457591394723, + "grad_norm": 2.130095958709717, + "learning_rate": 2.4831019696198855e-05, + "loss": 0.222, + "step": 6840 + }, + { + "epoch": 0.2489279744167454, + "grad_norm": 15.832701683044434, + "learning_rate": 2.486735954647867e-05, + "loss": 0.4634, + "step": 6850 + }, + { + "epoch": 0.24929137291954356, + "grad_norm": 1.87086820602417, + "learning_rate": 2.4903699396758487e-05, + "loss": 0.1887, + "step": 6860 + }, + { + "epoch": 0.24965477142234174, + "grad_norm": 2.32084584236145, + "learning_rate": 2.4940039247038303e-05, + "loss": 0.1881, + "step": 6870 + }, + { + "epoch": 0.2500181699251399, + "grad_norm": 3.3228461742401123, + "learning_rate": 2.497637909731812e-05, + "loss": 0.264, + "step": 6880 + }, + { + "epoch": 0.2503815684279381, + "grad_norm": 1.8676607608795166, + "learning_rate": 2.5012718947597935e-05, + "loss": 0.2102, + "step": 6890 + }, + { + "epoch": 0.2507449669307362, + "grad_norm": 17.540319442749023, + "learning_rate": 2.5049058797877757e-05, + "loss": 0.2567, + "step": 6900 + }, + { + "epoch": 0.25110836543353443, + "grad_norm": 1.6276856660842896, + "learning_rate": 2.508539864815757e-05, + "loss": 0.1917, + "step": 6910 + }, + { + "epoch": 0.2514717639363326, + "grad_norm": 2.347691059112549, + "learning_rate": 2.5121738498437386e-05, + "loss": 0.1998, + "step": 6920 + }, + { + "epoch": 0.25183516243913073, + "grad_norm": 3.5337650775909424, + "learning_rate": 2.5158078348717205e-05, + "loss": 0.2418, + "step": 6930 + }, + { + "epoch": 0.25219856094192894, + "grad_norm": 3.7415404319763184, + "learning_rate": 2.519441819899702e-05, + "loss": 0.2074, + "step": 6940 + }, + { + "epoch": 0.2525619594447271, + "grad_norm": 16.603042602539062, + "learning_rate": 2.5230758049276837e-05, + "loss": 0.3104, + "step": 6950 + }, + { + "epoch": 0.25292535794752524, + "grad_norm": 1.4864579439163208, + "learning_rate": 2.5267097899556656e-05, + "loss": 0.1771, + "step": 6960 + }, + { + "epoch": 0.25328875645032345, + "grad_norm": 1.7935876846313477, + "learning_rate": 2.5303437749836472e-05, + "loss": 0.1984, + "step": 6970 + }, + { + "epoch": 0.2536521549531216, + "grad_norm": 3.187351942062378, + "learning_rate": 2.533977760011629e-05, + "loss": 0.1828, + "step": 6980 + }, + { + "epoch": 0.25401555345591975, + "grad_norm": 1.7930549383163452, + "learning_rate": 2.5376117450396104e-05, + "loss": 0.2132, + "step": 6990 + }, + { + "epoch": 0.2543789519587179, + "grad_norm": 4.86196231842041, + "learning_rate": 2.5412457300675924e-05, + "loss": 0.2426, + "step": 7000 + }, + { + "epoch": 0.2547423504615161, + "grad_norm": 2.784335136413574, + "learning_rate": 2.544879715095574e-05, + "loss": 1.6557, + "step": 7010 + }, + { + "epoch": 0.25510574896431426, + "grad_norm": 1.460509181022644, + "learning_rate": 2.5485137001235552e-05, + "loss": 0.1812, + "step": 7020 + }, + { + "epoch": 0.2554691474671124, + "grad_norm": 2.5204946994781494, + "learning_rate": 2.5521476851515375e-05, + "loss": 0.3731, + "step": 7030 + }, + { + "epoch": 0.2558325459699106, + "grad_norm": 1.6122281551361084, + "learning_rate": 2.555781670179519e-05, + "loss": 0.2256, + "step": 7040 + }, + { + "epoch": 0.25619594447270877, + "grad_norm": 8.13974666595459, + "learning_rate": 2.5594156552075004e-05, + "loss": 0.2756, + "step": 7050 + }, + { + "epoch": 0.2565593429755069, + "grad_norm": 2.1560494899749756, + "learning_rate": 2.5630496402354826e-05, + "loss": 0.1869, + "step": 7060 + }, + { + "epoch": 0.25692274147830513, + "grad_norm": 2.938570737838745, + "learning_rate": 2.5666836252634642e-05, + "loss": 0.187, + "step": 7070 + }, + { + "epoch": 0.2572861399811033, + "grad_norm": 1.6697754859924316, + "learning_rate": 2.5703176102914455e-05, + "loss": 0.1841, + "step": 7080 + }, + { + "epoch": 0.25764953848390143, + "grad_norm": 2.500377655029297, + "learning_rate": 2.5739515953194278e-05, + "loss": 0.4097, + "step": 7090 + }, + { + "epoch": 0.25801293698669964, + "grad_norm": 6.614553928375244, + "learning_rate": 2.577585580347409e-05, + "loss": 0.2779, + "step": 7100 + }, + { + "epoch": 0.2583763354894978, + "grad_norm": 2.1538803577423096, + "learning_rate": 2.5812195653753906e-05, + "loss": 0.2035, + "step": 7110 + }, + { + "epoch": 0.25873973399229594, + "grad_norm": 2.64719820022583, + "learning_rate": 2.584853550403373e-05, + "loss": 0.1815, + "step": 7120 + }, + { + "epoch": 0.25910313249509415, + "grad_norm": 4.064308166503906, + "learning_rate": 2.588487535431354e-05, + "loss": 0.2115, + "step": 7130 + }, + { + "epoch": 0.2594665309978923, + "grad_norm": 4.535513877868652, + "learning_rate": 2.5921215204593357e-05, + "loss": 0.1733, + "step": 7140 + }, + { + "epoch": 0.25982992950069045, + "grad_norm": 14.761083602905273, + "learning_rate": 2.5957555054873173e-05, + "loss": 0.3061, + "step": 7150 + }, + { + "epoch": 0.2601933280034886, + "grad_norm": 2.902010202407837, + "learning_rate": 2.5993894905152993e-05, + "loss": 0.2539, + "step": 7160 + }, + { + "epoch": 0.2605567265062868, + "grad_norm": 2.6499462127685547, + "learning_rate": 2.603023475543281e-05, + "loss": 0.209, + "step": 7170 + }, + { + "epoch": 0.26092012500908496, + "grad_norm": 2.0298879146575928, + "learning_rate": 2.6066574605712625e-05, + "loss": 0.1966, + "step": 7180 + }, + { + "epoch": 0.2612835235118831, + "grad_norm": 5.285839080810547, + "learning_rate": 2.6102914455992444e-05, + "loss": 0.2416, + "step": 7190 + }, + { + "epoch": 0.2616469220146813, + "grad_norm": 14.89932918548584, + "learning_rate": 2.613925430627226e-05, + "loss": 0.2649, + "step": 7200 + }, + { + "epoch": 0.2616469220146813, + "eval_loss": 0.43822312355041504, + "eval_runtime": 180.398, + "eval_samples_per_second": 41.098, + "eval_steps_per_second": 5.139, + "eval_wer": 0.23023580881151634, + "step": 7200 + }, + { + "epoch": 0.26201032051747947, + "grad_norm": 2.9772818088531494, + "learning_rate": 2.6175594156552076e-05, + "loss": 0.2158, + "step": 7210 + }, + { + "epoch": 0.2623737190202776, + "grad_norm": 1.4703949689865112, + "learning_rate": 2.6211934006831895e-05, + "loss": 0.1925, + "step": 7220 + }, + { + "epoch": 0.2627371175230758, + "grad_norm": 2.6034176349639893, + "learning_rate": 2.624827385711171e-05, + "loss": 0.2065, + "step": 7230 + }, + { + "epoch": 0.263100516025874, + "grad_norm": 2.8392562866210938, + "learning_rate": 2.6284613707391527e-05, + "loss": 0.2097, + "step": 7240 + }, + { + "epoch": 0.2634639145286721, + "grad_norm": 8.892645835876465, + "learning_rate": 2.6320953557671347e-05, + "loss": 0.2835, + "step": 7250 + }, + { + "epoch": 0.26382731303147033, + "grad_norm": 1.616268277168274, + "learning_rate": 2.6357293407951162e-05, + "loss": 0.1875, + "step": 7260 + }, + { + "epoch": 0.2641907115342685, + "grad_norm": 2.1791138648986816, + "learning_rate": 2.6393633258230975e-05, + "loss": 0.1722, + "step": 7270 + }, + { + "epoch": 0.26455411003706664, + "grad_norm": 2.8691608905792236, + "learning_rate": 2.642997310851079e-05, + "loss": 0.2377, + "step": 7280 + }, + { + "epoch": 0.26491750853986484, + "grad_norm": 1.5673551559448242, + "learning_rate": 2.6466312958790614e-05, + "loss": 0.4404, + "step": 7290 + }, + { + "epoch": 0.265280907042663, + "grad_norm": 7.296738147735596, + "learning_rate": 2.6502652809070426e-05, + "loss": 0.3198, + "step": 7300 + }, + { + "epoch": 0.26564430554546115, + "grad_norm": 6.389322757720947, + "learning_rate": 2.6538992659350242e-05, + "loss": 0.2041, + "step": 7310 + }, + { + "epoch": 0.2660077040482593, + "grad_norm": 11.64201831817627, + "learning_rate": 2.657533250963006e-05, + "loss": 0.2014, + "step": 7320 + }, + { + "epoch": 0.2663711025510575, + "grad_norm": 4.454049587249756, + "learning_rate": 2.6611672359909878e-05, + "loss": 0.2295, + "step": 7330 + }, + { + "epoch": 0.26673450105385565, + "grad_norm": 2.091968297958374, + "learning_rate": 2.6648012210189694e-05, + "loss": 0.1784, + "step": 7340 + }, + { + "epoch": 0.2670978995566538, + "grad_norm": 6.904966354370117, + "learning_rate": 2.6684352060469513e-05, + "loss": 0.3303, + "step": 7350 + }, + { + "epoch": 0.267461298059452, + "grad_norm": 1.6893994808197021, + "learning_rate": 2.672069191074933e-05, + "loss": 0.2534, + "step": 7360 + }, + { + "epoch": 0.26782469656225016, + "grad_norm": 1.3456122875213623, + "learning_rate": 2.6757031761029145e-05, + "loss": 0.1829, + "step": 7370 + }, + { + "epoch": 0.2681880950650483, + "grad_norm": 7.959611892700195, + "learning_rate": 2.6793371611308964e-05, + "loss": 0.2425, + "step": 7380 + }, + { + "epoch": 0.2685514935678465, + "grad_norm": 1.5833840370178223, + "learning_rate": 2.682971146158878e-05, + "loss": 0.1988, + "step": 7390 + }, + { + "epoch": 0.2689148920706447, + "grad_norm": 19.886600494384766, + "learning_rate": 2.6866051311868596e-05, + "loss": 0.3563, + "step": 7400 + }, + { + "epoch": 0.2692782905734428, + "grad_norm": 2.55553936958313, + "learning_rate": 2.6902391162148415e-05, + "loss": 0.1857, + "step": 7410 + }, + { + "epoch": 0.26964168907624103, + "grad_norm": 2.125661849975586, + "learning_rate": 2.693873101242823e-05, + "loss": 0.7398, + "step": 7420 + }, + { + "epoch": 0.2700050875790392, + "grad_norm": 2.577770233154297, + "learning_rate": 2.6975070862708047e-05, + "loss": 0.5703, + "step": 7430 + }, + { + "epoch": 0.27036848608183733, + "grad_norm": 2.3848683834075928, + "learning_rate": 2.701141071298786e-05, + "loss": 0.173, + "step": 7440 + }, + { + "epoch": 0.2707318845846355, + "grad_norm": 22.96078109741211, + "learning_rate": 2.7047750563267683e-05, + "loss": 0.293, + "step": 7450 + }, + { + "epoch": 0.2710952830874337, + "grad_norm": 3.206329822540283, + "learning_rate": 2.70840904135475e-05, + "loss": 0.4585, + "step": 7460 + }, + { + "epoch": 0.27145868159023184, + "grad_norm": 2.251904010772705, + "learning_rate": 2.712043026382731e-05, + "loss": 0.2196, + "step": 7470 + }, + { + "epoch": 0.27182208009303, + "grad_norm": 3.7445387840270996, + "learning_rate": 2.7156770114107134e-05, + "loss": 0.2195, + "step": 7480 + }, + { + "epoch": 0.2721854785958282, + "grad_norm": 1.5370314121246338, + "learning_rate": 2.7193109964386947e-05, + "loss": 0.2007, + "step": 7490 + }, + { + "epoch": 0.27254887709862635, + "grad_norm": 18.44324493408203, + "learning_rate": 2.7229449814666763e-05, + "loss": 0.3091, + "step": 7500 + }, + { + "epoch": 0.2729122756014245, + "grad_norm": 1.5792795419692993, + "learning_rate": 2.7265789664946585e-05, + "loss": 0.1601, + "step": 7510 + }, + { + "epoch": 0.2732756741042227, + "grad_norm": 9.128384590148926, + "learning_rate": 2.7302129515226398e-05, + "loss": 0.178, + "step": 7520 + }, + { + "epoch": 0.27363907260702086, + "grad_norm": 2.2285592555999756, + "learning_rate": 2.7338469365506214e-05, + "loss": 2.4074, + "step": 7530 + }, + { + "epoch": 0.274002471109819, + "grad_norm": 2.2741541862487793, + "learning_rate": 2.7374809215786033e-05, + "loss": 0.246, + "step": 7540 + }, + { + "epoch": 0.2743658696126172, + "grad_norm": 17.185470581054688, + "learning_rate": 2.741114906606585e-05, + "loss": 0.2577, + "step": 7550 + }, + { + "epoch": 0.27472926811541537, + "grad_norm": 1.1907752752304077, + "learning_rate": 2.7447488916345665e-05, + "loss": 0.2073, + "step": 7560 + }, + { + "epoch": 0.2750926666182135, + "grad_norm": 3.535682201385498, + "learning_rate": 2.748382876662548e-05, + "loss": 0.2012, + "step": 7570 + }, + { + "epoch": 0.27545606512101173, + "grad_norm": 3.585460662841797, + "learning_rate": 2.75201686169053e-05, + "loss": 0.2147, + "step": 7580 + }, + { + "epoch": 0.2758194636238099, + "grad_norm": 1.9034504890441895, + "learning_rate": 2.7556508467185116e-05, + "loss": 0.1626, + "step": 7590 + }, + { + "epoch": 0.27618286212660803, + "grad_norm": 39.66155242919922, + "learning_rate": 2.7592848317464932e-05, + "loss": 0.2617, + "step": 7600 + }, + { + "epoch": 0.2765462606294062, + "grad_norm": 1.5698285102844238, + "learning_rate": 2.762918816774475e-05, + "loss": 0.3136, + "step": 7610 + }, + { + "epoch": 0.2769096591322044, + "grad_norm": 2.4866106510162354, + "learning_rate": 2.7665528018024568e-05, + "loss": 0.1971, + "step": 7620 + }, + { + "epoch": 0.27727305763500254, + "grad_norm": 9.244050025939941, + "learning_rate": 2.7701867868304384e-05, + "loss": 0.2025, + "step": 7630 + }, + { + "epoch": 0.2776364561378007, + "grad_norm": 2.1344380378723145, + "learning_rate": 2.7738207718584203e-05, + "loss": 0.2055, + "step": 7640 + }, + { + "epoch": 0.2779998546405989, + "grad_norm": 13.503227233886719, + "learning_rate": 2.777454756886402e-05, + "loss": 0.2671, + "step": 7650 + }, + { + "epoch": 0.27836325314339705, + "grad_norm": 2.238834857940674, + "learning_rate": 2.781088741914383e-05, + "loss": 0.1714, + "step": 7660 + }, + { + "epoch": 0.2787266516461952, + "grad_norm": 0.897280216217041, + "learning_rate": 2.7847227269423654e-05, + "loss": 0.1615, + "step": 7670 + }, + { + "epoch": 0.2790900501489934, + "grad_norm": 5.808285713195801, + "learning_rate": 2.788356711970347e-05, + "loss": 0.2052, + "step": 7680 + }, + { + "epoch": 0.27945344865179156, + "grad_norm": 1.8924663066864014, + "learning_rate": 2.7919906969983283e-05, + "loss": 0.1769, + "step": 7690 + }, + { + "epoch": 0.2798168471545897, + "grad_norm": 11.939653396606445, + "learning_rate": 2.7956246820263105e-05, + "loss": 0.2859, + "step": 7700 + }, + { + "epoch": 0.2801802456573879, + "grad_norm": 2.5077621936798096, + "learning_rate": 2.7992586670542918e-05, + "loss": 0.1767, + "step": 7710 + }, + { + "epoch": 0.28054364416018607, + "grad_norm": 2.0336718559265137, + "learning_rate": 2.8028926520822734e-05, + "loss": 0.6757, + "step": 7720 + }, + { + "epoch": 0.2809070426629842, + "grad_norm": 3.9547739028930664, + "learning_rate": 2.806526637110255e-05, + "loss": 0.2322, + "step": 7730 + }, + { + "epoch": 0.28127044116578237, + "grad_norm": 1.8082466125488281, + "learning_rate": 2.810160622138237e-05, + "loss": 0.1758, + "step": 7740 + }, + { + "epoch": 0.2816338396685806, + "grad_norm": 16.173986434936523, + "learning_rate": 2.8137946071662185e-05, + "loss": 0.2642, + "step": 7750 + }, + { + "epoch": 0.28199723817137873, + "grad_norm": 3.341475486755371, + "learning_rate": 2.8174285921942e-05, + "loss": 3.4407, + "step": 7760 + }, + { + "epoch": 0.2823606366741769, + "grad_norm": 1.7220288515090942, + "learning_rate": 2.821062577222182e-05, + "loss": 0.1965, + "step": 7770 + }, + { + "epoch": 0.2827240351769751, + "grad_norm": 3.8534610271453857, + "learning_rate": 2.8246965622501637e-05, + "loss": 0.1966, + "step": 7780 + }, + { + "epoch": 0.28308743367977324, + "grad_norm": 1.962780475616455, + "learning_rate": 2.8283305472781453e-05, + "loss": 0.1859, + "step": 7790 + }, + { + "epoch": 0.2834508321825714, + "grad_norm": 40.28166961669922, + "learning_rate": 2.8319645323061272e-05, + "loss": 0.6588, + "step": 7800 + }, + { + "epoch": 0.2834508321825714, + "eval_loss": 0.42970865964889526, + "eval_runtime": 180.6321, + "eval_samples_per_second": 41.045, + "eval_steps_per_second": 5.132, + "eval_wer": 0.2413455080145951, + "step": 7800 + }, + { + "epoch": 0.2838142306853696, + "grad_norm": 1.748349666595459, + "learning_rate": 2.8355985173341088e-05, + "loss": 0.1786, + "step": 7810 + }, + { + "epoch": 0.28417762918816775, + "grad_norm": 2.1137237548828125, + "learning_rate": 2.8392325023620904e-05, + "loss": 0.1803, + "step": 7820 + }, + { + "epoch": 0.2845410276909659, + "grad_norm": 1.59931218624115, + "learning_rate": 2.8428664873900723e-05, + "loss": 0.2107, + "step": 7830 + }, + { + "epoch": 0.2849044261937641, + "grad_norm": 2.263493061065674, + "learning_rate": 2.846500472418054e-05, + "loss": 0.1967, + "step": 7840 + }, + { + "epoch": 0.28526782469656226, + "grad_norm": 20.798656463623047, + "learning_rate": 2.8501344574460355e-05, + "loss": 0.268, + "step": 7850 + }, + { + "epoch": 0.2856312231993604, + "grad_norm": 3.0182480812072754, + "learning_rate": 2.8537684424740168e-05, + "loss": 0.1901, + "step": 7860 + }, + { + "epoch": 0.2859946217021586, + "grad_norm": 6.6378493309021, + "learning_rate": 2.857402427501999e-05, + "loss": 0.1804, + "step": 7870 + }, + { + "epoch": 0.28635802020495676, + "grad_norm": 2.5524067878723145, + "learning_rate": 2.8610364125299803e-05, + "loss": 0.233, + "step": 7880 + }, + { + "epoch": 0.2867214187077549, + "grad_norm": 2.6409335136413574, + "learning_rate": 2.864670397557962e-05, + "loss": 0.1717, + "step": 7890 + }, + { + "epoch": 0.28708481721055307, + "grad_norm": 6.834221363067627, + "learning_rate": 2.868304382585944e-05, + "loss": 0.2956, + "step": 7900 + }, + { + "epoch": 0.2874482157133513, + "grad_norm": 2.760669708251953, + "learning_rate": 2.8719383676139254e-05, + "loss": 0.1789, + "step": 7910 + }, + { + "epoch": 0.2878116142161494, + "grad_norm": 1.7543925046920776, + "learning_rate": 2.875572352641907e-05, + "loss": 0.2041, + "step": 7920 + }, + { + "epoch": 0.2881750127189476, + "grad_norm": 4.784151077270508, + "learning_rate": 2.879206337669889e-05, + "loss": 0.2259, + "step": 7930 + }, + { + "epoch": 0.2885384112217458, + "grad_norm": 2.1769356727600098, + "learning_rate": 2.8828403226978706e-05, + "loss": 0.2023, + "step": 7940 + }, + { + "epoch": 0.28890180972454393, + "grad_norm": 9.373051643371582, + "learning_rate": 2.886474307725852e-05, + "loss": 0.3511, + "step": 7950 + }, + { + "epoch": 0.2892652082273421, + "grad_norm": 1.895190715789795, + "learning_rate": 2.890108292753834e-05, + "loss": 0.1976, + "step": 7960 + }, + { + "epoch": 0.2896286067301403, + "grad_norm": 3.4400076866149902, + "learning_rate": 2.8937422777818157e-05, + "loss": 0.1902, + "step": 7970 + }, + { + "epoch": 0.28999200523293844, + "grad_norm": 9.663911819458008, + "learning_rate": 2.8973762628097973e-05, + "loss": 0.2551, + "step": 7980 + }, + { + "epoch": 0.2903554037357366, + "grad_norm": 5.1054463386535645, + "learning_rate": 2.9010102478377792e-05, + "loss": 0.2001, + "step": 7990 + }, + { + "epoch": 0.2907188022385348, + "grad_norm": 9.06143569946289, + "learning_rate": 2.9046442328657608e-05, + "loss": 0.2266, + "step": 8000 + }, + { + "epoch": 0.29108220074133295, + "grad_norm": 1.604077696800232, + "learning_rate": 2.9082782178937424e-05, + "loss": 0.1883, + "step": 8010 + }, + { + "epoch": 0.2914455992441311, + "grad_norm": 2.245687246322632, + "learning_rate": 2.911912202921724e-05, + "loss": 0.2093, + "step": 8020 + }, + { + "epoch": 0.29180899774692926, + "grad_norm": 3.8099372386932373, + "learning_rate": 2.915546187949706e-05, + "loss": 0.2283, + "step": 8030 + }, + { + "epoch": 0.29217239624972746, + "grad_norm": 2.135115623474121, + "learning_rate": 2.9191801729776875e-05, + "loss": 0.2369, + "step": 8040 + }, + { + "epoch": 0.2925357947525256, + "grad_norm": 5.596993446350098, + "learning_rate": 2.9228141580056688e-05, + "loss": 0.2709, + "step": 8050 + }, + { + "epoch": 0.29289919325532376, + "grad_norm": 1.3212496042251587, + "learning_rate": 2.926448143033651e-05, + "loss": 0.1968, + "step": 8060 + }, + { + "epoch": 0.29326259175812197, + "grad_norm": 1.9241231679916382, + "learning_rate": 2.9300821280616327e-05, + "loss": 0.3883, + "step": 8070 + }, + { + "epoch": 0.2936259902609201, + "grad_norm": 4.008016109466553, + "learning_rate": 2.933716113089614e-05, + "loss": 0.2074, + "step": 8080 + }, + { + "epoch": 0.2939893887637183, + "grad_norm": 1.5871399641036987, + "learning_rate": 2.9373500981175962e-05, + "loss": 0.1698, + "step": 8090 + }, + { + "epoch": 0.2943527872665165, + "grad_norm": 19.480670928955078, + "learning_rate": 2.9409840831455774e-05, + "loss": 0.4023, + "step": 8100 + }, + { + "epoch": 0.29471618576931463, + "grad_norm": 3.8420443534851074, + "learning_rate": 2.944618068173559e-05, + "loss": 0.181, + "step": 8110 + }, + { + "epoch": 0.2950795842721128, + "grad_norm": 1.9951499700546265, + "learning_rate": 2.9482520532015413e-05, + "loss": 0.2872, + "step": 8120 + }, + { + "epoch": 0.295442982774911, + "grad_norm": 4.958978176116943, + "learning_rate": 2.9518860382295226e-05, + "loss": 0.2359, + "step": 8130 + }, + { + "epoch": 0.29580638127770914, + "grad_norm": 1.5531708002090454, + "learning_rate": 2.9555200232575042e-05, + "loss": 0.2138, + "step": 8140 + }, + { + "epoch": 0.2961697797805073, + "grad_norm": 5.297884941101074, + "learning_rate": 2.9591540082854864e-05, + "loss": 0.2694, + "step": 8150 + }, + { + "epoch": 0.2965331782833055, + "grad_norm": 1.5989892482757568, + "learning_rate": 2.9627879933134677e-05, + "loss": 0.1686, + "step": 8160 + }, + { + "epoch": 0.29689657678610365, + "grad_norm": 3.347722291946411, + "learning_rate": 2.9664219783414493e-05, + "loss": 0.2206, + "step": 8170 + }, + { + "epoch": 0.2972599752889018, + "grad_norm": 2.9551491737365723, + "learning_rate": 2.970055963369431e-05, + "loss": 0.2274, + "step": 8180 + }, + { + "epoch": 0.29762337379169995, + "grad_norm": 2.527963638305664, + "learning_rate": 2.973689948397413e-05, + "loss": 0.1731, + "step": 8190 + }, + { + "epoch": 0.29798677229449816, + "grad_norm": 5.818012714385986, + "learning_rate": 2.9773239334253944e-05, + "loss": 0.265, + "step": 8200 + }, + { + "epoch": 0.2983501707972963, + "grad_norm": 1.5580624341964722, + "learning_rate": 2.980594519950578e-05, + "loss": 2.627, + "step": 8210 + }, + { + "epoch": 0.29871356930009446, + "grad_norm": 1.6011282205581665, + "learning_rate": 2.9842285049785594e-05, + "loss": 0.1811, + "step": 8220 + }, + { + "epoch": 0.29907696780289267, + "grad_norm": 44.825157165527344, + "learning_rate": 2.987862490006541e-05, + "loss": 0.4799, + "step": 8230 + }, + { + "epoch": 0.2994403663056908, + "grad_norm": 1.520982027053833, + "learning_rate": 2.991496475034523e-05, + "loss": 0.1935, + "step": 8240 + }, + { + "epoch": 0.29980376480848897, + "grad_norm": 6.3379058837890625, + "learning_rate": 2.9951304600625046e-05, + "loss": 0.2435, + "step": 8250 + }, + { + "epoch": 0.3001671633112872, + "grad_norm": 2.2493958473205566, + "learning_rate": 2.998764445090486e-05, + "loss": 0.1984, + "step": 8260 + }, + { + "epoch": 0.30053056181408533, + "grad_norm": 3.234196186065674, + "learning_rate": 3.002398430118468e-05, + "loss": 0.1785, + "step": 8270 + }, + { + "epoch": 0.3008939603168835, + "grad_norm": 4.99449348449707, + "learning_rate": 3.0060324151464497e-05, + "loss": 0.1888, + "step": 8280 + }, + { + "epoch": 0.3012573588196817, + "grad_norm": 1.8624048233032227, + "learning_rate": 3.0096664001744313e-05, + "loss": 1.6561, + "step": 8290 + }, + { + "epoch": 0.30162075732247984, + "grad_norm": 7.615640640258789, + "learning_rate": 3.0133003852024132e-05, + "loss": 0.2918, + "step": 8300 + }, + { + "epoch": 0.301984155825278, + "grad_norm": 1.6900697946548462, + "learning_rate": 3.0169343702303948e-05, + "loss": 0.2255, + "step": 8310 + }, + { + "epoch": 0.3023475543280762, + "grad_norm": 2.2034566402435303, + "learning_rate": 3.0205683552583764e-05, + "loss": 0.198, + "step": 8320 + }, + { + "epoch": 0.30271095283087435, + "grad_norm": 2.044597625732422, + "learning_rate": 3.0242023402863583e-05, + "loss": 0.1946, + "step": 8330 + }, + { + "epoch": 0.3030743513336725, + "grad_norm": 1.6171079874038696, + "learning_rate": 3.02783632531434e-05, + "loss": 0.1935, + "step": 8340 + }, + { + "epoch": 0.30343774983647065, + "grad_norm": 2.8435897827148438, + "learning_rate": 3.0314703103423215e-05, + "loss": 0.3876, + "step": 8350 + }, + { + "epoch": 0.30380114833926886, + "grad_norm": 2.023019552230835, + "learning_rate": 3.0351042953703035e-05, + "loss": 0.1879, + "step": 8360 + }, + { + "epoch": 0.304164546842067, + "grad_norm": 1.7610963582992554, + "learning_rate": 3.038738280398285e-05, + "loss": 0.1901, + "step": 8370 + }, + { + "epoch": 0.30452794534486516, + "grad_norm": 1.9482131004333496, + "learning_rate": 3.0423722654262667e-05, + "loss": 0.2119, + "step": 8380 + }, + { + "epoch": 0.30489134384766337, + "grad_norm": 1.6463958024978638, + "learning_rate": 3.046006250454248e-05, + "loss": 0.2067, + "step": 8390 + }, + { + "epoch": 0.3052547423504615, + "grad_norm": 10.607688903808594, + "learning_rate": 3.0496402354822302e-05, + "loss": 0.2709, + "step": 8400 + }, + { + "epoch": 0.3052547423504615, + "eval_loss": 0.3912598192691803, + "eval_runtime": 179.9461, + "eval_samples_per_second": 41.201, + "eval_steps_per_second": 5.152, + "eval_wer": 0.22865648882676493, + "step": 8400 + }, + { + "epoch": 0.30561814085325967, + "grad_norm": 5.675121307373047, + "learning_rate": 3.053274220510212e-05, + "loss": 0.1937, + "step": 8410 + }, + { + "epoch": 0.3059815393560579, + "grad_norm": 1.9001195430755615, + "learning_rate": 3.056908205538193e-05, + "loss": 0.1668, + "step": 8420 + }, + { + "epoch": 0.306344937858856, + "grad_norm": 6.807525157928467, + "learning_rate": 3.060542190566175e-05, + "loss": 0.2077, + "step": 8430 + }, + { + "epoch": 0.3067083363616542, + "grad_norm": 2.067265272140503, + "learning_rate": 3.064176175594157e-05, + "loss": 0.1596, + "step": 8440 + }, + { + "epoch": 0.3070717348644524, + "grad_norm": 15.267791748046875, + "learning_rate": 3.067810160622138e-05, + "loss": 0.2667, + "step": 8450 + }, + { + "epoch": 0.30743513336725053, + "grad_norm": 1.367903709411621, + "learning_rate": 3.07144414565012e-05, + "loss": 0.1819, + "step": 8460 + }, + { + "epoch": 0.3077985318700487, + "grad_norm": 1.531816840171814, + "learning_rate": 3.075078130678102e-05, + "loss": 0.1681, + "step": 8470 + }, + { + "epoch": 0.30816193037284684, + "grad_norm": 3.668304204940796, + "learning_rate": 3.078712115706083e-05, + "loss": 0.2488, + "step": 8480 + }, + { + "epoch": 0.30852532887564504, + "grad_norm": 2.2622220516204834, + "learning_rate": 3.082346100734065e-05, + "loss": 0.1866, + "step": 8490 + }, + { + "epoch": 0.3088887273784432, + "grad_norm": 6.450117111206055, + "learning_rate": 3.085980085762047e-05, + "loss": 0.2676, + "step": 8500 + }, + { + "epoch": 0.30925212588124135, + "grad_norm": 2.096731424331665, + "learning_rate": 3.0896140707900284e-05, + "loss": 0.1952, + "step": 8510 + }, + { + "epoch": 0.30961552438403955, + "grad_norm": 1.3809120655059814, + "learning_rate": 3.09324805581801e-05, + "loss": 0.3478, + "step": 8520 + }, + { + "epoch": 0.3099789228868377, + "grad_norm": 4.2257585525512695, + "learning_rate": 3.096882040845992e-05, + "loss": 0.2126, + "step": 8530 + }, + { + "epoch": 0.31034232138963586, + "grad_norm": 2.8543758392333984, + "learning_rate": 3.1005160258739736e-05, + "loss": 0.8169, + "step": 8540 + }, + { + "epoch": 0.31070571989243406, + "grad_norm": 5.897162437438965, + "learning_rate": 3.104150010901955e-05, + "loss": 0.2421, + "step": 8550 + }, + { + "epoch": 0.3110691183952322, + "grad_norm": 1.8980865478515625, + "learning_rate": 3.107783995929937e-05, + "loss": 0.193, + "step": 8560 + }, + { + "epoch": 0.31143251689803036, + "grad_norm": 2.113833427429199, + "learning_rate": 3.111417980957919e-05, + "loss": 0.1553, + "step": 8570 + }, + { + "epoch": 0.31179591540082857, + "grad_norm": 2.7569572925567627, + "learning_rate": 3.1150519659859e-05, + "loss": 0.2003, + "step": 8580 + }, + { + "epoch": 0.3121593139036267, + "grad_norm": 2.480473756790161, + "learning_rate": 3.118685951013882e-05, + "loss": 0.2173, + "step": 8590 + }, + { + "epoch": 0.3125227124064249, + "grad_norm": 12.174234390258789, + "learning_rate": 3.122319936041864e-05, + "loss": 0.3081, + "step": 8600 + }, + { + "epoch": 0.3128861109092231, + "grad_norm": 2.8075544834136963, + "learning_rate": 3.125953921069845e-05, + "loss": 0.263, + "step": 8610 + }, + { + "epoch": 0.31324950941202123, + "grad_norm": 16.535009384155273, + "learning_rate": 3.129587906097827e-05, + "loss": 0.1968, + "step": 8620 + }, + { + "epoch": 0.3136129079148194, + "grad_norm": 6.4783711433410645, + "learning_rate": 3.133221891125809e-05, + "loss": 0.2396, + "step": 8630 + }, + { + "epoch": 0.31397630641761753, + "grad_norm": 0.945353090763092, + "learning_rate": 3.13685587615379e-05, + "loss": 0.1623, + "step": 8640 + }, + { + "epoch": 0.31433970492041574, + "grad_norm": 7.135663032531738, + "learning_rate": 3.140489861181772e-05, + "loss": 0.3006, + "step": 8650 + }, + { + "epoch": 0.3147031034232139, + "grad_norm": 1.275896430015564, + "learning_rate": 3.144123846209754e-05, + "loss": 0.1845, + "step": 8660 + }, + { + "epoch": 0.31506650192601204, + "grad_norm": 2.1660525798797607, + "learning_rate": 3.147757831237735e-05, + "loss": 0.1614, + "step": 8670 + }, + { + "epoch": 0.31542990042881025, + "grad_norm": 3.878882646560669, + "learning_rate": 3.1513918162657166e-05, + "loss": 0.2124, + "step": 8680 + }, + { + "epoch": 0.3157932989316084, + "grad_norm": 3.452864170074463, + "learning_rate": 3.155025801293699e-05, + "loss": 0.1659, + "step": 8690 + }, + { + "epoch": 0.31615669743440655, + "grad_norm": 4.0493292808532715, + "learning_rate": 3.1586597863216805e-05, + "loss": 0.2653, + "step": 8700 + }, + { + "epoch": 0.31652009593720476, + "grad_norm": 1.9184757471084595, + "learning_rate": 3.162293771349662e-05, + "loss": 0.2043, + "step": 8710 + }, + { + "epoch": 0.3168834944400029, + "grad_norm": 4.22302770614624, + "learning_rate": 3.165927756377644e-05, + "loss": 0.2005, + "step": 8720 + }, + { + "epoch": 0.31724689294280106, + "grad_norm": 8.557464599609375, + "learning_rate": 3.1695617414056256e-05, + "loss": 0.2135, + "step": 8730 + }, + { + "epoch": 0.31761029144559927, + "grad_norm": 1.6090949773788452, + "learning_rate": 3.173195726433607e-05, + "loss": 0.1565, + "step": 8740 + }, + { + "epoch": 0.3179736899483974, + "grad_norm": 35.859737396240234, + "learning_rate": 3.1768297114615894e-05, + "loss": 0.3239, + "step": 8750 + }, + { + "epoch": 0.31833708845119557, + "grad_norm": 2.837944507598877, + "learning_rate": 3.180463696489571e-05, + "loss": 0.1902, + "step": 8760 + }, + { + "epoch": 0.3187004869539937, + "grad_norm": 1.6548888683319092, + "learning_rate": 3.184097681517552e-05, + "loss": 0.1732, + "step": 8770 + }, + { + "epoch": 0.31906388545679193, + "grad_norm": 3.840034246444702, + "learning_rate": 3.187731666545534e-05, + "loss": 0.2318, + "step": 8780 + }, + { + "epoch": 0.3194272839595901, + "grad_norm": 3.3684277534484863, + "learning_rate": 3.191365651573516e-05, + "loss": 0.1794, + "step": 8790 + }, + { + "epoch": 0.31979068246238823, + "grad_norm": 8.668655395507812, + "learning_rate": 3.194999636601497e-05, + "loss": 0.2745, + "step": 8800 + }, + { + "epoch": 0.32015408096518644, + "grad_norm": 1.412441611289978, + "learning_rate": 3.198633621629479e-05, + "loss": 0.1913, + "step": 8810 + }, + { + "epoch": 0.3205174794679846, + "grad_norm": 1.6273925304412842, + "learning_rate": 3.202267606657461e-05, + "loss": 0.1905, + "step": 8820 + }, + { + "epoch": 0.32088087797078274, + "grad_norm": 5.704558372497559, + "learning_rate": 3.205901591685442e-05, + "loss": 0.2217, + "step": 8830 + }, + { + "epoch": 0.32124427647358095, + "grad_norm": 2.248072385787964, + "learning_rate": 3.209535576713424e-05, + "loss": 0.1752, + "step": 8840 + }, + { + "epoch": 0.3216076749763791, + "grad_norm": 8.330979347229004, + "learning_rate": 3.213169561741406e-05, + "loss": 0.2693, + "step": 8850 + }, + { + "epoch": 0.32197107347917725, + "grad_norm": 6.713444709777832, + "learning_rate": 3.2168035467693873e-05, + "loss": 0.1821, + "step": 8860 + }, + { + "epoch": 0.32233447198197546, + "grad_norm": 1.7717983722686768, + "learning_rate": 3.220437531797369e-05, + "loss": 0.1572, + "step": 8870 + }, + { + "epoch": 0.3226978704847736, + "grad_norm": 3.8419570922851562, + "learning_rate": 3.224071516825351e-05, + "loss": 0.2168, + "step": 8880 + }, + { + "epoch": 0.32306126898757176, + "grad_norm": 1.8515948057174683, + "learning_rate": 3.2277055018533325e-05, + "loss": 0.1474, + "step": 8890 + }, + { + "epoch": 0.32342466749036997, + "grad_norm": 12.963587760925293, + "learning_rate": 3.231339486881314e-05, + "loss": 0.2349, + "step": 8900 + }, + { + "epoch": 0.3237880659931681, + "grad_norm": 1.078845500946045, + "learning_rate": 3.2349734719092963e-05, + "loss": 0.1968, + "step": 8910 + }, + { + "epoch": 0.32415146449596627, + "grad_norm": 1.5369044542312622, + "learning_rate": 3.2386074569372776e-05, + "loss": 0.1681, + "step": 8920 + }, + { + "epoch": 0.3245148629987644, + "grad_norm": 3.8013484477996826, + "learning_rate": 3.242241441965259e-05, + "loss": 0.2214, + "step": 8930 + }, + { + "epoch": 0.3248782615015626, + "grad_norm": 2.0259406566619873, + "learning_rate": 3.2458754269932415e-05, + "loss": 0.4227, + "step": 8940 + }, + { + "epoch": 0.3252416600043608, + "grad_norm": 6.423609256744385, + "learning_rate": 3.249509412021223e-05, + "loss": 0.2835, + "step": 8950 + }, + { + "epoch": 0.32560505850715893, + "grad_norm": 2.363159656524658, + "learning_rate": 3.253143397049204e-05, + "loss": 0.2038, + "step": 8960 + }, + { + "epoch": 0.32596845700995714, + "grad_norm": 2.4034435749053955, + "learning_rate": 3.256777382077186e-05, + "loss": 0.1907, + "step": 8970 + }, + { + "epoch": 0.3263318555127553, + "grad_norm": 4.032980442047119, + "learning_rate": 3.260411367105168e-05, + "loss": 0.1973, + "step": 8980 + }, + { + "epoch": 0.32669525401555344, + "grad_norm": 6.102022647857666, + "learning_rate": 3.264045352133149e-05, + "loss": 0.197, + "step": 8990 + }, + { + "epoch": 0.32705865251835164, + "grad_norm": 35.67893981933594, + "learning_rate": 3.267679337161131e-05, + "loss": 0.2682, + "step": 9000 + }, + { + "epoch": 0.32705865251835164, + "eval_loss": 0.40712428092956543, + "eval_runtime": 179.2194, + "eval_samples_per_second": 41.368, + "eval_steps_per_second": 5.172, + "eval_wer": 0.226941020567466, + "step": 9000 + }, + { + "epoch": 0.3274220510211498, + "grad_norm": 1.8014717102050781, + "learning_rate": 3.271313322189113e-05, + "loss": 0.1591, + "step": 9010 + }, + { + "epoch": 0.32778544952394795, + "grad_norm": 1.7404965162277222, + "learning_rate": 3.274947307217094e-05, + "loss": 0.17, + "step": 9020 + }, + { + "epoch": 0.32814884802674615, + "grad_norm": 3.7020771503448486, + "learning_rate": 3.278581292245076e-05, + "loss": 0.2225, + "step": 9030 + }, + { + "epoch": 0.3285122465295443, + "grad_norm": 1.045998454093933, + "learning_rate": 3.282215277273058e-05, + "loss": 0.1681, + "step": 9040 + }, + { + "epoch": 0.32887564503234246, + "grad_norm": 5.282716751098633, + "learning_rate": 3.2858492623010394e-05, + "loss": 0.2856, + "step": 9050 + }, + { + "epoch": 0.3292390435351406, + "grad_norm": 3.3956387042999268, + "learning_rate": 3.289483247329021e-05, + "loss": 0.1782, + "step": 9060 + }, + { + "epoch": 0.3296024420379388, + "grad_norm": 1.855603575706482, + "learning_rate": 3.293117232357003e-05, + "loss": 0.1582, + "step": 9070 + }, + { + "epoch": 0.32996584054073697, + "grad_norm": 7.214013576507568, + "learning_rate": 3.2967512173849845e-05, + "loss": 0.1691, + "step": 9080 + }, + { + "epoch": 0.3303292390435351, + "grad_norm": 3.140125036239624, + "learning_rate": 3.3003852024129664e-05, + "loss": 0.1872, + "step": 9090 + }, + { + "epoch": 0.3306926375463333, + "grad_norm": 17.094255447387695, + "learning_rate": 3.304019187440948e-05, + "loss": 0.2848, + "step": 9100 + }, + { + "epoch": 0.3310560360491315, + "grad_norm": 1.9439010620117188, + "learning_rate": 3.3076531724689296e-05, + "loss": 0.1625, + "step": 9110 + }, + { + "epoch": 0.3314194345519296, + "grad_norm": 1.609747290611267, + "learning_rate": 3.311287157496911e-05, + "loss": 0.1915, + "step": 9120 + }, + { + "epoch": 0.33178283305472783, + "grad_norm": 4.03629207611084, + "learning_rate": 3.314921142524893e-05, + "loss": 0.2291, + "step": 9130 + }, + { + "epoch": 0.332146231557526, + "grad_norm": 1.9643129110336304, + "learning_rate": 3.318555127552875e-05, + "loss": 0.1747, + "step": 9140 + }, + { + "epoch": 0.33250963006032414, + "grad_norm": 9.304847717285156, + "learning_rate": 3.322189112580856e-05, + "loss": 0.2539, + "step": 9150 + }, + { + "epoch": 0.33287302856312234, + "grad_norm": 1.991467833518982, + "learning_rate": 3.325823097608838e-05, + "loss": 3.61, + "step": 9160 + }, + { + "epoch": 0.3332364270659205, + "grad_norm": 2.7127187252044678, + "learning_rate": 3.32945708263682e-05, + "loss": 0.1985, + "step": 9170 + }, + { + "epoch": 0.33359982556871864, + "grad_norm": 2.831299304962158, + "learning_rate": 3.333091067664801e-05, + "loss": 1.7334, + "step": 9180 + }, + { + "epoch": 0.33396322407151685, + "grad_norm": 1.5434614419937134, + "learning_rate": 3.336725052692783e-05, + "loss": 0.1718, + "step": 9190 + }, + { + "epoch": 0.334326622574315, + "grad_norm": 10.254124641418457, + "learning_rate": 3.340359037720765e-05, + "loss": 0.3246, + "step": 9200 + }, + { + "epoch": 0.33469002107711315, + "grad_norm": 1.169886589050293, + "learning_rate": 3.343993022748746e-05, + "loss": 0.1936, + "step": 9210 + }, + { + "epoch": 0.3350534195799113, + "grad_norm": 3.697627544403076, + "learning_rate": 3.347627007776728e-05, + "loss": 0.205, + "step": 9220 + }, + { + "epoch": 0.3354168180827095, + "grad_norm": 3.15781307220459, + "learning_rate": 3.35126099280471e-05, + "loss": 0.2222, + "step": 9230 + }, + { + "epoch": 0.33578021658550766, + "grad_norm": 1.903701663017273, + "learning_rate": 3.3548949778326914e-05, + "loss": 0.1611, + "step": 9240 + }, + { + "epoch": 0.3361436150883058, + "grad_norm": 26.77275848388672, + "learning_rate": 3.358528962860673e-05, + "loss": 0.2872, + "step": 9250 + }, + { + "epoch": 0.336507013591104, + "grad_norm": 1.588224172592163, + "learning_rate": 3.3621629478886546e-05, + "loss": 3.404, + "step": 9260 + }, + { + "epoch": 0.33687041209390217, + "grad_norm": 1.8802090883255005, + "learning_rate": 3.3657969329166365e-05, + "loss": 0.1715, + "step": 9270 + }, + { + "epoch": 0.3372338105967003, + "grad_norm": 5.38352632522583, + "learning_rate": 3.3694309179446185e-05, + "loss": 0.1906, + "step": 9280 + }, + { + "epoch": 0.33759720909949853, + "grad_norm": 1.736177921295166, + "learning_rate": 3.3730649029726e-05, + "loss": 0.1881, + "step": 9290 + }, + { + "epoch": 0.3379606076022967, + "grad_norm": 17.865558624267578, + "learning_rate": 3.3766988880005816e-05, + "loss": 0.3003, + "step": 9300 + }, + { + "epoch": 0.33832400610509483, + "grad_norm": 1.532173991203308, + "learning_rate": 3.3803328730285636e-05, + "loss": 0.188, + "step": 9310 + }, + { + "epoch": 0.33868740460789304, + "grad_norm": 3.8595352172851562, + "learning_rate": 3.383966858056545e-05, + "loss": 0.1869, + "step": 9320 + }, + { + "epoch": 0.3390508031106912, + "grad_norm": 2.5906641483306885, + "learning_rate": 3.387600843084527e-05, + "loss": 0.1993, + "step": 9330 + }, + { + "epoch": 0.33941420161348934, + "grad_norm": 2.5224273204803467, + "learning_rate": 3.391234828112508e-05, + "loss": 0.1935, + "step": 9340 + }, + { + "epoch": 0.33977760011628755, + "grad_norm": 11.555095672607422, + "learning_rate": 3.39486881314049e-05, + "loss": 0.2891, + "step": 9350 + }, + { + "epoch": 0.3401409986190857, + "grad_norm": 1.3724703788757324, + "learning_rate": 3.398502798168472e-05, + "loss": 0.1656, + "step": 9360 + }, + { + "epoch": 0.34050439712188385, + "grad_norm": 2.1549072265625, + "learning_rate": 3.402136783196453e-05, + "loss": 0.1769, + "step": 9370 + }, + { + "epoch": 0.340867795624682, + "grad_norm": 1.793492317199707, + "learning_rate": 3.405770768224435e-05, + "loss": 0.2661, + "step": 9380 + }, + { + "epoch": 0.3412311941274802, + "grad_norm": 4.038620948791504, + "learning_rate": 3.409404753252417e-05, + "loss": 0.1871, + "step": 9390 + }, + { + "epoch": 0.34159459263027836, + "grad_norm": 31.7847900390625, + "learning_rate": 3.413038738280398e-05, + "loss": 0.2967, + "step": 9400 + }, + { + "epoch": 0.3419579911330765, + "grad_norm": 2.398646354675293, + "learning_rate": 3.41667272330838e-05, + "loss": 0.2086, + "step": 9410 + }, + { + "epoch": 0.3423213896358747, + "grad_norm": 2.2226221561431885, + "learning_rate": 3.4203067083363615e-05, + "loss": 0.1665, + "step": 9420 + }, + { + "epoch": 0.34268478813867287, + "grad_norm": 39.96380615234375, + "learning_rate": 3.4239406933643434e-05, + "loss": 0.9468, + "step": 9430 + }, + { + "epoch": 0.343048186641471, + "grad_norm": 1.5465339422225952, + "learning_rate": 3.4275746783923254e-05, + "loss": 0.1827, + "step": 9440 + }, + { + "epoch": 0.3434115851442692, + "grad_norm": 7.941345691680908, + "learning_rate": 3.4312086634203066e-05, + "loss": 0.2786, + "step": 9450 + }, + { + "epoch": 0.3437749836470674, + "grad_norm": 1.2575476169586182, + "learning_rate": 3.4348426484482885e-05, + "loss": 0.1764, + "step": 9460 + }, + { + "epoch": 0.34413838214986553, + "grad_norm": 1.3529596328735352, + "learning_rate": 3.4384766334762705e-05, + "loss": 0.207, + "step": 9470 + }, + { + "epoch": 0.34450178065266374, + "grad_norm": 3.2839174270629883, + "learning_rate": 3.442110618504252e-05, + "loss": 0.2672, + "step": 9480 + }, + { + "epoch": 0.3448651791554619, + "grad_norm": 3.246384859085083, + "learning_rate": 3.445744603532234e-05, + "loss": 0.1906, + "step": 9490 + }, + { + "epoch": 0.34522857765826004, + "grad_norm": 2.595038652420044, + "learning_rate": 3.4493785885602156e-05, + "loss": 0.2441, + "step": 9500 + }, + { + "epoch": 0.3455919761610582, + "grad_norm": 1.3803220987319946, + "learning_rate": 3.453012573588197e-05, + "loss": 0.1745, + "step": 9510 + }, + { + "epoch": 0.3459553746638564, + "grad_norm": 1.2091724872589111, + "learning_rate": 3.456646558616179e-05, + "loss": 0.1441, + "step": 9520 + }, + { + "epoch": 0.34631877316665455, + "grad_norm": 6.582603931427002, + "learning_rate": 3.460280543644161e-05, + "loss": 0.1835, + "step": 9530 + }, + { + "epoch": 0.3466821716694527, + "grad_norm": 2.6845383644104004, + "learning_rate": 3.463914528672142e-05, + "loss": 0.2048, + "step": 9540 + }, + { + "epoch": 0.3470455701722509, + "grad_norm": 11.775678634643555, + "learning_rate": 3.467548513700123e-05, + "loss": 0.2841, + "step": 9550 + }, + { + "epoch": 0.34740896867504906, + "grad_norm": 2.256279706954956, + "learning_rate": 3.471182498728106e-05, + "loss": 0.6472, + "step": 9560 + }, + { + "epoch": 0.3477723671778472, + "grad_norm": 1.4487576484680176, + "learning_rate": 3.474816483756087e-05, + "loss": 0.2722, + "step": 9570 + }, + { + "epoch": 0.3481357656806454, + "grad_norm": 3.843964099884033, + "learning_rate": 3.4784504687840684e-05, + "loss": 0.1855, + "step": 9580 + }, + { + "epoch": 0.34849916418344357, + "grad_norm": 1.5561772584915161, + "learning_rate": 3.48208445381205e-05, + "loss": 0.1908, + "step": 9590 + }, + { + "epoch": 0.3488625626862417, + "grad_norm": 3.757232666015625, + "learning_rate": 3.485718438840032e-05, + "loss": 0.2198, + "step": 9600 + }, + { + "epoch": 0.3488625626862417, + "eval_loss": 0.3895765244960785, + "eval_runtime": 179.7435, + "eval_samples_per_second": 41.248, + "eval_steps_per_second": 5.157, + "eval_wer": 0.21512335033674007, + "step": 9600 + }, + { + "epoch": 0.3492259611890399, + "grad_norm": 1.3912307024002075, + "learning_rate": 3.4893524238680135e-05, + "loss": 0.1616, + "step": 9610 + }, + { + "epoch": 0.3495893596918381, + "grad_norm": 2.4036080837249756, + "learning_rate": 3.4929864088959954e-05, + "loss": 0.1579, + "step": 9620 + }, + { + "epoch": 0.3499527581946362, + "grad_norm": 2.611175537109375, + "learning_rate": 3.4966203939239774e-05, + "loss": 0.1746, + "step": 9630 + }, + { + "epoch": 0.35031615669743443, + "grad_norm": 1.4045140743255615, + "learning_rate": 3.5002543789519586e-05, + "loss": 0.1594, + "step": 9640 + }, + { + "epoch": 0.3506795552002326, + "grad_norm": 12.708057403564453, + "learning_rate": 3.5038883639799406e-05, + "loss": 0.3118, + "step": 9650 + }, + { + "epoch": 0.35104295370303074, + "grad_norm": 3.0364696979522705, + "learning_rate": 3.5075223490079225e-05, + "loss": 0.3062, + "step": 9660 + }, + { + "epoch": 0.3514063522058289, + "grad_norm": 1.4527848958969116, + "learning_rate": 3.511156334035904e-05, + "loss": 0.1603, + "step": 9670 + }, + { + "epoch": 0.3517697507086271, + "grad_norm": 5.697939395904541, + "learning_rate": 3.514790319063886e-05, + "loss": 0.2069, + "step": 9680 + }, + { + "epoch": 0.35213314921142524, + "grad_norm": 2.1645712852478027, + "learning_rate": 3.5184243040918676e-05, + "loss": 0.162, + "step": 9690 + }, + { + "epoch": 0.3524965477142234, + "grad_norm": 8.024601936340332, + "learning_rate": 3.522058289119849e-05, + "loss": 0.898, + "step": 9700 + }, + { + "epoch": 0.3528599462170216, + "grad_norm": 1.4516103267669678, + "learning_rate": 3.52569227414783e-05, + "loss": 0.189, + "step": 9710 + }, + { + "epoch": 0.35322334471981975, + "grad_norm": 1.0467925071716309, + "learning_rate": 3.529326259175813e-05, + "loss": 0.1547, + "step": 9720 + }, + { + "epoch": 0.3535867432226179, + "grad_norm": 3.9237303733825684, + "learning_rate": 3.532960244203794e-05, + "loss": 0.1968, + "step": 9730 + }, + { + "epoch": 0.3539501417254161, + "grad_norm": 2.502257823944092, + "learning_rate": 3.536594229231775e-05, + "loss": 0.1645, + "step": 9740 + }, + { + "epoch": 0.35431354022821426, + "grad_norm": 30.662227630615234, + "learning_rate": 3.540228214259758e-05, + "loss": 0.2847, + "step": 9750 + }, + { + "epoch": 0.3546769387310124, + "grad_norm": 1.7106624841690063, + "learning_rate": 3.543862199287739e-05, + "loss": 0.1951, + "step": 9760 + }, + { + "epoch": 0.3550403372338106, + "grad_norm": 2.169036865234375, + "learning_rate": 3.5474961843157204e-05, + "loss": 0.172, + "step": 9770 + }, + { + "epoch": 0.3554037357366088, + "grad_norm": 6.116454124450684, + "learning_rate": 3.551130169343703e-05, + "loss": 0.1934, + "step": 9780 + }, + { + "epoch": 0.3557671342394069, + "grad_norm": 1.8530545234680176, + "learning_rate": 3.554764154371684e-05, + "loss": 0.217, + "step": 9790 + }, + { + "epoch": 0.3561305327422051, + "grad_norm": 11.060449600219727, + "learning_rate": 3.5583981393996655e-05, + "loss": 0.2145, + "step": 9800 + }, + { + "epoch": 0.3564939312450033, + "grad_norm": 7.748067378997803, + "learning_rate": 3.5620321244276475e-05, + "loss": 0.2114, + "step": 9810 + }, + { + "epoch": 0.35685732974780143, + "grad_norm": 3.562528610229492, + "learning_rate": 3.5656661094556294e-05, + "loss": 0.221, + "step": 9820 + }, + { + "epoch": 0.3572207282505996, + "grad_norm": 2.798417091369629, + "learning_rate": 3.5693000944836107e-05, + "loss": 0.2071, + "step": 9830 + }, + { + "epoch": 0.3575841267533978, + "grad_norm": 2.3908724784851074, + "learning_rate": 3.5729340795115926e-05, + "loss": 0.1678, + "step": 9840 + }, + { + "epoch": 0.35794752525619594, + "grad_norm": 7.205004692077637, + "learning_rate": 3.5765680645395745e-05, + "loss": 0.2953, + "step": 9850 + }, + { + "epoch": 0.3583109237589941, + "grad_norm": 2.5064749717712402, + "learning_rate": 3.580202049567556e-05, + "loss": 0.197, + "step": 9860 + }, + { + "epoch": 0.3586743222617923, + "grad_norm": 2.0985934734344482, + "learning_rate": 3.583836034595538e-05, + "loss": 0.1441, + "step": 9870 + }, + { + "epoch": 0.35903772076459045, + "grad_norm": 5.256442070007324, + "learning_rate": 3.5874700196235197e-05, + "loss": 0.203, + "step": 9880 + }, + { + "epoch": 0.3594011192673886, + "grad_norm": 2.3590219020843506, + "learning_rate": 3.591104004651501e-05, + "loss": 0.1811, + "step": 9890 + }, + { + "epoch": 0.3597645177701868, + "grad_norm": 24.96747398376465, + "learning_rate": 3.594737989679482e-05, + "loss": 0.293, + "step": 9900 + }, + { + "epoch": 0.36012791627298496, + "grad_norm": 1.727751612663269, + "learning_rate": 3.598371974707465e-05, + "loss": 0.1896, + "step": 9910 + }, + { + "epoch": 0.3604913147757831, + "grad_norm": 2.349269151687622, + "learning_rate": 3.602005959735446e-05, + "loss": 0.1649, + "step": 9920 + }, + { + "epoch": 0.3608547132785813, + "grad_norm": 3.139385223388672, + "learning_rate": 3.605639944763427e-05, + "loss": 0.2181, + "step": 9930 + }, + { + "epoch": 0.36121811178137947, + "grad_norm": 2.1249756813049316, + "learning_rate": 3.60927392979141e-05, + "loss": 0.1751, + "step": 9940 + }, + { + "epoch": 0.3615815102841776, + "grad_norm": 3.6616756916046143, + "learning_rate": 3.612907914819391e-05, + "loss": 0.2729, + "step": 9950 + }, + { + "epoch": 0.36194490878697577, + "grad_norm": 1.367600440979004, + "learning_rate": 3.6165418998473724e-05, + "loss": 0.1592, + "step": 9960 + }, + { + "epoch": 0.362308307289774, + "grad_norm": 1.8141239881515503, + "learning_rate": 3.620175884875355e-05, + "loss": 0.2867, + "step": 9970 + }, + { + "epoch": 0.36267170579257213, + "grad_norm": 7.0058794021606445, + "learning_rate": 3.623809869903336e-05, + "loss": 0.207, + "step": 9980 + }, + { + "epoch": 0.3630351042953703, + "grad_norm": 1.923048734664917, + "learning_rate": 3.6274438549313176e-05, + "loss": 0.346, + "step": 9990 + }, + { + "epoch": 0.3633985027981685, + "grad_norm": 16.30779457092285, + "learning_rate": 3.6310778399592995e-05, + "loss": 0.3107, + "step": 10000 + }, + { + "epoch": 0.36376190130096664, + "grad_norm": 1.979866862297058, + "learning_rate": 3.6347118249872814e-05, + "loss": 0.7999, + "step": 10010 + }, + { + "epoch": 0.3641252998037648, + "grad_norm": 2.7377023696899414, + "learning_rate": 3.638345810015263e-05, + "loss": 0.2005, + "step": 10020 + }, + { + "epoch": 0.364488698306563, + "grad_norm": 5.546159744262695, + "learning_rate": 3.6419797950432446e-05, + "loss": 0.1964, + "step": 10030 + }, + { + "epoch": 0.36485209680936115, + "grad_norm": 2.2417142391204834, + "learning_rate": 3.6456137800712265e-05, + "loss": 0.2078, + "step": 10040 + }, + { + "epoch": 0.3652154953121593, + "grad_norm": 7.2175092697143555, + "learning_rate": 3.649247765099208e-05, + "loss": 0.291, + "step": 10050 + }, + { + "epoch": 0.3655788938149575, + "grad_norm": 2.6172754764556885, + "learning_rate": 3.65288175012719e-05, + "loss": 0.2037, + "step": 10060 + }, + { + "epoch": 0.36594229231775566, + "grad_norm": 2.0634214878082275, + "learning_rate": 3.656515735155172e-05, + "loss": 0.1668, + "step": 10070 + }, + { + "epoch": 0.3663056908205538, + "grad_norm": 3.5431976318359375, + "learning_rate": 3.660149720183153e-05, + "loss": 0.475, + "step": 10080 + }, + { + "epoch": 0.366669089323352, + "grad_norm": 2.147472381591797, + "learning_rate": 3.663783705211135e-05, + "loss": 0.1869, + "step": 10090 + }, + { + "epoch": 0.36703248782615017, + "grad_norm": 18.726482391357422, + "learning_rate": 3.667417690239117e-05, + "loss": 0.2773, + "step": 10100 + }, + { + "epoch": 0.3673958863289483, + "grad_norm": 1.6554090976715088, + "learning_rate": 3.671051675267098e-05, + "loss": 0.1707, + "step": 10110 + }, + { + "epoch": 0.36775928483174647, + "grad_norm": 1.8967760801315308, + "learning_rate": 3.674685660295079e-05, + "loss": 0.2159, + "step": 10120 + }, + { + "epoch": 0.3681226833345447, + "grad_norm": 2.3765788078308105, + "learning_rate": 3.678319645323061e-05, + "loss": 0.2229, + "step": 10130 + }, + { + "epoch": 0.3684860818373428, + "grad_norm": 5.890452861785889, + "learning_rate": 3.681953630351043e-05, + "loss": 0.195, + "step": 10140 + }, + { + "epoch": 0.368849480340141, + "grad_norm": 5.045167446136475, + "learning_rate": 3.6855876153790244e-05, + "loss": 0.3111, + "step": 10150 + }, + { + "epoch": 0.3692128788429392, + "grad_norm": 2.37107253074646, + "learning_rate": 3.6892216004070064e-05, + "loss": 0.1942, + "step": 10160 + }, + { + "epoch": 0.36957627734573734, + "grad_norm": 1.9943170547485352, + "learning_rate": 3.692855585434988e-05, + "loss": 0.1906, + "step": 10170 + }, + { + "epoch": 0.3699396758485355, + "grad_norm": 3.16873836517334, + "learning_rate": 3.6964895704629696e-05, + "loss": 0.1791, + "step": 10180 + }, + { + "epoch": 0.3703030743513337, + "grad_norm": 15.252134323120117, + "learning_rate": 3.7001235554909515e-05, + "loss": 0.3702, + "step": 10190 + }, + { + "epoch": 0.37066647285413185, + "grad_norm": 8.845834732055664, + "learning_rate": 3.7037575405189334e-05, + "loss": 0.2765, + "step": 10200 + }, + { + "epoch": 0.37066647285413185, + "eval_loss": 0.4178149104118347, + "eval_runtime": 179.6523, + "eval_samples_per_second": 41.269, + "eval_steps_per_second": 5.16, + "eval_wer": 0.2237551509430537, + "step": 10200 + }, + { + "epoch": 0.37102987135693, + "grad_norm": 6.2689313888549805, + "learning_rate": 3.707391525546915e-05, + "loss": 0.1922, + "step": 10210 + }, + { + "epoch": 0.3713932698597282, + "grad_norm": 1.00067138671875, + "learning_rate": 3.7110255105748966e-05, + "loss": 0.1535, + "step": 10220 + }, + { + "epoch": 0.37175666836252635, + "grad_norm": 2.6602060794830322, + "learning_rate": 3.7146594956028786e-05, + "loss": 0.1959, + "step": 10230 + }, + { + "epoch": 0.3721200668653245, + "grad_norm": 4.743015766143799, + "learning_rate": 3.71829348063086e-05, + "loss": 0.2058, + "step": 10240 + }, + { + "epoch": 0.37248346536812266, + "grad_norm": 8.304347038269043, + "learning_rate": 3.721927465658842e-05, + "loss": 0.3027, + "step": 10250 + }, + { + "epoch": 0.37284686387092086, + "grad_norm": 1.8180521726608276, + "learning_rate": 3.725561450686824e-05, + "loss": 0.1708, + "step": 10260 + }, + { + "epoch": 0.373210262373719, + "grad_norm": 2.05625057220459, + "learning_rate": 3.729195435714805e-05, + "loss": 0.1824, + "step": 10270 + }, + { + "epoch": 0.37357366087651717, + "grad_norm": 2.426814317703247, + "learning_rate": 3.732829420742787e-05, + "loss": 0.197, + "step": 10280 + }, + { + "epoch": 0.3739370593793154, + "grad_norm": 1.658158540725708, + "learning_rate": 3.736463405770768e-05, + "loss": 0.1578, + "step": 10290 + }, + { + "epoch": 0.3743004578821135, + "grad_norm": 10.913407325744629, + "learning_rate": 3.74009739079875e-05, + "loss": 0.2728, + "step": 10300 + }, + { + "epoch": 0.3746638563849117, + "grad_norm": 1.6443781852722168, + "learning_rate": 3.743731375826732e-05, + "loss": 0.1656, + "step": 10310 + }, + { + "epoch": 0.3750272548877099, + "grad_norm": 1.0702744722366333, + "learning_rate": 3.747365360854713e-05, + "loss": 0.7132, + "step": 10320 + }, + { + "epoch": 0.37539065339050803, + "grad_norm": 5.8824052810668945, + "learning_rate": 3.750999345882695e-05, + "loss": 0.2701, + "step": 10330 + }, + { + "epoch": 0.3757540518933062, + "grad_norm": 4.373916149139404, + "learning_rate": 3.754633330910677e-05, + "loss": 0.2053, + "step": 10340 + }, + { + "epoch": 0.3761174503961044, + "grad_norm": 22.25397300720215, + "learning_rate": 3.7582673159386584e-05, + "loss": 0.2781, + "step": 10350 + }, + { + "epoch": 0.37648084889890254, + "grad_norm": 1.8272254467010498, + "learning_rate": 3.7619013009666403e-05, + "loss": 0.1833, + "step": 10360 + }, + { + "epoch": 0.3768442474017007, + "grad_norm": 3.286931037902832, + "learning_rate": 3.7655352859946216e-05, + "loss": 0.1576, + "step": 10370 + }, + { + "epoch": 0.3772076459044989, + "grad_norm": 5.283690929412842, + "learning_rate": 3.7691692710226035e-05, + "loss": 0.21, + "step": 10380 + }, + { + "epoch": 0.37757104440729705, + "grad_norm": 1.184476375579834, + "learning_rate": 3.7728032560505855e-05, + "loss": 0.2597, + "step": 10390 + }, + { + "epoch": 0.3779344429100952, + "grad_norm": 5.685116767883301, + "learning_rate": 3.776437241078567e-05, + "loss": 0.2476, + "step": 10400 + }, + { + "epoch": 0.37829784141289335, + "grad_norm": 1.1873399019241333, + "learning_rate": 3.7800712261065487e-05, + "loss": 0.1597, + "step": 10410 + }, + { + "epoch": 0.37866123991569156, + "grad_norm": 1.6136255264282227, + "learning_rate": 3.7837052111345306e-05, + "loss": 0.188, + "step": 10420 + }, + { + "epoch": 0.3790246384184897, + "grad_norm": 4.743179798126221, + "learning_rate": 3.787339196162512e-05, + "loss": 0.1962, + "step": 10430 + }, + { + "epoch": 0.37938803692128786, + "grad_norm": 2.603379011154175, + "learning_rate": 3.790973181190494e-05, + "loss": 0.1854, + "step": 10440 + }, + { + "epoch": 0.37975143542408607, + "grad_norm": 6.267378807067871, + "learning_rate": 3.794607166218475e-05, + "loss": 0.2569, + "step": 10450 + }, + { + "epoch": 0.3801148339268842, + "grad_norm": 5.370235919952393, + "learning_rate": 3.798241151246457e-05, + "loss": 0.1796, + "step": 10460 + }, + { + "epoch": 0.3804782324296824, + "grad_norm": 2.170964002609253, + "learning_rate": 3.801875136274439e-05, + "loss": 0.1713, + "step": 10470 + }, + { + "epoch": 0.3808416309324806, + "grad_norm": 4.134753704071045, + "learning_rate": 3.80550912130242e-05, + "loss": 0.2269, + "step": 10480 + }, + { + "epoch": 0.38120502943527873, + "grad_norm": 2.7026259899139404, + "learning_rate": 3.809143106330402e-05, + "loss": 0.1938, + "step": 10490 + }, + { + "epoch": 0.3815684279380769, + "grad_norm": 7.368224143981934, + "learning_rate": 3.812777091358384e-05, + "loss": 0.2617, + "step": 10500 + }, + { + "epoch": 0.3819318264408751, + "grad_norm": 1.3194938898086548, + "learning_rate": 3.816411076386365e-05, + "loss": 0.2066, + "step": 10510 + }, + { + "epoch": 0.38229522494367324, + "grad_norm": 1.901505470275879, + "learning_rate": 3.820045061414347e-05, + "loss": 0.1716, + "step": 10520 + }, + { + "epoch": 0.3826586234464714, + "grad_norm": 3.4045536518096924, + "learning_rate": 3.823679046442329e-05, + "loss": 0.1625, + "step": 10530 + }, + { + "epoch": 0.38302202194926954, + "grad_norm": 2.1540184020996094, + "learning_rate": 3.8273130314703104e-05, + "loss": 0.1829, + "step": 10540 + }, + { + "epoch": 0.38338542045206775, + "grad_norm": 14.377511024475098, + "learning_rate": 3.8309470164982924e-05, + "loss": 0.2747, + "step": 10550 + }, + { + "epoch": 0.3837488189548659, + "grad_norm": 1.9092762470245361, + "learning_rate": 3.834581001526274e-05, + "loss": 0.1728, + "step": 10560 + }, + { + "epoch": 0.38411221745766405, + "grad_norm": 1.867458462715149, + "learning_rate": 3.8382149865542556e-05, + "loss": 0.1752, + "step": 10570 + }, + { + "epoch": 0.38447561596046226, + "grad_norm": 5.246692657470703, + "learning_rate": 3.841848971582237e-05, + "loss": 0.1823, + "step": 10580 + }, + { + "epoch": 0.3848390144632604, + "grad_norm": 2.9294533729553223, + "learning_rate": 3.845482956610219e-05, + "loss": 0.2052, + "step": 10590 + }, + { + "epoch": 0.38520241296605856, + "grad_norm": 11.946113586425781, + "learning_rate": 3.849116941638201e-05, + "loss": 0.309, + "step": 10600 + }, + { + "epoch": 0.38556581146885677, + "grad_norm": 1.7155182361602783, + "learning_rate": 3.852750926666182e-05, + "loss": 0.175, + "step": 10610 + }, + { + "epoch": 0.3859292099716549, + "grad_norm": 1.1520076990127563, + "learning_rate": 3.856384911694164e-05, + "loss": 0.2129, + "step": 10620 + }, + { + "epoch": 0.38629260847445307, + "grad_norm": 1.9750351905822754, + "learning_rate": 3.860018896722146e-05, + "loss": 0.1725, + "step": 10630 + }, + { + "epoch": 0.3866560069772513, + "grad_norm": 4.309560298919678, + "learning_rate": 3.863652881750127e-05, + "loss": 0.1516, + "step": 10640 + }, + { + "epoch": 0.3870194054800494, + "grad_norm": 7.554156303405762, + "learning_rate": 3.867286866778109e-05, + "loss": 0.3069, + "step": 10650 + }, + { + "epoch": 0.3873828039828476, + "grad_norm": 3.7965683937072754, + "learning_rate": 3.870920851806091e-05, + "loss": 0.2014, + "step": 10660 + }, + { + "epoch": 0.3877462024856458, + "grad_norm": 3.8691935539245605, + "learning_rate": 3.874554836834072e-05, + "loss": 0.1678, + "step": 10670 + }, + { + "epoch": 0.38810960098844394, + "grad_norm": 4.144315719604492, + "learning_rate": 3.878188821862054e-05, + "loss": 0.2936, + "step": 10680 + }, + { + "epoch": 0.3884729994912421, + "grad_norm": 1.5667825937271118, + "learning_rate": 3.881822806890036e-05, + "loss": 0.1871, + "step": 10690 + }, + { + "epoch": 0.38883639799404024, + "grad_norm": 7.6076788902282715, + "learning_rate": 3.885456791918017e-05, + "loss": 0.2661, + "step": 10700 + }, + { + "epoch": 0.38919979649683845, + "grad_norm": 1.7828059196472168, + "learning_rate": 3.889090776945999e-05, + "loss": 0.1808, + "step": 10710 + }, + { + "epoch": 0.3895631949996366, + "grad_norm": 7.039370059967041, + "learning_rate": 3.892724761973981e-05, + "loss": 0.2484, + "step": 10720 + }, + { + "epoch": 0.38992659350243475, + "grad_norm": 2.1001148223876953, + "learning_rate": 3.8963587470019625e-05, + "loss": 0.1644, + "step": 10730 + }, + { + "epoch": 0.39028999200523296, + "grad_norm": 0.9235002398490906, + "learning_rate": 3.899992732029944e-05, + "loss": 0.172, + "step": 10740 + }, + { + "epoch": 0.3906533905080311, + "grad_norm": 10.066643714904785, + "learning_rate": 3.903626717057926e-05, + "loss": 0.2999, + "step": 10750 + }, + { + "epoch": 0.39101678901082926, + "grad_norm": 2.256965160369873, + "learning_rate": 3.9072607020859076e-05, + "loss": 0.2116, + "step": 10760 + }, + { + "epoch": 0.39138018751362746, + "grad_norm": 1.742125153541565, + "learning_rate": 3.910894687113889e-05, + "loss": 0.1838, + "step": 10770 + }, + { + "epoch": 0.3917435860164256, + "grad_norm": 5.397392749786377, + "learning_rate": 3.9145286721418714e-05, + "loss": 0.2213, + "step": 10780 + }, + { + "epoch": 0.39210698451922377, + "grad_norm": 2.439197540283203, + "learning_rate": 3.918162657169853e-05, + "loss": 0.1984, + "step": 10790 + }, + { + "epoch": 0.392470383022022, + "grad_norm": 6.7387895584106445, + "learning_rate": 3.921796642197834e-05, + "loss": 0.2842, + "step": 10800 + }, + { + "epoch": 0.392470383022022, + "eval_loss": 0.39516785740852356, + "eval_runtime": 180.1522, + "eval_samples_per_second": 41.154, + "eval_steps_per_second": 5.146, + "eval_wer": 0.21758309583023216, + "step": 10800 + }, + { + "epoch": 0.3928337815248201, + "grad_norm": 1.5229130983352661, + "learning_rate": 3.925430627225816e-05, + "loss": 0.1809, + "step": 10810 + }, + { + "epoch": 0.3931971800276183, + "grad_norm": 1.6385318040847778, + "learning_rate": 3.929064612253798e-05, + "loss": 0.155, + "step": 10820 + }, + { + "epoch": 0.3935605785304164, + "grad_norm": 2.403878927230835, + "learning_rate": 3.932698597281779e-05, + "loss": 0.2837, + "step": 10830 + }, + { + "epoch": 0.39392397703321463, + "grad_norm": 2.818368434906006, + "learning_rate": 3.936332582309761e-05, + "loss": 0.2298, + "step": 10840 + }, + { + "epoch": 0.3942873755360128, + "grad_norm": 6.08942174911499, + "learning_rate": 3.939966567337743e-05, + "loss": 0.2262, + "step": 10850 + }, + { + "epoch": 0.39465077403881094, + "grad_norm": 1.2632570266723633, + "learning_rate": 3.943600552365724e-05, + "loss": 0.2087, + "step": 10860 + }, + { + "epoch": 0.39501417254160914, + "grad_norm": 2.2119662761688232, + "learning_rate": 3.947234537393706e-05, + "loss": 0.1974, + "step": 10870 + }, + { + "epoch": 0.3953775710444073, + "grad_norm": 2.936021089553833, + "learning_rate": 3.950868522421688e-05, + "loss": 0.1909, + "step": 10880 + }, + { + "epoch": 0.39574096954720545, + "grad_norm": 1.3898749351501465, + "learning_rate": 3.9545025074496693e-05, + "loss": 0.184, + "step": 10890 + }, + { + "epoch": 0.39610436805000365, + "grad_norm": 9.063791275024414, + "learning_rate": 3.958136492477651e-05, + "loss": 0.338, + "step": 10900 + }, + { + "epoch": 0.3964677665528018, + "grad_norm": 1.3791584968566895, + "learning_rate": 3.961770477505633e-05, + "loss": 0.2256, + "step": 10910 + }, + { + "epoch": 0.39683116505559995, + "grad_norm": 0.9377845525741577, + "learning_rate": 3.9654044625336145e-05, + "loss": 0.9822, + "step": 10920 + }, + { + "epoch": 0.39719456355839816, + "grad_norm": 3.9755465984344482, + "learning_rate": 3.969038447561596e-05, + "loss": 0.2257, + "step": 10930 + }, + { + "epoch": 0.3975579620611963, + "grad_norm": 1.559699535369873, + "learning_rate": 3.9726724325895783e-05, + "loss": 0.2116, + "step": 10940 + }, + { + "epoch": 0.39792136056399446, + "grad_norm": 7.545668601989746, + "learning_rate": 3.9763064176175596e-05, + "loss": 0.2515, + "step": 10950 + }, + { + "epoch": 0.39828475906679267, + "grad_norm": 1.980197548866272, + "learning_rate": 3.979940402645541e-05, + "loss": 0.1721, + "step": 10960 + }, + { + "epoch": 0.3986481575695908, + "grad_norm": 2.5450973510742188, + "learning_rate": 3.9835743876735235e-05, + "loss": 1.7152, + "step": 10970 + }, + { + "epoch": 0.399011556072389, + "grad_norm": 3.518233060836792, + "learning_rate": 3.987208372701505e-05, + "loss": 0.2521, + "step": 10980 + }, + { + "epoch": 0.3993749545751871, + "grad_norm": 2.678774356842041, + "learning_rate": 3.990842357729486e-05, + "loss": 0.2025, + "step": 10990 + }, + { + "epoch": 0.39973835307798533, + "grad_norm": 11.46552848815918, + "learning_rate": 3.9944763427574686e-05, + "loss": 0.2683, + "step": 11000 + }, + { + "epoch": 0.4001017515807835, + "grad_norm": 2.3148844242095947, + "learning_rate": 3.99811032778545e-05, + "loss": 1.5331, + "step": 11010 + }, + { + "epoch": 0.40046515008358163, + "grad_norm": 1.2145686149597168, + "learning_rate": 4.001744312813431e-05, + "loss": 0.1931, + "step": 11020 + }, + { + "epoch": 0.40082854858637984, + "grad_norm": 3.581883192062378, + "learning_rate": 4.005378297841413e-05, + "loss": 0.18, + "step": 11030 + }, + { + "epoch": 0.401191947089178, + "grad_norm": 2.4645683765411377, + "learning_rate": 4.009012282869395e-05, + "loss": 0.2303, + "step": 11040 + }, + { + "epoch": 0.40155534559197614, + "grad_norm": 13.845566749572754, + "learning_rate": 4.012646267897376e-05, + "loss": 0.2515, + "step": 11050 + }, + { + "epoch": 0.40191874409477435, + "grad_norm": 1.6929864883422852, + "learning_rate": 4.016280252925358e-05, + "loss": 2.9232, + "step": 11060 + }, + { + "epoch": 0.4022821425975725, + "grad_norm": 1.5453213453292847, + "learning_rate": 4.01991423795334e-05, + "loss": 0.1703, + "step": 11070 + }, + { + "epoch": 0.40264554110037065, + "grad_norm": 1.5723987817764282, + "learning_rate": 4.0235482229813214e-05, + "loss": 0.1694, + "step": 11080 + }, + { + "epoch": 0.40300893960316886, + "grad_norm": 1.4501444101333618, + "learning_rate": 4.027182208009303e-05, + "loss": 0.2477, + "step": 11090 + }, + { + "epoch": 0.403372338105967, + "grad_norm": 20.50950813293457, + "learning_rate": 4.030816193037285e-05, + "loss": 0.2641, + "step": 11100 + }, + { + "epoch": 0.40373573660876516, + "grad_norm": 1.9846757650375366, + "learning_rate": 4.0344501780652665e-05, + "loss": 0.1807, + "step": 11110 + }, + { + "epoch": 0.40409913511156337, + "grad_norm": 1.3933240175247192, + "learning_rate": 4.0380841630932484e-05, + "loss": 0.1683, + "step": 11120 + }, + { + "epoch": 0.4044625336143615, + "grad_norm": 2.370534658432007, + "learning_rate": 4.0417181481212304e-05, + "loss": 0.2476, + "step": 11130 + }, + { + "epoch": 0.40482593211715967, + "grad_norm": 2.6382100582122803, + "learning_rate": 4.0453521331492116e-05, + "loss": 0.1723, + "step": 11140 + }, + { + "epoch": 0.4051893306199578, + "grad_norm": 27.381826400756836, + "learning_rate": 4.048986118177193e-05, + "loss": 0.2058, + "step": 11150 + }, + { + "epoch": 0.40555272912275603, + "grad_norm": 1.3622616529464722, + "learning_rate": 4.052620103205175e-05, + "loss": 0.1744, + "step": 11160 + }, + { + "epoch": 0.4059161276255542, + "grad_norm": 1.4734828472137451, + "learning_rate": 4.056254088233157e-05, + "loss": 0.1685, + "step": 11170 + }, + { + "epoch": 0.40627952612835233, + "grad_norm": 5.694312572479248, + "learning_rate": 4.059888073261138e-05, + "loss": 0.3549, + "step": 11180 + }, + { + "epoch": 0.40664292463115054, + "grad_norm": 1.9976438283920288, + "learning_rate": 4.06352205828912e-05, + "loss": 0.1525, + "step": 11190 + }, + { + "epoch": 0.4070063231339487, + "grad_norm": 5.735686779022217, + "learning_rate": 4.067156043317102e-05, + "loss": 0.2642, + "step": 11200 + }, + { + "epoch": 0.40736972163674684, + "grad_norm": 5.192315101623535, + "learning_rate": 4.070790028345083e-05, + "loss": 0.1636, + "step": 11210 + }, + { + "epoch": 0.40773312013954505, + "grad_norm": 2.6324477195739746, + "learning_rate": 4.074424013373065e-05, + "loss": 0.3451, + "step": 11220 + }, + { + "epoch": 0.4080965186423432, + "grad_norm": 2.496997356414795, + "learning_rate": 4.078057998401047e-05, + "loss": 0.1792, + "step": 11230 + }, + { + "epoch": 0.40845991714514135, + "grad_norm": 3.928255558013916, + "learning_rate": 4.081691983429028e-05, + "loss": 0.2203, + "step": 11240 + }, + { + "epoch": 0.40882331564793956, + "grad_norm": 14.433273315429688, + "learning_rate": 4.08532596845701e-05, + "loss": 0.3283, + "step": 11250 + }, + { + "epoch": 0.4091867141507377, + "grad_norm": 1.9282217025756836, + "learning_rate": 4.088959953484992e-05, + "loss": 0.2191, + "step": 11260 + }, + { + "epoch": 0.40955011265353586, + "grad_norm": 1.8360569477081299, + "learning_rate": 4.0925939385129734e-05, + "loss": 0.1623, + "step": 11270 + }, + { + "epoch": 0.409913511156334, + "grad_norm": 4.518060207366943, + "learning_rate": 4.096227923540955e-05, + "loss": 0.2036, + "step": 11280 + }, + { + "epoch": 0.4102769096591322, + "grad_norm": 1.4292632341384888, + "learning_rate": 4.099861908568937e-05, + "loss": 0.1515, + "step": 11290 + }, + { + "epoch": 0.41064030816193037, + "grad_norm": 23.795089721679688, + "learning_rate": 4.1034958935969185e-05, + "loss": 0.3228, + "step": 11300 + }, + { + "epoch": 0.4110037066647285, + "grad_norm": 1.7721456289291382, + "learning_rate": 4.1071298786249005e-05, + "loss": 0.15, + "step": 11310 + }, + { + "epoch": 0.4113671051675267, + "grad_norm": 3.544579029083252, + "learning_rate": 4.110763863652882e-05, + "loss": 0.2349, + "step": 11320 + }, + { + "epoch": 0.4117305036703249, + "grad_norm": 4.25554895401001, + "learning_rate": 4.1143978486808636e-05, + "loss": 0.5458, + "step": 11330 + }, + { + "epoch": 0.41209390217312303, + "grad_norm": 3.069894313812256, + "learning_rate": 4.1180318337088456e-05, + "loss": 0.2131, + "step": 11340 + }, + { + "epoch": 0.41245730067592123, + "grad_norm": 5.389547348022461, + "learning_rate": 4.121665818736827e-05, + "loss": 0.2895, + "step": 11350 + }, + { + "epoch": 0.4128206991787194, + "grad_norm": 2.308717727661133, + "learning_rate": 4.125299803764809e-05, + "loss": 0.209, + "step": 11360 + }, + { + "epoch": 0.41318409768151754, + "grad_norm": 2.071504831314087, + "learning_rate": 4.12893378879279e-05, + "loss": 0.225, + "step": 11370 + }, + { + "epoch": 0.41354749618431574, + "grad_norm": 10.397724151611328, + "learning_rate": 4.132567773820772e-05, + "loss": 0.5041, + "step": 11380 + }, + { + "epoch": 0.4139108946871139, + "grad_norm": 3.3916842937469482, + "learning_rate": 4.136201758848754e-05, + "loss": 0.2055, + "step": 11390 + }, + { + "epoch": 0.41427429318991205, + "grad_norm": 27.703519821166992, + "learning_rate": 4.139835743876735e-05, + "loss": 0.3002, + "step": 11400 + }, + { + "epoch": 0.41427429318991205, + "eval_loss": 0.40216270089149475, + "eval_runtime": 180.385, + "eval_samples_per_second": 41.101, + "eval_steps_per_second": 5.139, + "eval_wer": 0.22132263510447112, + "step": 11400 + }, + { + "epoch": 0.41463769169271025, + "grad_norm": 3.024658203125, + "learning_rate": 4.143469728904717e-05, + "loss": 0.168, + "step": 11410 + }, + { + "epoch": 0.4150010901955084, + "grad_norm": 2.899369478225708, + "learning_rate": 4.147103713932699e-05, + "loss": 3.0252, + "step": 11420 + }, + { + "epoch": 0.41536448869830656, + "grad_norm": 3.960700511932373, + "learning_rate": 4.15073769896068e-05, + "loss": 0.1972, + "step": 11430 + }, + { + "epoch": 0.4157278872011047, + "grad_norm": 1.542468786239624, + "learning_rate": 4.154371683988662e-05, + "loss": 0.1971, + "step": 11440 + }, + { + "epoch": 0.4160912857039029, + "grad_norm": 16.871423721313477, + "learning_rate": 4.158005669016644e-05, + "loss": 0.2768, + "step": 11450 + }, + { + "epoch": 0.41645468420670106, + "grad_norm": 3.142385721206665, + "learning_rate": 4.1616396540446254e-05, + "loss": 0.2173, + "step": 11460 + }, + { + "epoch": 0.4168180827094992, + "grad_norm": 0.9852932095527649, + "learning_rate": 4.1652736390726074e-05, + "loss": 0.2529, + "step": 11470 + }, + { + "epoch": 0.4171814812122974, + "grad_norm": 2.4834413528442383, + "learning_rate": 4.1689076241005886e-05, + "loss": 0.9175, + "step": 11480 + }, + { + "epoch": 0.4175448797150956, + "grad_norm": 2.7286272048950195, + "learning_rate": 4.1725416091285705e-05, + "loss": 0.1864, + "step": 11490 + }, + { + "epoch": 0.4179082782178937, + "grad_norm": 5.711360454559326, + "learning_rate": 4.1761755941565525e-05, + "loss": 0.2285, + "step": 11500 + }, + { + "epoch": 0.41827167672069193, + "grad_norm": 1.160866379737854, + "learning_rate": 4.179809579184534e-05, + "loss": 0.1959, + "step": 11510 + }, + { + "epoch": 0.4186350752234901, + "grad_norm": 2.5051305294036865, + "learning_rate": 4.183443564212516e-05, + "loss": 0.185, + "step": 11520 + }, + { + "epoch": 0.41899847372628823, + "grad_norm": 3.641874313354492, + "learning_rate": 4.1870775492404976e-05, + "loss": 0.199, + "step": 11530 + }, + { + "epoch": 0.41936187222908644, + "grad_norm": 1.676038146018982, + "learning_rate": 4.190711534268479e-05, + "loss": 0.1895, + "step": 11540 + }, + { + "epoch": 0.4197252707318846, + "grad_norm": 11.47658634185791, + "learning_rate": 4.194345519296461e-05, + "loss": 0.246, + "step": 11550 + }, + { + "epoch": 0.42008866923468274, + "grad_norm": 1.7632570266723633, + "learning_rate": 4.197979504324443e-05, + "loss": 0.1761, + "step": 11560 + }, + { + "epoch": 0.4204520677374809, + "grad_norm": 2.2994728088378906, + "learning_rate": 4.201613489352424e-05, + "loss": 0.1799, + "step": 11570 + }, + { + "epoch": 0.4208154662402791, + "grad_norm": 3.964228391647339, + "learning_rate": 4.205247474380406e-05, + "loss": 0.7376, + "step": 11580 + }, + { + "epoch": 0.42117886474307725, + "grad_norm": 1.866466760635376, + "learning_rate": 4.208881459408387e-05, + "loss": 0.2293, + "step": 11590 + }, + { + "epoch": 0.4215422632458754, + "grad_norm": 4.722428798675537, + "learning_rate": 4.212515444436369e-05, + "loss": 0.2303, + "step": 11600 + }, + { + "epoch": 0.4219056617486736, + "grad_norm": 2.8812968730926514, + "learning_rate": 4.2161494294643504e-05, + "loss": 0.1628, + "step": 11610 + }, + { + "epoch": 0.42226906025147176, + "grad_norm": 8.05451488494873, + "learning_rate": 4.219783414492332e-05, + "loss": 0.1978, + "step": 11620 + }, + { + "epoch": 0.4226324587542699, + "grad_norm": 3.4176700115203857, + "learning_rate": 4.223417399520314e-05, + "loss": 0.1986, + "step": 11630 + }, + { + "epoch": 0.4229958572570681, + "grad_norm": 5.204764366149902, + "learning_rate": 4.2270513845482955e-05, + "loss": 0.1959, + "step": 11640 + }, + { + "epoch": 0.42335925575986627, + "grad_norm": 6.184700965881348, + "learning_rate": 4.2306853695762774e-05, + "loss": 0.2822, + "step": 11650 + }, + { + "epoch": 0.4237226542626644, + "grad_norm": 2.288935422897339, + "learning_rate": 4.2343193546042594e-05, + "loss": 0.2073, + "step": 11660 + }, + { + "epoch": 0.42408605276546263, + "grad_norm": 3.8856844902038574, + "learning_rate": 4.2379533396322406e-05, + "loss": 0.2134, + "step": 11670 + }, + { + "epoch": 0.4244494512682608, + "grad_norm": 4.048069953918457, + "learning_rate": 4.2415873246602226e-05, + "loss": 0.1922, + "step": 11680 + }, + { + "epoch": 0.42481284977105893, + "grad_norm": 1.466927409172058, + "learning_rate": 4.2452213096882045e-05, + "loss": 0.1653, + "step": 11690 + }, + { + "epoch": 0.42517624827385714, + "grad_norm": 35.94015121459961, + "learning_rate": 4.248855294716186e-05, + "loss": 0.2398, + "step": 11700 + }, + { + "epoch": 0.4255396467766553, + "grad_norm": 2.575195789337158, + "learning_rate": 4.252489279744168e-05, + "loss": 0.2241, + "step": 11710 + }, + { + "epoch": 0.42590304527945344, + "grad_norm": 1.4232568740844727, + "learning_rate": 4.2561232647721496e-05, + "loss": 0.1817, + "step": 11720 + }, + { + "epoch": 0.4262664437822516, + "grad_norm": 2.8543412685394287, + "learning_rate": 4.259757249800131e-05, + "loss": 0.2094, + "step": 11730 + }, + { + "epoch": 0.4266298422850498, + "grad_norm": 0.85033118724823, + "learning_rate": 4.263391234828113e-05, + "loss": 0.1578, + "step": 11740 + }, + { + "epoch": 0.42699324078784795, + "grad_norm": 7.382369041442871, + "learning_rate": 4.267025219856095e-05, + "loss": 0.2763, + "step": 11750 + }, + { + "epoch": 0.4273566392906461, + "grad_norm": 1.3994635343551636, + "learning_rate": 4.270659204884076e-05, + "loss": 0.199, + "step": 11760 + }, + { + "epoch": 0.4277200377934443, + "grad_norm": 1.4978888034820557, + "learning_rate": 4.274293189912057e-05, + "loss": 0.1804, + "step": 11770 + }, + { + "epoch": 0.42808343629624246, + "grad_norm": 5.206210136413574, + "learning_rate": 4.27792717494004e-05, + "loss": 0.2483, + "step": 11780 + }, + { + "epoch": 0.4284468347990406, + "grad_norm": 1.4130820035934448, + "learning_rate": 4.281561159968021e-05, + "loss": 0.1792, + "step": 11790 + }, + { + "epoch": 0.4288102333018388, + "grad_norm": 2.60227370262146, + "learning_rate": 4.2851951449960024e-05, + "loss": 0.214, + "step": 11800 + }, + { + "epoch": 0.42917363180463697, + "grad_norm": 1.8874465227127075, + "learning_rate": 4.288829130023985e-05, + "loss": 0.1894, + "step": 11810 + }, + { + "epoch": 0.4295370303074351, + "grad_norm": 2.921766519546509, + "learning_rate": 4.292463115051966e-05, + "loss": 0.1608, + "step": 11820 + }, + { + "epoch": 0.4299004288102333, + "grad_norm": 2.812821626663208, + "learning_rate": 4.2960971000799475e-05, + "loss": 0.2381, + "step": 11830 + }, + { + "epoch": 0.4302638273130315, + "grad_norm": 1.8063637018203735, + "learning_rate": 4.2997310851079295e-05, + "loss": 0.198, + "step": 11840 + }, + { + "epoch": 0.43062722581582963, + "grad_norm": 16.433927536010742, + "learning_rate": 4.3033650701359114e-05, + "loss": 0.3015, + "step": 11850 + }, + { + "epoch": 0.4309906243186278, + "grad_norm": 1.295142650604248, + "learning_rate": 4.3069990551638927e-05, + "loss": 0.1678, + "step": 11860 + }, + { + "epoch": 0.431354022821426, + "grad_norm": 135.4871063232422, + "learning_rate": 4.3106330401918746e-05, + "loss": 1.8542, + "step": 11870 + }, + { + "epoch": 0.43171742132422414, + "grad_norm": 2.3314764499664307, + "learning_rate": 4.3142670252198565e-05, + "loss": 0.1983, + "step": 11880 + }, + { + "epoch": 0.4320808198270223, + "grad_norm": 1.6635117530822754, + "learning_rate": 4.317901010247838e-05, + "loss": 0.1737, + "step": 11890 + }, + { + "epoch": 0.4324442183298205, + "grad_norm": 32.102664947509766, + "learning_rate": 4.32153499527582e-05, + "loss": 0.3092, + "step": 11900 + }, + { + "epoch": 0.43280761683261865, + "grad_norm": 2.3491451740264893, + "learning_rate": 4.3251689803038017e-05, + "loss": 0.1849, + "step": 11910 + }, + { + "epoch": 0.4331710153354168, + "grad_norm": 3.8088629245758057, + "learning_rate": 4.328802965331783e-05, + "loss": 0.2023, + "step": 11920 + }, + { + "epoch": 0.433534413838215, + "grad_norm": 2.7132246494293213, + "learning_rate": 4.332436950359764e-05, + "loss": 0.1935, + "step": 11930 + }, + { + "epoch": 0.43389781234101316, + "grad_norm": 1.2917368412017822, + "learning_rate": 4.336070935387747e-05, + "loss": 0.1918, + "step": 11940 + }, + { + "epoch": 0.4342612108438113, + "grad_norm": 9.690601348876953, + "learning_rate": 4.339704920415728e-05, + "loss": 0.3059, + "step": 11950 + }, + { + "epoch": 0.4346246093466095, + "grad_norm": 1.2652380466461182, + "learning_rate": 4.343338905443709e-05, + "loss": 0.1587, + "step": 11960 + }, + { + "epoch": 0.43498800784940767, + "grad_norm": 0.9622058272361755, + "learning_rate": 4.346972890471692e-05, + "loss": 0.1755, + "step": 11970 + }, + { + "epoch": 0.4353514063522058, + "grad_norm": 5.316989898681641, + "learning_rate": 4.350606875499673e-05, + "loss": 0.1794, + "step": 11980 + }, + { + "epoch": 0.435714804855004, + "grad_norm": 3.428891181945801, + "learning_rate": 4.3542408605276544e-05, + "loss": 0.2105, + "step": 11990 + }, + { + "epoch": 0.4360782033578022, + "grad_norm": 12.879768371582031, + "learning_rate": 4.357874845555637e-05, + "loss": 0.2904, + "step": 12000 + }, + { + "epoch": 0.4360782033578022, + "eval_loss": 0.3918191194534302, + "eval_runtime": 180.0676, + "eval_samples_per_second": 41.173, + "eval_steps_per_second": 5.148, + "eval_wer": 0.22659611160527893, + "step": 12000 + }, + { + "epoch": 0.4364416018606003, + "grad_norm": 2.0471973419189453, + "learning_rate": 4.361508830583618e-05, + "loss": 0.2544, + "step": 12010 + }, + { + "epoch": 0.4368050003633985, + "grad_norm": 1.3883107900619507, + "learning_rate": 4.3651428156115995e-05, + "loss": 0.1957, + "step": 12020 + }, + { + "epoch": 0.4371683988661967, + "grad_norm": 1.786475419998169, + "learning_rate": 4.368776800639582e-05, + "loss": 0.1732, + "step": 12030 + }, + { + "epoch": 0.43753179736899483, + "grad_norm": 3.3099594116210938, + "learning_rate": 4.3724107856675634e-05, + "loss": 0.1871, + "step": 12040 + }, + { + "epoch": 0.437895195871793, + "grad_norm": 9.09699535369873, + "learning_rate": 4.376044770695545e-05, + "loss": 0.2745, + "step": 12050 + }, + { + "epoch": 0.4382585943745912, + "grad_norm": 2.0993807315826416, + "learning_rate": 4.3796787557235266e-05, + "loss": 0.2076, + "step": 12060 + }, + { + "epoch": 0.43862199287738934, + "grad_norm": 27.799428939819336, + "learning_rate": 4.3833127407515085e-05, + "loss": 0.548, + "step": 12070 + }, + { + "epoch": 0.4389853913801875, + "grad_norm": 3.8897557258605957, + "learning_rate": 4.38694672577949e-05, + "loss": 0.1799, + "step": 12080 + }, + { + "epoch": 0.4393487898829857, + "grad_norm": 3.4620189666748047, + "learning_rate": 4.390580710807472e-05, + "loss": 0.1735, + "step": 12090 + }, + { + "epoch": 0.43971218838578385, + "grad_norm": 9.587783813476562, + "learning_rate": 4.394214695835454e-05, + "loss": 0.3344, + "step": 12100 + }, + { + "epoch": 0.440075586888582, + "grad_norm": 1.2581641674041748, + "learning_rate": 4.397848680863435e-05, + "loss": 0.1863, + "step": 12110 + }, + { + "epoch": 0.4404389853913802, + "grad_norm": 1.3624401092529297, + "learning_rate": 4.401482665891417e-05, + "loss": 3.7692, + "step": 12120 + }, + { + "epoch": 0.44080238389417836, + "grad_norm": 2.0099213123321533, + "learning_rate": 4.405116650919399e-05, + "loss": 0.1999, + "step": 12130 + }, + { + "epoch": 0.4411657823969765, + "grad_norm": 2.7499871253967285, + "learning_rate": 4.40875063594738e-05, + "loss": 0.1854, + "step": 12140 + }, + { + "epoch": 0.4415291808997747, + "grad_norm": 6.473042964935303, + "learning_rate": 4.412384620975361e-05, + "loss": 0.2843, + "step": 12150 + }, + { + "epoch": 0.44189257940257287, + "grad_norm": 3.845900535583496, + "learning_rate": 4.416018606003344e-05, + "loss": 0.1747, + "step": 12160 + }, + { + "epoch": 0.442255977905371, + "grad_norm": 1.4052759408950806, + "learning_rate": 4.419652591031325e-05, + "loss": 0.16, + "step": 12170 + }, + { + "epoch": 0.4426193764081692, + "grad_norm": 3.5824673175811768, + "learning_rate": 4.4232865760593064e-05, + "loss": 0.7205, + "step": 12180 + }, + { + "epoch": 0.4429827749109674, + "grad_norm": 1.237358570098877, + "learning_rate": 4.426920561087289e-05, + "loss": 0.2043, + "step": 12190 + }, + { + "epoch": 0.44334617341376553, + "grad_norm": 11.106649398803711, + "learning_rate": 4.43055454611527e-05, + "loss": 0.2537, + "step": 12200 + }, + { + "epoch": 0.4437095719165637, + "grad_norm": 1.4566165208816528, + "learning_rate": 4.4341885311432516e-05, + "loss": 0.174, + "step": 12210 + }, + { + "epoch": 0.4440729704193619, + "grad_norm": 1.4067914485931396, + "learning_rate": 4.4378225161712335e-05, + "loss": 0.1672, + "step": 12220 + }, + { + "epoch": 0.44443636892216004, + "grad_norm": 3.1289005279541016, + "learning_rate": 4.4414565011992154e-05, + "loss": 0.2459, + "step": 12230 + }, + { + "epoch": 0.4447997674249582, + "grad_norm": 1.2487775087356567, + "learning_rate": 4.445090486227197e-05, + "loss": 0.1911, + "step": 12240 + }, + { + "epoch": 0.4451631659277564, + "grad_norm": 4.373108863830566, + "learning_rate": 4.4487244712551786e-05, + "loss": 0.265, + "step": 12250 + }, + { + "epoch": 0.44552656443055455, + "grad_norm": 3.0927655696868896, + "learning_rate": 4.4523584562831606e-05, + "loss": 0.166, + "step": 12260 + }, + { + "epoch": 0.4458899629333527, + "grad_norm": 1.4012075662612915, + "learning_rate": 4.455992441311142e-05, + "loss": 0.1631, + "step": 12270 + }, + { + "epoch": 0.4462533614361509, + "grad_norm": 3.9944920539855957, + "learning_rate": 4.459626426339124e-05, + "loss": 0.2616, + "step": 12280 + }, + { + "epoch": 0.44661675993894906, + "grad_norm": 2.412261962890625, + "learning_rate": 4.463260411367106e-05, + "loss": 0.1963, + "step": 12290 + }, + { + "epoch": 0.4469801584417472, + "grad_norm": 8.601739883422852, + "learning_rate": 4.466894396395087e-05, + "loss": 0.3057, + "step": 12300 + }, + { + "epoch": 0.44734355694454536, + "grad_norm": 2.1279587745666504, + "learning_rate": 4.470528381423069e-05, + "loss": 0.1931, + "step": 12310 + }, + { + "epoch": 0.44770695544734357, + "grad_norm": 2.465534210205078, + "learning_rate": 4.474162366451051e-05, + "loss": 0.1701, + "step": 12320 + }, + { + "epoch": 0.4480703539501417, + "grad_norm": 6.147269248962402, + "learning_rate": 4.477796351479032e-05, + "loss": 0.7176, + "step": 12330 + }, + { + "epoch": 0.44843375245293987, + "grad_norm": 1.6242046356201172, + "learning_rate": 4.481430336507014e-05, + "loss": 0.1769, + "step": 12340 + }, + { + "epoch": 0.4487971509557381, + "grad_norm": 7.065566539764404, + "learning_rate": 4.485064321534995e-05, + "loss": 0.2967, + "step": 12350 + }, + { + "epoch": 0.44916054945853623, + "grad_norm": 1.9389359951019287, + "learning_rate": 4.488698306562977e-05, + "loss": 0.1853, + "step": 12360 + }, + { + "epoch": 0.4495239479613344, + "grad_norm": 1.011250376701355, + "learning_rate": 4.492332291590959e-05, + "loss": 0.2036, + "step": 12370 + }, + { + "epoch": 0.4498873464641326, + "grad_norm": 2.459062099456787, + "learning_rate": 4.4959662766189404e-05, + "loss": 0.1865, + "step": 12380 + }, + { + "epoch": 0.45025074496693074, + "grad_norm": 1.8472875356674194, + "learning_rate": 4.499600261646922e-05, + "loss": 0.2178, + "step": 12390 + }, + { + "epoch": 0.4506141434697289, + "grad_norm": 40.6389045715332, + "learning_rate": 4.5032342466749036e-05, + "loss": 0.2506, + "step": 12400 + }, + { + "epoch": 0.4509775419725271, + "grad_norm": 3.9729344844818115, + "learning_rate": 4.5068682317028855e-05, + "loss": 0.1917, + "step": 12410 + }, + { + "epoch": 0.45134094047532525, + "grad_norm": 1.0262936353683472, + "learning_rate": 4.5105022167308675e-05, + "loss": 0.2115, + "step": 12420 + }, + { + "epoch": 0.4517043389781234, + "grad_norm": 1.5356003046035767, + "learning_rate": 4.514136201758849e-05, + "loss": 0.1907, + "step": 12430 + }, + { + "epoch": 0.4520677374809216, + "grad_norm": 1.3107296228408813, + "learning_rate": 4.5177701867868307e-05, + "loss": 0.195, + "step": 12440 + }, + { + "epoch": 0.45243113598371976, + "grad_norm": 11.025674819946289, + "learning_rate": 4.5214041718148126e-05, + "loss": 0.2794, + "step": 12450 + }, + { + "epoch": 0.4527945344865179, + "grad_norm": 1.8793771266937256, + "learning_rate": 4.525038156842794e-05, + "loss": 0.2143, + "step": 12460 + }, + { + "epoch": 0.45315793298931606, + "grad_norm": 1.6508142948150635, + "learning_rate": 4.528672141870776e-05, + "loss": 0.1863, + "step": 12470 + }, + { + "epoch": 0.45352133149211427, + "grad_norm": 4.942420959472656, + "learning_rate": 4.532306126898758e-05, + "loss": 0.1997, + "step": 12480 + }, + { + "epoch": 0.4538847299949124, + "grad_norm": 3.1977925300598145, + "learning_rate": 4.535940111926739e-05, + "loss": 1.9163, + "step": 12490 + }, + { + "epoch": 0.45424812849771057, + "grad_norm": 8.74572467803955, + "learning_rate": 4.539574096954721e-05, + "loss": 0.3186, + "step": 12500 + }, + { + "epoch": 0.4546115270005088, + "grad_norm": 1.5346311330795288, + "learning_rate": 4.543208081982702e-05, + "loss": 0.1958, + "step": 12510 + }, + { + "epoch": 0.4549749255033069, + "grad_norm": 1.622859239578247, + "learning_rate": 4.546842067010684e-05, + "loss": 0.1828, + "step": 12520 + }, + { + "epoch": 0.4553383240061051, + "grad_norm": 1.9394720792770386, + "learning_rate": 4.550476052038666e-05, + "loss": 0.2198, + "step": 12530 + }, + { + "epoch": 0.4557017225089033, + "grad_norm": 1.8405578136444092, + "learning_rate": 4.554110037066647e-05, + "loss": 0.1789, + "step": 12540 + }, + { + "epoch": 0.45606512101170144, + "grad_norm": 6.24867582321167, + "learning_rate": 4.557744022094629e-05, + "loss": 0.2593, + "step": 12550 + }, + { + "epoch": 0.4564285195144996, + "grad_norm": 1.6062959432601929, + "learning_rate": 4.561378007122611e-05, + "loss": 0.1665, + "step": 12560 + }, + { + "epoch": 0.4567919180172978, + "grad_norm": 1.1478540897369385, + "learning_rate": 4.5650119921505924e-05, + "loss": 0.1942, + "step": 12570 + }, + { + "epoch": 0.45715531652009594, + "grad_norm": 2.0299808979034424, + "learning_rate": 4.5686459771785744e-05, + "loss": 0.2092, + "step": 12580 + }, + { + "epoch": 0.4575187150228941, + "grad_norm": 1.6643180847167969, + "learning_rate": 4.572279962206556e-05, + "loss": 0.1714, + "step": 12590 + }, + { + "epoch": 0.45788211352569225, + "grad_norm": 10.169012069702148, + "learning_rate": 4.5759139472345376e-05, + "loss": 0.3101, + "step": 12600 + }, + { + "epoch": 0.45788211352569225, + "eval_loss": 0.408176064491272, + "eval_runtime": 179.7843, + "eval_samples_per_second": 41.238, + "eval_steps_per_second": 5.156, + "eval_wer": 0.24004756113057527, + "step": 12600 + }, + { + "epoch": 0.45824551202849045, + "grad_norm": 1.8151092529296875, + "learning_rate": 4.5795479322625195e-05, + "loss": 0.1739, + "step": 12610 + }, + { + "epoch": 0.4586089105312886, + "grad_norm": 1.1606543064117432, + "learning_rate": 4.583181917290501e-05, + "loss": 0.1781, + "step": 12620 + }, + { + "epoch": 0.45897230903408676, + "grad_norm": 2.5139431953430176, + "learning_rate": 4.586815902318483e-05, + "loss": 0.2101, + "step": 12630 + }, + { + "epoch": 0.45933570753688496, + "grad_norm": 3.1557183265686035, + "learning_rate": 4.590449887346464e-05, + "loss": 0.1925, + "step": 12640 + }, + { + "epoch": 0.4596991060396831, + "grad_norm": 13.978137016296387, + "learning_rate": 4.594083872374446e-05, + "loss": 0.3085, + "step": 12650 + }, + { + "epoch": 0.46006250454248127, + "grad_norm": 1.5187938213348389, + "learning_rate": 4.597717857402428e-05, + "loss": 0.1909, + "step": 12660 + }, + { + "epoch": 0.46042590304527947, + "grad_norm": 1.661890983581543, + "learning_rate": 4.601351842430409e-05, + "loss": 0.1729, + "step": 12670 + }, + { + "epoch": 0.4607893015480776, + "grad_norm": 5.693175792694092, + "learning_rate": 4.604985827458391e-05, + "loss": 0.2069, + "step": 12680 + }, + { + "epoch": 0.4611527000508758, + "grad_norm": 2.5228755474090576, + "learning_rate": 4.608619812486373e-05, + "loss": 0.1899, + "step": 12690 + }, + { + "epoch": 0.461516098553674, + "grad_norm": 12.629317283630371, + "learning_rate": 4.612253797514354e-05, + "loss": 0.2441, + "step": 12700 + }, + { + "epoch": 0.46187949705647213, + "grad_norm": 1.5003726482391357, + "learning_rate": 4.615887782542336e-05, + "loss": 0.1845, + "step": 12710 + }, + { + "epoch": 0.4622428955592703, + "grad_norm": 1.596705675125122, + "learning_rate": 4.619521767570318e-05, + "loss": 0.1942, + "step": 12720 + }, + { + "epoch": 0.4626062940620685, + "grad_norm": 4.299325466156006, + "learning_rate": 4.623155752598299e-05, + "loss": 0.1881, + "step": 12730 + }, + { + "epoch": 0.46296969256486664, + "grad_norm": 2.242932081222534, + "learning_rate": 4.626789737626281e-05, + "loss": 0.1655, + "step": 12740 + }, + { + "epoch": 0.4633330910676648, + "grad_norm": 17.353313446044922, + "learning_rate": 4.630423722654263e-05, + "loss": 0.3002, + "step": 12750 + }, + { + "epoch": 0.46369648957046294, + "grad_norm": 1.8967528343200684, + "learning_rate": 4.6340577076822444e-05, + "loss": 0.1967, + "step": 12760 + }, + { + "epoch": 0.46405988807326115, + "grad_norm": 1.9839125871658325, + "learning_rate": 4.6376916927102264e-05, + "loss": 0.1582, + "step": 12770 + }, + { + "epoch": 0.4644232865760593, + "grad_norm": 1.8139293193817139, + "learning_rate": 4.641325677738208e-05, + "loss": 0.2527, + "step": 12780 + }, + { + "epoch": 0.46478668507885745, + "grad_norm": 1.6944659948349, + "learning_rate": 4.6449596627661896e-05, + "loss": 0.1656, + "step": 12790 + }, + { + "epoch": 0.46515008358165566, + "grad_norm": 3.7842020988464355, + "learning_rate": 4.648593647794171e-05, + "loss": 0.2375, + "step": 12800 + }, + { + "epoch": 0.4655134820844538, + "grad_norm": 1.8103773593902588, + "learning_rate": 4.6522276328221534e-05, + "loss": 0.1932, + "step": 12810 + }, + { + "epoch": 0.46587688058725196, + "grad_norm": 1.4419440031051636, + "learning_rate": 4.655861617850135e-05, + "loss": 0.1808, + "step": 12820 + }, + { + "epoch": 0.46624027909005017, + "grad_norm": 6.361825466156006, + "learning_rate": 4.659495602878116e-05, + "loss": 0.2105, + "step": 12830 + }, + { + "epoch": 0.4666036775928483, + "grad_norm": 1.4687098264694214, + "learning_rate": 4.663129587906098e-05, + "loss": 0.18, + "step": 12840 + }, + { + "epoch": 0.46696707609564647, + "grad_norm": 14.758776664733887, + "learning_rate": 4.66676357293408e-05, + "loss": 0.3001, + "step": 12850 + }, + { + "epoch": 0.4673304745984447, + "grad_norm": 1.4836699962615967, + "learning_rate": 4.670397557962061e-05, + "loss": 0.1713, + "step": 12860 + }, + { + "epoch": 0.46769387310124283, + "grad_norm": 4.860133171081543, + "learning_rate": 4.674031542990043e-05, + "loss": 0.1791, + "step": 12870 + }, + { + "epoch": 0.468057271604041, + "grad_norm": 1.9861228466033936, + "learning_rate": 4.677665528018025e-05, + "loss": 0.2029, + "step": 12880 + }, + { + "epoch": 0.46842067010683913, + "grad_norm": 1.9190025329589844, + "learning_rate": 4.681299513046006e-05, + "loss": 0.1611, + "step": 12890 + }, + { + "epoch": 0.46878406860963734, + "grad_norm": 4.6381516456604, + "learning_rate": 4.684933498073988e-05, + "loss": 0.2646, + "step": 12900 + }, + { + "epoch": 0.4691474671124355, + "grad_norm": 1.2092620134353638, + "learning_rate": 4.68856748310197e-05, + "loss": 0.1865, + "step": 12910 + }, + { + "epoch": 0.46951086561523364, + "grad_norm": 2.7816121578216553, + "learning_rate": 4.6922014681299513e-05, + "loss": 0.2047, + "step": 12920 + }, + { + "epoch": 0.46987426411803185, + "grad_norm": 0.629324734210968, + "learning_rate": 4.6958354531579326e-05, + "loss": 0.2404, + "step": 12930 + }, + { + "epoch": 0.47023766262083, + "grad_norm": 4.156667232513428, + "learning_rate": 4.699469438185915e-05, + "loss": 0.1604, + "step": 12940 + }, + { + "epoch": 0.47060106112362815, + "grad_norm": 1.8534492254257202, + "learning_rate": 4.7031034232138965e-05, + "loss": 0.2364, + "step": 12950 + }, + { + "epoch": 0.47096445962642636, + "grad_norm": 1.382408857345581, + "learning_rate": 4.706737408241878e-05, + "loss": 0.2078, + "step": 12960 + }, + { + "epoch": 0.4713278581292245, + "grad_norm": 2.499023914337158, + "learning_rate": 4.7103713932698603e-05, + "loss": 0.1935, + "step": 12970 + }, + { + "epoch": 0.47169125663202266, + "grad_norm": 2.726032257080078, + "learning_rate": 4.7140053782978416e-05, + "loss": 0.2143, + "step": 12980 + }, + { + "epoch": 0.47205465513482087, + "grad_norm": 2.1388118267059326, + "learning_rate": 4.717639363325823e-05, + "loss": 0.1704, + "step": 12990 + }, + { + "epoch": 0.472418053637619, + "grad_norm": 5.408501148223877, + "learning_rate": 4.7212733483538055e-05, + "loss": 0.2492, + "step": 13000 + }, + { + "epoch": 0.47278145214041717, + "grad_norm": 1.8640841245651245, + "learning_rate": 4.724907333381787e-05, + "loss": 0.1958, + "step": 13010 + }, + { + "epoch": 0.4731448506432154, + "grad_norm": 1.4251651763916016, + "learning_rate": 4.728541318409768e-05, + "loss": 0.1969, + "step": 13020 + }, + { + "epoch": 0.4735082491460135, + "grad_norm": 2.2603137493133545, + "learning_rate": 4.7321753034377506e-05, + "loss": 0.1879, + "step": 13030 + }, + { + "epoch": 0.4738716476488117, + "grad_norm": 1.7813081741333008, + "learning_rate": 4.735809288465732e-05, + "loss": 0.1627, + "step": 13040 + }, + { + "epoch": 0.47423504615160983, + "grad_norm": 16.746126174926758, + "learning_rate": 4.739443273493713e-05, + "loss": 0.3058, + "step": 13050 + }, + { + "epoch": 0.47459844465440804, + "grad_norm": 2.56193470954895, + "learning_rate": 4.743077258521695e-05, + "loss": 0.1729, + "step": 13060 + }, + { + "epoch": 0.4749618431572062, + "grad_norm": 2.1787185668945312, + "learning_rate": 4.746711243549677e-05, + "loss": 0.1804, + "step": 13070 + }, + { + "epoch": 0.47532524166000434, + "grad_norm": 3.385338544845581, + "learning_rate": 4.750345228577658e-05, + "loss": 0.1884, + "step": 13080 + }, + { + "epoch": 0.47568864016280255, + "grad_norm": 2.48083233833313, + "learning_rate": 4.75397921360564e-05, + "loss": 0.1728, + "step": 13090 + }, + { + "epoch": 0.4760520386656007, + "grad_norm": 47.18072509765625, + "learning_rate": 4.757613198633622e-05, + "loss": 0.2427, + "step": 13100 + }, + { + "epoch": 0.47641543716839885, + "grad_norm": 1.3267533779144287, + "learning_rate": 4.7612471836616034e-05, + "loss": 0.1847, + "step": 13110 + }, + { + "epoch": 0.47677883567119705, + "grad_norm": 2.098389148712158, + "learning_rate": 4.764881168689585e-05, + "loss": 0.1682, + "step": 13120 + }, + { + "epoch": 0.4771422341739952, + "grad_norm": 1.1197071075439453, + "learning_rate": 4.768515153717567e-05, + "loss": 0.166, + "step": 13130 + }, + { + "epoch": 0.47750563267679336, + "grad_norm": 1.431281328201294, + "learning_rate": 4.7721491387455485e-05, + "loss": 0.3262, + "step": 13140 + }, + { + "epoch": 0.47786903117959156, + "grad_norm": 15.357772827148438, + "learning_rate": 4.7757831237735304e-05, + "loss": 0.2906, + "step": 13150 + }, + { + "epoch": 0.4782324296823897, + "grad_norm": 3.03275465965271, + "learning_rate": 4.7794171088015124e-05, + "loss": 0.207, + "step": 13160 + }, + { + "epoch": 0.47859582818518787, + "grad_norm": 1.0988962650299072, + "learning_rate": 4.7830510938294936e-05, + "loss": 0.1788, + "step": 13170 + }, + { + "epoch": 0.4789592266879861, + "grad_norm": 1.9456548690795898, + "learning_rate": 4.786685078857475e-05, + "loss": 0.2397, + "step": 13180 + }, + { + "epoch": 0.4793226251907842, + "grad_norm": 1.7383311986923218, + "learning_rate": 4.7903190638854575e-05, + "loss": 0.1841, + "step": 13190 + }, + { + "epoch": 0.4796860236935824, + "grad_norm": 5.512730121612549, + "learning_rate": 4.793953048913439e-05, + "loss": 0.2708, + "step": 13200 + }, + { + "epoch": 0.4796860236935824, + "eval_loss": 0.3998795747756958, + "eval_runtime": 180.9114, + "eval_samples_per_second": 40.981, + "eval_steps_per_second": 5.124, + "eval_wer": 0.2369433804708915, + "step": 13200 + }, + { + "epoch": 0.4800494221963805, + "grad_norm": 1.5843122005462646, + "learning_rate": 4.79758703394142e-05, + "loss": 1.0933, + "step": 13210 + }, + { + "epoch": 0.48041282069917873, + "grad_norm": 1.4696934223175049, + "learning_rate": 4.8012210189694026e-05, + "loss": 0.1771, + "step": 13220 + }, + { + "epoch": 0.4807762192019769, + "grad_norm": 2.5620357990264893, + "learning_rate": 4.804855003997384e-05, + "loss": 0.2202, + "step": 13230 + }, + { + "epoch": 0.48113961770477504, + "grad_norm": 73.08427429199219, + "learning_rate": 4.808488989025365e-05, + "loss": 0.2471, + "step": 13240 + }, + { + "epoch": 0.48150301620757324, + "grad_norm": 7.291989803314209, + "learning_rate": 4.812122974053347e-05, + "loss": 0.2542, + "step": 13250 + }, + { + "epoch": 0.4818664147103714, + "grad_norm": 1.7582112550735474, + "learning_rate": 4.815756959081329e-05, + "loss": 0.1884, + "step": 13260 + }, + { + "epoch": 0.48222981321316954, + "grad_norm": 0.9253680109977722, + "learning_rate": 4.81939094410931e-05, + "loss": 0.1797, + "step": 13270 + }, + { + "epoch": 0.48259321171596775, + "grad_norm": 8.042390823364258, + "learning_rate": 4.823024929137292e-05, + "loss": 0.192, + "step": 13280 + }, + { + "epoch": 0.4829566102187659, + "grad_norm": 3.2288219928741455, + "learning_rate": 4.826658914165274e-05, + "loss": 0.2041, + "step": 13290 + }, + { + "epoch": 0.48332000872156405, + "grad_norm": 7.657989978790283, + "learning_rate": 4.8302928991932554e-05, + "loss": 0.3034, + "step": 13300 + }, + { + "epoch": 0.48368340722436226, + "grad_norm": 2.9273271560668945, + "learning_rate": 4.833926884221237e-05, + "loss": 0.2028, + "step": 13310 + }, + { + "epoch": 0.4840468057271604, + "grad_norm": 4.2344865798950195, + "learning_rate": 4.837560869249219e-05, + "loss": 0.1817, + "step": 13320 + }, + { + "epoch": 0.48441020422995856, + "grad_norm": 4.074464797973633, + "learning_rate": 4.8411948542772005e-05, + "loss": 0.2197, + "step": 13330 + }, + { + "epoch": 0.4847736027327567, + "grad_norm": 1.7070029973983765, + "learning_rate": 4.8448288393051825e-05, + "loss": 0.2374, + "step": 13340 + }, + { + "epoch": 0.4851370012355549, + "grad_norm": 2.5278494358062744, + "learning_rate": 4.8484628243331644e-05, + "loss": 0.265, + "step": 13350 + }, + { + "epoch": 0.4855003997383531, + "grad_norm": 1.4800697565078735, + "learning_rate": 4.8520968093611456e-05, + "loss": 0.1597, + "step": 13360 + }, + { + "epoch": 0.4858637982411512, + "grad_norm": 1.238171935081482, + "learning_rate": 4.8557307943891276e-05, + "loss": 0.1862, + "step": 13370 + }, + { + "epoch": 0.48622719674394943, + "grad_norm": 2.7711944580078125, + "learning_rate": 4.859364779417109e-05, + "loss": 0.1572, + "step": 13380 + }, + { + "epoch": 0.4865905952467476, + "grad_norm": 2.386011838912964, + "learning_rate": 4.862998764445091e-05, + "loss": 0.1624, + "step": 13390 + }, + { + "epoch": 0.48695399374954573, + "grad_norm": 10.38249397277832, + "learning_rate": 4.866632749473072e-05, + "loss": 0.2182, + "step": 13400 + }, + { + "epoch": 0.48731739225234394, + "grad_norm": 1.1541043519973755, + "learning_rate": 4.870266734501054e-05, + "loss": 0.1867, + "step": 13410 + }, + { + "epoch": 0.4876807907551421, + "grad_norm": 0.7680534720420837, + "learning_rate": 4.873900719529036e-05, + "loss": 0.1619, + "step": 13420 + }, + { + "epoch": 0.48804418925794024, + "grad_norm": 2.6120142936706543, + "learning_rate": 4.877534704557017e-05, + "loss": 1.0657, + "step": 13430 + }, + { + "epoch": 0.48840758776073845, + "grad_norm": 2.1559348106384277, + "learning_rate": 4.881168689584999e-05, + "loss": 0.1576, + "step": 13440 + }, + { + "epoch": 0.4887709862635366, + "grad_norm": 8.222488403320312, + "learning_rate": 4.884802674612981e-05, + "loss": 0.2596, + "step": 13450 + }, + { + "epoch": 0.48913438476633475, + "grad_norm": 1.7630010843276978, + "learning_rate": 4.888436659640962e-05, + "loss": 0.1755, + "step": 13460 + }, + { + "epoch": 0.48949778326913296, + "grad_norm": 1.489050269126892, + "learning_rate": 4.892070644668944e-05, + "loss": 0.1844, + "step": 13470 + }, + { + "epoch": 0.4898611817719311, + "grad_norm": 4.412111759185791, + "learning_rate": 4.895704629696926e-05, + "loss": 0.2114, + "step": 13480 + }, + { + "epoch": 0.49022458027472926, + "grad_norm": 2.060366630554199, + "learning_rate": 4.8993386147249074e-05, + "loss": 0.1932, + "step": 13490 + }, + { + "epoch": 0.4905879787775274, + "grad_norm": 9.488603591918945, + "learning_rate": 4.9029725997528893e-05, + "loss": 0.303, + "step": 13500 + }, + { + "epoch": 0.4909513772803256, + "grad_norm": 2.295671224594116, + "learning_rate": 4.906606584780871e-05, + "loss": 0.1583, + "step": 13510 + }, + { + "epoch": 0.49131477578312377, + "grad_norm": 4.13812255859375, + "learning_rate": 4.9102405698088525e-05, + "loss": 1.9041, + "step": 13520 + }, + { + "epoch": 0.4916781742859219, + "grad_norm": 3.7411348819732666, + "learning_rate": 4.9138745548368345e-05, + "loss": 0.1927, + "step": 13530 + }, + { + "epoch": 0.4920415727887201, + "grad_norm": 1.523505449295044, + "learning_rate": 4.917508539864816e-05, + "loss": 0.1721, + "step": 13540 + }, + { + "epoch": 0.4924049712915183, + "grad_norm": 8.239662170410156, + "learning_rate": 4.921142524892798e-05, + "loss": 0.3205, + "step": 13550 + }, + { + "epoch": 0.49276836979431643, + "grad_norm": 1.8316904306411743, + "learning_rate": 4.9247765099207796e-05, + "loss": 0.172, + "step": 13560 + }, + { + "epoch": 0.49313176829711464, + "grad_norm": 4.627805233001709, + "learning_rate": 4.928410494948761e-05, + "loss": 0.1731, + "step": 13570 + }, + { + "epoch": 0.4934951667999128, + "grad_norm": 4.277485370635986, + "learning_rate": 4.932044479976743e-05, + "loss": 0.2522, + "step": 13580 + }, + { + "epoch": 0.49385856530271094, + "grad_norm": 2.131641149520874, + "learning_rate": 4.935678465004725e-05, + "loss": 0.1766, + "step": 13590 + }, + { + "epoch": 0.49422196380550915, + "grad_norm": 2.9195988178253174, + "learning_rate": 4.939312450032706e-05, + "loss": 0.3745, + "step": 13600 + }, + { + "epoch": 0.4945853623083073, + "grad_norm": 1.5876374244689941, + "learning_rate": 4.942946435060688e-05, + "loss": 0.1716, + "step": 13610 + }, + { + "epoch": 0.49494876081110545, + "grad_norm": 4.506389617919922, + "learning_rate": 4.946580420088669e-05, + "loss": 0.5847, + "step": 13620 + }, + { + "epoch": 0.4953121593139036, + "grad_norm": 3.497152090072632, + "learning_rate": 4.950214405116651e-05, + "loss": 0.2179, + "step": 13630 + }, + { + "epoch": 0.4956755578167018, + "grad_norm": 1.7728289365768433, + "learning_rate": 4.953848390144633e-05, + "loss": 0.165, + "step": 13640 + }, + { + "epoch": 0.49603895631949996, + "grad_norm": 12.01921558380127, + "learning_rate": 4.957482375172614e-05, + "loss": 0.2447, + "step": 13650 + }, + { + "epoch": 0.4964023548222981, + "grad_norm": 2.5448553562164307, + "learning_rate": 4.961116360200596e-05, + "loss": 0.2089, + "step": 13660 + }, + { + "epoch": 0.4967657533250963, + "grad_norm": 2.3643887042999268, + "learning_rate": 4.9647503452285775e-05, + "loss": 0.1724, + "step": 13670 + }, + { + "epoch": 0.49712915182789447, + "grad_norm": 2.096191644668579, + "learning_rate": 4.9683843302565594e-05, + "loss": 0.1759, + "step": 13680 + }, + { + "epoch": 0.4974925503306926, + "grad_norm": 0.9760168790817261, + "learning_rate": 4.9720183152845414e-05, + "loss": 0.1817, + "step": 13690 + }, + { + "epoch": 0.4978559488334908, + "grad_norm": 3.019702434539795, + "learning_rate": 4.9756523003125226e-05, + "loss": 0.2275, + "step": 13700 + }, + { + "epoch": 0.498219347336289, + "grad_norm": 1.0820231437683105, + "learning_rate": 4.9789228868377064e-05, + "loss": 2.5822, + "step": 13710 + }, + { + "epoch": 0.4985827458390871, + "grad_norm": 3.2908883094787598, + "learning_rate": 4.982556871865688e-05, + "loss": 0.1898, + "step": 13720 + }, + { + "epoch": 0.49894614434188533, + "grad_norm": 3.4303886890411377, + "learning_rate": 4.98619085689367e-05, + "loss": 0.2295, + "step": 13730 + }, + { + "epoch": 0.4993095428446835, + "grad_norm": 1.8785525560379028, + "learning_rate": 4.9898248419216515e-05, + "loss": 0.1699, + "step": 13740 + }, + { + "epoch": 0.49967294134748164, + "grad_norm": 7.539544105529785, + "learning_rate": 4.993458826949633e-05, + "loss": 0.2955, + "step": 13750 + }, + { + "epoch": 0.5000363398502798, + "grad_norm": 1.6091630458831787, + "learning_rate": 4.997092811977615e-05, + "loss": 0.1696, + "step": 13760 + }, + { + "epoch": 0.5003997383530779, + "grad_norm": 1.023695945739746, + "learning_rate": 4.9999999978456776e-05, + "loss": 0.1872, + "step": 13770 + }, + { + "epoch": 0.5007631368558761, + "grad_norm": 8.364274978637695, + "learning_rate": 4.999999922444405e-05, + "loss": 0.1844, + "step": 13780 + }, + { + "epoch": 0.5011265353586744, + "grad_norm": 1.7257829904556274, + "learning_rate": 4.99999973932703e-05, + "loss": 0.241, + "step": 13790 + }, + { + "epoch": 0.5014899338614724, + "grad_norm": 7.256163597106934, + "learning_rate": 4.999999448493561e-05, + "loss": 0.2714, + "step": 13800 + }, + { + "epoch": 0.5014899338614724, + "eval_loss": 0.4298999607563019, + "eval_runtime": 179.7223, + "eval_samples_per_second": 41.253, + "eval_steps_per_second": 5.158, + "eval_wer": 0.22517109299834806, + "step": 13800 + }, + { + "epoch": 0.5018533323642707, + "grad_norm": 3.087979316711426, + "learning_rate": 4.999999049944011e-05, + "loss": 0.3094, + "step": 13810 + }, + { + "epoch": 0.5022167308670689, + "grad_norm": 1.7626384496688843, + "learning_rate": 4.999998543678397e-05, + "loss": 0.2521, + "step": 13820 + }, + { + "epoch": 0.502580129369867, + "grad_norm": 2.257432699203491, + "learning_rate": 4.999997929696741e-05, + "loss": 0.1913, + "step": 13830 + }, + { + "epoch": 0.5029435278726652, + "grad_norm": 1.7763293981552124, + "learning_rate": 4.999997207999069e-05, + "loss": 0.1812, + "step": 13840 + }, + { + "epoch": 0.5033069263754634, + "grad_norm": 8.228759765625, + "learning_rate": 4.9999963785854124e-05, + "loss": 0.2953, + "step": 13850 + }, + { + "epoch": 0.5036703248782615, + "grad_norm": 1.200305461883545, + "learning_rate": 4.999995441455807e-05, + "loss": 0.3246, + "step": 13860 + }, + { + "epoch": 0.5040337233810597, + "grad_norm": 1.9264732599258423, + "learning_rate": 4.999994396610292e-05, + "loss": 0.1749, + "step": 13870 + }, + { + "epoch": 0.5043971218838579, + "grad_norm": 2.547212839126587, + "learning_rate": 4.999993244048915e-05, + "loss": 0.2714, + "step": 13880 + }, + { + "epoch": 0.504760520386656, + "grad_norm": 2.7918379306793213, + "learning_rate": 4.999991983771723e-05, + "loss": 0.1984, + "step": 13890 + }, + { + "epoch": 0.5051239188894542, + "grad_norm": 16.789764404296875, + "learning_rate": 4.999990615778772e-05, + "loss": 0.239, + "step": 13900 + }, + { + "epoch": 0.5054873173922524, + "grad_norm": 1.1825790405273438, + "learning_rate": 4.9999891400701205e-05, + "loss": 0.1774, + "step": 13910 + }, + { + "epoch": 0.5058507158950505, + "grad_norm": 2.1524746417999268, + "learning_rate": 4.999987556645832e-05, + "loss": 0.1956, + "step": 13920 + }, + { + "epoch": 0.5062141143978487, + "grad_norm": 2.8159048557281494, + "learning_rate": 4.999985865505974e-05, + "loss": 0.2315, + "step": 13930 + }, + { + "epoch": 0.5065775129006469, + "grad_norm": 1.7412035465240479, + "learning_rate": 4.99998406665062e-05, + "loss": 0.2106, + "step": 13940 + }, + { + "epoch": 0.506940911403445, + "grad_norm": 61.967708587646484, + "learning_rate": 4.999982160079848e-05, + "loss": 0.3067, + "step": 13950 + }, + { + "epoch": 0.5073043099062432, + "grad_norm": 2.378682851791382, + "learning_rate": 4.9999801457937404e-05, + "loss": 0.21, + "step": 13960 + }, + { + "epoch": 0.5076677084090414, + "grad_norm": 1.3668854236602783, + "learning_rate": 4.9999780237923824e-05, + "loss": 0.1529, + "step": 13970 + }, + { + "epoch": 0.5080311069118395, + "grad_norm": 2.2655959129333496, + "learning_rate": 4.9999757940758665e-05, + "loss": 0.1747, + "step": 13980 + }, + { + "epoch": 0.5083945054146377, + "grad_norm": 1.5975615978240967, + "learning_rate": 4.9999734566442877e-05, + "loss": 0.1728, + "step": 13990 + }, + { + "epoch": 0.5087579039174358, + "grad_norm": 9.869553565979004, + "learning_rate": 4.999971011497748e-05, + "loss": 0.3207, + "step": 14000 + }, + { + "epoch": 0.509121302420234, + "grad_norm": 2.3095829486846924, + "learning_rate": 4.999968458636353e-05, + "loss": 0.1872, + "step": 14010 + }, + { + "epoch": 0.5094847009230322, + "grad_norm": 2.059575080871582, + "learning_rate": 4.999965798060212e-05, + "loss": 0.3059, + "step": 14020 + }, + { + "epoch": 0.5098480994258303, + "grad_norm": 1.7138803005218506, + "learning_rate": 4.9999630297694395e-05, + "loss": 0.2025, + "step": 14030 + }, + { + "epoch": 0.5102114979286285, + "grad_norm": 2.831191062927246, + "learning_rate": 4.999960153764155e-05, + "loss": 0.1685, + "step": 14040 + }, + { + "epoch": 0.5105748964314267, + "grad_norm": 15.457362174987793, + "learning_rate": 4.999957170044482e-05, + "loss": 0.2165, + "step": 14050 + }, + { + "epoch": 0.5109382949342248, + "grad_norm": 3.923633337020874, + "learning_rate": 4.999954078610549e-05, + "loss": 0.1888, + "step": 14060 + }, + { + "epoch": 0.511301693437023, + "grad_norm": 0.8243936896324158, + "learning_rate": 4.999950879462491e-05, + "loss": 0.295, + "step": 14070 + }, + { + "epoch": 0.5116650919398212, + "grad_norm": 2.921447277069092, + "learning_rate": 4.9999475726004434e-05, + "loss": 0.3208, + "step": 14080 + }, + { + "epoch": 0.5120284904426193, + "grad_norm": 0.9395463466644287, + "learning_rate": 4.99994415802455e-05, + "loss": 0.1936, + "step": 14090 + }, + { + "epoch": 0.5123918889454175, + "grad_norm": 11.025691986083984, + "learning_rate": 4.999940635734958e-05, + "loss": 0.2581, + "step": 14100 + }, + { + "epoch": 0.5127552874482157, + "grad_norm": 2.2102460861206055, + "learning_rate": 4.999937005731818e-05, + "loss": 0.1888, + "step": 14110 + }, + { + "epoch": 0.5131186859510138, + "grad_norm": 1.6075447797775269, + "learning_rate": 4.9999332680152876e-05, + "loss": 0.1557, + "step": 14120 + }, + { + "epoch": 0.513482084453812, + "grad_norm": 3.0174403190612793, + "learning_rate": 4.999929422585528e-05, + "loss": 0.2137, + "step": 14130 + }, + { + "epoch": 0.5138454829566103, + "grad_norm": 3.2911272048950195, + "learning_rate": 4.999925469442705e-05, + "loss": 0.2249, + "step": 14140 + }, + { + "epoch": 0.5142088814594084, + "grad_norm": 4.0001444816589355, + "learning_rate": 4.999921408586986e-05, + "loss": 0.2548, + "step": 14150 + }, + { + "epoch": 0.5145722799622066, + "grad_norm": 2.7695538997650146, + "learning_rate": 4.9999172400185504e-05, + "loss": 0.2107, + "step": 14160 + }, + { + "epoch": 0.5149356784650048, + "grad_norm": 1.420189380645752, + "learning_rate": 4.999912963737574e-05, + "loss": 0.1887, + "step": 14170 + }, + { + "epoch": 0.5152990769678029, + "grad_norm": 1.4330711364746094, + "learning_rate": 4.9999085797442434e-05, + "loss": 0.2295, + "step": 14180 + }, + { + "epoch": 0.5156624754706011, + "grad_norm": 1.9518648386001587, + "learning_rate": 4.999904088038747e-05, + "loss": 0.181, + "step": 14190 + }, + { + "epoch": 0.5160258739733993, + "grad_norm": 9.763446807861328, + "learning_rate": 4.999899488621278e-05, + "loss": 0.2163, + "step": 14200 + }, + { + "epoch": 0.5163892724761974, + "grad_norm": 1.63487708568573, + "learning_rate": 4.999894781492035e-05, + "loss": 0.1675, + "step": 14210 + }, + { + "epoch": 0.5167526709789956, + "grad_norm": 1.3337619304656982, + "learning_rate": 4.99988996665122e-05, + "loss": 1.8258, + "step": 14220 + }, + { + "epoch": 0.5171160694817938, + "grad_norm": 4.741299152374268, + "learning_rate": 4.9998850440990414e-05, + "loss": 0.199, + "step": 14230 + }, + { + "epoch": 0.5174794679845919, + "grad_norm": 2.203994035720825, + "learning_rate": 4.9998800138357106e-05, + "loss": 0.1666, + "step": 14240 + }, + { + "epoch": 0.5178428664873901, + "grad_norm": 9.144301414489746, + "learning_rate": 4.999874875861444e-05, + "loss": 0.2567, + "step": 14250 + }, + { + "epoch": 0.5182062649901883, + "grad_norm": 1.432627558708191, + "learning_rate": 4.9998696301764644e-05, + "loss": 0.1842, + "step": 14260 + }, + { + "epoch": 0.5185696634929864, + "grad_norm": 1.5303106307983398, + "learning_rate": 4.999864276780998e-05, + "loss": 0.1726, + "step": 14270 + }, + { + "epoch": 0.5189330619957846, + "grad_norm": 13.468036651611328, + "learning_rate": 4.999858815675273e-05, + "loss": 0.1927, + "step": 14280 + }, + { + "epoch": 0.5192964604985827, + "grad_norm": 3.7133965492248535, + "learning_rate": 4.999853246859526e-05, + "loss": 0.1822, + "step": 14290 + }, + { + "epoch": 0.5196598590013809, + "grad_norm": 10.077652931213379, + "learning_rate": 4.999847570333998e-05, + "loss": 0.2847, + "step": 14300 + }, + { + "epoch": 0.5200232575041791, + "grad_norm": 2.3906922340393066, + "learning_rate": 4.9998417860989325e-05, + "loss": 0.1962, + "step": 14310 + }, + { + "epoch": 0.5203866560069772, + "grad_norm": 0.8041434288024902, + "learning_rate": 4.999835894154579e-05, + "loss": 0.1661, + "step": 14320 + }, + { + "epoch": 0.5207500545097754, + "grad_norm": 4.1071953773498535, + "learning_rate": 4.99982989450119e-05, + "loss": 0.2012, + "step": 14330 + }, + { + "epoch": 0.5211134530125736, + "grad_norm": 0.9645094871520996, + "learning_rate": 4.999823787139026e-05, + "loss": 0.209, + "step": 14340 + }, + { + "epoch": 0.5214768515153717, + "grad_norm": 19.18789291381836, + "learning_rate": 4.9998175720683506e-05, + "loss": 0.3019, + "step": 14350 + }, + { + "epoch": 0.5218402500181699, + "grad_norm": 1.6560392379760742, + "learning_rate": 4.999811249289429e-05, + "loss": 0.1696, + "step": 14360 + }, + { + "epoch": 0.5222036485209681, + "grad_norm": 1.993741512298584, + "learning_rate": 4.999804818802535e-05, + "loss": 0.1895, + "step": 14370 + }, + { + "epoch": 0.5225670470237662, + "grad_norm": 3.4508492946624756, + "learning_rate": 4.999798280607947e-05, + "loss": 0.2111, + "step": 14380 + }, + { + "epoch": 0.5229304455265644, + "grad_norm": 8.431037902832031, + "learning_rate": 4.999791634705944e-05, + "loss": 0.1898, + "step": 14390 + }, + { + "epoch": 0.5232938440293626, + "grad_norm": 10.659805297851562, + "learning_rate": 4.9997848810968137e-05, + "loss": 0.4744, + "step": 14400 + }, + { + "epoch": 0.5232938440293626, + "eval_loss": 0.40915772318840027, + "eval_runtime": 179.6286, + "eval_samples_per_second": 41.274, + "eval_steps_per_second": 5.161, + "eval_wer": 0.2273222357361991, + "step": 14400 + }, + { + "epoch": 0.5236572425321607, + "grad_norm": 1.676483392715454, + "learning_rate": 4.999778019780849e-05, + "loss": 0.1856, + "step": 14410 + }, + { + "epoch": 0.5240206410349589, + "grad_norm": 3.4859771728515625, + "learning_rate": 4.9997710507583414e-05, + "loss": 0.1641, + "step": 14420 + }, + { + "epoch": 0.5243840395377571, + "grad_norm": 2.583261251449585, + "learning_rate": 4.999763974029595e-05, + "loss": 0.2545, + "step": 14430 + }, + { + "epoch": 0.5247474380405552, + "grad_norm": 2.0467324256896973, + "learning_rate": 4.999756789594913e-05, + "loss": 0.1974, + "step": 14440 + }, + { + "epoch": 0.5251108365433534, + "grad_norm": 4.777310848236084, + "learning_rate": 4.999749497454605e-05, + "loss": 0.2653, + "step": 14450 + }, + { + "epoch": 0.5254742350461516, + "grad_norm": 1.6312458515167236, + "learning_rate": 4.999742097608984e-05, + "loss": 0.1503, + "step": 14460 + }, + { + "epoch": 0.5258376335489497, + "grad_norm": 1.1725629568099976, + "learning_rate": 4.999734590058371e-05, + "loss": 0.1636, + "step": 14470 + }, + { + "epoch": 0.526201032051748, + "grad_norm": 3.2061386108398438, + "learning_rate": 4.999726974803089e-05, + "loss": 0.1988, + "step": 14480 + }, + { + "epoch": 0.5265644305545462, + "grad_norm": 1.7078185081481934, + "learning_rate": 4.9997192518434655e-05, + "loss": 0.1763, + "step": 14490 + }, + { + "epoch": 0.5269278290573443, + "grad_norm": 3.5756313800811768, + "learning_rate": 4.999711421179833e-05, + "loss": 0.2651, + "step": 14500 + }, + { + "epoch": 0.5272912275601425, + "grad_norm": 1.8054040670394897, + "learning_rate": 4.99970348281253e-05, + "loss": 0.1923, + "step": 14510 + }, + { + "epoch": 0.5276546260629407, + "grad_norm": 2.8949921131134033, + "learning_rate": 4.9996954367418976e-05, + "loss": 0.1897, + "step": 14520 + }, + { + "epoch": 0.5280180245657388, + "grad_norm": 2.0020744800567627, + "learning_rate": 4.9996872829682825e-05, + "loss": 0.2469, + "step": 14530 + }, + { + "epoch": 0.528381423068537, + "grad_norm": 1.1650570631027222, + "learning_rate": 4.999679021492037e-05, + "loss": 0.2088, + "step": 14540 + }, + { + "epoch": 0.5287448215713352, + "grad_norm": 14.624237060546875, + "learning_rate": 4.999670652313516e-05, + "loss": 0.2918, + "step": 14550 + }, + { + "epoch": 0.5291082200741333, + "grad_norm": 1.6658445596694946, + "learning_rate": 4.99966217543308e-05, + "loss": 0.1936, + "step": 14560 + }, + { + "epoch": 0.5294716185769315, + "grad_norm": 2.0761842727661133, + "learning_rate": 4.9996535908510955e-05, + "loss": 0.2318, + "step": 14570 + }, + { + "epoch": 0.5298350170797297, + "grad_norm": 2.475193977355957, + "learning_rate": 4.999644898567931e-05, + "loss": 0.1682, + "step": 14580 + }, + { + "epoch": 0.5301984155825278, + "grad_norm": 1.7537975311279297, + "learning_rate": 4.9996360985839616e-05, + "loss": 0.1528, + "step": 14590 + }, + { + "epoch": 0.530561814085326, + "grad_norm": 6.486474990844727, + "learning_rate": 4.9996271908995666e-05, + "loss": 0.2571, + "step": 14600 + }, + { + "epoch": 0.5309252125881241, + "grad_norm": 2.308250665664673, + "learning_rate": 4.9996181755151294e-05, + "loss": 0.1764, + "step": 14610 + }, + { + "epoch": 0.5312886110909223, + "grad_norm": 4.871829032897949, + "learning_rate": 4.999609052431039e-05, + "loss": 0.3045, + "step": 14620 + }, + { + "epoch": 0.5316520095937205, + "grad_norm": 2.796844959259033, + "learning_rate": 4.999599821647688e-05, + "loss": 0.2102, + "step": 14630 + }, + { + "epoch": 0.5320154080965186, + "grad_norm": 1.9681658744812012, + "learning_rate": 4.999590483165475e-05, + "loss": 0.1882, + "step": 14640 + }, + { + "epoch": 0.5323788065993168, + "grad_norm": 5.858233451843262, + "learning_rate": 4.9995810369848006e-05, + "loss": 0.314, + "step": 14650 + }, + { + "epoch": 0.532742205102115, + "grad_norm": 6.469663143157959, + "learning_rate": 4.9995714831060736e-05, + "loss": 0.2103, + "step": 14660 + }, + { + "epoch": 0.5331056036049131, + "grad_norm": 1.8543453216552734, + "learning_rate": 4.999561821529705e-05, + "loss": 0.219, + "step": 14670 + }, + { + "epoch": 0.5334690021077113, + "grad_norm": 2.222320318222046, + "learning_rate": 4.99955205225611e-05, + "loss": 0.1879, + "step": 14680 + }, + { + "epoch": 0.5338324006105095, + "grad_norm": 5.018227577209473, + "learning_rate": 4.999542175285711e-05, + "loss": 0.1437, + "step": 14690 + }, + { + "epoch": 0.5341957991133076, + "grad_norm": 6.225541114807129, + "learning_rate": 4.999532190618933e-05, + "loss": 0.268, + "step": 14700 + }, + { + "epoch": 0.5345591976161058, + "grad_norm": 1.8122676610946655, + "learning_rate": 4.999522098256206e-05, + "loss": 0.1644, + "step": 14710 + }, + { + "epoch": 0.534922596118904, + "grad_norm": 2.4057557582855225, + "learning_rate": 4.999511898197966e-05, + "loss": 0.1663, + "step": 14720 + }, + { + "epoch": 0.5352859946217021, + "grad_norm": 1.756697416305542, + "learning_rate": 4.9995015904446513e-05, + "loss": 0.1771, + "step": 14730 + }, + { + "epoch": 0.5356493931245003, + "grad_norm": 1.5457457304000854, + "learning_rate": 4.999491174996706e-05, + "loss": 0.1889, + "step": 14740 + }, + { + "epoch": 0.5360127916272985, + "grad_norm": 3.108682155609131, + "learning_rate": 4.999480651854579e-05, + "loss": 0.2063, + "step": 14750 + }, + { + "epoch": 0.5363761901300966, + "grad_norm": 2.2037875652313232, + "learning_rate": 4.9994700210187246e-05, + "loss": 0.1579, + "step": 14760 + }, + { + "epoch": 0.5367395886328948, + "grad_norm": 1.2102454900741577, + "learning_rate": 4.9994592824895994e-05, + "loss": 0.2361, + "step": 14770 + }, + { + "epoch": 0.537102987135693, + "grad_norm": 6.5722455978393555, + "learning_rate": 4.999448436267667e-05, + "loss": 0.2165, + "step": 14780 + }, + { + "epoch": 0.5374663856384911, + "grad_norm": 1.606378197669983, + "learning_rate": 4.999437482353395e-05, + "loss": 0.1642, + "step": 14790 + }, + { + "epoch": 0.5378297841412893, + "grad_norm": 24.709177017211914, + "learning_rate": 4.999426420747255e-05, + "loss": 0.2628, + "step": 14800 + }, + { + "epoch": 0.5381931826440876, + "grad_norm": 2.543760299682617, + "learning_rate": 4.999415251449723e-05, + "loss": 0.1883, + "step": 14810 + }, + { + "epoch": 0.5385565811468856, + "grad_norm": 2.0813279151916504, + "learning_rate": 4.999403974461281e-05, + "loss": 0.1842, + "step": 14820 + }, + { + "epoch": 0.5389199796496839, + "grad_norm": 4.744104385375977, + "learning_rate": 4.9993925897824144e-05, + "loss": 0.1981, + "step": 14830 + }, + { + "epoch": 0.5392833781524821, + "grad_norm": 3.2407493591308594, + "learning_rate": 4.9993810974136146e-05, + "loss": 0.2169, + "step": 14840 + }, + { + "epoch": 0.5396467766552802, + "grad_norm": 13.33681869506836, + "learning_rate": 4.999369497355375e-05, + "loss": 0.2775, + "step": 14850 + }, + { + "epoch": 0.5400101751580784, + "grad_norm": 2.3192784786224365, + "learning_rate": 4.9993577896081975e-05, + "loss": 0.1987, + "step": 14860 + }, + { + "epoch": 0.5403735736608766, + "grad_norm": 1.6611911058425903, + "learning_rate": 4.999345974172586e-05, + "loss": 0.188, + "step": 14870 + }, + { + "epoch": 0.5407369721636747, + "grad_norm": 4.368532180786133, + "learning_rate": 4.9993340510490485e-05, + "loss": 0.2201, + "step": 14880 + }, + { + "epoch": 0.5411003706664729, + "grad_norm": 1.4825586080551147, + "learning_rate": 4.999322020238099e-05, + "loss": 0.185, + "step": 14890 + }, + { + "epoch": 0.541463769169271, + "grad_norm": 4.346343994140625, + "learning_rate": 4.9993098817402564e-05, + "loss": 0.2415, + "step": 14900 + }, + { + "epoch": 0.5418271676720692, + "grad_norm": 1.0175251960754395, + "learning_rate": 4.999297635556044e-05, + "loss": 0.1991, + "step": 14910 + }, + { + "epoch": 0.5421905661748674, + "grad_norm": 1.600205421447754, + "learning_rate": 4.999285281685989e-05, + "loss": 0.1706, + "step": 14920 + }, + { + "epoch": 0.5425539646776655, + "grad_norm": 4.332497596740723, + "learning_rate": 4.999272820130623e-05, + "loss": 0.1964, + "step": 14930 + }, + { + "epoch": 0.5429173631804637, + "grad_norm": 2.0384531021118164, + "learning_rate": 4.999260250890484e-05, + "loss": 0.1571, + "step": 14940 + }, + { + "epoch": 0.5432807616832619, + "grad_norm": 11.780756950378418, + "learning_rate": 4.999247573966114e-05, + "loss": 0.319, + "step": 14950 + }, + { + "epoch": 0.54364416018606, + "grad_norm": 2.7058663368225098, + "learning_rate": 4.999234789358057e-05, + "loss": 0.2009, + "step": 14960 + }, + { + "epoch": 0.5440075586888582, + "grad_norm": 1.966780662536621, + "learning_rate": 4.999221897066866e-05, + "loss": 0.177, + "step": 14970 + }, + { + "epoch": 0.5443709571916564, + "grad_norm": 2.2129642963409424, + "learning_rate": 4.999208897093096e-05, + "loss": 0.2472, + "step": 14980 + }, + { + "epoch": 0.5447343556944545, + "grad_norm": 2.726358652114868, + "learning_rate": 4.9991957894373064e-05, + "loss": 0.2239, + "step": 14990 + }, + { + "epoch": 0.5450977541972527, + "grad_norm": 28.577600479125977, + "learning_rate": 4.999182574100063e-05, + "loss": 0.2524, + "step": 15000 + }, + { + "epoch": 0.5450977541972527, + "eval_loss": 0.3972287178039551, + "eval_runtime": 180.8086, + "eval_samples_per_second": 41.005, + "eval_steps_per_second": 5.127, + "eval_wer": 0.2289560150307695, + "step": 15000 + }, + { + "epoch": 0.5454611527000509, + "grad_norm": 1.9243866205215454, + "learning_rate": 4.9991692510819335e-05, + "loss": 0.1679, + "step": 15010 + }, + { + "epoch": 0.545824551202849, + "grad_norm": 1.3926585912704468, + "learning_rate": 4.9991558203834944e-05, + "loss": 0.1933, + "step": 15020 + }, + { + "epoch": 0.5461879497056472, + "grad_norm": 5.275027751922607, + "learning_rate": 4.999142282005322e-05, + "loss": 0.2838, + "step": 15030 + }, + { + "epoch": 0.5465513482084454, + "grad_norm": 2.142784357070923, + "learning_rate": 4.999128635948e-05, + "loss": 0.1754, + "step": 15040 + }, + { + "epoch": 0.5469147467112435, + "grad_norm": 40.32966995239258, + "learning_rate": 4.999114882212119e-05, + "loss": 0.323, + "step": 15050 + }, + { + "epoch": 0.5472781452140417, + "grad_norm": 1.056662678718567, + "learning_rate": 4.999101020798268e-05, + "loss": 0.1462, + "step": 15060 + }, + { + "epoch": 0.5476415437168399, + "grad_norm": 3.7527568340301514, + "learning_rate": 4.9990870517070464e-05, + "loss": 0.2106, + "step": 15070 + }, + { + "epoch": 0.548004942219638, + "grad_norm": 3.396487236022949, + "learning_rate": 4.9990729749390555e-05, + "loss": 0.1995, + "step": 15080 + }, + { + "epoch": 0.5483683407224362, + "grad_norm": 1.650519609451294, + "learning_rate": 4.999058790494902e-05, + "loss": 0.195, + "step": 15090 + }, + { + "epoch": 0.5487317392252344, + "grad_norm": 16.096418380737305, + "learning_rate": 4.9990444983751975e-05, + "loss": 0.2705, + "step": 15100 + }, + { + "epoch": 0.5490951377280325, + "grad_norm": 1.273149847984314, + "learning_rate": 4.999030098580556e-05, + "loss": 0.2216, + "step": 15110 + }, + { + "epoch": 0.5494585362308307, + "grad_norm": 1.5414496660232544, + "learning_rate": 4.9990155911115995e-05, + "loss": 0.1876, + "step": 15120 + }, + { + "epoch": 0.549821934733629, + "grad_norm": 4.707805633544922, + "learning_rate": 4.9990009759689524e-05, + "loss": 0.1895, + "step": 15130 + }, + { + "epoch": 0.550185333236427, + "grad_norm": 2.033162832260132, + "learning_rate": 4.9989862531532456e-05, + "loss": 0.1705, + "step": 15140 + }, + { + "epoch": 0.5505487317392252, + "grad_norm": 7.349232196807861, + "learning_rate": 4.998971422665112e-05, + "loss": 0.2815, + "step": 15150 + }, + { + "epoch": 0.5509121302420235, + "grad_norm": 1.293078064918518, + "learning_rate": 4.9989564845051915e-05, + "loss": 0.1789, + "step": 15160 + }, + { + "epoch": 0.5512755287448216, + "grad_norm": 1.7343147993087769, + "learning_rate": 4.998941438674127e-05, + "loss": 0.1781, + "step": 15170 + }, + { + "epoch": 0.5516389272476198, + "grad_norm": 2.440030574798584, + "learning_rate": 4.9989262851725674e-05, + "loss": 0.1927, + "step": 15180 + }, + { + "epoch": 0.5520023257504179, + "grad_norm": 2.276111364364624, + "learning_rate": 4.998911024001165e-05, + "loss": 0.1774, + "step": 15190 + }, + { + "epoch": 0.5523657242532161, + "grad_norm": 9.360533714294434, + "learning_rate": 4.9988956551605783e-05, + "loss": 0.2761, + "step": 15200 + }, + { + "epoch": 0.5527291227560143, + "grad_norm": 3.8025522232055664, + "learning_rate": 4.998880178651468e-05, + "loss": 0.2855, + "step": 15210 + }, + { + "epoch": 0.5530925212588124, + "grad_norm": 3.816631555557251, + "learning_rate": 4.998864594474503e-05, + "loss": 0.1559, + "step": 15220 + }, + { + "epoch": 0.5534559197616106, + "grad_norm": 3.2255067825317383, + "learning_rate": 4.998848902630353e-05, + "loss": 0.1632, + "step": 15230 + }, + { + "epoch": 0.5538193182644088, + "grad_norm": 1.077268123626709, + "learning_rate": 4.9988331031196944e-05, + "loss": 0.1969, + "step": 15240 + }, + { + "epoch": 0.5541827167672069, + "grad_norm": 5.657801151275635, + "learning_rate": 4.998817195943209e-05, + "loss": 0.2361, + "step": 15250 + }, + { + "epoch": 0.5545461152700051, + "grad_norm": 1.180039882659912, + "learning_rate": 4.998801181101581e-05, + "loss": 0.1779, + "step": 15260 + }, + { + "epoch": 0.5549095137728033, + "grad_norm": 2.12725830078125, + "learning_rate": 4.998785058595501e-05, + "loss": 0.1505, + "step": 15270 + }, + { + "epoch": 0.5552729122756014, + "grad_norm": 2.0784361362457275, + "learning_rate": 4.998768828425664e-05, + "loss": 0.2221, + "step": 15280 + }, + { + "epoch": 0.5556363107783996, + "grad_norm": 2.0133538246154785, + "learning_rate": 4.998752490592768e-05, + "loss": 0.1759, + "step": 15290 + }, + { + "epoch": 0.5559997092811978, + "grad_norm": 3.3181140422821045, + "learning_rate": 4.998736045097518e-05, + "loss": 0.229, + "step": 15300 + }, + { + "epoch": 0.5563631077839959, + "grad_norm": 1.2881536483764648, + "learning_rate": 4.998719491940622e-05, + "loss": 0.1928, + "step": 15310 + }, + { + "epoch": 0.5567265062867941, + "grad_norm": 1.0155376195907593, + "learning_rate": 4.998702831122794e-05, + "loss": 0.1986, + "step": 15320 + }, + { + "epoch": 0.5570899047895923, + "grad_norm": 7.5557661056518555, + "learning_rate": 4.998686062644752e-05, + "loss": 0.2317, + "step": 15330 + }, + { + "epoch": 0.5574533032923904, + "grad_norm": 2.3196377754211426, + "learning_rate": 4.9986691865072176e-05, + "loss": 0.1827, + "step": 15340 + }, + { + "epoch": 0.5578167017951886, + "grad_norm": 25.910188674926758, + "learning_rate": 4.998652202710918e-05, + "loss": 0.2824, + "step": 15350 + }, + { + "epoch": 0.5581801002979868, + "grad_norm": 1.0091907978057861, + "learning_rate": 4.9986351112565846e-05, + "loss": 0.1946, + "step": 15360 + }, + { + "epoch": 0.5585434988007849, + "grad_norm": 3.0022408962249756, + "learning_rate": 4.998617912144956e-05, + "loss": 0.2028, + "step": 15370 + }, + { + "epoch": 0.5589068973035831, + "grad_norm": 2.9837419986724854, + "learning_rate": 4.99860060537677e-05, + "loss": 0.203, + "step": 15380 + }, + { + "epoch": 0.5592702958063813, + "grad_norm": 2.238867998123169, + "learning_rate": 4.9985831909527746e-05, + "loss": 0.1392, + "step": 15390 + }, + { + "epoch": 0.5596336943091794, + "grad_norm": 3.8585119247436523, + "learning_rate": 4.9985656688737205e-05, + "loss": 0.2289, + "step": 15400 + }, + { + "epoch": 0.5599970928119776, + "grad_norm": 2.4951331615448, + "learning_rate": 4.998548039140361e-05, + "loss": 0.1852, + "step": 15410 + }, + { + "epoch": 0.5603604913147758, + "grad_norm": 1.8404667377471924, + "learning_rate": 4.998530301753455e-05, + "loss": 0.1813, + "step": 15420 + }, + { + "epoch": 0.5607238898175739, + "grad_norm": 2.615247964859009, + "learning_rate": 4.9985124567137695e-05, + "loss": 0.228, + "step": 15430 + }, + { + "epoch": 0.5610872883203721, + "grad_norm": 1.2074272632598877, + "learning_rate": 4.9984945040220715e-05, + "loss": 0.1879, + "step": 15440 + }, + { + "epoch": 0.5614506868231703, + "grad_norm": 38.466712951660156, + "learning_rate": 4.9984764436791355e-05, + "loss": 0.3965, + "step": 15450 + }, + { + "epoch": 0.5618140853259684, + "grad_norm": 1.4196547269821167, + "learning_rate": 4.998458275685739e-05, + "loss": 0.2061, + "step": 15460 + }, + { + "epoch": 0.5621774838287666, + "grad_norm": 1.2451281547546387, + "learning_rate": 4.998440000042664e-05, + "loss": 0.2118, + "step": 15470 + }, + { + "epoch": 0.5625408823315647, + "grad_norm": 3.7021896839141846, + "learning_rate": 4.9984216167507005e-05, + "loss": 0.2294, + "step": 15480 + }, + { + "epoch": 0.562904280834363, + "grad_norm": 2.8826780319213867, + "learning_rate": 4.998403125810638e-05, + "loss": 0.1654, + "step": 15490 + }, + { + "epoch": 0.5632676793371612, + "grad_norm": 8.366926193237305, + "learning_rate": 4.998384527223274e-05, + "loss": 0.2467, + "step": 15500 + }, + { + "epoch": 0.5636310778399592, + "grad_norm": 2.2532148361206055, + "learning_rate": 4.99836582098941e-05, + "loss": 0.2569, + "step": 15510 + }, + { + "epoch": 0.5639944763427575, + "grad_norm": 2.164987325668335, + "learning_rate": 4.998347007109853e-05, + "loss": 0.2167, + "step": 15520 + }, + { + "epoch": 0.5643578748455557, + "grad_norm": 4.651108264923096, + "learning_rate": 4.998328085585411e-05, + "loss": 0.2138, + "step": 15530 + }, + { + "epoch": 0.5647212733483538, + "grad_norm": 1.5128902196884155, + "learning_rate": 4.9983090564169024e-05, + "loss": 0.1821, + "step": 15540 + }, + { + "epoch": 0.565084671851152, + "grad_norm": 8.516124725341797, + "learning_rate": 4.998289919605145e-05, + "loss": 0.2546, + "step": 15550 + }, + { + "epoch": 0.5654480703539502, + "grad_norm": 1.6480666399002075, + "learning_rate": 4.9982706751509635e-05, + "loss": 0.2069, + "step": 15560 + }, + { + "epoch": 0.5658114688567483, + "grad_norm": 1.3768938779830933, + "learning_rate": 4.998251323055187e-05, + "loss": 0.1775, + "step": 15570 + }, + { + "epoch": 0.5661748673595465, + "grad_norm": 1.8793795108795166, + "learning_rate": 4.998231863318651e-05, + "loss": 0.14, + "step": 15580 + }, + { + "epoch": 0.5665382658623447, + "grad_norm": 1.2361701726913452, + "learning_rate": 4.9982122959421924e-05, + "loss": 0.1797, + "step": 15590 + }, + { + "epoch": 0.5669016643651428, + "grad_norm": 14.16727352142334, + "learning_rate": 4.998192620926655e-05, + "loss": 0.3523, + "step": 15600 + }, + { + "epoch": 0.5669016643651428, + "eval_loss": 0.40661031007766724, + "eval_runtime": 180.2598, + "eval_samples_per_second": 41.13, + "eval_steps_per_second": 5.143, + "eval_wer": 0.21753771307204967, + "step": 15600 + }, + { + "epoch": 0.567265062867941, + "grad_norm": 2.460245370864868, + "learning_rate": 4.9981728382728855e-05, + "loss": 0.1824, + "step": 15610 + }, + { + "epoch": 0.5676284613707392, + "grad_norm": 1.603381633758545, + "learning_rate": 4.9981529479817366e-05, + "loss": 0.1506, + "step": 15620 + }, + { + "epoch": 0.5679918598735373, + "grad_norm": 3.650087356567383, + "learning_rate": 4.9981329500540664e-05, + "loss": 0.2351, + "step": 15630 + }, + { + "epoch": 0.5683552583763355, + "grad_norm": 2.2338075637817383, + "learning_rate": 4.9981128444907354e-05, + "loss": 0.1785, + "step": 15640 + }, + { + "epoch": 0.5687186568791337, + "grad_norm": 7.641642093658447, + "learning_rate": 4.998092631292611e-05, + "loss": 0.2816, + "step": 15650 + }, + { + "epoch": 0.5690820553819318, + "grad_norm": 1.5877048969268799, + "learning_rate": 4.998072310460562e-05, + "loss": 0.1784, + "step": 15660 + }, + { + "epoch": 0.56944545388473, + "grad_norm": 3.5917787551879883, + "learning_rate": 4.998051881995466e-05, + "loss": 0.1685, + "step": 15670 + }, + { + "epoch": 0.5698088523875282, + "grad_norm": 6.459184169769287, + "learning_rate": 4.998031345898203e-05, + "loss": 0.2031, + "step": 15680 + }, + { + "epoch": 0.5701722508903263, + "grad_norm": 2.7518184185028076, + "learning_rate": 4.9980107021696565e-05, + "loss": 0.168, + "step": 15690 + }, + { + "epoch": 0.5705356493931245, + "grad_norm": 9.814598083496094, + "learning_rate": 4.997989950810718e-05, + "loss": 0.2778, + "step": 15700 + }, + { + "epoch": 0.5708990478959227, + "grad_norm": 2.0985398292541504, + "learning_rate": 4.9979690918222785e-05, + "loss": 0.1864, + "step": 15710 + }, + { + "epoch": 0.5712624463987208, + "grad_norm": 1.9264591932296753, + "learning_rate": 4.997948125205241e-05, + "loss": 0.1682, + "step": 15720 + }, + { + "epoch": 0.571625844901519, + "grad_norm": 4.2961955070495605, + "learning_rate": 4.997927050960505e-05, + "loss": 0.198, + "step": 15730 + }, + { + "epoch": 0.5719892434043172, + "grad_norm": 4.524483680725098, + "learning_rate": 4.99790586908898e-05, + "loss": 0.3235, + "step": 15740 + }, + { + "epoch": 0.5723526419071153, + "grad_norm": 5.259559154510498, + "learning_rate": 4.997884579591578e-05, + "loss": 0.335, + "step": 15750 + }, + { + "epoch": 0.5727160404099135, + "grad_norm": 1.7875639200210571, + "learning_rate": 4.997863182469219e-05, + "loss": 0.1674, + "step": 15760 + }, + { + "epoch": 0.5730794389127116, + "grad_norm": 1.1852960586547852, + "learning_rate": 4.9978416777228216e-05, + "loss": 0.1968, + "step": 15770 + }, + { + "epoch": 0.5734428374155098, + "grad_norm": 1.253061294555664, + "learning_rate": 4.997820065353314e-05, + "loss": 0.2177, + "step": 15780 + }, + { + "epoch": 0.573806235918308, + "grad_norm": 2.0577871799468994, + "learning_rate": 4.9977983453616266e-05, + "loss": 0.1498, + "step": 15790 + }, + { + "epoch": 0.5741696344211061, + "grad_norm": 7.4168901443481445, + "learning_rate": 4.997776517748696e-05, + "loss": 0.3137, + "step": 15800 + }, + { + "epoch": 0.5745330329239043, + "grad_norm": 2.9957845211029053, + "learning_rate": 4.9977545825154625e-05, + "loss": 0.1819, + "step": 15810 + }, + { + "epoch": 0.5748964314267025, + "grad_norm": 1.251610517501831, + "learning_rate": 4.997732539662871e-05, + "loss": 0.1633, + "step": 15820 + }, + { + "epoch": 0.5752598299295006, + "grad_norm": 3.229581594467163, + "learning_rate": 4.997710389191871e-05, + "loss": 0.1888, + "step": 15830 + }, + { + "epoch": 0.5756232284322989, + "grad_norm": 1.2718089818954468, + "learning_rate": 4.997688131103417e-05, + "loss": 0.1938, + "step": 15840 + }, + { + "epoch": 0.5759866269350971, + "grad_norm": 4.77078104019165, + "learning_rate": 4.9976657653984694e-05, + "loss": 0.2311, + "step": 15850 + }, + { + "epoch": 0.5763500254378952, + "grad_norm": 1.9487907886505127, + "learning_rate": 4.9976432920779904e-05, + "loss": 0.7679, + "step": 15860 + }, + { + "epoch": 0.5767134239406934, + "grad_norm": 2.1322100162506104, + "learning_rate": 4.997620711142948e-05, + "loss": 0.204, + "step": 15870 + }, + { + "epoch": 0.5770768224434916, + "grad_norm": 3.0756008625030518, + "learning_rate": 4.997598022594316e-05, + "loss": 0.205, + "step": 15880 + }, + { + "epoch": 0.5774402209462897, + "grad_norm": 2.2399511337280273, + "learning_rate": 4.997575226433071e-05, + "loss": 0.183, + "step": 15890 + }, + { + "epoch": 0.5778036194490879, + "grad_norm": 4.17095947265625, + "learning_rate": 4.997552322660197e-05, + "loss": 0.2209, + "step": 15900 + }, + { + "epoch": 0.5781670179518861, + "grad_norm": 1.7085528373718262, + "learning_rate": 4.9975293112766794e-05, + "loss": 0.4196, + "step": 15910 + }, + { + "epoch": 0.5785304164546842, + "grad_norm": 1.6818984746932983, + "learning_rate": 4.99750619228351e-05, + "loss": 0.1592, + "step": 15920 + }, + { + "epoch": 0.5788938149574824, + "grad_norm": 2.526503324508667, + "learning_rate": 4.9974829656816846e-05, + "loss": 0.7523, + "step": 15930 + }, + { + "epoch": 0.5792572134602806, + "grad_norm": 4.456855297088623, + "learning_rate": 4.997459631472205e-05, + "loss": 0.1664, + "step": 15940 + }, + { + "epoch": 0.5796206119630787, + "grad_norm": 28.427839279174805, + "learning_rate": 4.9974361896560746e-05, + "loss": 0.2891, + "step": 15950 + }, + { + "epoch": 0.5799840104658769, + "grad_norm": 1.6720882654190063, + "learning_rate": 4.997412640234306e-05, + "loss": 0.1522, + "step": 15960 + }, + { + "epoch": 0.5803474089686751, + "grad_norm": 1.6327390670776367, + "learning_rate": 4.997388983207911e-05, + "loss": 0.1957, + "step": 15970 + }, + { + "epoch": 0.5807108074714732, + "grad_norm": 1.5792416334152222, + "learning_rate": 4.997365218577912e-05, + "loss": 0.2325, + "step": 15980 + }, + { + "epoch": 0.5810742059742714, + "grad_norm": 1.7585738897323608, + "learning_rate": 4.9973413463453305e-05, + "loss": 0.2023, + "step": 15990 + }, + { + "epoch": 0.5814376044770696, + "grad_norm": 8.14810562133789, + "learning_rate": 4.997317366511196e-05, + "loss": 0.2093, + "step": 16000 + }, + { + "epoch": 0.5818010029798677, + "grad_norm": 3.1430416107177734, + "learning_rate": 4.997293279076543e-05, + "loss": 0.1742, + "step": 16010 + }, + { + "epoch": 0.5821644014826659, + "grad_norm": 1.9447312355041504, + "learning_rate": 4.997269084042406e-05, + "loss": 0.1852, + "step": 16020 + }, + { + "epoch": 0.5825277999854641, + "grad_norm": 2.1479732990264893, + "learning_rate": 4.997244781409831e-05, + "loss": 0.2197, + "step": 16030 + }, + { + "epoch": 0.5828911984882622, + "grad_norm": 3.7066800594329834, + "learning_rate": 4.9972203711798625e-05, + "loss": 0.1899, + "step": 16040 + }, + { + "epoch": 0.5832545969910604, + "grad_norm": 4.3598432540893555, + "learning_rate": 4.9971958533535544e-05, + "loss": 0.237, + "step": 16050 + }, + { + "epoch": 0.5836179954938585, + "grad_norm": 3.410356283187866, + "learning_rate": 4.997171227931962e-05, + "loss": 0.1985, + "step": 16060 + }, + { + "epoch": 0.5839813939966567, + "grad_norm": 1.6299129724502563, + "learning_rate": 4.9971464949161454e-05, + "loss": 0.1758, + "step": 16070 + }, + { + "epoch": 0.5843447924994549, + "grad_norm": 1.993067979812622, + "learning_rate": 4.9971216543071716e-05, + "loss": 0.1822, + "step": 16080 + }, + { + "epoch": 0.584708191002253, + "grad_norm": 1.2057979106903076, + "learning_rate": 4.9970967061061104e-05, + "loss": 0.1953, + "step": 16090 + }, + { + "epoch": 0.5850715895050512, + "grad_norm": 34.54500961303711, + "learning_rate": 4.997071650314037e-05, + "loss": 0.277, + "step": 16100 + }, + { + "epoch": 0.5854349880078494, + "grad_norm": 1.243656039237976, + "learning_rate": 4.997046486932031e-05, + "loss": 0.172, + "step": 16110 + }, + { + "epoch": 0.5857983865106475, + "grad_norm": 0.6155187487602234, + "learning_rate": 4.997021215961176e-05, + "loss": 0.1523, + "step": 16120 + }, + { + "epoch": 0.5861617850134457, + "grad_norm": 2.0203208923339844, + "learning_rate": 4.9969958374025615e-05, + "loss": 0.1857, + "step": 16130 + }, + { + "epoch": 0.5865251835162439, + "grad_norm": 1.1912654638290405, + "learning_rate": 4.9969703512572805e-05, + "loss": 0.2169, + "step": 16140 + }, + { + "epoch": 0.586888582019042, + "grad_norm": 3.528538227081299, + "learning_rate": 4.9969447575264315e-05, + "loss": 0.2428, + "step": 16150 + }, + { + "epoch": 0.5872519805218402, + "grad_norm": 0.9166990518569946, + "learning_rate": 4.996919056211117e-05, + "loss": 0.2206, + "step": 16160 + }, + { + "epoch": 0.5876153790246385, + "grad_norm": 1.4956426620483398, + "learning_rate": 4.996893247312444e-05, + "loss": 0.168, + "step": 16170 + }, + { + "epoch": 0.5879787775274365, + "grad_norm": 1.4502993822097778, + "learning_rate": 4.996867330831526e-05, + "loss": 0.1767, + "step": 16180 + }, + { + "epoch": 0.5883421760302348, + "grad_norm": 0.9337482452392578, + "learning_rate": 4.9968413067694775e-05, + "loss": 0.2035, + "step": 16190 + }, + { + "epoch": 0.588705574533033, + "grad_norm": 13.500269889831543, + "learning_rate": 4.996815175127422e-05, + "loss": 0.2335, + "step": 16200 + }, + { + "epoch": 0.588705574533033, + "eval_loss": 0.4428017735481262, + "eval_runtime": 180.2159, + "eval_samples_per_second": 41.14, + "eval_steps_per_second": 5.144, + "eval_wer": 0.22281118957285748, + "step": 16200 + }, + { + "epoch": 0.5890689730358311, + "grad_norm": 5.091770648956299, + "learning_rate": 4.996788935906483e-05, + "loss": 0.1884, + "step": 16210 + }, + { + "epoch": 0.5894323715386293, + "grad_norm": 2.379033327102661, + "learning_rate": 4.996762589107793e-05, + "loss": 0.196, + "step": 16220 + }, + { + "epoch": 0.5897957700414275, + "grad_norm": 2.576484203338623, + "learning_rate": 4.996736134732487e-05, + "loss": 0.2069, + "step": 16230 + }, + { + "epoch": 0.5901591685442256, + "grad_norm": 1.4235923290252686, + "learning_rate": 4.9967095727817035e-05, + "loss": 0.1905, + "step": 16240 + }, + { + "epoch": 0.5905225670470238, + "grad_norm": 7.119918346405029, + "learning_rate": 4.9966829032565886e-05, + "loss": 0.2803, + "step": 16250 + }, + { + "epoch": 0.590885965549822, + "grad_norm": 1.1050286293029785, + "learning_rate": 4.99665612615829e-05, + "loss": 0.1914, + "step": 16260 + }, + { + "epoch": 0.5912493640526201, + "grad_norm": 1.403601884841919, + "learning_rate": 4.9966292414879625e-05, + "loss": 0.1649, + "step": 16270 + }, + { + "epoch": 0.5916127625554183, + "grad_norm": 5.439052104949951, + "learning_rate": 4.9966022492467635e-05, + "loss": 0.1897, + "step": 16280 + }, + { + "epoch": 0.5919761610582165, + "grad_norm": 1.0014379024505615, + "learning_rate": 4.996575149435857e-05, + "loss": 0.1472, + "step": 16290 + }, + { + "epoch": 0.5923395595610146, + "grad_norm": 9.480517387390137, + "learning_rate": 4.99654794205641e-05, + "loss": 0.2351, + "step": 16300 + }, + { + "epoch": 0.5927029580638128, + "grad_norm": 0.8406987190246582, + "learning_rate": 4.9965206271095955e-05, + "loss": 0.1795, + "step": 16310 + }, + { + "epoch": 0.593066356566611, + "grad_norm": 1.378169298171997, + "learning_rate": 4.996493204596589e-05, + "loss": 0.1597, + "step": 16320 + }, + { + "epoch": 0.5934297550694091, + "grad_norm": 3.9748549461364746, + "learning_rate": 4.996465674518573e-05, + "loss": 0.2264, + "step": 16330 + }, + { + "epoch": 0.5937931535722073, + "grad_norm": 2.2626171112060547, + "learning_rate": 4.996438036876734e-05, + "loss": 0.1647, + "step": 16340 + }, + { + "epoch": 0.5941565520750055, + "grad_norm": 3.8039205074310303, + "learning_rate": 4.996410291672262e-05, + "loss": 0.2204, + "step": 16350 + }, + { + "epoch": 0.5945199505778036, + "grad_norm": 1.5219416618347168, + "learning_rate": 4.996382438906353e-05, + "loss": 0.1518, + "step": 16360 + }, + { + "epoch": 0.5948833490806018, + "grad_norm": 1.4811570644378662, + "learning_rate": 4.9963544785802064e-05, + "loss": 0.2006, + "step": 16370 + }, + { + "epoch": 0.5952467475833999, + "grad_norm": 4.7030558586120605, + "learning_rate": 4.996326410695028e-05, + "loss": 0.2524, + "step": 16380 + }, + { + "epoch": 0.5956101460861981, + "grad_norm": 1.103624939918518, + "learning_rate": 4.996298235252026e-05, + "loss": 0.1558, + "step": 16390 + }, + { + "epoch": 0.5959735445889963, + "grad_norm": 4.654818534851074, + "learning_rate": 4.996269952252415e-05, + "loss": 0.2746, + "step": 16400 + }, + { + "epoch": 0.5963369430917944, + "grad_norm": 1.6746747493743896, + "learning_rate": 4.996241561697413e-05, + "loss": 0.1838, + "step": 16410 + }, + { + "epoch": 0.5967003415945926, + "grad_norm": 3.1955924034118652, + "learning_rate": 4.996213063588245e-05, + "loss": 0.1773, + "step": 16420 + }, + { + "epoch": 0.5970637400973908, + "grad_norm": 1.782669186592102, + "learning_rate": 4.996184457926137e-05, + "loss": 0.1939, + "step": 16430 + }, + { + "epoch": 0.5974271386001889, + "grad_norm": 1.2277849912643433, + "learning_rate": 4.996155744712322e-05, + "loss": 0.1724, + "step": 16440 + }, + { + "epoch": 0.5977905371029871, + "grad_norm": 25.578798294067383, + "learning_rate": 4.996126923948038e-05, + "loss": 0.2612, + "step": 16450 + }, + { + "epoch": 0.5981539356057853, + "grad_norm": 0.984426736831665, + "learning_rate": 4.9960979956345254e-05, + "loss": 0.1621, + "step": 16460 + }, + { + "epoch": 0.5985173341085834, + "grad_norm": 2.1299145221710205, + "learning_rate": 4.9960689597730315e-05, + "loss": 0.161, + "step": 16470 + }, + { + "epoch": 0.5988807326113816, + "grad_norm": 2.6153085231781006, + "learning_rate": 4.996039816364807e-05, + "loss": 0.2122, + "step": 16480 + }, + { + "epoch": 0.5992441311141798, + "grad_norm": 4.464552879333496, + "learning_rate": 4.996010565411108e-05, + "loss": 0.2417, + "step": 16490 + }, + { + "epoch": 0.5996075296169779, + "grad_norm": 26.441349029541016, + "learning_rate": 4.995981206913194e-05, + "loss": 0.3103, + "step": 16500 + }, + { + "epoch": 0.5999709281197761, + "grad_norm": 2.353302478790283, + "learning_rate": 4.995951740872331e-05, + "loss": 1.0256, + "step": 16510 + }, + { + "epoch": 0.6003343266225744, + "grad_norm": 0.8436356782913208, + "learning_rate": 4.995922167289788e-05, + "loss": 0.1563, + "step": 16520 + }, + { + "epoch": 0.6006977251253725, + "grad_norm": 3.3516342639923096, + "learning_rate": 4.99589248616684e-05, + "loss": 0.2441, + "step": 16530 + }, + { + "epoch": 0.6010611236281707, + "grad_norm": 2.0286059379577637, + "learning_rate": 4.995862697504764e-05, + "loss": 0.1767, + "step": 16540 + }, + { + "epoch": 0.6014245221309689, + "grad_norm": 18.248151779174805, + "learning_rate": 4.9958328013048464e-05, + "loss": 0.3522, + "step": 16550 + }, + { + "epoch": 0.601787920633767, + "grad_norm": 2.1514463424682617, + "learning_rate": 4.995802797568372e-05, + "loss": 0.1771, + "step": 16560 + }, + { + "epoch": 0.6021513191365652, + "grad_norm": 5.868020534515381, + "learning_rate": 4.995772686296635e-05, + "loss": 0.1776, + "step": 16570 + }, + { + "epoch": 0.6025147176393634, + "grad_norm": 4.539637565612793, + "learning_rate": 4.9957424674909336e-05, + "loss": 0.2002, + "step": 16580 + }, + { + "epoch": 0.6028781161421615, + "grad_norm": 1.7226190567016602, + "learning_rate": 4.99571214115257e-05, + "loss": 0.1927, + "step": 16590 + }, + { + "epoch": 0.6032415146449597, + "grad_norm": 22.087247848510742, + "learning_rate": 4.9956817072828485e-05, + "loss": 0.249, + "step": 16600 + }, + { + "epoch": 0.6036049131477579, + "grad_norm": 2.4267120361328125, + "learning_rate": 4.995651165883083e-05, + "loss": 0.1935, + "step": 16610 + }, + { + "epoch": 0.603968311650556, + "grad_norm": 2.5284249782562256, + "learning_rate": 4.995620516954588e-05, + "loss": 0.1495, + "step": 16620 + }, + { + "epoch": 0.6043317101533542, + "grad_norm": 1.5988596677780151, + "learning_rate": 4.995589760498684e-05, + "loss": 0.2329, + "step": 16630 + }, + { + "epoch": 0.6046951086561524, + "grad_norm": 1.0771689414978027, + "learning_rate": 4.9955588965166966e-05, + "loss": 0.1634, + "step": 16640 + }, + { + "epoch": 0.6050585071589505, + "grad_norm": 8.72423267364502, + "learning_rate": 4.995527925009956e-05, + "loss": 0.27, + "step": 16650 + }, + { + "epoch": 0.6054219056617487, + "grad_norm": 1.3176789283752441, + "learning_rate": 4.9954968459797955e-05, + "loss": 1.1913, + "step": 16660 + }, + { + "epoch": 0.6057853041645468, + "grad_norm": 1.8307547569274902, + "learning_rate": 4.9954656594275555e-05, + "loss": 0.188, + "step": 16670 + }, + { + "epoch": 0.606148702667345, + "grad_norm": 2.783604621887207, + "learning_rate": 4.9954343653545795e-05, + "loss": 0.1791, + "step": 16680 + }, + { + "epoch": 0.6065121011701432, + "grad_norm": 1.6639970541000366, + "learning_rate": 4.9954029637622146e-05, + "loss": 0.1829, + "step": 16690 + }, + { + "epoch": 0.6068754996729413, + "grad_norm": 11.055110931396484, + "learning_rate": 4.995371454651815e-05, + "loss": 0.2229, + "step": 16700 + }, + { + "epoch": 0.6072388981757395, + "grad_norm": 1.8166972398757935, + "learning_rate": 4.9953398380247384e-05, + "loss": 0.1734, + "step": 16710 + }, + { + "epoch": 0.6076022966785377, + "grad_norm": 4.851889610290527, + "learning_rate": 4.995308113882346e-05, + "loss": 0.1716, + "step": 16720 + }, + { + "epoch": 0.6079656951813358, + "grad_norm": 3.0047857761383057, + "learning_rate": 4.9952762822260056e-05, + "loss": 0.2125, + "step": 16730 + }, + { + "epoch": 0.608329093684134, + "grad_norm": 1.1506407260894775, + "learning_rate": 4.9952443430570887e-05, + "loss": 0.171, + "step": 16740 + }, + { + "epoch": 0.6086924921869322, + "grad_norm": 4.324979782104492, + "learning_rate": 4.995212296376971e-05, + "loss": 0.2365, + "step": 16750 + }, + { + "epoch": 0.6090558906897303, + "grad_norm": 1.2295490503311157, + "learning_rate": 4.995180142187033e-05, + "loss": 0.2002, + "step": 16760 + }, + { + "epoch": 0.6094192891925285, + "grad_norm": 1.454434871673584, + "learning_rate": 4.995147880488661e-05, + "loss": 0.1602, + "step": 16770 + }, + { + "epoch": 0.6097826876953267, + "grad_norm": 2.6185641288757324, + "learning_rate": 4.995115511283244e-05, + "loss": 0.1904, + "step": 16780 + }, + { + "epoch": 0.6101460861981248, + "grad_norm": 1.2603826522827148, + "learning_rate": 4.9950830345721774e-05, + "loss": 0.1892, + "step": 16790 + }, + { + "epoch": 0.610509484700923, + "grad_norm": 14.189190864562988, + "learning_rate": 4.9950504503568615e-05, + "loss": 0.281, + "step": 16800 + }, + { + "epoch": 0.610509484700923, + "eval_loss": 0.4123116433620453, + "eval_runtime": 179.5081, + "eval_samples_per_second": 41.302, + "eval_steps_per_second": 5.164, + "eval_wer": 0.22986367019441972, + "step": 16800 + }, + { + "epoch": 0.6108728832037212, + "grad_norm": 1.7670204639434814, + "learning_rate": 4.995017758638698e-05, + "loss": 0.2581, + "step": 16810 + }, + { + "epoch": 0.6112362817065193, + "grad_norm": 1.2099360227584839, + "learning_rate": 4.9949849594190964e-05, + "loss": 0.1762, + "step": 16820 + }, + { + "epoch": 0.6115996802093175, + "grad_norm": 2.7719335556030273, + "learning_rate": 4.9949520526994716e-05, + "loss": 0.1867, + "step": 16830 + }, + { + "epoch": 0.6119630787121157, + "grad_norm": 1.5935924053192139, + "learning_rate": 4.9949190384812386e-05, + "loss": 0.191, + "step": 16840 + }, + { + "epoch": 0.6123264772149138, + "grad_norm": 3.511439085006714, + "learning_rate": 4.994885916765821e-05, + "loss": 0.2132, + "step": 16850 + }, + { + "epoch": 0.612689875717712, + "grad_norm": 1.693789005279541, + "learning_rate": 4.994852687554647e-05, + "loss": 0.167, + "step": 16860 + }, + { + "epoch": 0.6130532742205103, + "grad_norm": 2.1199066638946533, + "learning_rate": 4.994819350849147e-05, + "loss": 0.172, + "step": 16870 + }, + { + "epoch": 0.6134166727233084, + "grad_norm": 2.724487543106079, + "learning_rate": 4.9947859066507575e-05, + "loss": 0.2083, + "step": 16880 + }, + { + "epoch": 0.6137800712261066, + "grad_norm": 0.926547110080719, + "learning_rate": 4.99475235496092e-05, + "loss": 0.1517, + "step": 16890 + }, + { + "epoch": 0.6141434697289048, + "grad_norm": 14.503059387207031, + "learning_rate": 4.99471869578108e-05, + "loss": 0.1945, + "step": 16900 + }, + { + "epoch": 0.6145068682317029, + "grad_norm": 3.2206919193267822, + "learning_rate": 4.994684929112687e-05, + "loss": 0.1882, + "step": 16910 + }, + { + "epoch": 0.6148702667345011, + "grad_norm": 2.004995107650757, + "learning_rate": 4.994651054957198e-05, + "loss": 0.1876, + "step": 16920 + }, + { + "epoch": 0.6152336652372993, + "grad_norm": 2.0580127239227295, + "learning_rate": 4.99461707331607e-05, + "loss": 0.2104, + "step": 16930 + }, + { + "epoch": 0.6155970637400974, + "grad_norm": 3.3028602600097656, + "learning_rate": 4.9945829841907684e-05, + "loss": 0.1494, + "step": 16940 + }, + { + "epoch": 0.6159604622428956, + "grad_norm": 7.572249412536621, + "learning_rate": 4.994548787582761e-05, + "loss": 0.2381, + "step": 16950 + }, + { + "epoch": 0.6163238607456937, + "grad_norm": 1.4220709800720215, + "learning_rate": 4.9945144834935234e-05, + "loss": 0.1916, + "step": 16960 + }, + { + "epoch": 0.6166872592484919, + "grad_norm": 1.2397724390029907, + "learning_rate": 4.994480071924531e-05, + "loss": 0.1593, + "step": 16970 + }, + { + "epoch": 0.6170506577512901, + "grad_norm": 2.2569403648376465, + "learning_rate": 4.9944455528772684e-05, + "loss": 0.1984, + "step": 16980 + }, + { + "epoch": 0.6174140562540882, + "grad_norm": 1.811727523803711, + "learning_rate": 4.994410926353221e-05, + "loss": 0.1838, + "step": 16990 + }, + { + "epoch": 0.6177774547568864, + "grad_norm": 2.783061981201172, + "learning_rate": 4.9943761923538834e-05, + "loss": 0.2217, + "step": 17000 + }, + { + "epoch": 0.6181408532596846, + "grad_norm": 2.816331148147583, + "learning_rate": 4.99434135088075e-05, + "loss": 0.1911, + "step": 17010 + }, + { + "epoch": 0.6185042517624827, + "grad_norm": 1.238916039466858, + "learning_rate": 4.9943064019353234e-05, + "loss": 0.1854, + "step": 17020 + }, + { + "epoch": 0.6188676502652809, + "grad_norm": 5.16685152053833, + "learning_rate": 4.9942713455191075e-05, + "loss": 0.1797, + "step": 17030 + }, + { + "epoch": 0.6192310487680791, + "grad_norm": 2.486461639404297, + "learning_rate": 4.9942361816336146e-05, + "loss": 0.1926, + "step": 17040 + }, + { + "epoch": 0.6195944472708772, + "grad_norm": 9.018515586853027, + "learning_rate": 4.994200910280359e-05, + "loss": 0.2193, + "step": 17050 + }, + { + "epoch": 0.6199578457736754, + "grad_norm": 1.804166555404663, + "learning_rate": 4.994165531460861e-05, + "loss": 0.1977, + "step": 17060 + }, + { + "epoch": 0.6203212442764736, + "grad_norm": 1.2862845659255981, + "learning_rate": 4.994130045176644e-05, + "loss": 0.1493, + "step": 17070 + }, + { + "epoch": 0.6206846427792717, + "grad_norm": 4.164750576019287, + "learning_rate": 4.994094451429237e-05, + "loss": 0.2548, + "step": 17080 + }, + { + "epoch": 0.6210480412820699, + "grad_norm": 1.577255368232727, + "learning_rate": 4.994058750220176e-05, + "loss": 0.1703, + "step": 17090 + }, + { + "epoch": 0.6214114397848681, + "grad_norm": 5.805021286010742, + "learning_rate": 4.994022941550996e-05, + "loss": 0.2976, + "step": 17100 + }, + { + "epoch": 0.6217748382876662, + "grad_norm": 0.9706230163574219, + "learning_rate": 4.993987025423241e-05, + "loss": 0.1454, + "step": 17110 + }, + { + "epoch": 0.6221382367904644, + "grad_norm": 1.4393014907836914, + "learning_rate": 4.993951001838459e-05, + "loss": 0.1496, + "step": 17120 + }, + { + "epoch": 0.6225016352932626, + "grad_norm": 1.839086651802063, + "learning_rate": 4.993914870798202e-05, + "loss": 0.2256, + "step": 17130 + }, + { + "epoch": 0.6228650337960607, + "grad_norm": 1.8924603462219238, + "learning_rate": 4.993878632304027e-05, + "loss": 0.1415, + "step": 17140 + }, + { + "epoch": 0.6232284322988589, + "grad_norm": 12.03149700164795, + "learning_rate": 4.993842286357494e-05, + "loss": 0.7236, + "step": 17150 + }, + { + "epoch": 0.6235918308016571, + "grad_norm": 2.0251877307891846, + "learning_rate": 4.993805832960171e-05, + "loss": 0.1913, + "step": 17160 + }, + { + "epoch": 0.6239552293044552, + "grad_norm": 2.341251850128174, + "learning_rate": 4.993769272113628e-05, + "loss": 0.1734, + "step": 17170 + }, + { + "epoch": 0.6243186278072534, + "grad_norm": 2.517820358276367, + "learning_rate": 4.993732603819438e-05, + "loss": 0.18, + "step": 17180 + }, + { + "epoch": 0.6246820263100517, + "grad_norm": 1.6384356021881104, + "learning_rate": 4.993695828079184e-05, + "loss": 0.1513, + "step": 17190 + }, + { + "epoch": 0.6250454248128497, + "grad_norm": 10.794693946838379, + "learning_rate": 4.993658944894449e-05, + "loss": 0.2282, + "step": 17200 + }, + { + "epoch": 0.625408823315648, + "grad_norm": 1.2552087306976318, + "learning_rate": 4.9936219542668236e-05, + "loss": 0.1938, + "step": 17210 + }, + { + "epoch": 0.6257722218184462, + "grad_norm": 2.423431634902954, + "learning_rate": 4.993584856197899e-05, + "loss": 0.1487, + "step": 17220 + }, + { + "epoch": 0.6261356203212443, + "grad_norm": 1.7924834489822388, + "learning_rate": 4.9935476506892763e-05, + "loss": 0.195, + "step": 17230 + }, + { + "epoch": 0.6264990188240425, + "grad_norm": 1.6521999835968018, + "learning_rate": 4.9935103377425566e-05, + "loss": 0.1652, + "step": 17240 + }, + { + "epoch": 0.6268624173268406, + "grad_norm": 6.472127437591553, + "learning_rate": 4.9934729173593494e-05, + "loss": 0.2481, + "step": 17250 + }, + { + "epoch": 0.6272258158296388, + "grad_norm": 1.8962410688400269, + "learning_rate": 4.993435389541265e-05, + "loss": 0.1487, + "step": 17260 + }, + { + "epoch": 0.627589214332437, + "grad_norm": 1.2054486274719238, + "learning_rate": 4.993397754289922e-05, + "loss": 0.1496, + "step": 17270 + }, + { + "epoch": 0.6279526128352351, + "grad_norm": 3.9840786457061768, + "learning_rate": 4.993360011606941e-05, + "loss": 0.1776, + "step": 17280 + }, + { + "epoch": 0.6283160113380333, + "grad_norm": 0.9625970125198364, + "learning_rate": 4.9933221614939485e-05, + "loss": 0.1652, + "step": 17290 + }, + { + "epoch": 0.6286794098408315, + "grad_norm": 11.166252136230469, + "learning_rate": 4.993284203952575e-05, + "loss": 0.233, + "step": 17300 + }, + { + "epoch": 0.6290428083436296, + "grad_norm": 2.356268882751465, + "learning_rate": 4.9932461389844566e-05, + "loss": 0.1498, + "step": 17310 + }, + { + "epoch": 0.6294062068464278, + "grad_norm": 0.9366337656974792, + "learning_rate": 4.993207966591234e-05, + "loss": 0.1483, + "step": 17320 + }, + { + "epoch": 0.629769605349226, + "grad_norm": 5.854847431182861, + "learning_rate": 4.9931696867745495e-05, + "loss": 0.1603, + "step": 17330 + }, + { + "epoch": 0.6301330038520241, + "grad_norm": 1.0090773105621338, + "learning_rate": 4.9931312995360546e-05, + "loss": 0.1475, + "step": 17340 + }, + { + "epoch": 0.6304964023548223, + "grad_norm": 3.896676540374756, + "learning_rate": 4.9930928048774024e-05, + "loss": 0.244, + "step": 17350 + }, + { + "epoch": 0.6308598008576205, + "grad_norm": 1.1872800588607788, + "learning_rate": 4.993054202800252e-05, + "loss": 0.1618, + "step": 17360 + }, + { + "epoch": 0.6312231993604186, + "grad_norm": 1.8078994750976562, + "learning_rate": 4.9930154933062654e-05, + "loss": 0.1554, + "step": 17370 + }, + { + "epoch": 0.6315865978632168, + "grad_norm": 1.8264563083648682, + "learning_rate": 4.9929766763971126e-05, + "loss": 0.162, + "step": 17380 + }, + { + "epoch": 0.631949996366015, + "grad_norm": 0.6304519176483154, + "learning_rate": 4.992937752074465e-05, + "loss": 0.209, + "step": 17390 + }, + { + "epoch": 0.6323133948688131, + "grad_norm": 4.7621917724609375, + "learning_rate": 4.992898720339998e-05, + "loss": 0.2393, + "step": 17400 + }, + { + "epoch": 0.6323133948688131, + "eval_loss": 0.3943130671977997, + "eval_runtime": 180.0553, + "eval_samples_per_second": 41.176, + "eval_steps_per_second": 5.148, + "eval_wer": 0.21144734692395664, + "step": 17400 + }, + { + "epoch": 0.6326767933716113, + "grad_norm": 1.1110138893127441, + "learning_rate": 4.992859581195396e-05, + "loss": 0.143, + "step": 17410 + }, + { + "epoch": 0.6330401918744095, + "grad_norm": 1.2453794479370117, + "learning_rate": 4.992820334642344e-05, + "loss": 0.1454, + "step": 17420 + }, + { + "epoch": 0.6334035903772076, + "grad_norm": 3.669144630432129, + "learning_rate": 4.9927809806825335e-05, + "loss": 0.2496, + "step": 17430 + }, + { + "epoch": 0.6337669888800058, + "grad_norm": 2.7898483276367188, + "learning_rate": 4.99274151931766e-05, + "loss": 0.1614, + "step": 17440 + }, + { + "epoch": 0.634130387382804, + "grad_norm": 6.725431442260742, + "learning_rate": 4.992701950549423e-05, + "loss": 0.2622, + "step": 17450 + }, + { + "epoch": 0.6344937858856021, + "grad_norm": 1.6481575965881348, + "learning_rate": 4.992662274379528e-05, + "loss": 0.1713, + "step": 17460 + }, + { + "epoch": 0.6348571843884003, + "grad_norm": 1.3567384481430054, + "learning_rate": 4.9926224908096856e-05, + "loss": 0.1725, + "step": 17470 + }, + { + "epoch": 0.6352205828911985, + "grad_norm": 1.8207722902297974, + "learning_rate": 4.9925825998416076e-05, + "loss": 0.1973, + "step": 17480 + }, + { + "epoch": 0.6355839813939966, + "grad_norm": 2.2345893383026123, + "learning_rate": 4.9925426014770146e-05, + "loss": 0.1847, + "step": 17490 + }, + { + "epoch": 0.6359473798967948, + "grad_norm": 7.193591594696045, + "learning_rate": 4.992502495717629e-05, + "loss": 0.2605, + "step": 17500 + }, + { + "epoch": 0.636310778399593, + "grad_norm": 1.346073865890503, + "learning_rate": 4.99246228256518e-05, + "loss": 0.1518, + "step": 17510 + }, + { + "epoch": 0.6366741769023911, + "grad_norm": 1.5637879371643066, + "learning_rate": 4.9924219620213995e-05, + "loss": 0.1648, + "step": 17520 + }, + { + "epoch": 0.6370375754051893, + "grad_norm": 3.2450170516967773, + "learning_rate": 4.9923815340880236e-05, + "loss": 0.1974, + "step": 17530 + }, + { + "epoch": 0.6374009739079874, + "grad_norm": 0.9553948640823364, + "learning_rate": 4.992340998766796e-05, + "loss": 0.1694, + "step": 17540 + }, + { + "epoch": 0.6377643724107857, + "grad_norm": 8.901055335998535, + "learning_rate": 4.9923003560594625e-05, + "loss": 0.2625, + "step": 17550 + }, + { + "epoch": 0.6381277709135839, + "grad_norm": 1.7500522136688232, + "learning_rate": 4.992259605967774e-05, + "loss": 0.1799, + "step": 17560 + }, + { + "epoch": 0.638491169416382, + "grad_norm": 1.4673160314559937, + "learning_rate": 4.9922187484934865e-05, + "loss": 0.1698, + "step": 17570 + }, + { + "epoch": 0.6388545679191802, + "grad_norm": 2.5377135276794434, + "learning_rate": 4.992177783638361e-05, + "loss": 0.1822, + "step": 17580 + }, + { + "epoch": 0.6392179664219784, + "grad_norm": 1.660311222076416, + "learning_rate": 4.9921367114041625e-05, + "loss": 0.1659, + "step": 17590 + }, + { + "epoch": 0.6395813649247765, + "grad_norm": 8.248649597167969, + "learning_rate": 4.9920955317926595e-05, + "loss": 0.2384, + "step": 17600 + }, + { + "epoch": 0.6399447634275747, + "grad_norm": 1.5581409931182861, + "learning_rate": 4.992054244805627e-05, + "loss": 0.1665, + "step": 17610 + }, + { + "epoch": 0.6403081619303729, + "grad_norm": 0.9654737710952759, + "learning_rate": 4.992012850444844e-05, + "loss": 0.3493, + "step": 17620 + }, + { + "epoch": 0.640671560433171, + "grad_norm": 3.4477317333221436, + "learning_rate": 4.9919713487120935e-05, + "loss": 0.2097, + "step": 17630 + }, + { + "epoch": 0.6410349589359692, + "grad_norm": 1.3745356798171997, + "learning_rate": 4.9919297396091634e-05, + "loss": 0.1459, + "step": 17640 + }, + { + "epoch": 0.6413983574387674, + "grad_norm": 4.813534259796143, + "learning_rate": 4.991888023137849e-05, + "loss": 0.1905, + "step": 17650 + }, + { + "epoch": 0.6417617559415655, + "grad_norm": 3.118452310562134, + "learning_rate": 4.9918461992999445e-05, + "loss": 0.1527, + "step": 17660 + }, + { + "epoch": 0.6421251544443637, + "grad_norm": 1.8424941301345825, + "learning_rate": 4.991804268097253e-05, + "loss": 0.1759, + "step": 17670 + }, + { + "epoch": 0.6424885529471619, + "grad_norm": 7.301458835601807, + "learning_rate": 4.9917622295315826e-05, + "loss": 0.1662, + "step": 17680 + }, + { + "epoch": 0.64285195144996, + "grad_norm": 3.133114814758301, + "learning_rate": 4.991720083604743e-05, + "loss": 0.1692, + "step": 17690 + }, + { + "epoch": 0.6432153499527582, + "grad_norm": 11.538620948791504, + "learning_rate": 4.99167783031855e-05, + "loss": 0.2443, + "step": 17700 + }, + { + "epoch": 0.6435787484555564, + "grad_norm": 1.3739595413208008, + "learning_rate": 4.991635469674825e-05, + "loss": 0.1465, + "step": 17710 + }, + { + "epoch": 0.6439421469583545, + "grad_norm": 1.6855549812316895, + "learning_rate": 4.991593001675393e-05, + "loss": 0.1819, + "step": 17720 + }, + { + "epoch": 0.6443055454611527, + "grad_norm": 1.692335844039917, + "learning_rate": 4.991550426322083e-05, + "loss": 0.1654, + "step": 17730 + }, + { + "epoch": 0.6446689439639509, + "grad_norm": 1.1132971048355103, + "learning_rate": 4.9915077436167313e-05, + "loss": 0.1688, + "step": 17740 + }, + { + "epoch": 0.645032342466749, + "grad_norm": 5.6813201904296875, + "learning_rate": 4.9914649535611756e-05, + "loss": 0.2235, + "step": 17750 + }, + { + "epoch": 0.6453957409695472, + "grad_norm": 1.5107471942901611, + "learning_rate": 4.99142205615726e-05, + "loss": 0.1747, + "step": 17760 + }, + { + "epoch": 0.6457591394723454, + "grad_norm": 2.4552764892578125, + "learning_rate": 4.9913790514068316e-05, + "loss": 0.1739, + "step": 17770 + }, + { + "epoch": 0.6461225379751435, + "grad_norm": 1.5664808750152588, + "learning_rate": 4.991335939311744e-05, + "loss": 0.1766, + "step": 17780 + }, + { + "epoch": 0.6464859364779417, + "grad_norm": 2.935850143432617, + "learning_rate": 4.9912927198738556e-05, + "loss": 0.2148, + "step": 17790 + }, + { + "epoch": 0.6468493349807399, + "grad_norm": 10.267364501953125, + "learning_rate": 4.991249393095028e-05, + "loss": 0.2521, + "step": 17800 + }, + { + "epoch": 0.647212733483538, + "grad_norm": 1.3392564058303833, + "learning_rate": 4.9912059589771274e-05, + "loss": 0.172, + "step": 17810 + }, + { + "epoch": 0.6475761319863362, + "grad_norm": 0.895491361618042, + "learning_rate": 4.991162417522026e-05, + "loss": 0.1379, + "step": 17820 + }, + { + "epoch": 0.6479395304891343, + "grad_norm": 2.536397695541382, + "learning_rate": 4.9911187687315997e-05, + "loss": 0.1477, + "step": 17830 + }, + { + "epoch": 0.6483029289919325, + "grad_norm": 1.7795464992523193, + "learning_rate": 4.9910750126077296e-05, + "loss": 0.1786, + "step": 17840 + }, + { + "epoch": 0.6486663274947307, + "grad_norm": 62.683929443359375, + "learning_rate": 4.9910311491523e-05, + "loss": 0.266, + "step": 17850 + }, + { + "epoch": 0.6490297259975288, + "grad_norm": 2.0866358280181885, + "learning_rate": 4.990987178367201e-05, + "loss": 0.1428, + "step": 17860 + }, + { + "epoch": 0.649393124500327, + "grad_norm": 1.5636661052703857, + "learning_rate": 4.990943100254328e-05, + "loss": 0.1845, + "step": 17870 + }, + { + "epoch": 0.6497565230031253, + "grad_norm": 3.540689468383789, + "learning_rate": 4.9908989148155796e-05, + "loss": 0.2348, + "step": 17880 + }, + { + "epoch": 0.6501199215059233, + "grad_norm": 1.720421314239502, + "learning_rate": 4.990854622052859e-05, + "loss": 0.1742, + "step": 17890 + }, + { + "epoch": 0.6504833200087216, + "grad_norm": 7.7201056480407715, + "learning_rate": 4.9908102219680756e-05, + "loss": 0.2573, + "step": 17900 + }, + { + "epoch": 0.6508467185115198, + "grad_norm": 3.826190948486328, + "learning_rate": 4.9907701701329876e-05, + "loss": 3.6024, + "step": 17910 + }, + { + "epoch": 0.6512101170143179, + "grad_norm": 2.047307252883911, + "learning_rate": 4.990725566141558e-05, + "loss": 0.1551, + "step": 17920 + }, + { + "epoch": 0.6515735155171161, + "grad_norm": 6.462743282318115, + "learning_rate": 4.990680854833626e-05, + "loss": 0.2109, + "step": 17930 + }, + { + "epoch": 0.6519369140199143, + "grad_norm": 1.7611109018325806, + "learning_rate": 4.9906360362111184e-05, + "loss": 0.1959, + "step": 17940 + }, + { + "epoch": 0.6523003125227124, + "grad_norm": 5.253514766693115, + "learning_rate": 4.9905911102759655e-05, + "loss": 0.2436, + "step": 17950 + }, + { + "epoch": 0.6526637110255106, + "grad_norm": 0.9357771873474121, + "learning_rate": 4.9905460770301035e-05, + "loss": 0.1664, + "step": 17960 + }, + { + "epoch": 0.6530271095283088, + "grad_norm": 1.219488263130188, + "learning_rate": 4.990500936475472e-05, + "loss": 0.2286, + "step": 17970 + }, + { + "epoch": 0.6533905080311069, + "grad_norm": 2.8499608039855957, + "learning_rate": 4.990455688614016e-05, + "loss": 0.2664, + "step": 17980 + }, + { + "epoch": 0.6537539065339051, + "grad_norm": 1.5652077198028564, + "learning_rate": 4.990410333447686e-05, + "loss": 0.1341, + "step": 17990 + }, + { + "epoch": 0.6541173050367033, + "grad_norm": 5.98219633102417, + "learning_rate": 4.9903648709784356e-05, + "loss": 0.2338, + "step": 18000 + }, + { + "epoch": 0.6541173050367033, + "eval_loss": 0.37892404198646545, + "eval_runtime": 180.0524, + "eval_samples_per_second": 41.177, + "eval_steps_per_second": 5.149, + "eval_wer": 0.200864087715795, + "step": 18000 + }, + { + "epoch": 0.6544807035395014, + "grad_norm": 2.105100154876709, + "learning_rate": 4.990319301208223e-05, + "loss": 0.1764, + "step": 18010 + }, + { + "epoch": 0.6548441020422996, + "grad_norm": 1.0867921113967896, + "learning_rate": 4.990273624139013e-05, + "loss": 0.1507, + "step": 18020 + }, + { + "epoch": 0.6552075005450978, + "grad_norm": 2.9895503520965576, + "learning_rate": 4.9902278397727734e-05, + "loss": 0.1479, + "step": 18030 + }, + { + "epoch": 0.6555708990478959, + "grad_norm": 0.9947407841682434, + "learning_rate": 4.990181948111475e-05, + "loss": 0.1558, + "step": 18040 + }, + { + "epoch": 0.6559342975506941, + "grad_norm": 7.774895191192627, + "learning_rate": 4.9901359491570974e-05, + "loss": 0.2202, + "step": 18050 + }, + { + "epoch": 0.6562976960534923, + "grad_norm": 1.8466017246246338, + "learning_rate": 4.990089842911622e-05, + "loss": 0.1929, + "step": 18060 + }, + { + "epoch": 0.6566610945562904, + "grad_norm": 0.8435410261154175, + "learning_rate": 4.9900436293770345e-05, + "loss": 0.1377, + "step": 18070 + }, + { + "epoch": 0.6570244930590886, + "grad_norm": 3.10648512840271, + "learning_rate": 4.989997308555326e-05, + "loss": 0.202, + "step": 18080 + }, + { + "epoch": 0.6573878915618868, + "grad_norm": 1.112806797027588, + "learning_rate": 4.989950880448494e-05, + "loss": 0.1486, + "step": 18090 + }, + { + "epoch": 0.6577512900646849, + "grad_norm": 18.821117401123047, + "learning_rate": 4.989904345058538e-05, + "loss": 0.2677, + "step": 18100 + }, + { + "epoch": 0.6581146885674831, + "grad_norm": 1.254798412322998, + "learning_rate": 4.989857702387463e-05, + "loss": 3.5769, + "step": 18110 + }, + { + "epoch": 0.6584780870702812, + "grad_norm": 0.9956761002540588, + "learning_rate": 4.989810952437277e-05, + "loss": 0.1958, + "step": 18120 + }, + { + "epoch": 0.6588414855730794, + "grad_norm": 2.9471828937530518, + "learning_rate": 4.9897640952099975e-05, + "loss": 0.1988, + "step": 18130 + }, + { + "epoch": 0.6592048840758776, + "grad_norm": 1.3806344270706177, + "learning_rate": 4.989717130707641e-05, + "loss": 0.1552, + "step": 18140 + }, + { + "epoch": 0.6595682825786757, + "grad_norm": 3.0857722759246826, + "learning_rate": 4.989670058932231e-05, + "loss": 0.2168, + "step": 18150 + }, + { + "epoch": 0.6599316810814739, + "grad_norm": 1.8781664371490479, + "learning_rate": 4.989622879885798e-05, + "loss": 0.1571, + "step": 18160 + }, + { + "epoch": 0.6602950795842721, + "grad_norm": 1.1139156818389893, + "learning_rate": 4.9895755935703725e-05, + "loss": 0.1365, + "step": 18170 + }, + { + "epoch": 0.6606584780870702, + "grad_norm": 2.3965742588043213, + "learning_rate": 4.9895281999879925e-05, + "loss": 0.1879, + "step": 18180 + }, + { + "epoch": 0.6610218765898684, + "grad_norm": 1.2575726509094238, + "learning_rate": 4.9894806991407e-05, + "loss": 0.2197, + "step": 18190 + }, + { + "epoch": 0.6613852750926666, + "grad_norm": 10.392169952392578, + "learning_rate": 4.989433091030542e-05, + "loss": 0.2318, + "step": 18200 + }, + { + "epoch": 0.6617486735954647, + "grad_norm": 0.8268498182296753, + "learning_rate": 4.98938537565957e-05, + "loss": 0.1416, + "step": 18210 + }, + { + "epoch": 0.662112072098263, + "grad_norm": 0.9257369637489319, + "learning_rate": 4.9893375530298384e-05, + "loss": 0.1855, + "step": 18220 + }, + { + "epoch": 0.6624754706010612, + "grad_norm": 1.7720370292663574, + "learning_rate": 4.9892896231434094e-05, + "loss": 1.0276, + "step": 18230 + }, + { + "epoch": 0.6628388691038593, + "grad_norm": 2.2012548446655273, + "learning_rate": 4.9892415860023476e-05, + "loss": 1.1909, + "step": 18240 + }, + { + "epoch": 0.6632022676066575, + "grad_norm": 9.690247535705566, + "learning_rate": 4.9891934416087224e-05, + "loss": 0.2603, + "step": 18250 + }, + { + "epoch": 0.6635656661094557, + "grad_norm": 2.528682231903076, + "learning_rate": 4.989145189964608e-05, + "loss": 0.1912, + "step": 18260 + }, + { + "epoch": 0.6639290646122538, + "grad_norm": 1.4666227102279663, + "learning_rate": 4.989096831072084e-05, + "loss": 0.2316, + "step": 18270 + }, + { + "epoch": 0.664292463115052, + "grad_norm": 1.463526725769043, + "learning_rate": 4.989048364933234e-05, + "loss": 0.1388, + "step": 18280 + }, + { + "epoch": 0.6646558616178502, + "grad_norm": 1.2156569957733154, + "learning_rate": 4.988999791550146e-05, + "loss": 0.4086, + "step": 18290 + }, + { + "epoch": 0.6650192601206483, + "grad_norm": 4.909139156341553, + "learning_rate": 4.988951110924913e-05, + "loss": 0.2631, + "step": 18300 + }, + { + "epoch": 0.6653826586234465, + "grad_norm": 1.3692512512207031, + "learning_rate": 4.988902323059632e-05, + "loss": 0.1525, + "step": 18310 + }, + { + "epoch": 0.6657460571262447, + "grad_norm": 1.153344988822937, + "learning_rate": 4.988853427956406e-05, + "loss": 0.1904, + "step": 18320 + }, + { + "epoch": 0.6661094556290428, + "grad_norm": 2.052828073501587, + "learning_rate": 4.988804425617341e-05, + "loss": 0.1979, + "step": 18330 + }, + { + "epoch": 0.666472854131841, + "grad_norm": 1.373213768005371, + "learning_rate": 4.988755316044548e-05, + "loss": 0.1836, + "step": 18340 + }, + { + "epoch": 0.6668362526346392, + "grad_norm": 24.185970306396484, + "learning_rate": 4.9887060992401436e-05, + "loss": 0.2546, + "step": 18350 + }, + { + "epoch": 0.6671996511374373, + "grad_norm": 1.702205777168274, + "learning_rate": 4.988656775206248e-05, + "loss": 0.1433, + "step": 18360 + }, + { + "epoch": 0.6675630496402355, + "grad_norm": 2.279100179672241, + "learning_rate": 4.9886073439449864e-05, + "loss": 0.1671, + "step": 18370 + }, + { + "epoch": 0.6679264481430337, + "grad_norm": 3.928740978240967, + "learning_rate": 4.98855780545849e-05, + "loss": 0.1506, + "step": 18380 + }, + { + "epoch": 0.6682898466458318, + "grad_norm": 2.2895402908325195, + "learning_rate": 4.988508159748891e-05, + "loss": 0.1523, + "step": 18390 + }, + { + "epoch": 0.66865324514863, + "grad_norm": 10.151689529418945, + "learning_rate": 4.98845840681833e-05, + "loss": 0.2284, + "step": 18400 + }, + { + "epoch": 0.6690166436514282, + "grad_norm": 1.268561840057373, + "learning_rate": 4.9884085466689504e-05, + "loss": 0.171, + "step": 18410 + }, + { + "epoch": 0.6693800421542263, + "grad_norm": 1.0731265544891357, + "learning_rate": 4.9883585793029e-05, + "loss": 0.7778, + "step": 18420 + }, + { + "epoch": 0.6697434406570245, + "grad_norm": 1.0762509107589722, + "learning_rate": 4.988308504722332e-05, + "loss": 0.1966, + "step": 18430 + }, + { + "epoch": 0.6701068391598226, + "grad_norm": 6.763409614562988, + "learning_rate": 4.9882583229294044e-05, + "loss": 0.156, + "step": 18440 + }, + { + "epoch": 0.6704702376626208, + "grad_norm": 8.312501907348633, + "learning_rate": 4.988208033926279e-05, + "loss": 0.2573, + "step": 18450 + }, + { + "epoch": 0.670833636165419, + "grad_norm": 1.7566003799438477, + "learning_rate": 4.988157637715122e-05, + "loss": 0.1639, + "step": 18460 + }, + { + "epoch": 0.6711970346682171, + "grad_norm": 2.336911916732788, + "learning_rate": 4.988107134298105e-05, + "loss": 0.1536, + "step": 18470 + }, + { + "epoch": 0.6715604331710153, + "grad_norm": 2.2477078437805176, + "learning_rate": 4.988056523677405e-05, + "loss": 0.2734, + "step": 18480 + }, + { + "epoch": 0.6719238316738135, + "grad_norm": 1.62912917137146, + "learning_rate": 4.9880058058552015e-05, + "loss": 0.1501, + "step": 18490 + }, + { + "epoch": 0.6722872301766116, + "grad_norm": 8.896906852722168, + "learning_rate": 4.98795498083368e-05, + "loss": 0.213, + "step": 18500 + }, + { + "epoch": 0.6726506286794098, + "grad_norm": 1.804291009902954, + "learning_rate": 4.987904048615031e-05, + "loss": 0.2175, + "step": 18510 + }, + { + "epoch": 0.673014027182208, + "grad_norm": 0.9261330366134644, + "learning_rate": 4.9878530092014486e-05, + "loss": 0.1553, + "step": 18520 + }, + { + "epoch": 0.6733774256850061, + "grad_norm": 4.854642868041992, + "learning_rate": 4.987801862595132e-05, + "loss": 0.2065, + "step": 18530 + }, + { + "epoch": 0.6737408241878043, + "grad_norm": 0.9362125992774963, + "learning_rate": 4.987750608798284e-05, + "loss": 0.1611, + "step": 18540 + }, + { + "epoch": 0.6741042226906026, + "grad_norm": 13.348092079162598, + "learning_rate": 4.987699247813114e-05, + "loss": 0.2834, + "step": 18550 + }, + { + "epoch": 0.6744676211934006, + "grad_norm": 1.3235937356948853, + "learning_rate": 4.987647779641835e-05, + "loss": 0.166, + "step": 18560 + }, + { + "epoch": 0.6748310196961989, + "grad_norm": 1.7941697835922241, + "learning_rate": 4.987596204286664e-05, + "loss": 0.186, + "step": 18570 + }, + { + "epoch": 0.6751944181989971, + "grad_norm": 6.945876121520996, + "learning_rate": 4.987544521749824e-05, + "loss": 0.1859, + "step": 18580 + }, + { + "epoch": 0.6755578167017952, + "grad_norm": 1.1671024560928345, + "learning_rate": 4.98749273203354e-05, + "loss": 0.2007, + "step": 18590 + }, + { + "epoch": 0.6759212152045934, + "grad_norm": 46.817718505859375, + "learning_rate": 4.987440835140046e-05, + "loss": 0.275, + "step": 18600 + }, + { + "epoch": 0.6759212152045934, + "eval_loss": 0.4186328053474426, + "eval_runtime": 180.6066, + "eval_samples_per_second": 41.051, + "eval_steps_per_second": 5.133, + "eval_wer": 0.21444260896400238, + "step": 18600 + }, + { + "epoch": 0.6762846137073916, + "grad_norm": 0.9619908928871155, + "learning_rate": 4.987388831071575e-05, + "loss": 0.2147, + "step": 18610 + }, + { + "epoch": 0.6766480122101897, + "grad_norm": 1.139666199684143, + "learning_rate": 4.9873367198303714e-05, + "loss": 0.2591, + "step": 18620 + }, + { + "epoch": 0.6770114107129879, + "grad_norm": 2.6673026084899902, + "learning_rate": 4.9872845014186776e-05, + "loss": 0.2013, + "step": 18630 + }, + { + "epoch": 0.6773748092157861, + "grad_norm": 1.0486637353897095, + "learning_rate": 4.987232175838745e-05, + "loss": 0.2326, + "step": 18640 + }, + { + "epoch": 0.6777382077185842, + "grad_norm": 6.457462787628174, + "learning_rate": 4.987179743092827e-05, + "loss": 0.2395, + "step": 18650 + }, + { + "epoch": 0.6781016062213824, + "grad_norm": 3.296480178833008, + "learning_rate": 4.987127203183183e-05, + "loss": 0.1857, + "step": 18660 + }, + { + "epoch": 0.6784650047241806, + "grad_norm": 2.828460454940796, + "learning_rate": 4.987074556112078e-05, + "loss": 0.1391, + "step": 18670 + }, + { + "epoch": 0.6788284032269787, + "grad_norm": 10.424219131469727, + "learning_rate": 4.987021801881779e-05, + "loss": 0.1583, + "step": 18680 + }, + { + "epoch": 0.6791918017297769, + "grad_norm": 5.248502254486084, + "learning_rate": 4.986968940494559e-05, + "loss": 0.1676, + "step": 18690 + }, + { + "epoch": 0.6795552002325751, + "grad_norm": 8.20375919342041, + "learning_rate": 4.986915971952696e-05, + "loss": 0.2844, + "step": 18700 + }, + { + "epoch": 0.6799185987353732, + "grad_norm": 2.415562152862549, + "learning_rate": 4.986862896258473e-05, + "loss": 0.1634, + "step": 18710 + }, + { + "epoch": 0.6802819972381714, + "grad_norm": 1.635680079460144, + "learning_rate": 4.986809713414176e-05, + "loss": 0.509, + "step": 18720 + }, + { + "epoch": 0.6806453957409695, + "grad_norm": 10.641048431396484, + "learning_rate": 4.986756423422095e-05, + "loss": 0.2015, + "step": 18730 + }, + { + "epoch": 0.6810087942437677, + "grad_norm": 1.3304156064987183, + "learning_rate": 4.986703026284529e-05, + "loss": 0.1598, + "step": 18740 + }, + { + "epoch": 0.6813721927465659, + "grad_norm": 4.707154750823975, + "learning_rate": 4.986649522003778e-05, + "loss": 0.2486, + "step": 18750 + }, + { + "epoch": 0.681735591249364, + "grad_norm": 1.671863317489624, + "learning_rate": 4.9865959105821454e-05, + "loss": 0.1628, + "step": 18760 + }, + { + "epoch": 0.6820989897521622, + "grad_norm": 2.4183709621429443, + "learning_rate": 4.986542192021942e-05, + "loss": 0.1636, + "step": 18770 + }, + { + "epoch": 0.6824623882549604, + "grad_norm": 120.8931884765625, + "learning_rate": 4.9864883663254836e-05, + "loss": 2.0172, + "step": 18780 + }, + { + "epoch": 0.6828257867577585, + "grad_norm": 2.785879135131836, + "learning_rate": 4.986434433495089e-05, + "loss": 0.1669, + "step": 18790 + }, + { + "epoch": 0.6831891852605567, + "grad_norm": 3.662753105163574, + "learning_rate": 4.98638039353308e-05, + "loss": 0.3095, + "step": 18800 + }, + { + "epoch": 0.6835525837633549, + "grad_norm": 1.1632777452468872, + "learning_rate": 4.986326246441787e-05, + "loss": 0.1632, + "step": 18810 + }, + { + "epoch": 0.683915982266153, + "grad_norm": 0.9660913348197937, + "learning_rate": 4.986271992223543e-05, + "loss": 0.1509, + "step": 18820 + }, + { + "epoch": 0.6842793807689512, + "grad_norm": 2.810391426086426, + "learning_rate": 4.986217630880684e-05, + "loss": 0.1507, + "step": 18830 + }, + { + "epoch": 0.6846427792717494, + "grad_norm": 2.008641242980957, + "learning_rate": 4.986163162415554e-05, + "loss": 0.1858, + "step": 18840 + }, + { + "epoch": 0.6850061777745475, + "grad_norm": 3.4007887840270996, + "learning_rate": 4.986108586830499e-05, + "loss": 0.2389, + "step": 18850 + }, + { + "epoch": 0.6853695762773457, + "grad_norm": 0.8250002861022949, + "learning_rate": 4.986053904127871e-05, + "loss": 0.1618, + "step": 18860 + }, + { + "epoch": 0.685732974780144, + "grad_norm": 0.792607307434082, + "learning_rate": 4.986004598111927e-05, + "loss": 1.5835, + "step": 18870 + }, + { + "epoch": 0.686096373282942, + "grad_norm": 2.740478038787842, + "learning_rate": 4.985949711892404e-05, + "loss": 0.2021, + "step": 18880 + }, + { + "epoch": 0.6864597717857402, + "grad_norm": 1.1361775398254395, + "learning_rate": 4.985894718562153e-05, + "loss": 0.2244, + "step": 18890 + }, + { + "epoch": 0.6868231702885385, + "grad_norm": 2.692542314529419, + "learning_rate": 4.985839618123543e-05, + "loss": 0.2095, + "step": 18900 + }, + { + "epoch": 0.6871865687913365, + "grad_norm": 1.2691428661346436, + "learning_rate": 4.9857844105789485e-05, + "loss": 0.1533, + "step": 18910 + }, + { + "epoch": 0.6875499672941348, + "grad_norm": 2.087209939956665, + "learning_rate": 4.9857290959307483e-05, + "loss": 0.1469, + "step": 18920 + }, + { + "epoch": 0.687913365796933, + "grad_norm": 1.5252209901809692, + "learning_rate": 4.985673674181326e-05, + "loss": 0.2099, + "step": 18930 + }, + { + "epoch": 0.6882767642997311, + "grad_norm": 1.81588876247406, + "learning_rate": 4.9856181453330685e-05, + "loss": 0.174, + "step": 18940 + }, + { + "epoch": 0.6886401628025293, + "grad_norm": 21.244775772094727, + "learning_rate": 4.9855625093883695e-05, + "loss": 0.2455, + "step": 18950 + }, + { + "epoch": 0.6890035613053275, + "grad_norm": 1.53201425075531, + "learning_rate": 4.9855067663496255e-05, + "loss": 0.1731, + "step": 18960 + }, + { + "epoch": 0.6893669598081256, + "grad_norm": 0.9922922849655151, + "learning_rate": 4.985450916219239e-05, + "loss": 0.1569, + "step": 18970 + }, + { + "epoch": 0.6897303583109238, + "grad_norm": 1.6983296871185303, + "learning_rate": 4.985394958999615e-05, + "loss": 0.1784, + "step": 18980 + }, + { + "epoch": 0.690093756813722, + "grad_norm": 2.5069353580474854, + "learning_rate": 4.9853388946931654e-05, + "loss": 0.1484, + "step": 18990 + }, + { + "epoch": 0.6904571553165201, + "grad_norm": 52.345367431640625, + "learning_rate": 4.985282723302306e-05, + "loss": 0.2431, + "step": 19000 + }, + { + "epoch": 0.6908205538193183, + "grad_norm": 1.5318138599395752, + "learning_rate": 4.9852264448294564e-05, + "loss": 0.1662, + "step": 19010 + }, + { + "epoch": 0.6911839523221164, + "grad_norm": 1.5980876684188843, + "learning_rate": 4.985170059277041e-05, + "loss": 1.3532, + "step": 19020 + }, + { + "epoch": 0.6915473508249146, + "grad_norm": 2.355023145675659, + "learning_rate": 4.9851135666474915e-05, + "loss": 0.1688, + "step": 19030 + }, + { + "epoch": 0.6919107493277128, + "grad_norm": 3.2141480445861816, + "learning_rate": 4.98505696694324e-05, + "loss": 0.1303, + "step": 19040 + }, + { + "epoch": 0.6922741478305109, + "grad_norm": 19.482290267944336, + "learning_rate": 4.985000260166725e-05, + "loss": 0.2337, + "step": 19050 + }, + { + "epoch": 0.6926375463333091, + "grad_norm": 0.8456101417541504, + "learning_rate": 4.9849434463203915e-05, + "loss": 0.1732, + "step": 19060 + }, + { + "epoch": 0.6930009448361073, + "grad_norm": 2.2158889770507812, + "learning_rate": 4.9848865254066856e-05, + "loss": 0.1524, + "step": 19070 + }, + { + "epoch": 0.6933643433389054, + "grad_norm": 2.0843331813812256, + "learning_rate": 4.9848294974280605e-05, + "loss": 0.1943, + "step": 19080 + }, + { + "epoch": 0.6937277418417036, + "grad_norm": 2.6970462799072266, + "learning_rate": 4.9847723623869734e-05, + "loss": 0.1697, + "step": 19090 + }, + { + "epoch": 0.6940911403445018, + "grad_norm": 9.394730567932129, + "learning_rate": 4.984715120285887e-05, + "loss": 0.2151, + "step": 19100 + }, + { + "epoch": 0.6944545388472999, + "grad_norm": 1.922090768814087, + "learning_rate": 4.9846577711272656e-05, + "loss": 0.1737, + "step": 19110 + }, + { + "epoch": 0.6948179373500981, + "grad_norm": 1.3870245218276978, + "learning_rate": 4.9846003149135815e-05, + "loss": 0.1694, + "step": 19120 + }, + { + "epoch": 0.6951813358528963, + "grad_norm": 1.6474970579147339, + "learning_rate": 4.9845427516473104e-05, + "loss": 0.219, + "step": 19130 + }, + { + "epoch": 0.6955447343556944, + "grad_norm": 1.4302411079406738, + "learning_rate": 4.984485081330932e-05, + "loss": 0.1489, + "step": 19140 + }, + { + "epoch": 0.6959081328584926, + "grad_norm": 3.888967990875244, + "learning_rate": 4.984427303966932e-05, + "loss": 0.2425, + "step": 19150 + }, + { + "epoch": 0.6962715313612908, + "grad_norm": 1.2002874612808228, + "learning_rate": 4.984369419557798e-05, + "loss": 0.1575, + "step": 19160 + }, + { + "epoch": 0.6966349298640889, + "grad_norm": 1.9064863920211792, + "learning_rate": 4.984311428106025e-05, + "loss": 0.1526, + "step": 19170 + }, + { + "epoch": 0.6969983283668871, + "grad_norm": 1.3838772773742676, + "learning_rate": 4.984253329614112e-05, + "loss": 0.1601, + "step": 19180 + }, + { + "epoch": 0.6973617268696853, + "grad_norm": 3.6261801719665527, + "learning_rate": 4.984195124084563e-05, + "loss": 0.1668, + "step": 19190 + }, + { + "epoch": 0.6977251253724834, + "grad_norm": 7.647263526916504, + "learning_rate": 4.984136811519884e-05, + "loss": 0.1879, + "step": 19200 + }, + { + "epoch": 0.6977251253724834, + "eval_loss": 0.3865276575088501, + "eval_runtime": 179.6651, + "eval_samples_per_second": 41.266, + "eval_steps_per_second": 5.16, + "eval_wer": 0.20815255867990634, + "step": 19200 + }, + { + "epoch": 0.6980885238752816, + "grad_norm": 1.7563225030899048, + "learning_rate": 4.984078391922589e-05, + "loss": 0.1481, + "step": 19210 + }, + { + "epoch": 0.6984519223780798, + "grad_norm": 1.8016029596328735, + "learning_rate": 4.984019865295194e-05, + "loss": 0.1713, + "step": 19220 + }, + { + "epoch": 0.6988153208808779, + "grad_norm": 2.0969181060791016, + "learning_rate": 4.983961231640221e-05, + "loss": 0.1959, + "step": 19230 + }, + { + "epoch": 0.6991787193836762, + "grad_norm": 1.6823608875274658, + "learning_rate": 4.9839024909601964e-05, + "loss": 0.1729, + "step": 19240 + }, + { + "epoch": 0.6995421178864744, + "grad_norm": 11.533753395080566, + "learning_rate": 4.983843643257652e-05, + "loss": 0.2264, + "step": 19250 + }, + { + "epoch": 0.6999055163892725, + "grad_norm": 4.1039204597473145, + "learning_rate": 4.983784688535122e-05, + "loss": 0.1738, + "step": 19260 + }, + { + "epoch": 0.7002689148920707, + "grad_norm": 1.1051629781723022, + "learning_rate": 4.983725626795147e-05, + "loss": 0.155, + "step": 19270 + }, + { + "epoch": 0.7006323133948689, + "grad_norm": 4.303994178771973, + "learning_rate": 4.983666458040273e-05, + "loss": 0.1593, + "step": 19280 + }, + { + "epoch": 0.700995711897667, + "grad_norm": 1.2324292659759521, + "learning_rate": 4.983607182273047e-05, + "loss": 0.1642, + "step": 19290 + }, + { + "epoch": 0.7013591104004652, + "grad_norm": 6.101926326751709, + "learning_rate": 4.983547799496024e-05, + "loss": 0.2338, + "step": 19300 + }, + { + "epoch": 0.7017225089032633, + "grad_norm": 1.1532049179077148, + "learning_rate": 4.983488309711763e-05, + "loss": 0.1591, + "step": 19310 + }, + { + "epoch": 0.7020859074060615, + "grad_norm": 0.8216233253479004, + "learning_rate": 4.983428712922828e-05, + "loss": 0.1489, + "step": 19320 + }, + { + "epoch": 0.7024493059088597, + "grad_norm": 1.489461064338684, + "learning_rate": 4.983369009131785e-05, + "loss": 0.2048, + "step": 19330 + }, + { + "epoch": 0.7028127044116578, + "grad_norm": 1.0493615865707397, + "learning_rate": 4.983309198341207e-05, + "loss": 0.1525, + "step": 19340 + }, + { + "epoch": 0.703176102914456, + "grad_norm": 10.2578706741333, + "learning_rate": 4.983249280553672e-05, + "loss": 0.2297, + "step": 19350 + }, + { + "epoch": 0.7035395014172542, + "grad_norm": 1.5366660356521606, + "learning_rate": 4.983189255771761e-05, + "loss": 0.1644, + "step": 19360 + }, + { + "epoch": 0.7039028999200523, + "grad_norm": 1.4915844202041626, + "learning_rate": 4.9831291239980596e-05, + "loss": 0.1599, + "step": 19370 + }, + { + "epoch": 0.7042662984228505, + "grad_norm": 1.3012590408325195, + "learning_rate": 4.98306888523516e-05, + "loss": 0.1907, + "step": 19380 + }, + { + "epoch": 0.7046296969256487, + "grad_norm": 1.5029476881027222, + "learning_rate": 4.983008539485656e-05, + "loss": 0.1391, + "step": 19390 + }, + { + "epoch": 0.7049930954284468, + "grad_norm": 3.0202033519744873, + "learning_rate": 4.9829480867521495e-05, + "loss": 0.2218, + "step": 19400 + }, + { + "epoch": 0.705356493931245, + "grad_norm": 1.7761317491531372, + "learning_rate": 4.9828875270372434e-05, + "loss": 0.1605, + "step": 19410 + }, + { + "epoch": 0.7057198924340432, + "grad_norm": 1.420793890953064, + "learning_rate": 4.9828268603435485e-05, + "loss": 1.5838, + "step": 19420 + }, + { + "epoch": 0.7060832909368413, + "grad_norm": 2.079665422439575, + "learning_rate": 4.982766086673678e-05, + "loss": 0.2146, + "step": 19430 + }, + { + "epoch": 0.7064466894396395, + "grad_norm": 2.440471887588501, + "learning_rate": 4.98270520603025e-05, + "loss": 0.1733, + "step": 19440 + }, + { + "epoch": 0.7068100879424377, + "grad_norm": 7.773731708526611, + "learning_rate": 4.982644218415889e-05, + "loss": 0.2126, + "step": 19450 + }, + { + "epoch": 0.7071734864452358, + "grad_norm": 0.9480405449867249, + "learning_rate": 4.982583123833221e-05, + "loss": 0.1575, + "step": 19460 + }, + { + "epoch": 0.707536884948034, + "grad_norm": 12.79196548461914, + "learning_rate": 4.982521922284881e-05, + "loss": 0.2745, + "step": 19470 + }, + { + "epoch": 0.7079002834508322, + "grad_norm": 4.492150783538818, + "learning_rate": 4.982460613773502e-05, + "loss": 0.1663, + "step": 19480 + }, + { + "epoch": 0.7082636819536303, + "grad_norm": 1.2373683452606201, + "learning_rate": 4.9823991983017295e-05, + "loss": 0.1699, + "step": 19490 + }, + { + "epoch": 0.7086270804564285, + "grad_norm": 5.8804402351379395, + "learning_rate": 4.982337675872207e-05, + "loss": 0.242, + "step": 19500 + }, + { + "epoch": 0.7089904789592267, + "grad_norm": 0.9465837478637695, + "learning_rate": 4.982276046487586e-05, + "loss": 0.1471, + "step": 19510 + }, + { + "epoch": 0.7093538774620248, + "grad_norm": 1.6178842782974243, + "learning_rate": 4.9822143101505226e-05, + "loss": 0.1619, + "step": 19520 + }, + { + "epoch": 0.709717275964823, + "grad_norm": 2.4963414669036865, + "learning_rate": 4.9821524668636766e-05, + "loss": 0.1426, + "step": 19530 + }, + { + "epoch": 0.7100806744676212, + "grad_norm": 1.1380610466003418, + "learning_rate": 4.982090516629712e-05, + "loss": 0.2364, + "step": 19540 + }, + { + "epoch": 0.7104440729704193, + "grad_norm": 5.2998046875, + "learning_rate": 4.982028459451298e-05, + "loss": 0.2661, + "step": 19550 + }, + { + "epoch": 0.7108074714732175, + "grad_norm": 1.1476637125015259, + "learning_rate": 4.9819662953311096e-05, + "loss": 0.1306, + "step": 19560 + }, + { + "epoch": 0.7111708699760158, + "grad_norm": 0.7960777878761292, + "learning_rate": 4.981904024271824e-05, + "loss": 0.1604, + "step": 19570 + }, + { + "epoch": 0.7115342684788138, + "grad_norm": 1.9035999774932861, + "learning_rate": 4.981841646276124e-05, + "loss": 0.1728, + "step": 19580 + }, + { + "epoch": 0.711897666981612, + "grad_norm": 0.9725393056869507, + "learning_rate": 4.981779161346699e-05, + "loss": 0.2529, + "step": 19590 + }, + { + "epoch": 0.7122610654844101, + "grad_norm": 5.759589672088623, + "learning_rate": 4.98171656948624e-05, + "loss": 0.25, + "step": 19600 + }, + { + "epoch": 0.7126244639872084, + "grad_norm": 1.3716357946395874, + "learning_rate": 4.9816538706974434e-05, + "loss": 0.1603, + "step": 19610 + }, + { + "epoch": 0.7129878624900066, + "grad_norm": 1.4253743886947632, + "learning_rate": 4.981591064983011e-05, + "loss": 0.1496, + "step": 19620 + }, + { + "epoch": 0.7133512609928047, + "grad_norm": 2.4253408908843994, + "learning_rate": 4.98152815234565e-05, + "loss": 0.1694, + "step": 19630 + }, + { + "epoch": 0.7137146594956029, + "grad_norm": 1.212689757347107, + "learning_rate": 4.9814651327880696e-05, + "loss": 0.1869, + "step": 19640 + }, + { + "epoch": 0.7140780579984011, + "grad_norm": 7.003270626068115, + "learning_rate": 4.981402006312986e-05, + "loss": 0.2709, + "step": 19650 + }, + { + "epoch": 0.7144414565011992, + "grad_norm": 1.6173512935638428, + "learning_rate": 4.981338772923119e-05, + "loss": 0.1651, + "step": 19660 + }, + { + "epoch": 0.7148048550039974, + "grad_norm": 2.2197723388671875, + "learning_rate": 4.981275432621192e-05, + "loss": 0.1657, + "step": 19670 + }, + { + "epoch": 0.7151682535067956, + "grad_norm": 1.8906898498535156, + "learning_rate": 4.981211985409936e-05, + "loss": 2.3111, + "step": 19680 + }, + { + "epoch": 0.7155316520095937, + "grad_norm": 3.50747013092041, + "learning_rate": 4.981148431292084e-05, + "loss": 0.1498, + "step": 19690 + }, + { + "epoch": 0.7158950505123919, + "grad_norm": 4.080805778503418, + "learning_rate": 4.981084770270373e-05, + "loss": 0.2094, + "step": 19700 + }, + { + "epoch": 0.7162584490151901, + "grad_norm": 2.1056652069091797, + "learning_rate": 4.981021002347547e-05, + "loss": 0.157, + "step": 19710 + }, + { + "epoch": 0.7166218475179882, + "grad_norm": 1.07776939868927, + "learning_rate": 4.980957127526354e-05, + "loss": 0.2049, + "step": 19720 + }, + { + "epoch": 0.7169852460207864, + "grad_norm": 3.5387072563171387, + "learning_rate": 4.980893145809546e-05, + "loss": 0.1706, + "step": 19730 + }, + { + "epoch": 0.7173486445235846, + "grad_norm": 1.5516027212142944, + "learning_rate": 4.980829057199879e-05, + "loss": 0.1371, + "step": 19740 + }, + { + "epoch": 0.7177120430263827, + "grad_norm": 6.618633270263672, + "learning_rate": 4.9807648617001145e-05, + "loss": 0.1833, + "step": 19750 + }, + { + "epoch": 0.7180754415291809, + "grad_norm": 1.7093079090118408, + "learning_rate": 4.980700559313019e-05, + "loss": 0.1592, + "step": 19760 + }, + { + "epoch": 0.7184388400319791, + "grad_norm": 1.1217936277389526, + "learning_rate": 4.9806361500413626e-05, + "loss": 0.145, + "step": 19770 + }, + { + "epoch": 0.7188022385347772, + "grad_norm": 1.869722604751587, + "learning_rate": 4.980571633887921e-05, + "loss": 0.1605, + "step": 19780 + }, + { + "epoch": 0.7191656370375754, + "grad_norm": 1.1555829048156738, + "learning_rate": 4.980507010855473e-05, + "loss": 0.1539, + "step": 19790 + }, + { + "epoch": 0.7195290355403736, + "grad_norm": 5.0145111083984375, + "learning_rate": 4.9804422809468046e-05, + "loss": 0.2334, + "step": 19800 + }, + { + "epoch": 0.7195290355403736, + "eval_loss": 0.394449919462204, + "eval_runtime": 180.0311, + "eval_samples_per_second": 41.182, + "eval_steps_per_second": 5.149, + "eval_wer": 0.2100677110752083, + "step": 19800 + }, + { + "epoch": 0.7198924340431717, + "grad_norm": 1.0865716934204102, + "learning_rate": 4.980377444164702e-05, + "loss": 0.1569, + "step": 19810 + }, + { + "epoch": 0.7202558325459699, + "grad_norm": 1.5475140810012817, + "learning_rate": 4.980312500511962e-05, + "loss": 0.1268, + "step": 19820 + }, + { + "epoch": 0.7206192310487681, + "grad_norm": 1.9507659673690796, + "learning_rate": 4.980247449991381e-05, + "loss": 0.2092, + "step": 19830 + }, + { + "epoch": 0.7209826295515662, + "grad_norm": 1.185339093208313, + "learning_rate": 4.980182292605762e-05, + "loss": 0.1432, + "step": 19840 + }, + { + "epoch": 0.7213460280543644, + "grad_norm": 5.294797420501709, + "learning_rate": 4.980117028357912e-05, + "loss": 0.2459, + "step": 19850 + }, + { + "epoch": 0.7217094265571626, + "grad_norm": 2.691941976547241, + "learning_rate": 4.980051657250645e-05, + "loss": 0.1747, + "step": 19860 + }, + { + "epoch": 0.7220728250599607, + "grad_norm": 1.3377537727355957, + "learning_rate": 4.9799861792867756e-05, + "loss": 0.1541, + "step": 19870 + }, + { + "epoch": 0.7224362235627589, + "grad_norm": 3.39907169342041, + "learning_rate": 4.979920594469124e-05, + "loss": 0.166, + "step": 19880 + }, + { + "epoch": 0.722799622065557, + "grad_norm": 1.738271951675415, + "learning_rate": 4.9798549028005195e-05, + "loss": 0.1591, + "step": 19890 + }, + { + "epoch": 0.7231630205683552, + "grad_norm": 4.062039852142334, + "learning_rate": 4.9797891042837893e-05, + "loss": 0.2372, + "step": 19900 + }, + { + "epoch": 0.7235264190711534, + "grad_norm": 2.46109676361084, + "learning_rate": 4.979723198921771e-05, + "loss": 0.1606, + "step": 19910 + }, + { + "epoch": 0.7238898175739515, + "grad_norm": 1.3511689901351929, + "learning_rate": 4.9796571867173017e-05, + "loss": 0.148, + "step": 19920 + }, + { + "epoch": 0.7242532160767498, + "grad_norm": 4.831977844238281, + "learning_rate": 4.979591067673227e-05, + "loss": 0.1832, + "step": 19930 + }, + { + "epoch": 0.724616614579548, + "grad_norm": 0.9530340433120728, + "learning_rate": 4.979524841792397e-05, + "loss": 0.1776, + "step": 19940 + }, + { + "epoch": 0.724980013082346, + "grad_norm": 2.886121988296509, + "learning_rate": 4.979458509077663e-05, + "loss": 0.217, + "step": 19950 + }, + { + "epoch": 0.7253434115851443, + "grad_norm": 2.6050822734832764, + "learning_rate": 4.979392069531883e-05, + "loss": 0.1709, + "step": 19960 + }, + { + "epoch": 0.7257068100879425, + "grad_norm": 1.1615772247314453, + "learning_rate": 4.979325523157921e-05, + "loss": 0.1891, + "step": 19970 + }, + { + "epoch": 0.7260702085907406, + "grad_norm": 5.947473526000977, + "learning_rate": 4.979258869958643e-05, + "loss": 0.1685, + "step": 19980 + }, + { + "epoch": 0.7264336070935388, + "grad_norm": 2.2721457481384277, + "learning_rate": 4.979192109936922e-05, + "loss": 0.1733, + "step": 19990 + }, + { + "epoch": 0.726797005596337, + "grad_norm": 2.83907413482666, + "learning_rate": 4.979125243095635e-05, + "loss": 0.2067, + "step": 20000 + }, + { + "epoch": 0.7271604040991351, + "grad_norm": 1.84774649143219, + "learning_rate": 4.9790582694376605e-05, + "loss": 0.1634, + "step": 20010 + }, + { + "epoch": 0.7275238026019333, + "grad_norm": 3.5162901878356934, + "learning_rate": 4.978991188965887e-05, + "loss": 0.1546, + "step": 20020 + }, + { + "epoch": 0.7278872011047315, + "grad_norm": 1.3396214246749878, + "learning_rate": 4.9789240016832026e-05, + "loss": 0.1549, + "step": 20030 + }, + { + "epoch": 0.7282505996075296, + "grad_norm": 0.8957159519195557, + "learning_rate": 4.978856707592503e-05, + "loss": 0.4856, + "step": 20040 + }, + { + "epoch": 0.7286139981103278, + "grad_norm": 3.291719913482666, + "learning_rate": 4.978789306696688e-05, + "loss": 0.1672, + "step": 20050 + }, + { + "epoch": 0.728977396613126, + "grad_norm": 1.2237446308135986, + "learning_rate": 4.978721798998661e-05, + "loss": 0.1547, + "step": 20060 + }, + { + "epoch": 0.7293407951159241, + "grad_norm": 1.5760120153427124, + "learning_rate": 4.978654184501331e-05, + "loss": 0.1491, + "step": 20070 + }, + { + "epoch": 0.7297041936187223, + "grad_norm": 2.661914587020874, + "learning_rate": 4.978586463207612e-05, + "loss": 0.2399, + "step": 20080 + }, + { + "epoch": 0.7300675921215205, + "grad_norm": 1.4015228748321533, + "learning_rate": 4.978518635120421e-05, + "loss": 0.1592, + "step": 20090 + }, + { + "epoch": 0.7304309906243186, + "grad_norm": 11.479881286621094, + "learning_rate": 4.9784507002426793e-05, + "loss": 0.2478, + "step": 20100 + }, + { + "epoch": 0.7307943891271168, + "grad_norm": 2.3282432556152344, + "learning_rate": 4.9783826585773164e-05, + "loss": 0.1565, + "step": 20110 + }, + { + "epoch": 0.731157787629915, + "grad_norm": 1.0281476974487305, + "learning_rate": 4.9783145101272625e-05, + "loss": 2.6872, + "step": 20120 + }, + { + "epoch": 0.7315211861327131, + "grad_norm": 1.4759191274642944, + "learning_rate": 4.978246254895455e-05, + "loss": 0.1755, + "step": 20130 + }, + { + "epoch": 0.7318845846355113, + "grad_norm": 1.1100878715515137, + "learning_rate": 4.978177892884833e-05, + "loss": 0.1519, + "step": 20140 + }, + { + "epoch": 0.7322479831383095, + "grad_norm": 5.326310157775879, + "learning_rate": 4.9781094240983435e-05, + "loss": 0.257, + "step": 20150 + }, + { + "epoch": 0.7326113816411076, + "grad_norm": 8.199230194091797, + "learning_rate": 4.978040848538936e-05, + "loss": 0.192, + "step": 20160 + }, + { + "epoch": 0.7329747801439058, + "grad_norm": 1.579663872718811, + "learning_rate": 4.9779721662095654e-05, + "loss": 0.1738, + "step": 20170 + }, + { + "epoch": 0.733338178646704, + "grad_norm": 3.319883346557617, + "learning_rate": 4.97790337711319e-05, + "loss": 0.1809, + "step": 20180 + }, + { + "epoch": 0.7337015771495021, + "grad_norm": 1.4813331365585327, + "learning_rate": 4.977834481252776e-05, + "loss": 0.1645, + "step": 20190 + }, + { + "epoch": 0.7340649756523003, + "grad_norm": 4.392731666564941, + "learning_rate": 4.9777654786312886e-05, + "loss": 0.1897, + "step": 20200 + }, + { + "epoch": 0.7344283741550984, + "grad_norm": 1.7336299419403076, + "learning_rate": 4.9776963692517034e-05, + "loss": 0.1751, + "step": 20210 + }, + { + "epoch": 0.7347917726578966, + "grad_norm": 1.6261765956878662, + "learning_rate": 4.977627153116998e-05, + "loss": 0.156, + "step": 20220 + }, + { + "epoch": 0.7351551711606948, + "grad_norm": 1.9801748991012573, + "learning_rate": 4.977557830230153e-05, + "loss": 0.2069, + "step": 20230 + }, + { + "epoch": 0.7355185696634929, + "grad_norm": 1.4615390300750732, + "learning_rate": 4.977488400594157e-05, + "loss": 0.1458, + "step": 20240 + }, + { + "epoch": 0.7358819681662911, + "grad_norm": 3.78981876373291, + "learning_rate": 4.977418864212e-05, + "loss": 0.1765, + "step": 20250 + }, + { + "epoch": 0.7362453666690894, + "grad_norm": 0.813947319984436, + "learning_rate": 4.97734922108668e-05, + "loss": 0.1482, + "step": 20260 + }, + { + "epoch": 0.7366087651718874, + "grad_norm": 1.1082271337509155, + "learning_rate": 4.977279471221195e-05, + "loss": 0.149, + "step": 20270 + }, + { + "epoch": 0.7369721636746857, + "grad_norm": 4.023866176605225, + "learning_rate": 4.9772096146185527e-05, + "loss": 0.1797, + "step": 20280 + }, + { + "epoch": 0.7373355621774839, + "grad_norm": 1.3649333715438843, + "learning_rate": 4.977139651281762e-05, + "loss": 0.182, + "step": 20290 + }, + { + "epoch": 0.737698960680282, + "grad_norm": 8.213293075561523, + "learning_rate": 4.977069581213837e-05, + "loss": 0.2117, + "step": 20300 + }, + { + "epoch": 0.7380623591830802, + "grad_norm": 1.0769990682601929, + "learning_rate": 4.9769994044177976e-05, + "loss": 0.1689, + "step": 20310 + }, + { + "epoch": 0.7384257576858784, + "grad_norm": 1.712949275970459, + "learning_rate": 4.9769291208966674e-05, + "loss": 0.1402, + "step": 20320 + }, + { + "epoch": 0.7387891561886765, + "grad_norm": 2.213164806365967, + "learning_rate": 4.976858730653473e-05, + "loss": 0.193, + "step": 20330 + }, + { + "epoch": 0.7391525546914747, + "grad_norm": 1.9228605031967163, + "learning_rate": 4.97678823369125e-05, + "loss": 0.1517, + "step": 20340 + }, + { + "epoch": 0.7395159531942729, + "grad_norm": 8.813825607299805, + "learning_rate": 4.976717630013034e-05, + "loss": 0.2682, + "step": 20350 + }, + { + "epoch": 0.739879351697071, + "grad_norm": 1.9778189659118652, + "learning_rate": 4.976646919621867e-05, + "loss": 0.1701, + "step": 20360 + }, + { + "epoch": 0.7402427501998692, + "grad_norm": 1.8553961515426636, + "learning_rate": 4.976576102520797e-05, + "loss": 0.1455, + "step": 20370 + }, + { + "epoch": 0.7406061487026674, + "grad_norm": 3.1159512996673584, + "learning_rate": 4.976505178712874e-05, + "loss": 0.2252, + "step": 20380 + }, + { + "epoch": 0.7409695472054655, + "grad_norm": 1.9035766124725342, + "learning_rate": 4.9764341482011545e-05, + "loss": 0.1815, + "step": 20390 + }, + { + "epoch": 0.7413329457082637, + "grad_norm": 2.228940725326538, + "learning_rate": 4.976363010988698e-05, + "loss": 0.1995, + "step": 20400 + }, + { + "epoch": 0.7413329457082637, + "eval_loss": 0.35944151878356934, + "eval_runtime": 179.8589, + "eval_samples_per_second": 41.221, + "eval_steps_per_second": 5.154, + "eval_wer": 0.200864087715795, + "step": 20400 + }, + { + "epoch": 0.7416963442110619, + "grad_norm": 1.5204256772994995, + "learning_rate": 4.976291767078571e-05, + "loss": 1.6497, + "step": 20410 + }, + { + "epoch": 0.74205974271386, + "grad_norm": 1.3520594835281372, + "learning_rate": 4.976220416473842e-05, + "loss": 0.1503, + "step": 20420 + }, + { + "epoch": 0.7424231412166582, + "grad_norm": 2.7322440147399902, + "learning_rate": 4.976148959177586e-05, + "loss": 0.1784, + "step": 20430 + }, + { + "epoch": 0.7427865397194564, + "grad_norm": 1.3193668127059937, + "learning_rate": 4.9760773951928815e-05, + "loss": 0.1685, + "step": 20440 + }, + { + "epoch": 0.7431499382222545, + "grad_norm": 11.000434875488281, + "learning_rate": 4.976005724522812e-05, + "loss": 0.2147, + "step": 20450 + }, + { + "epoch": 0.7435133367250527, + "grad_norm": 1.1825796365737915, + "learning_rate": 4.9759339471704656e-05, + "loss": 0.2116, + "step": 20460 + }, + { + "epoch": 0.7438767352278509, + "grad_norm": 1.1518877744674683, + "learning_rate": 4.975862063138934e-05, + "loss": 0.141, + "step": 20470 + }, + { + "epoch": 0.744240133730649, + "grad_norm": 6.054372310638428, + "learning_rate": 4.975790072431316e-05, + "loss": 0.1766, + "step": 20480 + }, + { + "epoch": 0.7446035322334472, + "grad_norm": 1.0629233121871948, + "learning_rate": 4.975717975050713e-05, + "loss": 0.1641, + "step": 20490 + }, + { + "epoch": 0.7449669307362453, + "grad_norm": 2.4782843589782715, + "learning_rate": 4.97564577100023e-05, + "loss": 0.2186, + "step": 20500 + }, + { + "epoch": 0.7453303292390435, + "grad_norm": 1.5713534355163574, + "learning_rate": 4.975573460282979e-05, + "loss": 0.1535, + "step": 20510 + }, + { + "epoch": 0.7456937277418417, + "grad_norm": 0.7279618382453918, + "learning_rate": 4.975501042902078e-05, + "loss": 0.1372, + "step": 20520 + }, + { + "epoch": 0.7460571262446398, + "grad_norm": 5.573297500610352, + "learning_rate": 4.975428518860643e-05, + "loss": 0.161, + "step": 20530 + }, + { + "epoch": 0.746420524747438, + "grad_norm": 1.022141695022583, + "learning_rate": 4.975355888161801e-05, + "loss": 0.1645, + "step": 20540 + }, + { + "epoch": 0.7467839232502362, + "grad_norm": 2.9584996700286865, + "learning_rate": 4.9752831508086805e-05, + "loss": 0.2085, + "step": 20550 + }, + { + "epoch": 0.7471473217530343, + "grad_norm": 2.2749557495117188, + "learning_rate": 4.975210306804418e-05, + "loss": 0.1531, + "step": 20560 + }, + { + "epoch": 0.7475107202558325, + "grad_norm": 1.877822995185852, + "learning_rate": 4.9751373561521484e-05, + "loss": 0.1654, + "step": 20570 + }, + { + "epoch": 0.7478741187586307, + "grad_norm": 7.727886199951172, + "learning_rate": 4.975064298855017e-05, + "loss": 0.2026, + "step": 20580 + }, + { + "epoch": 0.7482375172614288, + "grad_norm": 1.2424033880233765, + "learning_rate": 4.974991134916171e-05, + "loss": 0.1834, + "step": 20590 + }, + { + "epoch": 0.748600915764227, + "grad_norm": 7.272613525390625, + "learning_rate": 4.974917864338764e-05, + "loss": 0.2266, + "step": 20600 + }, + { + "epoch": 0.7489643142670253, + "grad_norm": 0.6424925327301025, + "learning_rate": 4.974844487125952e-05, + "loss": 0.1496, + "step": 20610 + }, + { + "epoch": 0.7493277127698234, + "grad_norm": 2.064819097518921, + "learning_rate": 4.974771003280896e-05, + "loss": 0.192, + "step": 20620 + }, + { + "epoch": 0.7496911112726216, + "grad_norm": 2.55157470703125, + "learning_rate": 4.974697412806763e-05, + "loss": 0.1863, + "step": 20630 + }, + { + "epoch": 0.7500545097754198, + "grad_norm": 1.10732901096344, + "learning_rate": 4.974623715706723e-05, + "loss": 0.1452, + "step": 20640 + }, + { + "epoch": 0.7504179082782179, + "grad_norm": 6.665337562561035, + "learning_rate": 4.9745499119839526e-05, + "loss": 0.2393, + "step": 20650 + }, + { + "epoch": 0.7507813067810161, + "grad_norm": 2.315764904022217, + "learning_rate": 4.974476001641631e-05, + "loss": 0.1724, + "step": 20660 + }, + { + "epoch": 0.7511447052838143, + "grad_norm": 1.7643327713012695, + "learning_rate": 4.974401984682942e-05, + "loss": 0.1676, + "step": 20670 + }, + { + "epoch": 0.7515081037866124, + "grad_norm": 2.556265115737915, + "learning_rate": 4.974327861111075e-05, + "loss": 0.1706, + "step": 20680 + }, + { + "epoch": 0.7518715022894106, + "grad_norm": 1.0939987897872925, + "learning_rate": 4.9742536309292257e-05, + "loss": 0.1514, + "step": 20690 + }, + { + "epoch": 0.7522349007922088, + "grad_norm": 2.3087685108184814, + "learning_rate": 4.97417929414059e-05, + "loss": 0.2064, + "step": 20700 + }, + { + "epoch": 0.7525982992950069, + "grad_norm": 1.6968719959259033, + "learning_rate": 4.974104850748372e-05, + "loss": 0.65, + "step": 20710 + }, + { + "epoch": 0.7529616977978051, + "grad_norm": 1.3144559860229492, + "learning_rate": 4.974030300755779e-05, + "loss": 3.2825, + "step": 20720 + }, + { + "epoch": 0.7533250963006033, + "grad_norm": 2.346266031265259, + "learning_rate": 4.973955644166022e-05, + "loss": 0.1621, + "step": 20730 + }, + { + "epoch": 0.7536884948034014, + "grad_norm": 0.8026605248451233, + "learning_rate": 4.973880880982319e-05, + "loss": 0.1566, + "step": 20740 + }, + { + "epoch": 0.7540518933061996, + "grad_norm": 8.70439624786377, + "learning_rate": 4.973806011207891e-05, + "loss": 0.2671, + "step": 20750 + }, + { + "epoch": 0.7544152918089978, + "grad_norm": 0.9762817025184631, + "learning_rate": 4.973731034845964e-05, + "loss": 0.1692, + "step": 20760 + }, + { + "epoch": 0.7547786903117959, + "grad_norm": 1.3316736221313477, + "learning_rate": 4.973655951899768e-05, + "loss": 0.1605, + "step": 20770 + }, + { + "epoch": 0.7551420888145941, + "grad_norm": 1.9772186279296875, + "learning_rate": 4.9735807623725394e-05, + "loss": 0.1551, + "step": 20780 + }, + { + "epoch": 0.7555054873173922, + "grad_norm": 1.4639058113098145, + "learning_rate": 4.9735054662675154e-05, + "loss": 0.2075, + "step": 20790 + }, + { + "epoch": 0.7558688858201904, + "grad_norm": 10.605428695678711, + "learning_rate": 4.973430063587943e-05, + "loss": 0.2542, + "step": 20800 + }, + { + "epoch": 0.7562322843229886, + "grad_norm": 1.9553091526031494, + "learning_rate": 4.9733545543370684e-05, + "loss": 0.1353, + "step": 20810 + }, + { + "epoch": 0.7565956828257867, + "grad_norm": 2.2855403423309326, + "learning_rate": 4.9732789385181466e-05, + "loss": 0.5004, + "step": 20820 + }, + { + "epoch": 0.7569590813285849, + "grad_norm": 1.7468841075897217, + "learning_rate": 4.973203216134435e-05, + "loss": 0.1433, + "step": 20830 + }, + { + "epoch": 0.7573224798313831, + "grad_norm": 0.9522268772125244, + "learning_rate": 4.973127387189197e-05, + "loss": 0.1488, + "step": 20840 + }, + { + "epoch": 0.7576858783341812, + "grad_norm": 13.445122718811035, + "learning_rate": 4.9730514516856996e-05, + "loss": 0.2154, + "step": 20850 + }, + { + "epoch": 0.7580492768369794, + "grad_norm": 1.0712549686431885, + "learning_rate": 4.972975409627214e-05, + "loss": 0.144, + "step": 20860 + }, + { + "epoch": 0.7584126753397776, + "grad_norm": 0.6894069314002991, + "learning_rate": 4.972899261017017e-05, + "loss": 0.1612, + "step": 20870 + }, + { + "epoch": 0.7587760738425757, + "grad_norm": 2.059844970703125, + "learning_rate": 4.9728230058583893e-05, + "loss": 0.1664, + "step": 20880 + }, + { + "epoch": 0.7591394723453739, + "grad_norm": 2.0392911434173584, + "learning_rate": 4.972746644154616e-05, + "loss": 0.1991, + "step": 20890 + }, + { + "epoch": 0.7595028708481721, + "grad_norm": 2.9800570011138916, + "learning_rate": 4.972670175908989e-05, + "loss": 0.2725, + "step": 20900 + }, + { + "epoch": 0.7598662693509702, + "grad_norm": 2.390784502029419, + "learning_rate": 4.972593601124801e-05, + "loss": 0.3158, + "step": 20910 + }, + { + "epoch": 0.7602296678537684, + "grad_norm": 6.595739364624023, + "learning_rate": 4.972516919805352e-05, + "loss": 0.1658, + "step": 20920 + }, + { + "epoch": 0.7605930663565666, + "grad_norm": 2.2043120861053467, + "learning_rate": 4.972440131953947e-05, + "loss": 0.163, + "step": 20930 + }, + { + "epoch": 0.7609564648593647, + "grad_norm": 0.9223461747169495, + "learning_rate": 4.972363237573894e-05, + "loss": 0.1276, + "step": 20940 + }, + { + "epoch": 0.761319863362163, + "grad_norm": 12.165254592895508, + "learning_rate": 4.972286236668505e-05, + "loss": 0.2105, + "step": 20950 + }, + { + "epoch": 0.7616832618649612, + "grad_norm": 1.2093875408172607, + "learning_rate": 4.9722091292410984e-05, + "loss": 0.1697, + "step": 20960 + }, + { + "epoch": 0.7620466603677593, + "grad_norm": 0.8847984075546265, + "learning_rate": 4.9721396414828535e-05, + "loss": 3.043, + "step": 20970 + }, + { + "epoch": 0.7624100588705575, + "grad_norm": 1.6682274341583252, + "learning_rate": 4.9720623316727705e-05, + "loss": 0.1841, + "step": 20980 + }, + { + "epoch": 0.7627734573733557, + "grad_norm": 1.2780869007110596, + "learning_rate": 4.971984915350317e-05, + "loss": 0.1412, + "step": 20990 + }, + { + "epoch": 0.7631368558761538, + "grad_norm": 36.68233108520508, + "learning_rate": 4.97190739251883e-05, + "loss": 0.2059, + "step": 21000 + }, + { + "epoch": 0.7631368558761538, + "eval_loss": 0.3906314969062805, + "eval_runtime": 180.3915, + "eval_samples_per_second": 41.1, + "eval_steps_per_second": 5.139, + "eval_wer": 0.21157441864686768, + "step": 21000 + }, + { + "epoch": 0.763500254378952, + "grad_norm": 24.036775588989258, + "learning_rate": 4.971829763181647e-05, + "loss": 0.3942, + "step": 21010 + }, + { + "epoch": 0.7638636528817502, + "grad_norm": 1.6546601057052612, + "learning_rate": 4.971752027342115e-05, + "loss": 0.1555, + "step": 21020 + }, + { + "epoch": 0.7642270513845483, + "grad_norm": 3.100032091140747, + "learning_rate": 4.971674185003583e-05, + "loss": 0.1917, + "step": 21030 + }, + { + "epoch": 0.7645904498873465, + "grad_norm": 3.2824084758758545, + "learning_rate": 4.9715962361694045e-05, + "loss": 0.1744, + "step": 21040 + }, + { + "epoch": 0.7649538483901447, + "grad_norm": 7.680720329284668, + "learning_rate": 4.9715181808429376e-05, + "loss": 0.2567, + "step": 21050 + }, + { + "epoch": 0.7653172468929428, + "grad_norm": 1.5478154420852661, + "learning_rate": 4.971440019027547e-05, + "loss": 0.1949, + "step": 21060 + }, + { + "epoch": 0.765680645395741, + "grad_norm": 1.1294565200805664, + "learning_rate": 4.971361750726598e-05, + "loss": 0.1546, + "step": 21070 + }, + { + "epoch": 0.7660440438985391, + "grad_norm": 3.339749813079834, + "learning_rate": 4.971283375943465e-05, + "loss": 0.1784, + "step": 21080 + }, + { + "epoch": 0.7664074424013373, + "grad_norm": 1.9784200191497803, + "learning_rate": 4.9712048946815244e-05, + "loss": 0.8969, + "step": 21090 + }, + { + "epoch": 0.7667708409041355, + "grad_norm": 13.550655364990234, + "learning_rate": 4.971126306944157e-05, + "loss": 0.2037, + "step": 21100 + }, + { + "epoch": 0.7671342394069336, + "grad_norm": 60.52021408081055, + "learning_rate": 4.971047612734749e-05, + "loss": 1.0649, + "step": 21110 + }, + { + "epoch": 0.7674976379097318, + "grad_norm": 1.7544801235198975, + "learning_rate": 4.970968812056693e-05, + "loss": 0.1619, + "step": 21120 + }, + { + "epoch": 0.76786103641253, + "grad_norm": 2.0749471187591553, + "learning_rate": 4.970889904913382e-05, + "loss": 0.1934, + "step": 21130 + }, + { + "epoch": 0.7682244349153281, + "grad_norm": 2.33097767829895, + "learning_rate": 4.970810891308215e-05, + "loss": 0.3121, + "step": 21140 + }, + { + "epoch": 0.7685878334181263, + "grad_norm": 3.5586440563201904, + "learning_rate": 4.9707317712445996e-05, + "loss": 0.2198, + "step": 21150 + }, + { + "epoch": 0.7689512319209245, + "grad_norm": 1.7430351972579956, + "learning_rate": 4.970652544725942e-05, + "loss": 0.1884, + "step": 21160 + }, + { + "epoch": 0.7693146304237226, + "grad_norm": 1.2475924491882324, + "learning_rate": 4.9705732117556574e-05, + "loss": 0.183, + "step": 21170 + }, + { + "epoch": 0.7696780289265208, + "grad_norm": 1.369491457939148, + "learning_rate": 4.970493772337164e-05, + "loss": 0.1854, + "step": 21180 + }, + { + "epoch": 0.770041427429319, + "grad_norm": 1.8093339204788208, + "learning_rate": 4.970414226473883e-05, + "loss": 0.1389, + "step": 21190 + }, + { + "epoch": 0.7704048259321171, + "grad_norm": 15.3746919631958, + "learning_rate": 4.9703345741692425e-05, + "loss": 0.2603, + "step": 21200 + }, + { + "epoch": 0.7707682244349153, + "grad_norm": 0.9604819416999817, + "learning_rate": 4.970254815426675e-05, + "loss": 0.1663, + "step": 21210 + }, + { + "epoch": 0.7711316229377135, + "grad_norm": 1.3457413911819458, + "learning_rate": 4.970174950249617e-05, + "loss": 0.1784, + "step": 21220 + }, + { + "epoch": 0.7714950214405116, + "grad_norm": 3.19975209236145, + "learning_rate": 4.970094978641509e-05, + "loss": 0.2369, + "step": 21230 + }, + { + "epoch": 0.7718584199433098, + "grad_norm": 1.4974329471588135, + "learning_rate": 4.970014900605797e-05, + "loss": 0.1553, + "step": 21240 + }, + { + "epoch": 0.772221818446108, + "grad_norm": 6.426448345184326, + "learning_rate": 4.969934716145932e-05, + "loss": 0.1848, + "step": 21250 + }, + { + "epoch": 0.7725852169489061, + "grad_norm": 4.081672668457031, + "learning_rate": 4.969854425265368e-05, + "loss": 0.2135, + "step": 21260 + }, + { + "epoch": 0.7729486154517043, + "grad_norm": 0.7796603441238403, + "learning_rate": 4.9697740279675635e-05, + "loss": 0.2853, + "step": 21270 + }, + { + "epoch": 0.7733120139545026, + "grad_norm": 1.2303035259246826, + "learning_rate": 4.969693524255984e-05, + "loss": 0.5319, + "step": 21280 + }, + { + "epoch": 0.7736754124573006, + "grad_norm": 0.9134958386421204, + "learning_rate": 4.9696129141340986e-05, + "loss": 0.1789, + "step": 21290 + }, + { + "epoch": 0.7740388109600989, + "grad_norm": 1.8099846839904785, + "learning_rate": 4.969532197605379e-05, + "loss": 0.1967, + "step": 21300 + }, + { + "epoch": 0.7744022094628971, + "grad_norm": 3.75593900680542, + "learning_rate": 4.969451374673304e-05, + "loss": 0.1908, + "step": 21310 + }, + { + "epoch": 0.7747656079656952, + "grad_norm": 2.851921319961548, + "learning_rate": 4.969370445341355e-05, + "loss": 0.1616, + "step": 21320 + }, + { + "epoch": 0.7751290064684934, + "grad_norm": 2.978349447250366, + "learning_rate": 4.96928940961302e-05, + "loss": 0.1682, + "step": 21330 + }, + { + "epoch": 0.7754924049712916, + "grad_norm": 2.945326089859009, + "learning_rate": 4.96920826749179e-05, + "loss": 0.1897, + "step": 21340 + }, + { + "epoch": 0.7758558034740897, + "grad_norm": 5.529159069061279, + "learning_rate": 4.9691270189811614e-05, + "loss": 0.2351, + "step": 21350 + }, + { + "epoch": 0.7762192019768879, + "grad_norm": 0.816582441329956, + "learning_rate": 4.969045664084634e-05, + "loss": 0.255, + "step": 21360 + }, + { + "epoch": 0.776582600479686, + "grad_norm": 3.373413324356079, + "learning_rate": 4.968964202805715e-05, + "loss": 0.165, + "step": 21370 + }, + { + "epoch": 0.7769459989824842, + "grad_norm": 1.4986653327941895, + "learning_rate": 4.968882635147912e-05, + "loss": 0.1803, + "step": 21380 + }, + { + "epoch": 0.7773093974852824, + "grad_norm": 4.049030303955078, + "learning_rate": 4.968800961114741e-05, + "loss": 0.2312, + "step": 21390 + }, + { + "epoch": 0.7776727959880805, + "grad_norm": 1.8616725206375122, + "learning_rate": 4.968719180709721e-05, + "loss": 0.2038, + "step": 21400 + }, + { + "epoch": 0.7780361944908787, + "grad_norm": 0.7410339117050171, + "learning_rate": 4.968637293936374e-05, + "loss": 0.1736, + "step": 21410 + }, + { + "epoch": 0.7783995929936769, + "grad_norm": 0.9004227519035339, + "learning_rate": 4.968555300798231e-05, + "loss": 0.6926, + "step": 21420 + }, + { + "epoch": 0.778762991496475, + "grad_norm": 1.9912917613983154, + "learning_rate": 4.968473201298822e-05, + "loss": 0.183, + "step": 21430 + }, + { + "epoch": 0.7791263899992732, + "grad_norm": 1.5098110437393188, + "learning_rate": 4.968390995441686e-05, + "loss": 0.1555, + "step": 21440 + }, + { + "epoch": 0.7794897885020714, + "grad_norm": 1.5687317848205566, + "learning_rate": 4.9683086832303655e-05, + "loss": 0.199, + "step": 21450 + }, + { + "epoch": 0.7798531870048695, + "grad_norm": 1.456758975982666, + "learning_rate": 4.9682262646684054e-05, + "loss": 0.1573, + "step": 21460 + }, + { + "epoch": 0.7802165855076677, + "grad_norm": 1.152894377708435, + "learning_rate": 4.9681437397593575e-05, + "loss": 0.136, + "step": 21470 + }, + { + "epoch": 0.7805799840104659, + "grad_norm": 6.458597183227539, + "learning_rate": 4.968061108506777e-05, + "loss": 0.2111, + "step": 21480 + }, + { + "epoch": 0.780943382513264, + "grad_norm": 1.3398655652999878, + "learning_rate": 4.967978370914226e-05, + "loss": 0.1785, + "step": 21490 + }, + { + "epoch": 0.7813067810160622, + "grad_norm": 12.363832473754883, + "learning_rate": 4.967895526985267e-05, + "loss": 0.217, + "step": 21500 + }, + { + "epoch": 0.7816701795188604, + "grad_norm": 3.800936698913574, + "learning_rate": 4.967812576723471e-05, + "loss": 0.1533, + "step": 21510 + }, + { + "epoch": 0.7820335780216585, + "grad_norm": 0.9531782865524292, + "learning_rate": 4.967729520132411e-05, + "loss": 1.353, + "step": 21520 + }, + { + "epoch": 0.7823969765244567, + "grad_norm": 1.3066377639770508, + "learning_rate": 4.967646357215667e-05, + "loss": 0.1338, + "step": 21530 + }, + { + "epoch": 0.7827603750272549, + "grad_norm": 1.1814554929733276, + "learning_rate": 4.967563087976821e-05, + "loss": 0.1735, + "step": 21540 + }, + { + "epoch": 0.783123773530053, + "grad_norm": 4.6233367919921875, + "learning_rate": 4.967479712419461e-05, + "loss": 0.2266, + "step": 21550 + }, + { + "epoch": 0.7834871720328512, + "grad_norm": 1.366377353668213, + "learning_rate": 4.96739623054718e-05, + "loss": 0.1595, + "step": 21560 + }, + { + "epoch": 0.7838505705356494, + "grad_norm": 2.0722217559814453, + "learning_rate": 4.967312642363574e-05, + "loss": 0.1721, + "step": 21570 + }, + { + "epoch": 0.7842139690384475, + "grad_norm": 2.186340570449829, + "learning_rate": 4.967228947872245e-05, + "loss": 0.1653, + "step": 21580 + }, + { + "epoch": 0.7845773675412457, + "grad_norm": 2.4222512245178223, + "learning_rate": 4.9671451470767996e-05, + "loss": 0.1446, + "step": 21590 + }, + { + "epoch": 0.784940766044044, + "grad_norm": 62.15577697753906, + "learning_rate": 4.9670612399808467e-05, + "loss": 0.2911, + "step": 21600 + }, + { + "epoch": 0.784940766044044, + "eval_loss": 0.3627218008041382, + "eval_runtime": 179.8971, + "eval_samples_per_second": 41.212, + "eval_steps_per_second": 5.153, + "eval_wer": 0.21580409170947773, + "step": 21600 + }, + { + "epoch": 0.785304164546842, + "grad_norm": 1.236609935760498, + "learning_rate": 4.9669772265880044e-05, + "loss": 0.1417, + "step": 21610 + }, + { + "epoch": 0.7856675630496402, + "grad_norm": 1.2447402477264404, + "learning_rate": 4.96689310690189e-05, + "loss": 0.1508, + "step": 21620 + }, + { + "epoch": 0.7860309615524385, + "grad_norm": 4.567975997924805, + "learning_rate": 4.966808880926129e-05, + "loss": 0.3503, + "step": 21630 + }, + { + "epoch": 0.7863943600552366, + "grad_norm": 0.9699403047561646, + "learning_rate": 4.96672454866435e-05, + "loss": 0.1615, + "step": 21640 + }, + { + "epoch": 0.7867577585580348, + "grad_norm": 11.004621505737305, + "learning_rate": 4.966640110120187e-05, + "loss": 0.2604, + "step": 21650 + }, + { + "epoch": 0.7871211570608329, + "grad_norm": 1.3322606086730957, + "learning_rate": 4.9665555652972784e-05, + "loss": 0.1958, + "step": 21660 + }, + { + "epoch": 0.7874845555636311, + "grad_norm": 1.0020729303359985, + "learning_rate": 4.966470914199266e-05, + "loss": 0.1207, + "step": 21670 + }, + { + "epoch": 0.7878479540664293, + "grad_norm": 3.457019567489624, + "learning_rate": 4.9663861568297976e-05, + "loss": 0.3774, + "step": 21680 + }, + { + "epoch": 0.7882113525692274, + "grad_norm": 2.4993362426757812, + "learning_rate": 4.9663012931925254e-05, + "loss": 0.1537, + "step": 21690 + }, + { + "epoch": 0.7885747510720256, + "grad_norm": 11.104598999023438, + "learning_rate": 4.966216323291106e-05, + "loss": 0.2472, + "step": 21700 + }, + { + "epoch": 0.7889381495748238, + "grad_norm": 1.5027676820755005, + "learning_rate": 4.9661312471291996e-05, + "loss": 0.154, + "step": 21710 + }, + { + "epoch": 0.7893015480776219, + "grad_norm": 1.1929068565368652, + "learning_rate": 4.9660460647104726e-05, + "loss": 0.1416, + "step": 21720 + }, + { + "epoch": 0.7896649465804201, + "grad_norm": 17.008617401123047, + "learning_rate": 4.965960776038594e-05, + "loss": 0.3858, + "step": 21730 + }, + { + "epoch": 0.7900283450832183, + "grad_norm": 1.6043013334274292, + "learning_rate": 4.96587538111724e-05, + "loss": 0.1624, + "step": 21740 + }, + { + "epoch": 0.7903917435860164, + "grad_norm": 10.960922241210938, + "learning_rate": 4.96578987995009e-05, + "loss": 0.2034, + "step": 21750 + }, + { + "epoch": 0.7907551420888146, + "grad_norm": 1.4807969331741333, + "learning_rate": 4.965704272540826e-05, + "loss": 0.1491, + "step": 21760 + }, + { + "epoch": 0.7911185405916128, + "grad_norm": 0.9724571108818054, + "learning_rate": 4.965618558893139e-05, + "loss": 0.1455, + "step": 21770 + }, + { + "epoch": 0.7914819390944109, + "grad_norm": 2.6035313606262207, + "learning_rate": 4.965532739010722e-05, + "loss": 0.1696, + "step": 21780 + }, + { + "epoch": 0.7918453375972091, + "grad_norm": 0.7998749017715454, + "learning_rate": 4.9654468128972695e-05, + "loss": 0.1549, + "step": 21790 + }, + { + "epoch": 0.7922087361000073, + "grad_norm": 14.13917350769043, + "learning_rate": 4.965360780556487e-05, + "loss": 0.2124, + "step": 21800 + }, + { + "epoch": 0.7925721346028054, + "grad_norm": 1.88883638381958, + "learning_rate": 4.9652746419920804e-05, + "loss": 0.1475, + "step": 21810 + }, + { + "epoch": 0.7929355331056036, + "grad_norm": 1.5585650205612183, + "learning_rate": 4.965188397207761e-05, + "loss": 0.1534, + "step": 21820 + }, + { + "epoch": 0.7932989316084018, + "grad_norm": 2.6418206691741943, + "learning_rate": 4.965102046207244e-05, + "loss": 0.1608, + "step": 21830 + }, + { + "epoch": 0.7936623301111999, + "grad_norm": 1.1672085523605347, + "learning_rate": 4.965015588994251e-05, + "loss": 0.1596, + "step": 21840 + }, + { + "epoch": 0.7940257286139981, + "grad_norm": 3.009610652923584, + "learning_rate": 4.964929025572507e-05, + "loss": 0.1805, + "step": 21850 + }, + { + "epoch": 0.7943891271167963, + "grad_norm": 1.8774985074996948, + "learning_rate": 4.964842355945742e-05, + "loss": 0.1583, + "step": 21860 + }, + { + "epoch": 0.7947525256195944, + "grad_norm": 1.1219382286071777, + "learning_rate": 4.964755580117689e-05, + "loss": 0.1524, + "step": 21870 + }, + { + "epoch": 0.7951159241223926, + "grad_norm": 6.0511627197265625, + "learning_rate": 4.964668698092088e-05, + "loss": 0.2349, + "step": 21880 + }, + { + "epoch": 0.7954793226251908, + "grad_norm": 3.4487464427948, + "learning_rate": 4.9645817098726824e-05, + "loss": 0.1915, + "step": 21890 + }, + { + "epoch": 0.7958427211279889, + "grad_norm": 4.096559524536133, + "learning_rate": 4.9644946154632196e-05, + "loss": 0.2067, + "step": 21900 + }, + { + "epoch": 0.7962061196307871, + "grad_norm": 4.144627571105957, + "learning_rate": 4.9644074148674526e-05, + "loss": 0.1564, + "step": 21910 + }, + { + "epoch": 0.7965695181335853, + "grad_norm": 1.3851386308670044, + "learning_rate": 4.9643201080891384e-05, + "loss": 0.1656, + "step": 21920 + }, + { + "epoch": 0.7969329166363834, + "grad_norm": 1.3050576448440552, + "learning_rate": 4.9642326951320384e-05, + "loss": 0.1555, + "step": 21930 + }, + { + "epoch": 0.7972963151391816, + "grad_norm": 1.578134298324585, + "learning_rate": 4.96414517599992e-05, + "loss": 0.1637, + "step": 21940 + }, + { + "epoch": 0.7976597136419797, + "grad_norm": 10.813237190246582, + "learning_rate": 4.9640575506965535e-05, + "loss": 0.3143, + "step": 21950 + }, + { + "epoch": 0.798023112144778, + "grad_norm": 0.7118828892707825, + "learning_rate": 4.963969819225713e-05, + "loss": 0.1581, + "step": 21960 + }, + { + "epoch": 0.7983865106475762, + "grad_norm": 1.389856219291687, + "learning_rate": 4.963881981591182e-05, + "loss": 0.1466, + "step": 21970 + }, + { + "epoch": 0.7987499091503742, + "grad_norm": 1.1921494007110596, + "learning_rate": 4.963794037796741e-05, + "loss": 0.1604, + "step": 21980 + }, + { + "epoch": 0.7991133076531725, + "grad_norm": 4.355441093444824, + "learning_rate": 4.963705987846182e-05, + "loss": 0.1792, + "step": 21990 + }, + { + "epoch": 0.7994767061559707, + "grad_norm": 8.20235824584961, + "learning_rate": 4.963617831743298e-05, + "loss": 0.2314, + "step": 22000 + }, + { + "epoch": 0.7998401046587688, + "grad_norm": 1.3720426559448242, + "learning_rate": 4.963529569491887e-05, + "loss": 0.1378, + "step": 22010 + }, + { + "epoch": 0.800203503161567, + "grad_norm": 1.490679383277893, + "learning_rate": 4.963441201095752e-05, + "loss": 0.1505, + "step": 22020 + }, + { + "epoch": 0.8005669016643652, + "grad_norm": 1.576416254043579, + "learning_rate": 4.963352726558701e-05, + "loss": 0.1379, + "step": 22030 + }, + { + "epoch": 0.8009303001671633, + "grad_norm": 1.547780156135559, + "learning_rate": 4.9632641458845454e-05, + "loss": 0.1584, + "step": 22040 + }, + { + "epoch": 0.8012936986699615, + "grad_norm": 41.95133972167969, + "learning_rate": 4.963175459077102e-05, + "loss": 0.6762, + "step": 22050 + }, + { + "epoch": 0.8016570971727597, + "grad_norm": 0.8984355330467224, + "learning_rate": 4.963086666140192e-05, + "loss": 0.1513, + "step": 22060 + }, + { + "epoch": 0.8020204956755578, + "grad_norm": 1.6865235567092896, + "learning_rate": 4.9629977670776404e-05, + "loss": 0.1659, + "step": 22070 + }, + { + "epoch": 0.802383894178356, + "grad_norm": 5.291965007781982, + "learning_rate": 4.96290876189328e-05, + "loss": 0.1735, + "step": 22080 + }, + { + "epoch": 0.8027472926811542, + "grad_norm": 0.9124179482460022, + "learning_rate": 4.962819650590943e-05, + "loss": 0.163, + "step": 22090 + }, + { + "epoch": 0.8031106911839523, + "grad_norm": 5.151334762573242, + "learning_rate": 4.9627304331744705e-05, + "loss": 0.2997, + "step": 22100 + }, + { + "epoch": 0.8034740896867505, + "grad_norm": 0.7093039155006409, + "learning_rate": 4.9626411096477066e-05, + "loss": 0.1297, + "step": 22110 + }, + { + "epoch": 0.8038374881895487, + "grad_norm": 0.7643496990203857, + "learning_rate": 4.962551680014499e-05, + "loss": 0.1568, + "step": 22120 + }, + { + "epoch": 0.8042008866923468, + "grad_norm": 2.0619888305664062, + "learning_rate": 4.9624621442787005e-05, + "loss": 0.1685, + "step": 22130 + }, + { + "epoch": 0.804564285195145, + "grad_norm": 1.3836963176727295, + "learning_rate": 4.9623725024441704e-05, + "loss": 0.1597, + "step": 22140 + }, + { + "epoch": 0.8049276836979432, + "grad_norm": 10.014172554016113, + "learning_rate": 4.96228275451477e-05, + "loss": 0.2371, + "step": 22150 + }, + { + "epoch": 0.8052910822007413, + "grad_norm": 0.8201650381088257, + "learning_rate": 4.962192900494367e-05, + "loss": 0.1457, + "step": 22160 + }, + { + "epoch": 0.8056544807035395, + "grad_norm": 2.9909164905548096, + "learning_rate": 4.962102940386832e-05, + "loss": 0.1584, + "step": 22170 + }, + { + "epoch": 0.8060178792063377, + "grad_norm": 1.8986990451812744, + "learning_rate": 4.9620128741960414e-05, + "loss": 0.1521, + "step": 22180 + }, + { + "epoch": 0.8063812777091358, + "grad_norm": 1.2521679401397705, + "learning_rate": 4.9619227019258766e-05, + "loss": 0.1398, + "step": 22190 + }, + { + "epoch": 0.806744676211934, + "grad_norm": 9.087230682373047, + "learning_rate": 4.9618324235802214e-05, + "loss": 0.2414, + "step": 22200 + }, + { + "epoch": 0.806744676211934, + "eval_loss": 0.3814217448234558, + "eval_runtime": 180.9296, + "eval_samples_per_second": 40.977, + "eval_steps_per_second": 5.124, + "eval_wer": 0.21859059306188394, + "step": 22200 + }, + { + "epoch": 0.8071080747147322, + "grad_norm": 1.3065155744552612, + "learning_rate": 4.9617420391629666e-05, + "loss": 0.1382, + "step": 22210 + }, + { + "epoch": 0.8074714732175303, + "grad_norm": 1.0691299438476562, + "learning_rate": 4.961651548678006e-05, + "loss": 0.1692, + "step": 22220 + }, + { + "epoch": 0.8078348717203285, + "grad_norm": 2.515131711959839, + "learning_rate": 4.961560952129239e-05, + "loss": 0.1719, + "step": 22230 + }, + { + "epoch": 0.8081982702231267, + "grad_norm": 1.3650884628295898, + "learning_rate": 4.9614702495205686e-05, + "loss": 0.1918, + "step": 22240 + }, + { + "epoch": 0.8085616687259248, + "grad_norm": 4.730445384979248, + "learning_rate": 4.961379440855903e-05, + "loss": 0.2002, + "step": 22250 + }, + { + "epoch": 0.808925067228723, + "grad_norm": 1.0421544313430786, + "learning_rate": 4.9612885261391555e-05, + "loss": 0.1544, + "step": 22260 + }, + { + "epoch": 0.8092884657315211, + "grad_norm": 1.1957643032073975, + "learning_rate": 4.961197505374242e-05, + "loss": 0.1471, + "step": 22270 + }, + { + "epoch": 0.8096518642343193, + "grad_norm": 2.936429977416992, + "learning_rate": 4.961106378565086e-05, + "loss": 0.2068, + "step": 22280 + }, + { + "epoch": 0.8100152627371175, + "grad_norm": 2.0803070068359375, + "learning_rate": 4.961015145715612e-05, + "loss": 0.1496, + "step": 22290 + }, + { + "epoch": 0.8103786612399156, + "grad_norm": 10.564451217651367, + "learning_rate": 4.960923806829752e-05, + "loss": 0.2549, + "step": 22300 + }, + { + "epoch": 0.8107420597427138, + "grad_norm": 1.0569120645523071, + "learning_rate": 4.9608323619114406e-05, + "loss": 0.1624, + "step": 22310 + }, + { + "epoch": 0.8111054582455121, + "grad_norm": 1.4505226612091064, + "learning_rate": 4.960740810964619e-05, + "loss": 0.1523, + "step": 22320 + }, + { + "epoch": 0.8114688567483102, + "grad_norm": 5.100767135620117, + "learning_rate": 4.960649153993231e-05, + "loss": 0.1562, + "step": 22330 + }, + { + "epoch": 0.8118322552511084, + "grad_norm": 2.2787342071533203, + "learning_rate": 4.960557391001226e-05, + "loss": 0.1691, + "step": 22340 + }, + { + "epoch": 0.8121956537539066, + "grad_norm": 15.405048370361328, + "learning_rate": 4.960465521992558e-05, + "loss": 0.2542, + "step": 22350 + }, + { + "epoch": 0.8125590522567047, + "grad_norm": 0.7388777732849121, + "learning_rate": 4.9603735469711845e-05, + "loss": 0.1522, + "step": 22360 + }, + { + "epoch": 0.8129224507595029, + "grad_norm": 0.9490914344787598, + "learning_rate": 4.960281465941069e-05, + "loss": 0.1317, + "step": 22370 + }, + { + "epoch": 0.8132858492623011, + "grad_norm": 2.281085252761841, + "learning_rate": 4.960189278906179e-05, + "loss": 0.1503, + "step": 22380 + }, + { + "epoch": 0.8136492477650992, + "grad_norm": 0.9328985810279846, + "learning_rate": 4.960096985870486e-05, + "loss": 0.1556, + "step": 22390 + }, + { + "epoch": 0.8140126462678974, + "grad_norm": 4.4524617195129395, + "learning_rate": 4.960004586837967e-05, + "loss": 0.2387, + "step": 22400 + }, + { + "epoch": 0.8143760447706956, + "grad_norm": 1.5577040910720825, + "learning_rate": 4.959912081812603e-05, + "loss": 0.1557, + "step": 22410 + }, + { + "epoch": 0.8147394432734937, + "grad_norm": 2.358896493911743, + "learning_rate": 4.95981947079838e-05, + "loss": 0.2016, + "step": 22420 + }, + { + "epoch": 0.8151028417762919, + "grad_norm": 2.1001386642456055, + "learning_rate": 4.9597267537992885e-05, + "loss": 0.1587, + "step": 22430 + }, + { + "epoch": 0.8154662402790901, + "grad_norm": 2.7561607360839844, + "learning_rate": 4.959633930819323e-05, + "loss": 0.1616, + "step": 22440 + }, + { + "epoch": 0.8158296387818882, + "grad_norm": 4.204514980316162, + "learning_rate": 4.959541001862482e-05, + "loss": 0.6089, + "step": 22450 + }, + { + "epoch": 0.8161930372846864, + "grad_norm": 1.3738398551940918, + "learning_rate": 4.959447966932771e-05, + "loss": 0.1756, + "step": 22460 + }, + { + "epoch": 0.8165564357874846, + "grad_norm": 0.705806314945221, + "learning_rate": 4.959354826034197e-05, + "loss": 0.1213, + "step": 22470 + }, + { + "epoch": 0.8169198342902827, + "grad_norm": 2.053788661956787, + "learning_rate": 4.9592615791707755e-05, + "loss": 0.1765, + "step": 22480 + }, + { + "epoch": 0.8172832327930809, + "grad_norm": 2.0120911598205566, + "learning_rate": 4.959168226346521e-05, + "loss": 0.1444, + "step": 22490 + }, + { + "epoch": 0.8176466312958791, + "grad_norm": 6.552361011505127, + "learning_rate": 4.959074767565458e-05, + "loss": 0.2201, + "step": 22500 + }, + { + "epoch": 0.8180100297986772, + "grad_norm": 1.3007264137268066, + "learning_rate": 4.958981202831613e-05, + "loss": 0.1488, + "step": 22510 + }, + { + "epoch": 0.8183734283014754, + "grad_norm": 1.7885551452636719, + "learning_rate": 4.958887532149016e-05, + "loss": 2.6491, + "step": 22520 + }, + { + "epoch": 0.8187368268042736, + "grad_norm": 1.7092806100845337, + "learning_rate": 4.9587937555217054e-05, + "loss": 0.1946, + "step": 22530 + }, + { + "epoch": 0.8191002253070717, + "grad_norm": 2.56215238571167, + "learning_rate": 4.958699872953719e-05, + "loss": 0.1676, + "step": 22540 + }, + { + "epoch": 0.8194636238098699, + "grad_norm": 2.085753917694092, + "learning_rate": 4.958605884449104e-05, + "loss": 0.2038, + "step": 22550 + }, + { + "epoch": 0.819827022312668, + "grad_norm": 0.8225610852241516, + "learning_rate": 4.958511790011909e-05, + "loss": 0.5185, + "step": 22560 + }, + { + "epoch": 0.8201904208154662, + "grad_norm": 1.6775872707366943, + "learning_rate": 4.9584175896461884e-05, + "loss": 0.17, + "step": 22570 + }, + { + "epoch": 0.8205538193182644, + "grad_norm": 3.4285826683044434, + "learning_rate": 4.958323283356001e-05, + "loss": 0.164, + "step": 22580 + }, + { + "epoch": 0.8209172178210625, + "grad_norm": 1.892842411994934, + "learning_rate": 4.95822887114541e-05, + "loss": 1.2783, + "step": 22590 + }, + { + "epoch": 0.8212806163238607, + "grad_norm": 4.959444522857666, + "learning_rate": 4.9581343530184834e-05, + "loss": 0.2062, + "step": 22600 + }, + { + "epoch": 0.8216440148266589, + "grad_norm": 2.4584267139434814, + "learning_rate": 4.958039728979293e-05, + "loss": 0.1443, + "step": 22610 + }, + { + "epoch": 0.822007413329457, + "grad_norm": 1.118804693222046, + "learning_rate": 4.957944999031917e-05, + "loss": 0.16, + "step": 22620 + }, + { + "epoch": 0.8223708118322552, + "grad_norm": 1.5434421300888062, + "learning_rate": 4.9578501631804365e-05, + "loss": 0.2104, + "step": 22630 + }, + { + "epoch": 0.8227342103350535, + "grad_norm": 1.3116744756698608, + "learning_rate": 4.9577552214289374e-05, + "loss": 0.1326, + "step": 22640 + }, + { + "epoch": 0.8230976088378515, + "grad_norm": 11.34653377532959, + "learning_rate": 4.95766017378151e-05, + "loss": 0.2231, + "step": 22650 + }, + { + "epoch": 0.8234610073406498, + "grad_norm": 1.0379194021224976, + "learning_rate": 4.957565020242251e-05, + "loss": 0.1805, + "step": 22660 + }, + { + "epoch": 0.823824405843448, + "grad_norm": 1.8218019008636475, + "learning_rate": 4.957469760815259e-05, + "loss": 0.1287, + "step": 22670 + }, + { + "epoch": 0.8241878043462461, + "grad_norm": 1.1962164640426636, + "learning_rate": 4.957374395504638e-05, + "loss": 0.4115, + "step": 22680 + }, + { + "epoch": 0.8245512028490443, + "grad_norm": 1.9947481155395508, + "learning_rate": 4.957278924314499e-05, + "loss": 0.1407, + "step": 22690 + }, + { + "epoch": 0.8249146013518425, + "grad_norm": 25.343172073364258, + "learning_rate": 4.957183347248953e-05, + "loss": 0.4247, + "step": 22700 + }, + { + "epoch": 0.8252779998546406, + "grad_norm": 1.4444775581359863, + "learning_rate": 4.95708766431212e-05, + "loss": 0.1641, + "step": 22710 + }, + { + "epoch": 0.8256413983574388, + "grad_norm": 1.621640920639038, + "learning_rate": 4.9569918755081216e-05, + "loss": 0.1289, + "step": 22720 + }, + { + "epoch": 0.826004796860237, + "grad_norm": 1.018471360206604, + "learning_rate": 4.9568959808410854e-05, + "loss": 0.1694, + "step": 22730 + }, + { + "epoch": 0.8263681953630351, + "grad_norm": 3.1913223266601562, + "learning_rate": 4.9567999803151424e-05, + "loss": 0.1898, + "step": 22740 + }, + { + "epoch": 0.8267315938658333, + "grad_norm": 8.095772743225098, + "learning_rate": 4.956703873934431e-05, + "loss": 0.2246, + "step": 22750 + }, + { + "epoch": 0.8270949923686315, + "grad_norm": 1.738887906074524, + "learning_rate": 4.956607661703089e-05, + "loss": 0.1678, + "step": 22760 + }, + { + "epoch": 0.8274583908714296, + "grad_norm": 0.9688615202903748, + "learning_rate": 4.9565113436252644e-05, + "loss": 0.1341, + "step": 22770 + }, + { + "epoch": 0.8278217893742278, + "grad_norm": 2.2478010654449463, + "learning_rate": 4.956414919705106e-05, + "loss": 0.1823, + "step": 22780 + }, + { + "epoch": 0.828185187877026, + "grad_norm": 1.6718928813934326, + "learning_rate": 4.956318389946769e-05, + "loss": 0.1543, + "step": 22790 + }, + { + "epoch": 0.8285485863798241, + "grad_norm": 5.168727874755859, + "learning_rate": 4.956221754354412e-05, + "loss": 0.1795, + "step": 22800 + }, + { + "epoch": 0.8285485863798241, + "eval_loss": 0.3908107876777649, + "eval_runtime": 180.5873, + "eval_samples_per_second": 41.055, + "eval_steps_per_second": 5.133, + "eval_wer": 0.20674569317624847, + "step": 22800 + }, + { + "epoch": 0.8289119848826223, + "grad_norm": 0.9549854397773743, + "learning_rate": 4.956125012932199e-05, + "loss": 0.1559, + "step": 22810 + }, + { + "epoch": 0.8292753833854205, + "grad_norm": 3.2057716846466064, + "learning_rate": 4.9560281656842977e-05, + "loss": 0.1675, + "step": 22820 + }, + { + "epoch": 0.8296387818882186, + "grad_norm": 1.7775851488113403, + "learning_rate": 4.955931212614882e-05, + "loss": 0.1997, + "step": 22830 + }, + { + "epoch": 0.8300021803910168, + "grad_norm": 1.7028132677078247, + "learning_rate": 4.9558341537281274e-05, + "loss": 0.1505, + "step": 22840 + }, + { + "epoch": 0.8303655788938149, + "grad_norm": 2.7027060985565186, + "learning_rate": 4.955736989028218e-05, + "loss": 0.2009, + "step": 22850 + }, + { + "epoch": 0.8307289773966131, + "grad_norm": 1.8419814109802246, + "learning_rate": 4.955639718519339e-05, + "loss": 0.1355, + "step": 22860 + }, + { + "epoch": 0.8310923758994113, + "grad_norm": 0.8633226156234741, + "learning_rate": 4.955542342205682e-05, + "loss": 0.178, + "step": 22870 + }, + { + "epoch": 0.8314557744022094, + "grad_norm": 6.966017723083496, + "learning_rate": 4.955444860091442e-05, + "loss": 0.1885, + "step": 22880 + }, + { + "epoch": 0.8318191729050076, + "grad_norm": 1.9565801620483398, + "learning_rate": 4.955347272180819e-05, + "loss": 0.1485, + "step": 22890 + }, + { + "epoch": 0.8321825714078058, + "grad_norm": 22.704593658447266, + "learning_rate": 4.9552495784780196e-05, + "loss": 0.2294, + "step": 22900 + }, + { + "epoch": 0.8325459699106039, + "grad_norm": 2.0515658855438232, + "learning_rate": 4.95515177898725e-05, + "loss": 0.166, + "step": 22910 + }, + { + "epoch": 0.8329093684134021, + "grad_norm": 2.9277150630950928, + "learning_rate": 4.9550538737127275e-05, + "loss": 0.8898, + "step": 22920 + }, + { + "epoch": 0.8332727669162003, + "grad_norm": 3.9280052185058594, + "learning_rate": 4.9549558626586676e-05, + "loss": 0.171, + "step": 22930 + }, + { + "epoch": 0.8336361654189984, + "grad_norm": 2.5431272983551025, + "learning_rate": 4.954857745829294e-05, + "loss": 0.1539, + "step": 22940 + }, + { + "epoch": 0.8339995639217966, + "grad_norm": 2.815434694290161, + "learning_rate": 4.954759523228835e-05, + "loss": 0.2126, + "step": 22950 + }, + { + "epoch": 0.8343629624245948, + "grad_norm": 0.6958141922950745, + "learning_rate": 4.9546611948615224e-05, + "loss": 0.2069, + "step": 22960 + }, + { + "epoch": 0.8347263609273929, + "grad_norm": 0.7068191766738892, + "learning_rate": 4.9545627607315924e-05, + "loss": 0.1287, + "step": 22970 + }, + { + "epoch": 0.8350897594301911, + "grad_norm": 1.8746801614761353, + "learning_rate": 4.954464220843287e-05, + "loss": 0.1488, + "step": 22980 + }, + { + "epoch": 0.8354531579329894, + "grad_norm": 1.5134693384170532, + "learning_rate": 4.95436557520085e-05, + "loss": 0.1337, + "step": 22990 + }, + { + "epoch": 0.8358165564357874, + "grad_norm": 4.778042316436768, + "learning_rate": 4.9542668238085344e-05, + "loss": 0.2172, + "step": 23000 + }, + { + "epoch": 0.8361799549385857, + "grad_norm": 1.074409008026123, + "learning_rate": 4.9541679666705924e-05, + "loss": 0.1696, + "step": 23010 + }, + { + "epoch": 0.8365433534413839, + "grad_norm": 1.6725049018859863, + "learning_rate": 4.954069003791286e-05, + "loss": 0.136, + "step": 23020 + }, + { + "epoch": 0.836906751944182, + "grad_norm": 3.194450616836548, + "learning_rate": 4.953969935174877e-05, + "loss": 0.2067, + "step": 23030 + }, + { + "epoch": 0.8372701504469802, + "grad_norm": 7.7923150062561035, + "learning_rate": 4.9538707608256345e-05, + "loss": 0.1938, + "step": 23040 + }, + { + "epoch": 0.8376335489497784, + "grad_norm": 8.767574310302734, + "learning_rate": 4.953771480747833e-05, + "loss": 0.2473, + "step": 23050 + }, + { + "epoch": 0.8379969474525765, + "grad_norm": 1.3911685943603516, + "learning_rate": 4.953672094945748e-05, + "loss": 0.1497, + "step": 23060 + }, + { + "epoch": 0.8383603459553747, + "grad_norm": 0.7775372266769409, + "learning_rate": 4.953572603423662e-05, + "loss": 0.7581, + "step": 23070 + }, + { + "epoch": 0.8387237444581729, + "grad_norm": 2.6937413215637207, + "learning_rate": 4.9534730061858634e-05, + "loss": 0.1849, + "step": 23080 + }, + { + "epoch": 0.839087142960971, + "grad_norm": 0.7375633716583252, + "learning_rate": 4.953373303236642e-05, + "loss": 0.1706, + "step": 23090 + }, + { + "epoch": 0.8394505414637692, + "grad_norm": 3.070746421813965, + "learning_rate": 4.953273494580295e-05, + "loss": 0.2114, + "step": 23100 + }, + { + "epoch": 0.8398139399665674, + "grad_norm": 0.7470118403434753, + "learning_rate": 4.953173580221121e-05, + "loss": 0.13, + "step": 23110 + }, + { + "epoch": 0.8401773384693655, + "grad_norm": 1.040595531463623, + "learning_rate": 4.953073560163426e-05, + "loss": 0.2088, + "step": 23120 + }, + { + "epoch": 0.8405407369721637, + "grad_norm": 3.9858949184417725, + "learning_rate": 4.95297343441152e-05, + "loss": 0.1528, + "step": 23130 + }, + { + "epoch": 0.8409041354749618, + "grad_norm": 1.4031178951263428, + "learning_rate": 4.952873202969716e-05, + "loss": 2.5826, + "step": 23140 + }, + { + "epoch": 0.84126753397776, + "grad_norm": 16.660646438598633, + "learning_rate": 4.952772865842332e-05, + "loss": 0.3101, + "step": 23150 + }, + { + "epoch": 0.8416309324805582, + "grad_norm": 1.21910560131073, + "learning_rate": 4.952672423033693e-05, + "loss": 0.1326, + "step": 23160 + }, + { + "epoch": 0.8419943309833563, + "grad_norm": 1.4494057893753052, + "learning_rate": 4.952571874548126e-05, + "loss": 0.1567, + "step": 23170 + }, + { + "epoch": 0.8423577294861545, + "grad_norm": 1.1903733015060425, + "learning_rate": 4.952471220389964e-05, + "loss": 0.1537, + "step": 23180 + }, + { + "epoch": 0.8427211279889527, + "grad_norm": 1.0293620824813843, + "learning_rate": 4.9523704605635414e-05, + "loss": 0.1695, + "step": 23190 + }, + { + "epoch": 0.8430845264917508, + "grad_norm": 9.536385536193848, + "learning_rate": 4.9522695950732025e-05, + "loss": 0.2702, + "step": 23200 + }, + { + "epoch": 0.843447924994549, + "grad_norm": 1.1565468311309814, + "learning_rate": 4.9521686239232915e-05, + "loss": 0.1452, + "step": 23210 + }, + { + "epoch": 0.8438113234973472, + "grad_norm": 1.0805953741073608, + "learning_rate": 4.9520675471181586e-05, + "loss": 0.1478, + "step": 23220 + }, + { + "epoch": 0.8441747220001453, + "grad_norm": 2.7216696739196777, + "learning_rate": 4.95196636466216e-05, + "loss": 0.1965, + "step": 23230 + }, + { + "epoch": 0.8445381205029435, + "grad_norm": 2.2064578533172607, + "learning_rate": 4.9518650765596564e-05, + "loss": 0.213, + "step": 23240 + }, + { + "epoch": 0.8449015190057417, + "grad_norm": 11.686285972595215, + "learning_rate": 4.951763682815009e-05, + "loss": 0.2929, + "step": 23250 + }, + { + "epoch": 0.8452649175085398, + "grad_norm": 1.6271568536758423, + "learning_rate": 4.9516621834325885e-05, + "loss": 0.1406, + "step": 23260 + }, + { + "epoch": 0.845628316011338, + "grad_norm": 2.791619300842285, + "learning_rate": 4.951560578416767e-05, + "loss": 0.1431, + "step": 23270 + }, + { + "epoch": 0.8459917145141362, + "grad_norm": 1.9396895170211792, + "learning_rate": 4.951458867771923e-05, + "loss": 0.1516, + "step": 23280 + }, + { + "epoch": 0.8463551130169343, + "grad_norm": 0.9364364147186279, + "learning_rate": 4.951357051502439e-05, + "loss": 0.1935, + "step": 23290 + }, + { + "epoch": 0.8467185115197325, + "grad_norm": 2.275146007537842, + "learning_rate": 4.9512551296127005e-05, + "loss": 0.1832, + "step": 23300 + }, + { + "epoch": 0.8470819100225307, + "grad_norm": 1.4089415073394775, + "learning_rate": 4.951153102107101e-05, + "loss": 0.1511, + "step": 23310 + }, + { + "epoch": 0.8474453085253288, + "grad_norm": 1.2446107864379883, + "learning_rate": 4.951050968990035e-05, + "loss": 0.282, + "step": 23320 + }, + { + "epoch": 0.847808707028127, + "grad_norm": 2.595438241958618, + "learning_rate": 4.950948730265905e-05, + "loss": 0.1643, + "step": 23330 + }, + { + "epoch": 0.8481721055309253, + "grad_norm": 1.1884585618972778, + "learning_rate": 4.950846385939114e-05, + "loss": 0.1445, + "step": 23340 + }, + { + "epoch": 0.8485355040337234, + "grad_norm": 33.609004974365234, + "learning_rate": 4.9507439360140716e-05, + "loss": 0.185, + "step": 23350 + }, + { + "epoch": 0.8488989025365216, + "grad_norm": 0.573637068271637, + "learning_rate": 4.950641380495194e-05, + "loss": 0.1417, + "step": 23360 + }, + { + "epoch": 0.8492623010393198, + "grad_norm": 1.1126424074172974, + "learning_rate": 4.9505387193868975e-05, + "loss": 0.1592, + "step": 23370 + }, + { + "epoch": 0.8496256995421179, + "grad_norm": 2.466045379638672, + "learning_rate": 4.9504359526936074e-05, + "loss": 0.1507, + "step": 23380 + }, + { + "epoch": 0.8499890980449161, + "grad_norm": 1.273472547531128, + "learning_rate": 4.95033308041975e-05, + "loss": 0.174, + "step": 23390 + }, + { + "epoch": 0.8503524965477143, + "grad_norm": 5.497190475463867, + "learning_rate": 4.9502301025697595e-05, + "loss": 0.2269, + "step": 23400 + }, + { + "epoch": 0.8503524965477143, + "eval_loss": 0.3661801218986511, + "eval_runtime": 181.0852, + "eval_samples_per_second": 40.942, + "eval_steps_per_second": 5.119, + "eval_wer": 0.198767404287763, + "step": 23400 + }, + { + "epoch": 0.8507158950505124, + "grad_norm": 0.740798830986023, + "learning_rate": 4.950127019148071e-05, + "loss": 0.148, + "step": 23410 + }, + { + "epoch": 0.8510792935533106, + "grad_norm": 1.7785030603408813, + "learning_rate": 4.950023830159127e-05, + "loss": 0.175, + "step": 23420 + }, + { + "epoch": 0.8514426920561087, + "grad_norm": 0.7675313949584961, + "learning_rate": 4.949920535607374e-05, + "loss": 0.1635, + "step": 23430 + }, + { + "epoch": 0.8518060905589069, + "grad_norm": 0.9880558252334595, + "learning_rate": 4.9498171354972617e-05, + "loss": 0.1732, + "step": 23440 + }, + { + "epoch": 0.8521694890617051, + "grad_norm": 5.804686069488525, + "learning_rate": 4.9497136298332454e-05, + "loss": 0.2142, + "step": 23450 + }, + { + "epoch": 0.8525328875645032, + "grad_norm": 1.063359022140503, + "learning_rate": 4.949610018619785e-05, + "loss": 0.1529, + "step": 23460 + }, + { + "epoch": 0.8528962860673014, + "grad_norm": 1.9043885469436646, + "learning_rate": 4.949506301861344e-05, + "loss": 0.1633, + "step": 23470 + }, + { + "epoch": 0.8532596845700996, + "grad_norm": 2.0380702018737793, + "learning_rate": 4.9494024795623926e-05, + "loss": 0.1595, + "step": 23480 + }, + { + "epoch": 0.8536230830728977, + "grad_norm": 1.65935218334198, + "learning_rate": 4.949298551727403e-05, + "loss": 0.1526, + "step": 23490 + }, + { + "epoch": 0.8539864815756959, + "grad_norm": 1.7575215101242065, + "learning_rate": 4.9491945183608536e-05, + "loss": 0.1924, + "step": 23500 + }, + { + "epoch": 0.8543498800784941, + "grad_norm": 2.332193374633789, + "learning_rate": 4.949090379467226e-05, + "loss": 0.1536, + "step": 23510 + }, + { + "epoch": 0.8547132785812922, + "grad_norm": 1.0475032329559326, + "learning_rate": 4.948986135051009e-05, + "loss": 0.1322, + "step": 23520 + }, + { + "epoch": 0.8550766770840904, + "grad_norm": 3.1753509044647217, + "learning_rate": 4.948881785116692e-05, + "loss": 0.1457, + "step": 23530 + }, + { + "epoch": 0.8554400755868886, + "grad_norm": 0.7468664646148682, + "learning_rate": 4.948777329668772e-05, + "loss": 0.1385, + "step": 23540 + }, + { + "epoch": 0.8558034740896867, + "grad_norm": 6.77406120300293, + "learning_rate": 4.9486727687117507e-05, + "loss": 0.19, + "step": 23550 + }, + { + "epoch": 0.8561668725924849, + "grad_norm": 1.6008226871490479, + "learning_rate": 4.9485681022501316e-05, + "loss": 0.1609, + "step": 23560 + }, + { + "epoch": 0.8565302710952831, + "grad_norm": 1.1062623262405396, + "learning_rate": 4.948463330288425e-05, + "loss": 0.1624, + "step": 23570 + }, + { + "epoch": 0.8568936695980812, + "grad_norm": 1.6599873304367065, + "learning_rate": 4.948358452831145e-05, + "loss": 0.1532, + "step": 23580 + }, + { + "epoch": 0.8572570681008794, + "grad_norm": 1.264592170715332, + "learning_rate": 4.9482534698828106e-05, + "loss": 0.1696, + "step": 23590 + }, + { + "epoch": 0.8576204666036776, + "grad_norm": 2.027796745300293, + "learning_rate": 4.948148381447945e-05, + "loss": 0.1913, + "step": 23600 + }, + { + "epoch": 0.8579838651064757, + "grad_norm": 1.3213417530059814, + "learning_rate": 4.948043187531076e-05, + "loss": 0.1517, + "step": 23610 + }, + { + "epoch": 0.8583472636092739, + "grad_norm": 1.6190669536590576, + "learning_rate": 4.9479378881367366e-05, + "loss": 0.1517, + "step": 23620 + }, + { + "epoch": 0.8587106621120721, + "grad_norm": 5.381803512573242, + "learning_rate": 4.947832483269464e-05, + "loss": 0.1504, + "step": 23630 + }, + { + "epoch": 0.8590740606148702, + "grad_norm": 3.4807474613189697, + "learning_rate": 4.947726972933798e-05, + "loss": 0.1887, + "step": 23640 + }, + { + "epoch": 0.8594374591176684, + "grad_norm": 4.890349864959717, + "learning_rate": 4.947621357134287e-05, + "loss": 0.219, + "step": 23650 + }, + { + "epoch": 0.8598008576204667, + "grad_norm": 1.1006419658660889, + "learning_rate": 4.947515635875479e-05, + "loss": 0.1743, + "step": 23660 + }, + { + "epoch": 0.8601642561232647, + "grad_norm": 0.9933237433433533, + "learning_rate": 4.9474098091619314e-05, + "loss": 0.1294, + "step": 23670 + }, + { + "epoch": 0.860527654626063, + "grad_norm": 3.392524480819702, + "learning_rate": 4.947303876998203e-05, + "loss": 0.1784, + "step": 23680 + }, + { + "epoch": 0.8608910531288612, + "grad_norm": 1.466454029083252, + "learning_rate": 4.947197839388857e-05, + "loss": 0.1828, + "step": 23690 + }, + { + "epoch": 0.8612544516316593, + "grad_norm": 3.670731544494629, + "learning_rate": 4.947091696338465e-05, + "loss": 0.1772, + "step": 23700 + }, + { + "epoch": 0.8616178501344575, + "grad_norm": 1.3586241006851196, + "learning_rate": 4.9469854478515976e-05, + "loss": 0.1512, + "step": 23710 + }, + { + "epoch": 0.8619812486372556, + "grad_norm": 0.8312864303588867, + "learning_rate": 4.9468790939328336e-05, + "loss": 0.1582, + "step": 23720 + }, + { + "epoch": 0.8623446471400538, + "grad_norm": 0.9825647473335266, + "learning_rate": 4.946772634586756e-05, + "loss": 0.1662, + "step": 23730 + }, + { + "epoch": 0.862708045642852, + "grad_norm": 2.7960050106048584, + "learning_rate": 4.94666606981795e-05, + "loss": 0.226, + "step": 23740 + }, + { + "epoch": 0.8630714441456501, + "grad_norm": 5.3017683029174805, + "learning_rate": 4.94655939963101e-05, + "loss": 0.2065, + "step": 23750 + }, + { + "epoch": 0.8634348426484483, + "grad_norm": 1.0958201885223389, + "learning_rate": 4.946452624030529e-05, + "loss": 0.2177, + "step": 23760 + }, + { + "epoch": 0.8637982411512465, + "grad_norm": 1.0320892333984375, + "learning_rate": 4.94634574302111e-05, + "loss": 0.1263, + "step": 23770 + }, + { + "epoch": 0.8641616396540446, + "grad_norm": 1.0401560068130493, + "learning_rate": 4.946238756607356e-05, + "loss": 0.6474, + "step": 23780 + }, + { + "epoch": 0.8645250381568428, + "grad_norm": 1.378184199333191, + "learning_rate": 4.9461316647938785e-05, + "loss": 0.1783, + "step": 23790 + }, + { + "epoch": 0.864888436659641, + "grad_norm": 7.429476261138916, + "learning_rate": 4.9460244675852906e-05, + "loss": 0.2744, + "step": 23800 + }, + { + "epoch": 0.8652518351624391, + "grad_norm": 2.2409234046936035, + "learning_rate": 4.945917164986211e-05, + "loss": 0.2088, + "step": 23810 + }, + { + "epoch": 0.8656152336652373, + "grad_norm": 1.1307353973388672, + "learning_rate": 4.945809757001264e-05, + "loss": 0.1311, + "step": 23820 + }, + { + "epoch": 0.8659786321680355, + "grad_norm": 1.6061898469924927, + "learning_rate": 4.945702243635077e-05, + "loss": 0.1683, + "step": 23830 + }, + { + "epoch": 0.8663420306708336, + "grad_norm": 1.0011060237884521, + "learning_rate": 4.945594624892281e-05, + "loss": 0.8323, + "step": 23840 + }, + { + "epoch": 0.8667054291736318, + "grad_norm": 6.631030082702637, + "learning_rate": 4.9454869007775154e-05, + "loss": 0.177, + "step": 23850 + }, + { + "epoch": 0.86706882767643, + "grad_norm": 2.8532910346984863, + "learning_rate": 4.9453790712954195e-05, + "loss": 0.145, + "step": 23860 + }, + { + "epoch": 0.8674322261792281, + "grad_norm": 2.6437554359436035, + "learning_rate": 4.945271136450641e-05, + "loss": 0.1496, + "step": 23870 + }, + { + "epoch": 0.8677956246820263, + "grad_norm": 3.0070180892944336, + "learning_rate": 4.945163096247829e-05, + "loss": 0.1582, + "step": 23880 + }, + { + "epoch": 0.8681590231848245, + "grad_norm": 0.8612903356552124, + "learning_rate": 4.9450549506916386e-05, + "loss": 0.157, + "step": 23890 + }, + { + "epoch": 0.8685224216876226, + "grad_norm": 9.475138664245605, + "learning_rate": 4.94494669978673e-05, + "loss": 0.312, + "step": 23900 + }, + { + "epoch": 0.8688858201904208, + "grad_norm": 0.789193868637085, + "learning_rate": 4.944838343537768e-05, + "loss": 0.1385, + "step": 23910 + }, + { + "epoch": 0.869249218693219, + "grad_norm": 0.9372280240058899, + "learning_rate": 4.94472988194942e-05, + "loss": 0.1581, + "step": 23920 + }, + { + "epoch": 0.8696126171960171, + "grad_norm": 4.738519191741943, + "learning_rate": 4.94462131502636e-05, + "loss": 0.1693, + "step": 23930 + }, + { + "epoch": 0.8699760156988153, + "grad_norm": 0.9660571217536926, + "learning_rate": 4.9445126427732654e-05, + "loss": 0.1578, + "step": 23940 + }, + { + "epoch": 0.8703394142016135, + "grad_norm": 8.137104034423828, + "learning_rate": 4.944403865194818e-05, + "loss": 0.1857, + "step": 23950 + }, + { + "epoch": 0.8707028127044116, + "grad_norm": 1.1240946054458618, + "learning_rate": 4.944294982295706e-05, + "loss": 0.2508, + "step": 23960 + }, + { + "epoch": 0.8710662112072098, + "grad_norm": 3.6192643642425537, + "learning_rate": 4.94418599408062e-05, + "loss": 0.1354, + "step": 23970 + }, + { + "epoch": 0.871429609710008, + "grad_norm": 2.76771879196167, + "learning_rate": 4.944076900554256e-05, + "loss": 0.1638, + "step": 23980 + }, + { + "epoch": 0.8717930082128061, + "grad_norm": 1.734529972076416, + "learning_rate": 4.9439677017213143e-05, + "loss": 0.1414, + "step": 23990 + }, + { + "epoch": 0.8721564067156043, + "grad_norm": 6.897458553314209, + "learning_rate": 4.9438583975864996e-05, + "loss": 0.2154, + "step": 24000 + }, + { + "epoch": 0.8721564067156043, + "eval_loss": 0.37997984886169434, + "eval_runtime": 180.3101, + "eval_samples_per_second": 41.118, + "eval_steps_per_second": 5.141, + "eval_wer": 0.20322399114128561, + "step": 24000 + }, + { + "epoch": 0.8725198052184026, + "grad_norm": 1.5639888048171997, + "learning_rate": 4.943748988154523e-05, + "loss": 0.1372, + "step": 24010 + }, + { + "epoch": 0.8728832037212007, + "grad_norm": 4.484424114227295, + "learning_rate": 4.943639473430096e-05, + "loss": 0.3205, + "step": 24020 + }, + { + "epoch": 0.8732466022239989, + "grad_norm": 1.9517849683761597, + "learning_rate": 4.9435298534179396e-05, + "loss": 0.2085, + "step": 24030 + }, + { + "epoch": 0.873610000726797, + "grad_norm": 1.3041925430297852, + "learning_rate": 4.943420128122776e-05, + "loss": 0.1446, + "step": 24040 + }, + { + "epoch": 0.8739733992295952, + "grad_norm": 29.67850685119629, + "learning_rate": 4.943310297549332e-05, + "loss": 0.2643, + "step": 24050 + }, + { + "epoch": 0.8743367977323934, + "grad_norm": 4.462527751922607, + "learning_rate": 4.9432003617023405e-05, + "loss": 0.2067, + "step": 24060 + }, + { + "epoch": 0.8747001962351915, + "grad_norm": 1.2176992893218994, + "learning_rate": 4.9430903205865384e-05, + "loss": 0.1353, + "step": 24070 + }, + { + "epoch": 0.8750635947379897, + "grad_norm": 2.044191360473633, + "learning_rate": 4.9429801742066675e-05, + "loss": 0.1632, + "step": 24080 + }, + { + "epoch": 0.8754269932407879, + "grad_norm": 3.0303845405578613, + "learning_rate": 4.942869922567473e-05, + "loss": 0.1533, + "step": 24090 + }, + { + "epoch": 0.875790391743586, + "grad_norm": 4.44179105758667, + "learning_rate": 4.942759565673705e-05, + "loss": 0.2054, + "step": 24100 + }, + { + "epoch": 0.8761537902463842, + "grad_norm": 2.158686637878418, + "learning_rate": 4.942649103530119e-05, + "loss": 0.1457, + "step": 24110 + }, + { + "epoch": 0.8765171887491824, + "grad_norm": 5.875476837158203, + "learning_rate": 4.942538536141473e-05, + "loss": 0.1941, + "step": 24120 + }, + { + "epoch": 0.8768805872519805, + "grad_norm": 1.7252172231674194, + "learning_rate": 4.9424278635125335e-05, + "loss": 0.155, + "step": 24130 + }, + { + "epoch": 0.8772439857547787, + "grad_norm": 1.6594487428665161, + "learning_rate": 4.9423170856480674e-05, + "loss": 0.1736, + "step": 24140 + }, + { + "epoch": 0.8776073842575769, + "grad_norm": 6.2919697761535645, + "learning_rate": 4.9422062025528474e-05, + "loss": 0.2313, + "step": 24150 + }, + { + "epoch": 0.877970782760375, + "grad_norm": 2.1133229732513428, + "learning_rate": 4.942095214231651e-05, + "loss": 0.1642, + "step": 24160 + }, + { + "epoch": 0.8783341812631732, + "grad_norm": 1.02867591381073, + "learning_rate": 4.941984120689262e-05, + "loss": 0.1554, + "step": 24170 + }, + { + "epoch": 0.8786975797659714, + "grad_norm": 1.7262704372406006, + "learning_rate": 4.941872921930465e-05, + "loss": 0.1428, + "step": 24180 + }, + { + "epoch": 0.8790609782687695, + "grad_norm": 1.095211386680603, + "learning_rate": 4.9417616179600526e-05, + "loss": 0.1683, + "step": 24190 + }, + { + "epoch": 0.8794243767715677, + "grad_norm": 9.772414207458496, + "learning_rate": 4.94165020878282e-05, + "loss": 0.2224, + "step": 24200 + }, + { + "epoch": 0.8797877752743659, + "grad_norm": 0.6741021871566772, + "learning_rate": 4.9415386944035665e-05, + "loss": 0.7216, + "step": 24210 + }, + { + "epoch": 0.880151173777164, + "grad_norm": 0.6714327335357666, + "learning_rate": 4.941427074827098e-05, + "loss": 0.1321, + "step": 24220 + }, + { + "epoch": 0.8805145722799622, + "grad_norm": 9.116118431091309, + "learning_rate": 4.941315350058223e-05, + "loss": 0.1738, + "step": 24230 + }, + { + "epoch": 0.8808779707827604, + "grad_norm": 1.119581937789917, + "learning_rate": 4.941203520101757e-05, + "loss": 0.1076, + "step": 24240 + }, + { + "epoch": 0.8812413692855585, + "grad_norm": 1.5630614757537842, + "learning_rate": 4.941091584962516e-05, + "loss": 0.1734, + "step": 24250 + }, + { + "epoch": 0.8816047677883567, + "grad_norm": 3.4376001358032227, + "learning_rate": 4.940979544645325e-05, + "loss": 0.1567, + "step": 24260 + }, + { + "epoch": 0.8819681662911549, + "grad_norm": 1.1688649654388428, + "learning_rate": 4.94086739915501e-05, + "loss": 0.137, + "step": 24270 + }, + { + "epoch": 0.882331564793953, + "grad_norm": 2.02235746383667, + "learning_rate": 4.9407551484964035e-05, + "loss": 0.1718, + "step": 24280 + }, + { + "epoch": 0.8826949632967512, + "grad_norm": 1.7484105825424194, + "learning_rate": 4.940642792674341e-05, + "loss": 0.1973, + "step": 24290 + }, + { + "epoch": 0.8830583617995494, + "grad_norm": 7.056839942932129, + "learning_rate": 4.940530331693666e-05, + "loss": 0.1916, + "step": 24300 + }, + { + "epoch": 0.8834217603023475, + "grad_norm": 1.4804614782333374, + "learning_rate": 4.940417765559221e-05, + "loss": 0.1418, + "step": 24310 + }, + { + "epoch": 0.8837851588051457, + "grad_norm": 1.3168327808380127, + "learning_rate": 4.940305094275859e-05, + "loss": 0.1466, + "step": 24320 + }, + { + "epoch": 0.8841485573079438, + "grad_norm": 2.4612350463867188, + "learning_rate": 4.9401923178484325e-05, + "loss": 0.1956, + "step": 24330 + }, + { + "epoch": 0.884511955810742, + "grad_norm": 0.8389832973480225, + "learning_rate": 4.9400794362818005e-05, + "loss": 0.1751, + "step": 24340 + }, + { + "epoch": 0.8848753543135403, + "grad_norm": 2.618521213531494, + "learning_rate": 4.939966449580828e-05, + "loss": 0.2133, + "step": 24350 + }, + { + "epoch": 0.8852387528163383, + "grad_norm": 0.767784833908081, + "learning_rate": 4.9398533577503826e-05, + "loss": 0.1256, + "step": 24360 + }, + { + "epoch": 0.8856021513191366, + "grad_norm": 1.7649836540222168, + "learning_rate": 4.939740160795336e-05, + "loss": 0.1925, + "step": 24370 + }, + { + "epoch": 0.8859655498219348, + "grad_norm": 2.182840347290039, + "learning_rate": 4.9396268587205685e-05, + "loss": 0.184, + "step": 24380 + }, + { + "epoch": 0.8863289483247329, + "grad_norm": 1.6524356603622437, + "learning_rate": 4.939513451530958e-05, + "loss": 0.1582, + "step": 24390 + }, + { + "epoch": 0.8866923468275311, + "grad_norm": 13.93655776977539, + "learning_rate": 4.939399939231394e-05, + "loss": 0.1813, + "step": 24400 + }, + { + "epoch": 0.8870557453303293, + "grad_norm": 1.9153752326965332, + "learning_rate": 4.939286321826766e-05, + "loss": 0.2093, + "step": 24410 + }, + { + "epoch": 0.8874191438331274, + "grad_norm": 1.9444178342819214, + "learning_rate": 4.9391725993219685e-05, + "loss": 0.1489, + "step": 24420 + }, + { + "epoch": 0.8877825423359256, + "grad_norm": 2.9371562004089355, + "learning_rate": 4.939058771721903e-05, + "loss": 0.1648, + "step": 24430 + }, + { + "epoch": 0.8881459408387238, + "grad_norm": 3.127439498901367, + "learning_rate": 4.938944839031473e-05, + "loss": 0.1756, + "step": 24440 + }, + { + "epoch": 0.8885093393415219, + "grad_norm": 11.735489845275879, + "learning_rate": 4.938830801255588e-05, + "loss": 0.2049, + "step": 24450 + }, + { + "epoch": 0.8888727378443201, + "grad_norm": 1.0685577392578125, + "learning_rate": 4.938716658399161e-05, + "loss": 0.147, + "step": 24460 + }, + { + "epoch": 0.8892361363471183, + "grad_norm": 3.6975417137145996, + "learning_rate": 4.93860241046711e-05, + "loss": 0.1402, + "step": 24470 + }, + { + "epoch": 0.8895995348499164, + "grad_norm": 1.703731894493103, + "learning_rate": 4.938488057464358e-05, + "loss": 0.1418, + "step": 24480 + }, + { + "epoch": 0.8899629333527146, + "grad_norm": 1.5911983251571655, + "learning_rate": 4.938373599395831e-05, + "loss": 0.1268, + "step": 24490 + }, + { + "epoch": 0.8903263318555128, + "grad_norm": 5.278975486755371, + "learning_rate": 4.9382590362664613e-05, + "loss": 0.2388, + "step": 24500 + }, + { + "epoch": 0.8906897303583109, + "grad_norm": 1.673403263092041, + "learning_rate": 4.9381443680811865e-05, + "loss": 0.1568, + "step": 24510 + }, + { + "epoch": 0.8910531288611091, + "grad_norm": 0.5384930968284607, + "learning_rate": 4.938029594844945e-05, + "loss": 0.1364, + "step": 24520 + }, + { + "epoch": 0.8914165273639073, + "grad_norm": 1.4231863021850586, + "learning_rate": 4.937914716562683e-05, + "loss": 0.1358, + "step": 24530 + }, + { + "epoch": 0.8917799258667054, + "grad_norm": 1.2151052951812744, + "learning_rate": 4.937799733239349e-05, + "loss": 0.1673, + "step": 24540 + }, + { + "epoch": 0.8921433243695036, + "grad_norm": 9.278292655944824, + "learning_rate": 4.937684644879899e-05, + "loss": 0.2505, + "step": 24550 + }, + { + "epoch": 0.8925067228723018, + "grad_norm": 2.3570127487182617, + "learning_rate": 4.937569451489291e-05, + "loss": 0.1447, + "step": 24560 + }, + { + "epoch": 0.8928701213750999, + "grad_norm": 0.44337037205696106, + "learning_rate": 4.937454153072488e-05, + "loss": 0.2015, + "step": 24570 + }, + { + "epoch": 0.8932335198778981, + "grad_norm": 2.4552314281463623, + "learning_rate": 4.937338749634458e-05, + "loss": 0.1838, + "step": 24580 + }, + { + "epoch": 0.8935969183806963, + "grad_norm": 0.9864338636398315, + "learning_rate": 4.937223241180174e-05, + "loss": 0.1356, + "step": 24590 + }, + { + "epoch": 0.8939603168834944, + "grad_norm": 8.218843460083008, + "learning_rate": 4.937107627714612e-05, + "loss": 0.2109, + "step": 24600 + }, + { + "epoch": 0.8939603168834944, + "eval_loss": 0.38069987297058105, + "eval_runtime": 180.4244, + "eval_samples_per_second": 41.092, + "eval_steps_per_second": 5.138, + "eval_wer": 0.20163559460489772, + "step": 24600 + }, + { + "epoch": 0.8943237153862926, + "grad_norm": 0.7269652485847473, + "learning_rate": 4.936991909242753e-05, + "loss": 0.1756, + "step": 24610 + }, + { + "epoch": 0.8946871138890907, + "grad_norm": 0.9835095405578613, + "learning_rate": 4.9368760857695836e-05, + "loss": 0.1297, + "step": 24620 + }, + { + "epoch": 0.8950505123918889, + "grad_norm": 3.5632708072662354, + "learning_rate": 4.9367601573000944e-05, + "loss": 0.146, + "step": 24630 + }, + { + "epoch": 0.8954139108946871, + "grad_norm": 0.7898311614990234, + "learning_rate": 4.93664412383928e-05, + "loss": 0.1693, + "step": 24640 + }, + { + "epoch": 0.8957773093974852, + "grad_norm": 3.8220248222351074, + "learning_rate": 4.93652798539214e-05, + "loss": 0.1739, + "step": 24650 + }, + { + "epoch": 0.8961407079002834, + "grad_norm": 0.7946699857711792, + "learning_rate": 4.936411741963678e-05, + "loss": 0.1271, + "step": 24660 + }, + { + "epoch": 0.8965041064030816, + "grad_norm": 1.5677101612091064, + "learning_rate": 4.936295393558903e-05, + "loss": 0.1365, + "step": 24670 + }, + { + "epoch": 0.8968675049058797, + "grad_norm": 18.39532470703125, + "learning_rate": 4.9361789401828285e-05, + "loss": 0.2035, + "step": 24680 + }, + { + "epoch": 0.897230903408678, + "grad_norm": 2.577984094619751, + "learning_rate": 4.93606238184047e-05, + "loss": 0.127, + "step": 24690 + }, + { + "epoch": 0.8975943019114762, + "grad_norm": 3.4822871685028076, + "learning_rate": 4.9359457185368515e-05, + "loss": 0.2335, + "step": 24700 + }, + { + "epoch": 0.8979577004142743, + "grad_norm": 1.6475412845611572, + "learning_rate": 4.935828950277e-05, + "loss": 0.1581, + "step": 24710 + }, + { + "epoch": 0.8983210989170725, + "grad_norm": 2.0972635746002197, + "learning_rate": 4.9357120770659446e-05, + "loss": 0.1608, + "step": 24720 + }, + { + "epoch": 0.8986844974198707, + "grad_norm": 3.194946050643921, + "learning_rate": 4.9355950989087226e-05, + "loss": 0.1911, + "step": 24730 + }, + { + "epoch": 0.8990478959226688, + "grad_norm": 1.1382654905319214, + "learning_rate": 4.9354780158103744e-05, + "loss": 0.1671, + "step": 24740 + }, + { + "epoch": 0.899411294425467, + "grad_norm": 7.309133052825928, + "learning_rate": 4.9353608277759433e-05, + "loss": 0.192, + "step": 24750 + }, + { + "epoch": 0.8997746929282652, + "grad_norm": 1.0215349197387695, + "learning_rate": 4.9352435348104786e-05, + "loss": 0.1713, + "step": 24760 + }, + { + "epoch": 0.9001380914310633, + "grad_norm": 2.319836378097534, + "learning_rate": 4.935126136919035e-05, + "loss": 0.1441, + "step": 24770 + }, + { + "epoch": 0.9005014899338615, + "grad_norm": 3.443413496017456, + "learning_rate": 4.9350086341066716e-05, + "loss": 0.2136, + "step": 24780 + }, + { + "epoch": 0.9008648884366597, + "grad_norm": 0.9862478971481323, + "learning_rate": 4.934891026378449e-05, + "loss": 0.134, + "step": 24790 + }, + { + "epoch": 0.9012282869394578, + "grad_norm": 10.3681640625, + "learning_rate": 4.934773313739435e-05, + "loss": 0.3034, + "step": 24800 + }, + { + "epoch": 0.901591685442256, + "grad_norm": 0.9848408102989197, + "learning_rate": 4.9346554961947014e-05, + "loss": 0.1503, + "step": 24810 + }, + { + "epoch": 0.9019550839450542, + "grad_norm": 1.3456752300262451, + "learning_rate": 4.934537573749326e-05, + "loss": 2.2881, + "step": 24820 + }, + { + "epoch": 0.9023184824478523, + "grad_norm": 0.8639931082725525, + "learning_rate": 4.9344195464083884e-05, + "loss": 0.1565, + "step": 24830 + }, + { + "epoch": 0.9026818809506505, + "grad_norm": 1.1297109127044678, + "learning_rate": 4.9343014141769744e-05, + "loss": 0.1338, + "step": 24840 + }, + { + "epoch": 0.9030452794534487, + "grad_norm": 20.8160343170166, + "learning_rate": 4.934183177060173e-05, + "loss": 0.2155, + "step": 24850 + }, + { + "epoch": 0.9034086779562468, + "grad_norm": 0.8113746643066406, + "learning_rate": 4.9340648350630804e-05, + "loss": 0.126, + "step": 24860 + }, + { + "epoch": 0.903772076459045, + "grad_norm": 1.7760541439056396, + "learning_rate": 4.9339463881907946e-05, + "loss": 0.119, + "step": 24870 + }, + { + "epoch": 0.9041354749618432, + "grad_norm": 1.8657050132751465, + "learning_rate": 4.933827836448418e-05, + "loss": 0.1772, + "step": 24880 + }, + { + "epoch": 0.9044988734646413, + "grad_norm": 1.2576991319656372, + "learning_rate": 4.9337091798410594e-05, + "loss": 0.1609, + "step": 24890 + }, + { + "epoch": 0.9048622719674395, + "grad_norm": 4.8249311447143555, + "learning_rate": 4.933590418373833e-05, + "loss": 1.7033, + "step": 24900 + }, + { + "epoch": 0.9052256704702376, + "grad_norm": 1.065819501876831, + "learning_rate": 4.9334715520518526e-05, + "loss": 0.1559, + "step": 24910 + }, + { + "epoch": 0.9055890689730358, + "grad_norm": 0.961330771446228, + "learning_rate": 4.933352580880242e-05, + "loss": 0.1459, + "step": 24920 + }, + { + "epoch": 0.905952467475834, + "grad_norm": 2.0911202430725098, + "learning_rate": 4.933233504864126e-05, + "loss": 0.2173, + "step": 24930 + }, + { + "epoch": 0.9063158659786321, + "grad_norm": 0.5074183940887451, + "learning_rate": 4.933114324008636e-05, + "loss": 0.1544, + "step": 24940 + }, + { + "epoch": 0.9066792644814303, + "grad_norm": 3.663172483444214, + "learning_rate": 4.932995038318907e-05, + "loss": 0.2042, + "step": 24950 + }, + { + "epoch": 0.9070426629842285, + "grad_norm": 1.691545844078064, + "learning_rate": 4.9328756478000784e-05, + "loss": 0.1616, + "step": 24960 + }, + { + "epoch": 0.9074060614870266, + "grad_norm": 1.6613342761993408, + "learning_rate": 4.9327561524572944e-05, + "loss": 0.1212, + "step": 24970 + }, + { + "epoch": 0.9077694599898248, + "grad_norm": 2.5737128257751465, + "learning_rate": 4.9326365522957044e-05, + "loss": 0.1753, + "step": 24980 + }, + { + "epoch": 0.908132858492623, + "grad_norm": 1.717429280281067, + "learning_rate": 4.932516847320459e-05, + "loss": 0.1436, + "step": 24990 + }, + { + "epoch": 0.9084962569954211, + "grad_norm": 13.324812889099121, + "learning_rate": 4.9323970375367186e-05, + "loss": 0.1983, + "step": 25000 + }, + { + "epoch": 0.9088596554982193, + "grad_norm": 1.374232530593872, + "learning_rate": 4.932277122949644e-05, + "loss": 0.1588, + "step": 25010 + }, + { + "epoch": 0.9092230540010175, + "grad_norm": 1.1790850162506104, + "learning_rate": 4.932157103564402e-05, + "loss": 0.1603, + "step": 25020 + }, + { + "epoch": 0.9095864525038156, + "grad_norm": 2.7326996326446533, + "learning_rate": 4.932036979386165e-05, + "loss": 0.1656, + "step": 25030 + }, + { + "epoch": 0.9099498510066139, + "grad_norm": 1.2364397048950195, + "learning_rate": 4.931916750420107e-05, + "loss": 0.2311, + "step": 25040 + }, + { + "epoch": 0.9103132495094121, + "grad_norm": 3.7070934772491455, + "learning_rate": 4.9317964166714095e-05, + "loss": 0.2286, + "step": 25050 + }, + { + "epoch": 0.9106766480122102, + "grad_norm": 2.05336594581604, + "learning_rate": 4.931675978145256e-05, + "loss": 0.1404, + "step": 25060 + }, + { + "epoch": 0.9110400465150084, + "grad_norm": 1.3064135313034058, + "learning_rate": 4.931555434846837e-05, + "loss": 0.1395, + "step": 25070 + }, + { + "epoch": 0.9114034450178066, + "grad_norm": 1.252254843711853, + "learning_rate": 4.931434786781346e-05, + "loss": 0.1595, + "step": 25080 + }, + { + "epoch": 0.9117668435206047, + "grad_norm": 1.399654507637024, + "learning_rate": 4.931314033953981e-05, + "loss": 0.1495, + "step": 25090 + }, + { + "epoch": 0.9121302420234029, + "grad_norm": 9.340110778808594, + "learning_rate": 4.931193176369945e-05, + "loss": 0.2489, + "step": 25100 + }, + { + "epoch": 0.9124936405262011, + "grad_norm": 1.4071942567825317, + "learning_rate": 4.931072214034445e-05, + "loss": 0.1409, + "step": 25110 + }, + { + "epoch": 0.9128570390289992, + "grad_norm": 1.5617743730545044, + "learning_rate": 4.9309511469526934e-05, + "loss": 0.2026, + "step": 25120 + }, + { + "epoch": 0.9132204375317974, + "grad_norm": 1.4382219314575195, + "learning_rate": 4.930829975129906e-05, + "loss": 0.1426, + "step": 25130 + }, + { + "epoch": 0.9135838360345956, + "grad_norm": 1.0388094186782837, + "learning_rate": 4.930708698571303e-05, + "loss": 0.132, + "step": 25140 + }, + { + "epoch": 0.9139472345373937, + "grad_norm": 3.9398436546325684, + "learning_rate": 4.9305873172821126e-05, + "loss": 0.2257, + "step": 25150 + }, + { + "epoch": 0.9143106330401919, + "grad_norm": 2.5586395263671875, + "learning_rate": 4.930465831267562e-05, + "loss": 0.1508, + "step": 25160 + }, + { + "epoch": 0.9146740315429901, + "grad_norm": 1.6908849477767944, + "learning_rate": 4.930344240532886e-05, + "loss": 0.1407, + "step": 25170 + }, + { + "epoch": 0.9150374300457882, + "grad_norm": 3.980564594268799, + "learning_rate": 4.930222545083324e-05, + "loss": 0.1749, + "step": 25180 + }, + { + "epoch": 0.9154008285485864, + "grad_norm": 1.7451142072677612, + "learning_rate": 4.930100744924119e-05, + "loss": 0.1415, + "step": 25190 + }, + { + "epoch": 0.9157642270513845, + "grad_norm": 11.09490966796875, + "learning_rate": 4.9299788400605194e-05, + "loss": 0.248, + "step": 25200 + }, + { + "epoch": 0.9157642270513845, + "eval_loss": 0.36305877566337585, + "eval_runtime": 180.0742, + "eval_samples_per_second": 41.172, + "eval_steps_per_second": 5.148, + "eval_wer": 0.19528200845934612, + "step": 25200 + }, + { + "epoch": 0.9161276255541827, + "grad_norm": 0.9552545547485352, + "learning_rate": 4.929856830497778e-05, + "loss": 0.1765, + "step": 25210 + }, + { + "epoch": 0.9164910240569809, + "grad_norm": 1.0652204751968384, + "learning_rate": 4.929734716241151e-05, + "loss": 0.1412, + "step": 25220 + }, + { + "epoch": 0.916854422559779, + "grad_norm": 2.473240375518799, + "learning_rate": 4.929612497295899e-05, + "loss": 0.1511, + "step": 25230 + }, + { + "epoch": 0.9172178210625772, + "grad_norm": 2.0563089847564697, + "learning_rate": 4.929490173667291e-05, + "loss": 0.1562, + "step": 25240 + }, + { + "epoch": 0.9175812195653754, + "grad_norm": 5.446952819824219, + "learning_rate": 4.929367745360593e-05, + "loss": 0.2416, + "step": 25250 + }, + { + "epoch": 0.9179446180681735, + "grad_norm": 0.724795937538147, + "learning_rate": 4.929245212381085e-05, + "loss": 0.1554, + "step": 25260 + }, + { + "epoch": 0.9183080165709717, + "grad_norm": 1.0962814092636108, + "learning_rate": 4.929122574734043e-05, + "loss": 0.1567, + "step": 25270 + }, + { + "epoch": 0.9186714150737699, + "grad_norm": 1.3689608573913574, + "learning_rate": 4.9289998324247524e-05, + "loss": 0.1498, + "step": 25280 + }, + { + "epoch": 0.919034813576568, + "grad_norm": 3.039569139480591, + "learning_rate": 4.9288769854585015e-05, + "loss": 0.1666, + "step": 25290 + }, + { + "epoch": 0.9193982120793662, + "grad_norm": 10.71928882598877, + "learning_rate": 4.928754033840583e-05, + "loss": 0.2487, + "step": 25300 + }, + { + "epoch": 0.9197616105821644, + "grad_norm": 0.47624918818473816, + "learning_rate": 4.928630977576295e-05, + "loss": 0.1457, + "step": 25310 + }, + { + "epoch": 0.9201250090849625, + "grad_norm": 1.2840664386749268, + "learning_rate": 4.9285078166709386e-05, + "loss": 0.1437, + "step": 25320 + }, + { + "epoch": 0.9204884075877607, + "grad_norm": 2.118415117263794, + "learning_rate": 4.928384551129822e-05, + "loss": 0.1861, + "step": 25330 + }, + { + "epoch": 0.9208518060905589, + "grad_norm": 0.8363248109817505, + "learning_rate": 4.928261180958255e-05, + "loss": 0.1494, + "step": 25340 + }, + { + "epoch": 0.921215204593357, + "grad_norm": 20.23488998413086, + "learning_rate": 4.928137706161553e-05, + "loss": 0.2414, + "step": 25350 + }, + { + "epoch": 0.9215786030961552, + "grad_norm": 1.1590826511383057, + "learning_rate": 4.928014126745037e-05, + "loss": 0.162, + "step": 25360 + }, + { + "epoch": 0.9219420015989535, + "grad_norm": 1.1986241340637207, + "learning_rate": 4.9278904427140315e-05, + "loss": 0.1323, + "step": 25370 + }, + { + "epoch": 0.9223054001017515, + "grad_norm": 5.075083255767822, + "learning_rate": 4.927766654073864e-05, + "loss": 0.2944, + "step": 25380 + }, + { + "epoch": 0.9226687986045498, + "grad_norm": 3.1853582859039307, + "learning_rate": 4.927642760829871e-05, + "loss": 0.1792, + "step": 25390 + }, + { + "epoch": 0.923032197107348, + "grad_norm": 5.919759273529053, + "learning_rate": 4.927518762987388e-05, + "loss": 0.2182, + "step": 25400 + }, + { + "epoch": 0.9233955956101461, + "grad_norm": 1.4639918804168701, + "learning_rate": 4.927394660551759e-05, + "loss": 0.1277, + "step": 25410 + }, + { + "epoch": 0.9237589941129443, + "grad_norm": 1.205178141593933, + "learning_rate": 4.927270453528331e-05, + "loss": 0.1197, + "step": 25420 + }, + { + "epoch": 0.9241223926157425, + "grad_norm": 1.4328303337097168, + "learning_rate": 4.927146141922455e-05, + "loss": 0.1522, + "step": 25430 + }, + { + "epoch": 0.9244857911185406, + "grad_norm": 0.6114678382873535, + "learning_rate": 4.927021725739488e-05, + "loss": 0.1661, + "step": 25440 + }, + { + "epoch": 0.9248491896213388, + "grad_norm": 34.13093566894531, + "learning_rate": 4.92689720498479e-05, + "loss": 0.2852, + "step": 25450 + }, + { + "epoch": 0.925212588124137, + "grad_norm": 0.9967424273490906, + "learning_rate": 4.9267725796637256e-05, + "loss": 0.1433, + "step": 25460 + }, + { + "epoch": 0.9255759866269351, + "grad_norm": 1.0493268966674805, + "learning_rate": 4.926647849781666e-05, + "loss": 0.1361, + "step": 25470 + }, + { + "epoch": 0.9259393851297333, + "grad_norm": 2.582016944885254, + "learning_rate": 4.926523015343985e-05, + "loss": 0.1829, + "step": 25480 + }, + { + "epoch": 0.9263027836325314, + "grad_norm": 5.122225284576416, + "learning_rate": 4.92639807635606e-05, + "loss": 0.1148, + "step": 25490 + }, + { + "epoch": 0.9266661821353296, + "grad_norm": 8.054966926574707, + "learning_rate": 4.9262730328232755e-05, + "loss": 0.2363, + "step": 25500 + }, + { + "epoch": 0.9270295806381278, + "grad_norm": 3.3668735027313232, + "learning_rate": 4.926147884751018e-05, + "loss": 0.1311, + "step": 25510 + }, + { + "epoch": 0.9273929791409259, + "grad_norm": 1.0643728971481323, + "learning_rate": 4.926022632144681e-05, + "loss": 0.1318, + "step": 25520 + }, + { + "epoch": 0.9277563776437241, + "grad_norm": 1.632354497909546, + "learning_rate": 4.9258972750096614e-05, + "loss": 0.1958, + "step": 25530 + }, + { + "epoch": 0.9281197761465223, + "grad_norm": 0.7638659477233887, + "learning_rate": 4.9257718133513586e-05, + "loss": 0.168, + "step": 25540 + }, + { + "epoch": 0.9284831746493204, + "grad_norm": 4.14115571975708, + "learning_rate": 4.9256462471751796e-05, + "loss": 0.1976, + "step": 25550 + }, + { + "epoch": 0.9288465731521186, + "grad_norm": 39.925689697265625, + "learning_rate": 4.925520576486534e-05, + "loss": 0.67, + "step": 25560 + }, + { + "epoch": 0.9292099716549168, + "grad_norm": 1.349623441696167, + "learning_rate": 4.9253948012908366e-05, + "loss": 0.1475, + "step": 25570 + }, + { + "epoch": 0.9295733701577149, + "grad_norm": 10.941556930541992, + "learning_rate": 4.925268921593508e-05, + "loss": 0.1696, + "step": 25580 + }, + { + "epoch": 0.9299367686605131, + "grad_norm": 1.5406817197799683, + "learning_rate": 4.925142937399969e-05, + "loss": 0.1444, + "step": 25590 + }, + { + "epoch": 0.9303001671633113, + "grad_norm": 3.9542319774627686, + "learning_rate": 4.925016848715651e-05, + "loss": 0.216, + "step": 25600 + }, + { + "epoch": 0.9306635656661094, + "grad_norm": 2.0055665969848633, + "learning_rate": 4.924890655545984e-05, + "loss": 0.1248, + "step": 25610 + }, + { + "epoch": 0.9310269641689076, + "grad_norm": 2.145512819290161, + "learning_rate": 4.924764357896408e-05, + "loss": 0.1278, + "step": 25620 + }, + { + "epoch": 0.9313903626717058, + "grad_norm": 6.076485633850098, + "learning_rate": 4.924637955772361e-05, + "loss": 0.1586, + "step": 25630 + }, + { + "epoch": 0.9317537611745039, + "grad_norm": 0.9482760429382324, + "learning_rate": 4.924511449179293e-05, + "loss": 0.1547, + "step": 25640 + }, + { + "epoch": 0.9321171596773021, + "grad_norm": 2.335090398788452, + "learning_rate": 4.924384838122653e-05, + "loss": 0.1709, + "step": 25650 + }, + { + "epoch": 0.9324805581801003, + "grad_norm": 2.1309449672698975, + "learning_rate": 4.924258122607895e-05, + "loss": 0.1425, + "step": 25660 + }, + { + "epoch": 0.9328439566828984, + "grad_norm": 1.092887282371521, + "learning_rate": 4.924131302640482e-05, + "loss": 0.1578, + "step": 25670 + }, + { + "epoch": 0.9332073551856966, + "grad_norm": 0.7325641512870789, + "learning_rate": 4.9240043782258746e-05, + "loss": 0.1473, + "step": 25680 + }, + { + "epoch": 0.9335707536884948, + "grad_norm": 1.296338677406311, + "learning_rate": 4.9238773493695443e-05, + "loss": 0.2279, + "step": 25690 + }, + { + "epoch": 0.9339341521912929, + "grad_norm": 1.196590542793274, + "learning_rate": 4.923750216076963e-05, + "loss": 0.1524, + "step": 25700 + }, + { + "epoch": 0.9342975506940912, + "grad_norm": 1.5417845249176025, + "learning_rate": 4.923622978353608e-05, + "loss": 0.1385, + "step": 25710 + }, + { + "epoch": 0.9346609491968894, + "grad_norm": 1.4865704774856567, + "learning_rate": 4.923495636204963e-05, + "loss": 0.1435, + "step": 25720 + }, + { + "epoch": 0.9350243476996875, + "grad_norm": 1.6445010900497437, + "learning_rate": 4.923368189636513e-05, + "loss": 0.223, + "step": 25730 + }, + { + "epoch": 0.9353877462024857, + "grad_norm": 0.6629343032836914, + "learning_rate": 4.9232406386537505e-05, + "loss": 0.1479, + "step": 25740 + }, + { + "epoch": 0.9357511447052839, + "grad_norm": 8.440834999084473, + "learning_rate": 4.923112983262171e-05, + "loss": 0.7624, + "step": 25750 + }, + { + "epoch": 0.936114543208082, + "grad_norm": 1.088809847831726, + "learning_rate": 4.922985223467274e-05, + "loss": 0.134, + "step": 25760 + }, + { + "epoch": 0.9364779417108802, + "grad_norm": 1.1839587688446045, + "learning_rate": 4.922857359274565e-05, + "loss": 0.1284, + "step": 25770 + }, + { + "epoch": 0.9368413402136783, + "grad_norm": 2.278588056564331, + "learning_rate": 4.922729390689553e-05, + "loss": 0.1873, + "step": 25780 + }, + { + "epoch": 0.9372047387164765, + "grad_norm": 1.6524765491485596, + "learning_rate": 4.9226013177177515e-05, + "loss": 0.1769, + "step": 25790 + }, + { + "epoch": 0.9375681372192747, + "grad_norm": 18.044713973999023, + "learning_rate": 4.922473140364679e-05, + "loss": 0.2122, + "step": 25800 + }, + { + "epoch": 0.9375681372192747, + "eval_loss": 0.3881298005580902, + "eval_runtime": 180.2979, + "eval_samples_per_second": 41.121, + "eval_steps_per_second": 5.141, + "eval_wer": 0.1963348884491804, + "step": 25800 + }, + { + "epoch": 0.9379315357220728, + "grad_norm": 1.1691884994506836, + "learning_rate": 4.9223448586358576e-05, + "loss": 0.1573, + "step": 25810 + }, + { + "epoch": 0.938294934224871, + "grad_norm": 1.1012376546859741, + "learning_rate": 4.9222164725368156e-05, + "loss": 0.1511, + "step": 25820 + }, + { + "epoch": 0.9386583327276692, + "grad_norm": 2.1937880516052246, + "learning_rate": 4.9220879820730844e-05, + "loss": 0.1684, + "step": 25830 + }, + { + "epoch": 0.9390217312304673, + "grad_norm": 1.5964059829711914, + "learning_rate": 4.921959387250199e-05, + "loss": 0.1897, + "step": 25840 + }, + { + "epoch": 0.9393851297332655, + "grad_norm": 6.693167209625244, + "learning_rate": 4.921830688073701e-05, + "loss": 0.2155, + "step": 25850 + }, + { + "epoch": 0.9397485282360637, + "grad_norm": 1.679046869277954, + "learning_rate": 4.921701884549136e-05, + "loss": 0.1566, + "step": 25860 + }, + { + "epoch": 0.9401119267388618, + "grad_norm": 0.648047924041748, + "learning_rate": 4.9215729766820536e-05, + "loss": 0.1398, + "step": 25870 + }, + { + "epoch": 0.94047532524166, + "grad_norm": 0.7286267876625061, + "learning_rate": 4.921443964478007e-05, + "loss": 0.1598, + "step": 25880 + }, + { + "epoch": 0.9408387237444582, + "grad_norm": 1.3676726818084717, + "learning_rate": 4.921314847942555e-05, + "loss": 0.1627, + "step": 25890 + }, + { + "epoch": 0.9412021222472563, + "grad_norm": 11.982099533081055, + "learning_rate": 4.921185627081263e-05, + "loss": 0.2181, + "step": 25900 + }, + { + "epoch": 0.9415655207500545, + "grad_norm": 0.8863544464111328, + "learning_rate": 4.9210563018996955e-05, + "loss": 0.1296, + "step": 25910 + }, + { + "epoch": 0.9419289192528527, + "grad_norm": 0.8388992547988892, + "learning_rate": 4.9209268724034265e-05, + "loss": 0.1406, + "step": 25920 + }, + { + "epoch": 0.9422923177556508, + "grad_norm": 2.4800333976745605, + "learning_rate": 4.9207973385980324e-05, + "loss": 0.1694, + "step": 25930 + }, + { + "epoch": 0.942655716258449, + "grad_norm": 4.2597174644470215, + "learning_rate": 4.920667700489093e-05, + "loss": 0.9439, + "step": 25940 + }, + { + "epoch": 0.9430191147612472, + "grad_norm": 5.32108736038208, + "learning_rate": 4.920537958082196e-05, + "loss": 0.1745, + "step": 25950 + }, + { + "epoch": 0.9433825132640453, + "grad_norm": 1.3563112020492554, + "learning_rate": 4.9204081113829316e-05, + "loss": 0.1554, + "step": 25960 + }, + { + "epoch": 0.9437459117668435, + "grad_norm": 8.575587272644043, + "learning_rate": 4.9202781603968926e-05, + "loss": 0.2015, + "step": 25970 + }, + { + "epoch": 0.9441093102696417, + "grad_norm": 6.85026216506958, + "learning_rate": 4.920148105129679e-05, + "loss": 0.1548, + "step": 25980 + }, + { + "epoch": 0.9444727087724398, + "grad_norm": 1.2886810302734375, + "learning_rate": 4.9200179455868944e-05, + "loss": 0.136, + "step": 25990 + }, + { + "epoch": 0.944836107275238, + "grad_norm": 2.0779457092285156, + "learning_rate": 4.919887681774148e-05, + "loss": 0.1744, + "step": 26000 + }, + { + "epoch": 0.9451995057780362, + "grad_norm": Infinity, + "learning_rate": 4.919770355196496e-05, + "loss": 2.7706, + "step": 26010 + }, + { + "epoch": 0.9455629042808343, + "grad_norm": 0.9514101147651672, + "learning_rate": 4.919639893286285e-05, + "loss": 0.1435, + "step": 26020 + }, + { + "epoch": 0.9459263027836325, + "grad_norm": 0.3761270046234131, + "learning_rate": 4.9195093271224016e-05, + "loss": 0.1525, + "step": 26030 + }, + { + "epoch": 0.9462897012864308, + "grad_norm": 1.2147834300994873, + "learning_rate": 4.919378656710469e-05, + "loss": 0.1922, + "step": 26040 + }, + { + "epoch": 0.9466530997892288, + "grad_norm": 15.408570289611816, + "learning_rate": 4.919247882056119e-05, + "loss": 0.2773, + "step": 26050 + }, + { + "epoch": 0.947016498292027, + "grad_norm": 2.2306370735168457, + "learning_rate": 4.919117003164985e-05, + "loss": 0.1446, + "step": 26060 + }, + { + "epoch": 0.9473798967948253, + "grad_norm": 1.3414242267608643, + "learning_rate": 4.918986020042706e-05, + "loss": 0.1484, + "step": 26070 + }, + { + "epoch": 0.9477432952976234, + "grad_norm": 1.9740337133407593, + "learning_rate": 4.9188549326949275e-05, + "loss": 0.1845, + "step": 26080 + }, + { + "epoch": 0.9481066938004216, + "grad_norm": 0.7002670764923096, + "learning_rate": 4.9187237411272955e-05, + "loss": 0.1559, + "step": 26090 + }, + { + "epoch": 0.9484700923032197, + "grad_norm": 8.308074951171875, + "learning_rate": 4.9185924453454635e-05, + "loss": 0.223, + "step": 26100 + }, + { + "epoch": 0.9488334908060179, + "grad_norm": 0.8129051327705383, + "learning_rate": 4.9184610453550884e-05, + "loss": 0.1459, + "step": 26110 + }, + { + "epoch": 0.9491968893088161, + "grad_norm": 1.5998592376708984, + "learning_rate": 4.918329541161831e-05, + "loss": 0.1394, + "step": 26120 + }, + { + "epoch": 0.9495602878116142, + "grad_norm": 1.8726842403411865, + "learning_rate": 4.918197932771359e-05, + "loss": 0.1859, + "step": 26130 + }, + { + "epoch": 0.9499236863144124, + "grad_norm": 1.1915557384490967, + "learning_rate": 4.9180662201893424e-05, + "loss": 0.1621, + "step": 26140 + }, + { + "epoch": 0.9502870848172106, + "grad_norm": 6.970126152038574, + "learning_rate": 4.917934403421455e-05, + "loss": 0.2613, + "step": 26150 + }, + { + "epoch": 0.9506504833200087, + "grad_norm": 1.0738050937652588, + "learning_rate": 4.9178024824733776e-05, + "loss": 0.1383, + "step": 26160 + }, + { + "epoch": 0.9510138818228069, + "grad_norm": 2.1130123138427734, + "learning_rate": 4.9176704573507933e-05, + "loss": 0.222, + "step": 26170 + }, + { + "epoch": 0.9513772803256051, + "grad_norm": 3.1722593307495117, + "learning_rate": 4.9175383280593925e-05, + "loss": 0.1624, + "step": 26180 + }, + { + "epoch": 0.9517406788284032, + "grad_norm": 0.9101456999778748, + "learning_rate": 4.917406094604865e-05, + "loss": 0.16, + "step": 26190 + }, + { + "epoch": 0.9521040773312014, + "grad_norm": 3.284672260284424, + "learning_rate": 4.917273756992911e-05, + "loss": 0.1817, + "step": 26200 + }, + { + "epoch": 0.9524674758339996, + "grad_norm": 2.2083284854888916, + "learning_rate": 4.917141315229232e-05, + "loss": 0.169, + "step": 26210 + }, + { + "epoch": 0.9528308743367977, + "grad_norm": 4.354351997375488, + "learning_rate": 4.9170087693195335e-05, + "loss": 0.2629, + "step": 26220 + }, + { + "epoch": 0.9531942728395959, + "grad_norm": 2.520522117614746, + "learning_rate": 4.916876119269526e-05, + "loss": 0.1852, + "step": 26230 + }, + { + "epoch": 0.9535576713423941, + "grad_norm": 0.8573399186134338, + "learning_rate": 4.9167433650849264e-05, + "loss": 0.1524, + "step": 26240 + }, + { + "epoch": 0.9539210698451922, + "grad_norm": 6.314918041229248, + "learning_rate": 4.916610506771454e-05, + "loss": 0.2685, + "step": 26250 + }, + { + "epoch": 0.9542844683479904, + "grad_norm": 3.0610973834991455, + "learning_rate": 4.916477544334833e-05, + "loss": 0.1374, + "step": 26260 + }, + { + "epoch": 0.9546478668507886, + "grad_norm": 0.9085964560508728, + "learning_rate": 4.916344477780793e-05, + "loss": 0.1754, + "step": 26270 + }, + { + "epoch": 0.9550112653535867, + "grad_norm": 3.8550243377685547, + "learning_rate": 4.916211307115067e-05, + "loss": 0.3546, + "step": 26280 + }, + { + "epoch": 0.9553746638563849, + "grad_norm": 5.278194904327393, + "learning_rate": 4.916078032343392e-05, + "loss": 0.1298, + "step": 26290 + }, + { + "epoch": 0.9557380623591831, + "grad_norm": 2.707965612411499, + "learning_rate": 4.9159446534715116e-05, + "loss": 0.1689, + "step": 26300 + }, + { + "epoch": 0.9561014608619812, + "grad_norm": 1.3821223974227905, + "learning_rate": 4.9158111705051716e-05, + "loss": 0.2117, + "step": 26310 + }, + { + "epoch": 0.9564648593647794, + "grad_norm": 1.0195057392120361, + "learning_rate": 4.915677583450123e-05, + "loss": 0.1151, + "step": 26320 + }, + { + "epoch": 0.9568282578675776, + "grad_norm": 2.078343629837036, + "learning_rate": 4.915543892312124e-05, + "loss": 0.1433, + "step": 26330 + }, + { + "epoch": 0.9571916563703757, + "grad_norm": 1.6972254514694214, + "learning_rate": 4.915410097096932e-05, + "loss": 0.1307, + "step": 26340 + }, + { + "epoch": 0.9575550548731739, + "grad_norm": 4.440702438354492, + "learning_rate": 4.915276197810313e-05, + "loss": 0.1806, + "step": 26350 + }, + { + "epoch": 0.9579184533759721, + "grad_norm": 0.778567373752594, + "learning_rate": 4.9151421944580374e-05, + "loss": 0.177, + "step": 26360 + }, + { + "epoch": 0.9582818518787702, + "grad_norm": 1.2955224514007568, + "learning_rate": 4.915008087045877e-05, + "loss": 0.1395, + "step": 26370 + }, + { + "epoch": 0.9586452503815684, + "grad_norm": 2.077195405960083, + "learning_rate": 4.9148738755796104e-05, + "loss": 0.1583, + "step": 26380 + }, + { + "epoch": 0.9590086488843665, + "grad_norm": 0.8736408352851868, + "learning_rate": 4.914739560065021e-05, + "loss": 0.1284, + "step": 26390 + }, + { + "epoch": 0.9593720473871648, + "grad_norm": 2.9465060234069824, + "learning_rate": 4.914605140507895e-05, + "loss": 0.1929, + "step": 26400 + }, + { + "epoch": 0.9593720473871648, + "eval_loss": 0.3894718587398529, + "eval_runtime": 180.7535, + "eval_samples_per_second": 41.017, + "eval_steps_per_second": 5.129, + "eval_wer": 0.18666836095630548, + "step": 26400 + }, + { + "epoch": 0.959735445889963, + "grad_norm": 0.7856747508049011, + "learning_rate": 4.9144706169140256e-05, + "loss": 0.1548, + "step": 26410 + }, + { + "epoch": 0.960098844392761, + "grad_norm": 3.3245174884796143, + "learning_rate": 4.914335989289208e-05, + "loss": 0.1328, + "step": 26420 + }, + { + "epoch": 0.9604622428955593, + "grad_norm": 4.848336219787598, + "learning_rate": 4.914201257639243e-05, + "loss": 0.145, + "step": 26430 + }, + { + "epoch": 0.9608256413983575, + "grad_norm": 2.2661678791046143, + "learning_rate": 4.9140664219699344e-05, + "loss": 0.1482, + "step": 26440 + }, + { + "epoch": 0.9611890399011556, + "grad_norm": 6.279752731323242, + "learning_rate": 4.913931482287094e-05, + "loss": 0.2087, + "step": 26450 + }, + { + "epoch": 0.9615524384039538, + "grad_norm": 0.9201165437698364, + "learning_rate": 4.913796438596534e-05, + "loss": 0.1641, + "step": 26460 + }, + { + "epoch": 0.961915836906752, + "grad_norm": 1.0935853719711304, + "learning_rate": 4.9136612909040746e-05, + "loss": 0.1678, + "step": 26470 + }, + { + "epoch": 0.9622792354095501, + "grad_norm": 5.511369705200195, + "learning_rate": 4.913526039215538e-05, + "loss": 0.2284, + "step": 26480 + }, + { + "epoch": 0.9626426339123483, + "grad_norm": 0.8109707832336426, + "learning_rate": 4.913390683536751e-05, + "loss": 0.1314, + "step": 26490 + }, + { + "epoch": 0.9630060324151465, + "grad_norm": 20.594274520874023, + "learning_rate": 4.9132552238735464e-05, + "loss": 0.2612, + "step": 26500 + }, + { + "epoch": 0.9633694309179446, + "grad_norm": 40.1435661315918, + "learning_rate": 4.913119660231761e-05, + "loss": 0.5943, + "step": 26510 + }, + { + "epoch": 0.9637328294207428, + "grad_norm": 0.6886749863624573, + "learning_rate": 4.912983992617235e-05, + "loss": 0.1445, + "step": 26520 + }, + { + "epoch": 0.964096227923541, + "grad_norm": 4.847496032714844, + "learning_rate": 4.912848221035815e-05, + "loss": 0.1645, + "step": 26530 + }, + { + "epoch": 0.9644596264263391, + "grad_norm": 1.3625943660736084, + "learning_rate": 4.912712345493349e-05, + "loss": 0.1403, + "step": 26540 + }, + { + "epoch": 0.9648230249291373, + "grad_norm": 6.022468090057373, + "learning_rate": 4.9125763659956934e-05, + "loss": 0.2215, + "step": 26550 + }, + { + "epoch": 0.9651864234319355, + "grad_norm": 1.5577186346054077, + "learning_rate": 4.912440282548706e-05, + "loss": 0.1401, + "step": 26560 + }, + { + "epoch": 0.9655498219347336, + "grad_norm": 1.5680512189865112, + "learning_rate": 4.91230409515825e-05, + "loss": 0.1373, + "step": 26570 + }, + { + "epoch": 0.9659132204375318, + "grad_norm": 2.3043782711029053, + "learning_rate": 4.912167803830193e-05, + "loss": 0.1501, + "step": 26580 + }, + { + "epoch": 0.96627661894033, + "grad_norm": 1.5168496370315552, + "learning_rate": 4.912031408570409e-05, + "loss": 0.1665, + "step": 26590 + }, + { + "epoch": 0.9666400174431281, + "grad_norm": 10.529095649719238, + "learning_rate": 4.911894909384773e-05, + "loss": 0.1642, + "step": 26600 + }, + { + "epoch": 0.9670034159459263, + "grad_norm": 2.458815097808838, + "learning_rate": 4.911758306279167e-05, + "loss": 3.3307, + "step": 26610 + }, + { + "epoch": 0.9673668144487245, + "grad_norm": 1.2745519876480103, + "learning_rate": 4.911621599259477e-05, + "loss": 0.1681, + "step": 26620 + }, + { + "epoch": 0.9677302129515226, + "grad_norm": 1.877960443496704, + "learning_rate": 4.911484788331593e-05, + "loss": 0.1445, + "step": 26630 + }, + { + "epoch": 0.9680936114543208, + "grad_norm": 1.3567255735397339, + "learning_rate": 4.911347873501408e-05, + "loss": 0.132, + "step": 26640 + }, + { + "epoch": 0.968457009957119, + "grad_norm": 3.44063138961792, + "learning_rate": 4.911210854774825e-05, + "loss": 0.205, + "step": 26650 + }, + { + "epoch": 0.9688204084599171, + "grad_norm": 5.335951805114746, + "learning_rate": 4.911073732157744e-05, + "loss": 0.1475, + "step": 26660 + }, + { + "epoch": 0.9691838069627153, + "grad_norm": 3.0675578117370605, + "learning_rate": 4.910936505656074e-05, + "loss": 0.1602, + "step": 26670 + }, + { + "epoch": 0.9695472054655134, + "grad_norm": 5.95693826675415, + "learning_rate": 4.910799175275729e-05, + "loss": 0.1888, + "step": 26680 + }, + { + "epoch": 0.9699106039683116, + "grad_norm": 1.7128913402557373, + "learning_rate": 4.910661741022625e-05, + "loss": 0.1402, + "step": 26690 + }, + { + "epoch": 0.9702740024711098, + "grad_norm": 11.855730056762695, + "learning_rate": 4.9105242029026844e-05, + "loss": 0.1939, + "step": 26700 + }, + { + "epoch": 0.9706374009739079, + "grad_norm": 2.21028208732605, + "learning_rate": 4.910386560921831e-05, + "loss": 0.1365, + "step": 26710 + }, + { + "epoch": 0.9710007994767061, + "grad_norm": 3.6761391162872314, + "learning_rate": 4.910248815085998e-05, + "loss": 0.1661, + "step": 26720 + }, + { + "epoch": 0.9713641979795044, + "grad_norm": 1.9474952220916748, + "learning_rate": 4.9101109654011196e-05, + "loss": 0.1176, + "step": 26730 + }, + { + "epoch": 0.9717275964823024, + "grad_norm": 4.190001010894775, + "learning_rate": 4.909973011873135e-05, + "loss": 0.1426, + "step": 26740 + }, + { + "epoch": 0.9720909949851007, + "grad_norm": 2.785562753677368, + "learning_rate": 4.909834954507987e-05, + "loss": 0.201, + "step": 26750 + }, + { + "epoch": 0.9724543934878989, + "grad_norm": 2.296952724456787, + "learning_rate": 4.909696793311625e-05, + "loss": 1.5478, + "step": 26760 + }, + { + "epoch": 0.972817791990697, + "grad_norm": 1.471690058708191, + "learning_rate": 4.909558528290002e-05, + "loss": 0.1254, + "step": 26770 + }, + { + "epoch": 0.9731811904934952, + "grad_norm": 5.213918685913086, + "learning_rate": 4.9094340010048675e-05, + "loss": 0.8881, + "step": 26780 + }, + { + "epoch": 0.9735445889962934, + "grad_norm": 1.5338894128799438, + "learning_rate": 4.909295538731665e-05, + "loss": 0.1621, + "step": 26790 + }, + { + "epoch": 0.9739079874990915, + "grad_norm": 4.493140697479248, + "learning_rate": 4.909156972650491e-05, + "loss": 0.1653, + "step": 26800 + }, + { + "epoch": 0.9742713860018897, + "grad_norm": 0.9602924585342407, + "learning_rate": 4.909018302767313e-05, + "loss": 0.1388, + "step": 26810 + }, + { + "epoch": 0.9746347845046879, + "grad_norm": 1.038445234298706, + "learning_rate": 4.9088795290881085e-05, + "loss": 0.145, + "step": 26820 + }, + { + "epoch": 0.974998183007486, + "grad_norm": 3.1368119716644287, + "learning_rate": 4.908740651618856e-05, + "loss": 0.1732, + "step": 26830 + }, + { + "epoch": 0.9753615815102842, + "grad_norm": 0.6875894069671631, + "learning_rate": 4.908601670365539e-05, + "loss": 0.1582, + "step": 26840 + }, + { + "epoch": 0.9757249800130824, + "grad_norm": 14.604360580444336, + "learning_rate": 4.908462585334146e-05, + "loss": 0.228, + "step": 26850 + }, + { + "epoch": 0.9760883785158805, + "grad_norm": 1.819300889968872, + "learning_rate": 4.9083233965306694e-05, + "loss": 0.5066, + "step": 26860 + }, + { + "epoch": 0.9764517770186787, + "grad_norm": 5.712610721588135, + "learning_rate": 4.908184103961106e-05, + "loss": 0.2109, + "step": 26870 + }, + { + "epoch": 0.9768151755214769, + "grad_norm": 2.8377017974853516, + "learning_rate": 4.908044707631459e-05, + "loss": 0.1417, + "step": 26880 + }, + { + "epoch": 0.977178574024275, + "grad_norm": 1.0483819246292114, + "learning_rate": 4.907905207547733e-05, + "loss": 0.1526, + "step": 26890 + }, + { + "epoch": 0.9775419725270732, + "grad_norm": 3.996112823486328, + "learning_rate": 4.907765603715938e-05, + "loss": 0.6109, + "step": 26900 + }, + { + "epoch": 0.9779053710298714, + "grad_norm": 1.336004614830017, + "learning_rate": 4.907625896142091e-05, + "loss": 0.8939, + "step": 26910 + }, + { + "epoch": 0.9782687695326695, + "grad_norm": 0.9394060373306274, + "learning_rate": 4.907486084832211e-05, + "loss": 0.1515, + "step": 26920 + }, + { + "epoch": 0.9786321680354677, + "grad_norm": 2.356201410293579, + "learning_rate": 4.907346169792321e-05, + "loss": 0.1567, + "step": 26930 + }, + { + "epoch": 0.9789955665382659, + "grad_norm": 0.926143229007721, + "learning_rate": 4.907206151028449e-05, + "loss": 0.1669, + "step": 26940 + }, + { + "epoch": 0.979358965041064, + "grad_norm": 4.815629482269287, + "learning_rate": 4.90706602854663e-05, + "loss": 0.2068, + "step": 26950 + }, + { + "epoch": 0.9797223635438622, + "grad_norm": 1.3679453134536743, + "learning_rate": 4.906925802352899e-05, + "loss": 0.15, + "step": 26960 + }, + { + "epoch": 0.9800857620466603, + "grad_norm": 1.1221717596054077, + "learning_rate": 4.9067854724533e-05, + "loss": 0.223, + "step": 26970 + }, + { + "epoch": 0.9804491605494585, + "grad_norm": 2.198657512664795, + "learning_rate": 4.906645038853878e-05, + "loss": 0.1662, + "step": 26980 + }, + { + "epoch": 0.9808125590522567, + "grad_norm": 1.7014293670654297, + "learning_rate": 4.906504501560684e-05, + "loss": 0.1601, + "step": 26990 + }, + { + "epoch": 0.9811759575550548, + "grad_norm": 8.204177856445312, + "learning_rate": 4.906363860579774e-05, + "loss": 0.2345, + "step": 27000 + }, + { + "epoch": 0.9811759575550548, + "eval_loss": 0.3534720242023468, + "eval_runtime": 180.0448, + "eval_samples_per_second": 41.179, + "eval_steps_per_second": 5.149, + "eval_wer": 0.19012652712981284, + "step": 27000 + }, + { + "epoch": 0.981539356057853, + "grad_norm": 1.1557930707931519, + "learning_rate": 4.906223115917207e-05, + "loss": 0.1357, + "step": 27010 + }, + { + "epoch": 0.9819027545606512, + "grad_norm": 0.7808053493499756, + "learning_rate": 4.906082267579047e-05, + "loss": 0.1366, + "step": 27020 + }, + { + "epoch": 0.9822661530634493, + "grad_norm": 1.4547855854034424, + "learning_rate": 4.9059413155713626e-05, + "loss": 0.1472, + "step": 27030 + }, + { + "epoch": 0.9826295515662475, + "grad_norm": 0.5997300148010254, + "learning_rate": 4.9058002599002275e-05, + "loss": 0.1455, + "step": 27040 + }, + { + "epoch": 0.9829929500690457, + "grad_norm": 12.663732528686523, + "learning_rate": 4.90565910057172e-05, + "loss": 0.2728, + "step": 27050 + }, + { + "epoch": 0.9833563485718438, + "grad_norm": 0.9098420739173889, + "learning_rate": 4.9055178375919196e-05, + "loss": 0.1467, + "step": 27060 + }, + { + "epoch": 0.983719747074642, + "grad_norm": 3.4135582447052, + "learning_rate": 4.9053764709669156e-05, + "loss": 0.2571, + "step": 27070 + }, + { + "epoch": 0.9840831455774403, + "grad_norm": 1.7984899282455444, + "learning_rate": 4.905235000702798e-05, + "loss": 0.2026, + "step": 27080 + }, + { + "epoch": 0.9844465440802384, + "grad_norm": 1.2022316455841064, + "learning_rate": 4.9050934268056615e-05, + "loss": 0.1378, + "step": 27090 + }, + { + "epoch": 0.9848099425830366, + "grad_norm": 1.499773621559143, + "learning_rate": 4.9049517492816066e-05, + "loss": 0.1765, + "step": 27100 + }, + { + "epoch": 0.9851733410858348, + "grad_norm": 0.8122308254241943, + "learning_rate": 4.9048099681367377e-05, + "loss": 0.1422, + "step": 27110 + }, + { + "epoch": 0.9855367395886329, + "grad_norm": 1.203873872756958, + "learning_rate": 4.904668083377164e-05, + "loss": 0.1436, + "step": 27120 + }, + { + "epoch": 0.9859001380914311, + "grad_norm": 1.6162346601486206, + "learning_rate": 4.9045260950089976e-05, + "loss": 0.1667, + "step": 27130 + }, + { + "epoch": 0.9862635365942293, + "grad_norm": 0.5100680589675903, + "learning_rate": 4.904384003038358e-05, + "loss": 0.1573, + "step": 27140 + }, + { + "epoch": 0.9866269350970274, + "grad_norm": 6.364781379699707, + "learning_rate": 4.904241807471366e-05, + "loss": 0.2245, + "step": 27150 + }, + { + "epoch": 0.9869903335998256, + "grad_norm": 0.6124529242515564, + "learning_rate": 4.9040995083141495e-05, + "loss": 0.2296, + "step": 27160 + }, + { + "epoch": 0.9873537321026238, + "grad_norm": 1.3477269411087036, + "learning_rate": 4.903957105572838e-05, + "loss": 0.1173, + "step": 27170 + }, + { + "epoch": 0.9877171306054219, + "grad_norm": 0.8505461812019348, + "learning_rate": 4.903814599253569e-05, + "loss": 0.1913, + "step": 27180 + }, + { + "epoch": 0.9880805291082201, + "grad_norm": 0.928269624710083, + "learning_rate": 4.903671989362481e-05, + "loss": 0.1449, + "step": 27190 + }, + { + "epoch": 0.9884439276110183, + "grad_norm": 9.115983963012695, + "learning_rate": 4.903529275905719e-05, + "loss": 0.2025, + "step": 27200 + }, + { + "epoch": 0.9888073261138164, + "grad_norm": 0.8631362318992615, + "learning_rate": 4.903386458889434e-05, + "loss": 0.1313, + "step": 27210 + }, + { + "epoch": 0.9891707246166146, + "grad_norm": 1.5814399719238281, + "learning_rate": 4.9032435383197764e-05, + "loss": 0.1547, + "step": 27220 + }, + { + "epoch": 0.9895341231194128, + "grad_norm": 2.2507669925689697, + "learning_rate": 4.9031005142029054e-05, + "loss": 2.0754, + "step": 27230 + }, + { + "epoch": 0.9898975216222109, + "grad_norm": 2.0611305236816406, + "learning_rate": 4.902957386544984e-05, + "loss": 0.1351, + "step": 27240 + }, + { + "epoch": 0.9902609201250091, + "grad_norm": 4.219666481018066, + "learning_rate": 4.9028141553521785e-05, + "loss": 0.194, + "step": 27250 + }, + { + "epoch": 0.9906243186278072, + "grad_norm": 2.4156904220581055, + "learning_rate": 4.90267082063066e-05, + "loss": 1.9594, + "step": 27260 + }, + { + "epoch": 0.9909877171306054, + "grad_norm": 4.805545806884766, + "learning_rate": 4.9025273823866046e-05, + "loss": 0.1608, + "step": 27270 + }, + { + "epoch": 0.9913511156334036, + "grad_norm": 3.431521415710449, + "learning_rate": 4.902383840626193e-05, + "loss": 0.1439, + "step": 27280 + }, + { + "epoch": 0.9917145141362017, + "grad_norm": 0.9847241640090942, + "learning_rate": 4.902240195355609e-05, + "loss": 0.1568, + "step": 27290 + }, + { + "epoch": 0.9920779126389999, + "grad_norm": 4.65169095993042, + "learning_rate": 4.9020964465810426e-05, + "loss": 0.2039, + "step": 27300 + }, + { + "epoch": 0.9924413111417981, + "grad_norm": 1.466956377029419, + "learning_rate": 4.9019525943086865e-05, + "loss": 0.1649, + "step": 27310 + }, + { + "epoch": 0.9928047096445962, + "grad_norm": 4.803518772125244, + "learning_rate": 4.901808638544739e-05, + "loss": 0.1645, + "step": 27320 + }, + { + "epoch": 0.9931681081473944, + "grad_norm": 3.4496331214904785, + "learning_rate": 4.901664579295404e-05, + "loss": 0.1751, + "step": 27330 + }, + { + "epoch": 0.9935315066501926, + "grad_norm": 0.9507334232330322, + "learning_rate": 4.9015204165668866e-05, + "loss": 0.1228, + "step": 27340 + }, + { + "epoch": 0.9938949051529907, + "grad_norm": 5.97396993637085, + "learning_rate": 4.901376150365399e-05, + "loss": 0.2178, + "step": 27350 + }, + { + "epoch": 0.9942583036557889, + "grad_norm": 1.7720214128494263, + "learning_rate": 4.9012317806971573e-05, + "loss": 0.1468, + "step": 27360 + }, + { + "epoch": 0.9946217021585871, + "grad_norm": 0.9194307923316956, + "learning_rate": 4.9010873075683825e-05, + "loss": 0.1481, + "step": 27370 + }, + { + "epoch": 0.9949851006613852, + "grad_norm": 2.8458971977233887, + "learning_rate": 4.9009427309852986e-05, + "loss": 0.1402, + "step": 27380 + }, + { + "epoch": 0.9953484991641834, + "grad_norm": 1.9232338666915894, + "learning_rate": 4.900798050954134e-05, + "loss": 0.155, + "step": 27390 + }, + { + "epoch": 0.9957118976669816, + "grad_norm": 4.017787456512451, + "learning_rate": 4.900653267481125e-05, + "loss": 0.2279, + "step": 27400 + }, + { + "epoch": 0.9960752961697797, + "grad_norm": 0.714726448059082, + "learning_rate": 4.9005083805725064e-05, + "loss": 0.1271, + "step": 27410 + }, + { + "epoch": 0.996438694672578, + "grad_norm": 0.8059016466140747, + "learning_rate": 4.900363390234524e-05, + "loss": 0.138, + "step": 27420 + }, + { + "epoch": 0.9968020931753762, + "grad_norm": 2.650024175643921, + "learning_rate": 4.9002182964734234e-05, + "loss": 0.1483, + "step": 27430 + }, + { + "epoch": 0.9971654916781743, + "grad_norm": 1.200749397277832, + "learning_rate": 4.900073099295456e-05, + "loss": 0.9832, + "step": 27440 + }, + { + "epoch": 0.9975288901809725, + "grad_norm": 3.3051798343658447, + "learning_rate": 4.8999277987068785e-05, + "loss": 0.2136, + "step": 27450 + }, + { + "epoch": 0.9978922886837707, + "grad_norm": 1.3630801439285278, + "learning_rate": 4.899782394713951e-05, + "loss": 0.2136, + "step": 27460 + }, + { + "epoch": 0.9982556871865688, + "grad_norm": 2.5952398777008057, + "learning_rate": 4.899636887322939e-05, + "loss": 0.1515, + "step": 27470 + }, + { + "epoch": 0.998619085689367, + "grad_norm": 5.025683879852295, + "learning_rate": 4.8994912765401116e-05, + "loss": 0.1862, + "step": 27480 + }, + { + "epoch": 0.9989824841921652, + "grad_norm": 1.1604958772659302, + "learning_rate": 4.8993455623717415e-05, + "loss": 0.1433, + "step": 27490 + }, + { + "epoch": 0.9993458826949633, + "grad_norm": 14.864492416381836, + "learning_rate": 4.899199744824109e-05, + "loss": 0.1752, + "step": 27500 + }, + { + "epoch": 0.9997092811977615, + "grad_norm": 1.072911024093628, + "learning_rate": 4.8990538239034956e-05, + "loss": 0.1524, + "step": 27510 + }, + { + "epoch": 1.0000726797005597, + "grad_norm": 0.41248244047164917, + "learning_rate": 4.898907799616188e-05, + "loss": 0.1457, + "step": 27520 + }, + { + "epoch": 1.000436078203358, + "grad_norm": 1.0402699708938599, + "learning_rate": 4.89876167196848e-05, + "loss": 0.1394, + "step": 27530 + }, + { + "epoch": 1.0007994767061559, + "grad_norm": 0.8177555203437805, + "learning_rate": 4.8986154409666654e-05, + "loss": 0.1134, + "step": 27540 + }, + { + "epoch": 1.001162875208954, + "grad_norm": 1.1209142208099365, + "learning_rate": 4.8984691066170465e-05, + "loss": 0.1574, + "step": 27550 + }, + { + "epoch": 1.0015262737117523, + "grad_norm": 1.4969863891601562, + "learning_rate": 4.8983226689259264e-05, + "loss": 0.1144, + "step": 27560 + }, + { + "epoch": 1.0018896722145505, + "grad_norm": 0.8014885783195496, + "learning_rate": 4.898176127899617e-05, + "loss": 0.1616, + "step": 27570 + }, + { + "epoch": 1.0022530707173487, + "grad_norm": 1.1477352380752563, + "learning_rate": 4.89802948354443e-05, + "loss": 0.1209, + "step": 27580 + }, + { + "epoch": 1.002616469220147, + "grad_norm": 1.0199166536331177, + "learning_rate": 4.897882735866686e-05, + "loss": 0.1422, + "step": 27590 + }, + { + "epoch": 1.002979867722945, + "grad_norm": 0.8987438678741455, + "learning_rate": 4.897735884872705e-05, + "loss": 0.1328, + "step": 27600 + }, + { + "epoch": 1.002979867722945, + "eval_loss": 0.38110727071762085, + "eval_runtime": 180.6687, + "eval_samples_per_second": 41.036, + "eval_steps_per_second": 5.131, + "eval_wer": 0.18068691342785048, + "step": 27600 + }, + { + "epoch": 1.003343266225743, + "grad_norm": 1.1313180923461914, + "learning_rate": 4.897588930568817e-05, + "loss": 0.1395, + "step": 27610 + }, + { + "epoch": 1.0037066647285413, + "grad_norm": 0.6739907264709473, + "learning_rate": 4.8974418729613526e-05, + "loss": 0.2011, + "step": 27620 + }, + { + "epoch": 1.0040700632313395, + "grad_norm": 0.986926257610321, + "learning_rate": 4.8972947120566475e-05, + "loss": 0.1212, + "step": 27630 + }, + { + "epoch": 1.0044334617341377, + "grad_norm": 0.795300304889679, + "learning_rate": 4.8971474478610437e-05, + "loss": 0.1404, + "step": 27640 + }, + { + "epoch": 1.004796860236936, + "grad_norm": 1.7036499977111816, + "learning_rate": 4.897000080380885e-05, + "loss": 0.1677, + "step": 27650 + }, + { + "epoch": 1.005160258739734, + "grad_norm": 1.4313631057739258, + "learning_rate": 4.896852609622521e-05, + "loss": 0.1188, + "step": 27660 + }, + { + "epoch": 1.0055236572425321, + "grad_norm": 0.5508180260658264, + "learning_rate": 4.896705035592306e-05, + "loss": 0.1627, + "step": 27670 + }, + { + "epoch": 1.0058870557453303, + "grad_norm": 2.3307416439056396, + "learning_rate": 4.896557358296599e-05, + "loss": 0.1173, + "step": 27680 + }, + { + "epoch": 1.0062504542481285, + "grad_norm": 3.0311474800109863, + "learning_rate": 4.896409577741762e-05, + "loss": 0.1176, + "step": 27690 + }, + { + "epoch": 1.0066138527509267, + "grad_norm": 1.8580576181411743, + "learning_rate": 4.896261693934163e-05, + "loss": 0.1647, + "step": 27700 + }, + { + "epoch": 1.0069772512537247, + "grad_norm": 1.094754934310913, + "learning_rate": 4.896113706880174e-05, + "loss": 0.1137, + "step": 27710 + }, + { + "epoch": 1.007340649756523, + "grad_norm": 0.8240002393722534, + "learning_rate": 4.89596561658617e-05, + "loss": 0.1835, + "step": 27720 + }, + { + "epoch": 1.0077040482593211, + "grad_norm": 1.4678568840026855, + "learning_rate": 4.895817423058533e-05, + "loss": 0.1612, + "step": 27730 + }, + { + "epoch": 1.0080674467621193, + "grad_norm": 0.8481863737106323, + "learning_rate": 4.8956691263036473e-05, + "loss": 0.1211, + "step": 27740 + }, + { + "epoch": 1.0084308452649176, + "grad_norm": 5.045682907104492, + "learning_rate": 4.895520726327903e-05, + "loss": 0.2747, + "step": 27750 + }, + { + "epoch": 1.0087942437677158, + "grad_norm": 2.3443167209625244, + "learning_rate": 4.895372223137694e-05, + "loss": 0.1437, + "step": 27760 + }, + { + "epoch": 1.0091576422705137, + "grad_norm": 1.6994588375091553, + "learning_rate": 4.895223616739418e-05, + "loss": 0.16, + "step": 27770 + }, + { + "epoch": 1.009521040773312, + "grad_norm": 2.073699712753296, + "learning_rate": 4.8950749071394794e-05, + "loss": 0.1341, + "step": 27780 + }, + { + "epoch": 1.0098844392761102, + "grad_norm": 1.1939536333084106, + "learning_rate": 4.894926094344284e-05, + "loss": 0.1284, + "step": 27790 + }, + { + "epoch": 1.0102478377789084, + "grad_norm": 1.4820387363433838, + "learning_rate": 4.8947771783602444e-05, + "loss": 0.1644, + "step": 27800 + }, + { + "epoch": 1.0106112362817066, + "grad_norm": 1.8140612840652466, + "learning_rate": 4.894628159193778e-05, + "loss": 0.1681, + "step": 27810 + }, + { + "epoch": 1.0109746347845048, + "grad_norm": 1.7120946645736694, + "learning_rate": 4.894479036851303e-05, + "loss": 0.2066, + "step": 27820 + }, + { + "epoch": 1.0113380332873028, + "grad_norm": 1.0871057510375977, + "learning_rate": 4.894329811339247e-05, + "loss": 0.1428, + "step": 27830 + }, + { + "epoch": 1.011701431790101, + "grad_norm": 0.897597074508667, + "learning_rate": 4.8941804826640375e-05, + "loss": 0.1202, + "step": 27840 + }, + { + "epoch": 1.0120648302928992, + "grad_norm": 1.2489410638809204, + "learning_rate": 4.89403105083211e-05, + "loss": 0.1435, + "step": 27850 + }, + { + "epoch": 1.0124282287956974, + "grad_norm": 1.043281078338623, + "learning_rate": 4.893881515849902e-05, + "loss": 0.1657, + "step": 27860 + }, + { + "epoch": 1.0127916272984956, + "grad_norm": 1.0345379114151, + "learning_rate": 4.893731877723857e-05, + "loss": 0.1669, + "step": 27870 + }, + { + "epoch": 1.0131550258012938, + "grad_norm": 3.5156590938568115, + "learning_rate": 4.893582136460423e-05, + "loss": 0.1356, + "step": 27880 + }, + { + "epoch": 1.0135184243040918, + "grad_norm": 1.0468858480453491, + "learning_rate": 4.893432292066051e-05, + "loss": 0.1201, + "step": 27890 + }, + { + "epoch": 1.01388182280689, + "grad_norm": 1.3872016668319702, + "learning_rate": 4.893282344547197e-05, + "loss": 0.1472, + "step": 27900 + }, + { + "epoch": 1.0142452213096882, + "grad_norm": 0.83976811170578, + "learning_rate": 4.893132293910322e-05, + "loss": 0.1467, + "step": 27910 + }, + { + "epoch": 1.0146086198124864, + "grad_norm": 0.625514566898346, + "learning_rate": 4.892982140161892e-05, + "loss": 0.1661, + "step": 27920 + }, + { + "epoch": 1.0149720183152846, + "grad_norm": 1.0802186727523804, + "learning_rate": 4.892831883308375e-05, + "loss": 0.1444, + "step": 27930 + }, + { + "epoch": 1.0153354168180828, + "grad_norm": 0.567722499370575, + "learning_rate": 4.892681523356246e-05, + "loss": 0.1003, + "step": 27940 + }, + { + "epoch": 1.0156988153208808, + "grad_norm": 1.1036186218261719, + "learning_rate": 4.892531060311985e-05, + "loss": 0.1438, + "step": 27950 + }, + { + "epoch": 1.016062213823679, + "grad_norm": 1.2610325813293457, + "learning_rate": 4.892380494182071e-05, + "loss": 0.1478, + "step": 27960 + }, + { + "epoch": 1.0164256123264772, + "grad_norm": 4.7541913986206055, + "learning_rate": 4.892229824972995e-05, + "loss": 0.2068, + "step": 27970 + }, + { + "epoch": 1.0167890108292754, + "grad_norm": 14.794916152954102, + "learning_rate": 4.8920790526912464e-05, + "loss": 0.4626, + "step": 27980 + }, + { + "epoch": 1.0171524093320736, + "grad_norm": 0.8083056807518005, + "learning_rate": 4.891928177343323e-05, + "loss": 0.4919, + "step": 27990 + }, + { + "epoch": 1.0175158078348718, + "grad_norm": 1.1072735786437988, + "learning_rate": 4.8917771989357246e-05, + "loss": 0.2863, + "step": 28000 + }, + { + "epoch": 1.0178792063376698, + "grad_norm": 0.8811991810798645, + "learning_rate": 4.891626117474957e-05, + "loss": 0.1361, + "step": 28010 + }, + { + "epoch": 1.018242604840468, + "grad_norm": 0.43256062269210815, + "learning_rate": 4.8914749329675294e-05, + "loss": 0.1668, + "step": 28020 + }, + { + "epoch": 1.0186060033432662, + "grad_norm": 1.7490280866622925, + "learning_rate": 4.891323645419956e-05, + "loss": 0.1328, + "step": 28030 + }, + { + "epoch": 1.0189694018460644, + "grad_norm": 1.5770010948181152, + "learning_rate": 4.891172254838755e-05, + "loss": 0.1429, + "step": 28040 + }, + { + "epoch": 1.0193328003488626, + "grad_norm": 0.5603241920471191, + "learning_rate": 4.8910207612304495e-05, + "loss": 0.1319, + "step": 28050 + }, + { + "epoch": 1.0196961988516606, + "grad_norm": 1.5490175485610962, + "learning_rate": 4.890869164601566e-05, + "loss": 0.1292, + "step": 28060 + }, + { + "epoch": 1.0200595973544588, + "grad_norm": 0.7562422752380371, + "learning_rate": 4.8907174649586376e-05, + "loss": 0.1978, + "step": 28070 + }, + { + "epoch": 1.020422995857257, + "grad_norm": 2.67669415473938, + "learning_rate": 4.8905656623082e-05, + "loss": 0.1367, + "step": 28080 + }, + { + "epoch": 1.0207863943600552, + "grad_norm": 1.4589964151382446, + "learning_rate": 4.890413756656793e-05, + "loss": 0.136, + "step": 28090 + }, + { + "epoch": 1.0211497928628535, + "grad_norm": 0.5042529702186584, + "learning_rate": 4.8902617480109626e-05, + "loss": 0.1768, + "step": 28100 + }, + { + "epoch": 1.0215131913656517, + "grad_norm": 3.3886609077453613, + "learning_rate": 4.890109636377258e-05, + "loss": 0.1827, + "step": 28110 + }, + { + "epoch": 1.0218765898684496, + "grad_norm": 0.8882365226745605, + "learning_rate": 4.889957421762234e-05, + "loss": 0.2176, + "step": 28120 + }, + { + "epoch": 1.0222399883712479, + "grad_norm": 1.5471583604812622, + "learning_rate": 4.889805104172447e-05, + "loss": 0.1934, + "step": 28130 + }, + { + "epoch": 1.022603386874046, + "grad_norm": 1.221699595451355, + "learning_rate": 4.889652683614461e-05, + "loss": 0.1217, + "step": 28140 + }, + { + "epoch": 1.0229667853768443, + "grad_norm": 1.1075172424316406, + "learning_rate": 4.8895001600948444e-05, + "loss": 2.944, + "step": 28150 + }, + { + "epoch": 1.0233301838796425, + "grad_norm": 0.9731149077415466, + "learning_rate": 4.889347533620167e-05, + "loss": 0.1123, + "step": 28160 + }, + { + "epoch": 1.0236935823824407, + "grad_norm": 0.8448407649993896, + "learning_rate": 4.889194804197006e-05, + "loss": 0.1755, + "step": 28170 + }, + { + "epoch": 1.0240569808852387, + "grad_norm": 0.8480188250541687, + "learning_rate": 4.8890419718319414e-05, + "loss": 0.1434, + "step": 28180 + }, + { + "epoch": 1.0244203793880369, + "grad_norm": 1.5608705282211304, + "learning_rate": 4.8888890365315584e-05, + "loss": 0.1309, + "step": 28190 + }, + { + "epoch": 1.024783777890835, + "grad_norm": 7.765607833862305, + "learning_rate": 4.888735998302447e-05, + "loss": 0.1584, + "step": 28200 + }, + { + "epoch": 1.024783777890835, + "eval_loss": 0.3628901541233063, + "eval_runtime": 180.7805, + "eval_samples_per_second": 41.011, + "eval_steps_per_second": 5.128, + "eval_wer": 0.1830921996115236, + "step": 28200 + }, + { + "epoch": 1.0251471763936333, + "grad_norm": 2.208989381790161, + "learning_rate": 4.8885828571512e-05, + "loss": 0.1206, + "step": 28210 + }, + { + "epoch": 1.0255105748964315, + "grad_norm": 0.4320629835128784, + "learning_rate": 4.8884296130844166e-05, + "loss": 0.2776, + "step": 28220 + }, + { + "epoch": 1.0258739733992297, + "grad_norm": 1.4430392980575562, + "learning_rate": 4.888276266108699e-05, + "loss": 0.1181, + "step": 28230 + }, + { + "epoch": 1.0262373719020277, + "grad_norm": 0.893260657787323, + "learning_rate": 4.888122816230655e-05, + "loss": 0.1141, + "step": 28240 + }, + { + "epoch": 1.026600770404826, + "grad_norm": 1.9237782955169678, + "learning_rate": 4.887969263456895e-05, + "loss": 0.1676, + "step": 28250 + }, + { + "epoch": 1.026964168907624, + "grad_norm": 1.0318949222564697, + "learning_rate": 4.8878156077940376e-05, + "loss": 0.1256, + "step": 28260 + }, + { + "epoch": 1.0273275674104223, + "grad_norm": 0.8919249773025513, + "learning_rate": 4.8876618492487e-05, + "loss": 0.2314, + "step": 28270 + }, + { + "epoch": 1.0276909659132205, + "grad_norm": 1.31845223903656, + "learning_rate": 4.8875079878275085e-05, + "loss": 0.1414, + "step": 28280 + }, + { + "epoch": 1.0280543644160187, + "grad_norm": 8.070326805114746, + "learning_rate": 4.887354023537094e-05, + "loss": 0.2391, + "step": 28290 + }, + { + "epoch": 1.0284177629188167, + "grad_norm": 0.7600485682487488, + "learning_rate": 4.887199956384088e-05, + "loss": 0.164, + "step": 28300 + }, + { + "epoch": 1.028781161421615, + "grad_norm": 1.0197162628173828, + "learning_rate": 4.88704578637513e-05, + "loss": 0.1324, + "step": 28310 + }, + { + "epoch": 1.0291445599244131, + "grad_norm": 0.5989790558815002, + "learning_rate": 4.886891513516861e-05, + "loss": 0.2162, + "step": 28320 + }, + { + "epoch": 1.0295079584272113, + "grad_norm": 1.2145419120788574, + "learning_rate": 4.88673713781593e-05, + "loss": 1.7629, + "step": 28330 + }, + { + "epoch": 1.0298713569300095, + "grad_norm": 0.7220103740692139, + "learning_rate": 4.8865826592789876e-05, + "loss": 0.105, + "step": 28340 + }, + { + "epoch": 1.0302347554328075, + "grad_norm": 1.2737821340560913, + "learning_rate": 4.88642807791269e-05, + "loss": 0.172, + "step": 28350 + }, + { + "epoch": 1.0305981539356057, + "grad_norm": 2.3391408920288086, + "learning_rate": 4.886273393723698e-05, + "loss": 0.1431, + "step": 28360 + }, + { + "epoch": 1.030961552438404, + "grad_norm": 1.1937615871429443, + "learning_rate": 4.8861186067186756e-05, + "loss": 0.1776, + "step": 28370 + }, + { + "epoch": 1.0313249509412021, + "grad_norm": 0.5789287090301514, + "learning_rate": 4.885963716904292e-05, + "loss": 0.1412, + "step": 28380 + }, + { + "epoch": 1.0316883494440003, + "grad_norm": 1.2566107511520386, + "learning_rate": 4.885808724287221e-05, + "loss": 0.1284, + "step": 28390 + }, + { + "epoch": 1.0320517479467985, + "grad_norm": 5.225760459899902, + "learning_rate": 4.885653628874141e-05, + "loss": 0.1411, + "step": 28400 + }, + { + "epoch": 1.0324151464495965, + "grad_norm": 1.2525557279586792, + "learning_rate": 4.885498430671735e-05, + "loss": 0.1372, + "step": 28410 + }, + { + "epoch": 1.0327785449523947, + "grad_norm": 0.5048568844795227, + "learning_rate": 4.885343129686688e-05, + "loss": 0.1595, + "step": 28420 + }, + { + "epoch": 1.033141943455193, + "grad_norm": 0.8768513202667236, + "learning_rate": 4.8851877259256933e-05, + "loss": 0.286, + "step": 28430 + }, + { + "epoch": 1.0335053419579912, + "grad_norm": 1.2799090147018433, + "learning_rate": 4.885032219395446e-05, + "loss": 0.1431, + "step": 28440 + }, + { + "epoch": 1.0338687404607894, + "grad_norm": 0.9944593906402588, + "learning_rate": 4.8848766101026466e-05, + "loss": 0.13, + "step": 28450 + }, + { + "epoch": 1.0342321389635876, + "grad_norm": 1.3601889610290527, + "learning_rate": 4.8847208980539994e-05, + "loss": 0.1379, + "step": 28460 + }, + { + "epoch": 1.0345955374663856, + "grad_norm": 0.6347102522850037, + "learning_rate": 4.884565083256213e-05, + "loss": 0.1833, + "step": 28470 + }, + { + "epoch": 1.0349589359691838, + "grad_norm": NaN, + "learning_rate": 4.884424762093241e-05, + "loss": 3.779, + "step": 28480 + }, + { + "epoch": 1.035322334471982, + "grad_norm": 1.6947808265686035, + "learning_rate": 4.8842687520905906e-05, + "loss": 0.1571, + "step": 28490 + }, + { + "epoch": 1.0356857329747802, + "grad_norm": 4.521624565124512, + "learning_rate": 4.884112639358283e-05, + "loss": 0.1429, + "step": 28500 + }, + { + "epoch": 1.0360491314775784, + "grad_norm": 1.9370489120483398, + "learning_rate": 4.883956423903044e-05, + "loss": 0.1375, + "step": 28510 + }, + { + "epoch": 1.0364125299803766, + "grad_norm": 2.3492047786712646, + "learning_rate": 4.883800105731606e-05, + "loss": 0.1496, + "step": 28520 + }, + { + "epoch": 1.0367759284831746, + "grad_norm": 1.1862452030181885, + "learning_rate": 4.8836436848507026e-05, + "loss": 0.1239, + "step": 28530 + }, + { + "epoch": 1.0371393269859728, + "grad_norm": 2.223708391189575, + "learning_rate": 4.883487161267074e-05, + "loss": 0.1159, + "step": 28540 + }, + { + "epoch": 1.037502725488771, + "grad_norm": 5.854187965393066, + "learning_rate": 4.8833305349874636e-05, + "loss": 0.1732, + "step": 28550 + }, + { + "epoch": 1.0378661239915692, + "grad_norm": 1.4000542163848877, + "learning_rate": 4.883173806018621e-05, + "loss": 0.1428, + "step": 28560 + }, + { + "epoch": 1.0382295224943674, + "grad_norm": 1.8862130641937256, + "learning_rate": 4.883016974367298e-05, + "loss": 0.2339, + "step": 28570 + }, + { + "epoch": 1.0385929209971656, + "grad_norm": 1.701545238494873, + "learning_rate": 4.8828600400402525e-05, + "loss": 0.2063, + "step": 28580 + }, + { + "epoch": 1.0389563194999636, + "grad_norm": 3.8795692920684814, + "learning_rate": 4.8827030030442466e-05, + "loss": 0.1317, + "step": 28590 + }, + { + "epoch": 1.0393197180027618, + "grad_norm": 1.0597456693649292, + "learning_rate": 4.882545863386046e-05, + "loss": 0.1783, + "step": 28600 + }, + { + "epoch": 1.03968311650556, + "grad_norm": 0.8949028849601746, + "learning_rate": 4.88238862107242e-05, + "loss": 0.1453, + "step": 28610 + }, + { + "epoch": 1.0400465150083582, + "grad_norm": 0.6270145773887634, + "learning_rate": 4.8822312761101456e-05, + "loss": 0.2118, + "step": 28620 + }, + { + "epoch": 1.0404099135111564, + "grad_norm": 0.8819754719734192, + "learning_rate": 4.8820738285060016e-05, + "loss": 0.1398, + "step": 28630 + }, + { + "epoch": 1.0407733120139544, + "grad_norm": 1.5963236093521118, + "learning_rate": 4.881916278266772e-05, + "loss": 0.1592, + "step": 28640 + }, + { + "epoch": 1.0411367105167526, + "grad_norm": 1.2960532903671265, + "learning_rate": 4.8817586253992445e-05, + "loss": 0.2044, + "step": 28650 + }, + { + "epoch": 1.0415001090195508, + "grad_norm": 1.6735124588012695, + "learning_rate": 4.881600869910212e-05, + "loss": 0.143, + "step": 28660 + }, + { + "epoch": 1.041863507522349, + "grad_norm": 1.2382493019104004, + "learning_rate": 4.8814430118064724e-05, + "loss": 0.182, + "step": 28670 + }, + { + "epoch": 1.0422269060251472, + "grad_norm": 1.614788293838501, + "learning_rate": 4.881285051094826e-05, + "loss": 0.149, + "step": 28680 + }, + { + "epoch": 1.0425903045279454, + "grad_norm": 1.549124002456665, + "learning_rate": 4.88112698778208e-05, + "loss": 0.1238, + "step": 28690 + }, + { + "epoch": 1.0429537030307434, + "grad_norm": 0.8877584338188171, + "learning_rate": 4.8809688218750435e-05, + "loss": 0.1541, + "step": 28700 + }, + { + "epoch": 1.0433171015335416, + "grad_norm": 1.1061103343963623, + "learning_rate": 4.8808105533805325e-05, + "loss": 0.1209, + "step": 28710 + }, + { + "epoch": 1.0436805000363398, + "grad_norm": 1.8957878351211548, + "learning_rate": 4.880652182305365e-05, + "loss": 0.1739, + "step": 28720 + }, + { + "epoch": 1.044043898539138, + "grad_norm": 0.9069591164588928, + "learning_rate": 4.880493708656366e-05, + "loss": 0.2014, + "step": 28730 + }, + { + "epoch": 1.0444072970419362, + "grad_norm": 0.7086552381515503, + "learning_rate": 4.880335132440364e-05, + "loss": 0.1149, + "step": 28740 + }, + { + "epoch": 1.0447706955447345, + "grad_norm": 0.5514993667602539, + "learning_rate": 4.8801764536641883e-05, + "loss": 0.163, + "step": 28750 + }, + { + "epoch": 1.0451340940475324, + "grad_norm": 0.5786269903182983, + "learning_rate": 4.880017672334679e-05, + "loss": 0.126, + "step": 28760 + }, + { + "epoch": 1.0454974925503306, + "grad_norm": 0.8554352521896362, + "learning_rate": 4.879858788458676e-05, + "loss": 0.2564, + "step": 28770 + }, + { + "epoch": 1.0458608910531288, + "grad_norm": 3.329148769378662, + "learning_rate": 4.8796998020430253e-05, + "loss": 0.1297, + "step": 28780 + }, + { + "epoch": 1.046224289555927, + "grad_norm": 1.1520358324050903, + "learning_rate": 4.879540713094578e-05, + "loss": 0.1156, + "step": 28790 + }, + { + "epoch": 1.0465876880587253, + "grad_norm": 1.6375194787979126, + "learning_rate": 4.879381521620187e-05, + "loss": 0.1418, + "step": 28800 + }, + { + "epoch": 1.0465876880587253, + "eval_loss": 0.35767313838005066, + "eval_runtime": 180.3335, + "eval_samples_per_second": 41.113, + "eval_steps_per_second": 5.14, + "eval_wer": 0.18009693757147785, + "step": 28800 + }, + { + "epoch": 1.0469510865615235, + "grad_norm": 2.6099300384521484, + "learning_rate": 4.879222227626712e-05, + "loss": 2.0354, + "step": 28810 + }, + { + "epoch": 1.0473144850643215, + "grad_norm": 0.9497049450874329, + "learning_rate": 4.879062831121017e-05, + "loss": 0.2014, + "step": 28820 + }, + { + "epoch": 1.0476778835671197, + "grad_norm": 1.100393533706665, + "learning_rate": 4.878903332109969e-05, + "loss": 0.1294, + "step": 28830 + }, + { + "epoch": 1.0480412820699179, + "grad_norm": 0.46238216757774353, + "learning_rate": 4.87874373060044e-05, + "loss": 0.1103, + "step": 28840 + }, + { + "epoch": 1.048404680572716, + "grad_norm": 1.111619234085083, + "learning_rate": 4.8785840265993085e-05, + "loss": 0.1635, + "step": 28850 + }, + { + "epoch": 1.0487680790755143, + "grad_norm": 1.8693902492523193, + "learning_rate": 4.8784242201134534e-05, + "loss": 0.1145, + "step": 28860 + }, + { + "epoch": 1.0491314775783125, + "grad_norm": 0.5382725596427917, + "learning_rate": 4.878264311149762e-05, + "loss": 0.1699, + "step": 28870 + }, + { + "epoch": 1.0494948760811105, + "grad_norm": 1.3384134769439697, + "learning_rate": 4.878104299715123e-05, + "loss": 0.1479, + "step": 28880 + }, + { + "epoch": 1.0498582745839087, + "grad_norm": 3.7112338542938232, + "learning_rate": 4.87794418581643e-05, + "loss": 2.828, + "step": 28890 + }, + { + "epoch": 1.0502216730867069, + "grad_norm": 0.8874093890190125, + "learning_rate": 4.8777839694605844e-05, + "loss": 0.1274, + "step": 28900 + }, + { + "epoch": 1.050585071589505, + "grad_norm": 2.278064489364624, + "learning_rate": 4.877623650654487e-05, + "loss": 0.1298, + "step": 28910 + }, + { + "epoch": 1.0509484700923033, + "grad_norm": 0.8750000596046448, + "learning_rate": 4.877463229405046e-05, + "loss": 0.2705, + "step": 28920 + }, + { + "epoch": 1.0513118685951013, + "grad_norm": 0.5634777545928955, + "learning_rate": 4.8773027057191735e-05, + "loss": 0.13, + "step": 28930 + }, + { + "epoch": 1.0516752670978995, + "grad_norm": 1.1990102529525757, + "learning_rate": 4.877142079603786e-05, + "loss": 0.1115, + "step": 28940 + }, + { + "epoch": 1.0520386656006977, + "grad_norm": 5.793541431427002, + "learning_rate": 4.8769813510658035e-05, + "loss": 0.1909, + "step": 28950 + }, + { + "epoch": 1.052402064103496, + "grad_norm": 1.0433887243270874, + "learning_rate": 4.876820520112153e-05, + "loss": 0.1225, + "step": 28960 + }, + { + "epoch": 1.0527654626062941, + "grad_norm": 0.8786159753799438, + "learning_rate": 4.8766595867497624e-05, + "loss": 0.1772, + "step": 28970 + }, + { + "epoch": 1.0531288611090923, + "grad_norm": 1.1270724534988403, + "learning_rate": 4.8764985509855664e-05, + "loss": 0.1683, + "step": 28980 + }, + { + "epoch": 1.0534922596118903, + "grad_norm": 0.9916827082633972, + "learning_rate": 4.876337412826504e-05, + "loss": 0.1834, + "step": 28990 + }, + { + "epoch": 1.0538556581146885, + "grad_norm": 1.1295456886291504, + "learning_rate": 4.876176172279517e-05, + "loss": 0.1677, + "step": 29000 + }, + { + "epoch": 1.0542190566174867, + "grad_norm": 1.625546932220459, + "learning_rate": 4.876014829351553e-05, + "loss": 0.1374, + "step": 29010 + }, + { + "epoch": 1.054582455120285, + "grad_norm": 0.4282989799976349, + "learning_rate": 4.875853384049564e-05, + "loss": 0.1826, + "step": 29020 + }, + { + "epoch": 1.0549458536230831, + "grad_norm": 0.8806937336921692, + "learning_rate": 4.875691836380507e-05, + "loss": 0.116, + "step": 29030 + }, + { + "epoch": 1.0553092521258813, + "grad_norm": 0.5082537531852722, + "learning_rate": 4.87553018635134e-05, + "loss": 0.1867, + "step": 29040 + }, + { + "epoch": 1.0556726506286793, + "grad_norm": 3.172614336013794, + "learning_rate": 4.875368433969031e-05, + "loss": 0.1872, + "step": 29050 + }, + { + "epoch": 1.0560360491314775, + "grad_norm": 4.570537090301514, + "learning_rate": 4.875206579240546e-05, + "loss": 0.1501, + "step": 29060 + }, + { + "epoch": 1.0563994476342757, + "grad_norm": 0.9751003980636597, + "learning_rate": 4.875044622172862e-05, + "loss": 0.1533, + "step": 29070 + }, + { + "epoch": 1.056762846137074, + "grad_norm": 0.9446988701820374, + "learning_rate": 4.874882562772955e-05, + "loss": 0.1462, + "step": 29080 + }, + { + "epoch": 1.0571262446398721, + "grad_norm": 5.769078254699707, + "learning_rate": 4.8747204010478086e-05, + "loss": 0.1796, + "step": 29090 + }, + { + "epoch": 1.0574896431426704, + "grad_norm": 6.486478328704834, + "learning_rate": 4.8745581370044094e-05, + "loss": 0.1674, + "step": 29100 + }, + { + "epoch": 1.0578530416454683, + "grad_norm": 0.622352123260498, + "learning_rate": 4.874395770649748e-05, + "loss": 0.137, + "step": 29110 + }, + { + "epoch": 1.0582164401482665, + "grad_norm": 0.5244133472442627, + "learning_rate": 4.8742333019908215e-05, + "loss": 0.2035, + "step": 29120 + }, + { + "epoch": 1.0585798386510648, + "grad_norm": 1.7058534622192383, + "learning_rate": 4.87407073103463e-05, + "loss": 0.1408, + "step": 29130 + }, + { + "epoch": 1.058943237153863, + "grad_norm": 0.9428019523620605, + "learning_rate": 4.873908057788177e-05, + "loss": 0.1128, + "step": 29140 + }, + { + "epoch": 1.0593066356566612, + "grad_norm": 0.4694746136665344, + "learning_rate": 4.8737452822584724e-05, + "loss": 0.142, + "step": 29150 + }, + { + "epoch": 1.0596700341594594, + "grad_norm": 1.3985977172851562, + "learning_rate": 4.873582404452529e-05, + "loss": 0.1169, + "step": 29160 + }, + { + "epoch": 1.0600334326622574, + "grad_norm": 0.8285462856292725, + "learning_rate": 4.873419424377366e-05, + "loss": 0.1945, + "step": 29170 + }, + { + "epoch": 1.0603968311650556, + "grad_norm": 1.657012939453125, + "learning_rate": 4.8732563420400037e-05, + "loss": 0.1904, + "step": 29180 + }, + { + "epoch": 1.0607602296678538, + "grad_norm": 1.4633735418319702, + "learning_rate": 4.87309315744747e-05, + "loss": 0.1183, + "step": 29190 + }, + { + "epoch": 1.061123628170652, + "grad_norm": 0.9722393155097961, + "learning_rate": 4.872929870606796e-05, + "loss": 0.1608, + "step": 29200 + }, + { + "epoch": 1.0614870266734502, + "grad_norm": 0.6080673933029175, + "learning_rate": 4.872766481525016e-05, + "loss": 0.1198, + "step": 29210 + }, + { + "epoch": 1.0618504251762482, + "grad_norm": 0.7502457499504089, + "learning_rate": 4.8726029902091715e-05, + "loss": 0.1893, + "step": 29220 + }, + { + "epoch": 1.0622138236790464, + "grad_norm": 1.7775638103485107, + "learning_rate": 4.8724393966663054e-05, + "loss": 1.4777, + "step": 29230 + }, + { + "epoch": 1.0625772221818446, + "grad_norm": 1.1095236539840698, + "learning_rate": 4.8722757009034666e-05, + "loss": 0.1443, + "step": 29240 + }, + { + "epoch": 1.0629406206846428, + "grad_norm": 0.6879424452781677, + "learning_rate": 4.872111902927709e-05, + "loss": 0.2048, + "step": 29250 + }, + { + "epoch": 1.063304019187441, + "grad_norm": 1.2532442808151245, + "learning_rate": 4.8719480027460895e-05, + "loss": 0.1343, + "step": 29260 + }, + { + "epoch": 1.0636674176902392, + "grad_norm": 1.0296350717544556, + "learning_rate": 4.87178400036567e-05, + "loss": 0.1656, + "step": 29270 + }, + { + "epoch": 1.0640308161930372, + "grad_norm": 1.0346356630325317, + "learning_rate": 4.871619895793517e-05, + "loss": 0.1466, + "step": 29280 + }, + { + "epoch": 1.0643942146958354, + "grad_norm": 1.9428579807281494, + "learning_rate": 4.8714556890367e-05, + "loss": 0.1496, + "step": 29290 + }, + { + "epoch": 1.0647576131986336, + "grad_norm": 2.6400890350341797, + "learning_rate": 4.871291380102295e-05, + "loss": 0.1857, + "step": 29300 + }, + { + "epoch": 1.0651210117014318, + "grad_norm": 7.17543888092041, + "learning_rate": 4.8711269689973826e-05, + "loss": 0.1242, + "step": 29310 + }, + { + "epoch": 1.06548441020423, + "grad_norm": 1.8619358539581299, + "learning_rate": 4.870962455729045e-05, + "loss": 0.2137, + "step": 29320 + }, + { + "epoch": 1.0658478087070282, + "grad_norm": 4.936455726623535, + "learning_rate": 4.8707978403043716e-05, + "loss": 0.1551, + "step": 29330 + }, + { + "epoch": 1.0662112072098262, + "grad_norm": 1.2196155786514282, + "learning_rate": 4.8706331227304533e-05, + "loss": 0.184, + "step": 29340 + }, + { + "epoch": 1.0665746057126244, + "grad_norm": 2.0982654094696045, + "learning_rate": 4.87046830301439e-05, + "loss": 0.1566, + "step": 29350 + }, + { + "epoch": 1.0669380042154226, + "grad_norm": 1.0265774726867676, + "learning_rate": 4.8703033811632806e-05, + "loss": 0.1263, + "step": 29360 + }, + { + "epoch": 1.0673014027182208, + "grad_norm": 2.413862705230713, + "learning_rate": 4.870138357184233e-05, + "loss": 0.135, + "step": 29370 + }, + { + "epoch": 1.067664801221019, + "grad_norm": 0.800736665725708, + "learning_rate": 4.869973231084356e-05, + "loss": 0.1169, + "step": 29380 + }, + { + "epoch": 1.0680281997238172, + "grad_norm": 4.07125186920166, + "learning_rate": 4.8698080028707647e-05, + "loss": 0.1745, + "step": 29390 + }, + { + "epoch": 1.0683915982266152, + "grad_norm": 1.1997871398925781, + "learning_rate": 4.8696426725505784e-05, + "loss": 0.1427, + "step": 29400 + }, + { + "epoch": 1.0683915982266152, + "eval_loss": 0.3479246199131012, + "eval_runtime": 180.3605, + "eval_samples_per_second": 41.107, + "eval_steps_per_second": 5.14, + "eval_wer": 0.1802693920525714, + "step": 29400 + }, + { + "epoch": 1.0687549967294134, + "grad_norm": 1.5684832334518433, + "learning_rate": 4.8694772401309205e-05, + "loss": 0.1491, + "step": 29410 + }, + { + "epoch": 1.0691183952322116, + "grad_norm": 1.3784462213516235, + "learning_rate": 4.8693117056189194e-05, + "loss": 0.1741, + "step": 29420 + }, + { + "epoch": 1.0694817937350098, + "grad_norm": 2.0766236782073975, + "learning_rate": 4.869146069021707e-05, + "loss": 0.1375, + "step": 29430 + }, + { + "epoch": 1.069845192237808, + "grad_norm": 0.6553940773010254, + "learning_rate": 4.86898033034642e-05, + "loss": 0.1371, + "step": 29440 + }, + { + "epoch": 1.0702085907406063, + "grad_norm": 0.9652252197265625, + "learning_rate": 4.868814489600199e-05, + "loss": 0.1446, + "step": 29450 + }, + { + "epoch": 1.0705719892434042, + "grad_norm": 1.123075008392334, + "learning_rate": 4.8686485467901896e-05, + "loss": 0.1628, + "step": 29460 + }, + { + "epoch": 1.0709353877462024, + "grad_norm": 1.3370702266693115, + "learning_rate": 4.868482501923543e-05, + "loss": 0.1822, + "step": 29470 + }, + { + "epoch": 1.0712987862490007, + "grad_norm": 1.1716543436050415, + "learning_rate": 4.868316355007412e-05, + "loss": 0.7928, + "step": 29480 + }, + { + "epoch": 1.0716621847517989, + "grad_norm": 2.255791187286377, + "learning_rate": 4.868150106048955e-05, + "loss": 0.3897, + "step": 29490 + }, + { + "epoch": 1.072025583254597, + "grad_norm": 2.2386605739593506, + "learning_rate": 4.8679837550553366e-05, + "loss": 0.174, + "step": 29500 + }, + { + "epoch": 1.072388981757395, + "grad_norm": 0.7938382625579834, + "learning_rate": 4.867817302033724e-05, + "loss": 0.1274, + "step": 29510 + }, + { + "epoch": 1.0727523802601933, + "grad_norm": 0.8619611263275146, + "learning_rate": 4.8676507469912866e-05, + "loss": 0.1756, + "step": 29520 + }, + { + "epoch": 1.0731157787629915, + "grad_norm": 5.2337727546691895, + "learning_rate": 4.867484089935205e-05, + "loss": 0.1131, + "step": 29530 + }, + { + "epoch": 1.0734791772657897, + "grad_norm": 1.339237093925476, + "learning_rate": 4.867317330872656e-05, + "loss": 0.2385, + "step": 29540 + }, + { + "epoch": 1.0738425757685879, + "grad_norm": 0.9693569540977478, + "learning_rate": 4.8671504698108266e-05, + "loss": 0.1455, + "step": 29550 + }, + { + "epoch": 1.074205974271386, + "grad_norm": 0.8684889674186707, + "learning_rate": 4.866983506756906e-05, + "loss": 0.1516, + "step": 29560 + }, + { + "epoch": 1.074569372774184, + "grad_norm": 0.5518342852592468, + "learning_rate": 4.866816441718088e-05, + "loss": 0.2068, + "step": 29570 + }, + { + "epoch": 1.0749327712769823, + "grad_norm": 1.049777865409851, + "learning_rate": 4.86664927470157e-05, + "loss": 0.1473, + "step": 29580 + }, + { + "epoch": 1.0752961697797805, + "grad_norm": 3.8799684047698975, + "learning_rate": 4.8664820057145556e-05, + "loss": 0.1398, + "step": 29590 + }, + { + "epoch": 1.0756595682825787, + "grad_norm": 0.9927829504013062, + "learning_rate": 4.866314634764252e-05, + "loss": 2.0386, + "step": 29600 + }, + { + "epoch": 1.076022966785377, + "grad_norm": 1.2022935152053833, + "learning_rate": 4.86614716185787e-05, + "loss": 0.1539, + "step": 29610 + }, + { + "epoch": 1.076386365288175, + "grad_norm": 0.7556710243225098, + "learning_rate": 4.865979587002625e-05, + "loss": 0.1718, + "step": 29620 + }, + { + "epoch": 1.076749763790973, + "grad_norm": 1.0953086614608765, + "learning_rate": 4.865811910205738e-05, + "loss": 0.1537, + "step": 29630 + }, + { + "epoch": 1.0771131622937713, + "grad_norm": 0.49788376688957214, + "learning_rate": 4.865644131474434e-05, + "loss": 0.1436, + "step": 29640 + }, + { + "epoch": 1.0774765607965695, + "grad_norm": 2.5504343509674072, + "learning_rate": 4.865476250815941e-05, + "loss": 0.1535, + "step": 29650 + }, + { + "epoch": 1.0778399592993677, + "grad_norm": 1.280085802078247, + "learning_rate": 4.865308268237492e-05, + "loss": 0.1389, + "step": 29660 + }, + { + "epoch": 1.078203357802166, + "grad_norm": 0.4341859519481659, + "learning_rate": 4.865140183746326e-05, + "loss": 0.2016, + "step": 29670 + }, + { + "epoch": 1.0785667563049641, + "grad_norm": 0.744679868221283, + "learning_rate": 4.864971997349685e-05, + "loss": 0.1216, + "step": 29680 + }, + { + "epoch": 1.078930154807762, + "grad_norm": 1.449559211730957, + "learning_rate": 4.8648037090548154e-05, + "loss": 0.1202, + "step": 29690 + }, + { + "epoch": 1.0792935533105603, + "grad_norm": 3.58284330368042, + "learning_rate": 4.8646353188689674e-05, + "loss": 0.145, + "step": 29700 + }, + { + "epoch": 1.0796569518133585, + "grad_norm": 1.7318589687347412, + "learning_rate": 4.864466826799398e-05, + "loss": 0.1361, + "step": 29710 + }, + { + "epoch": 1.0800203503161567, + "grad_norm": 0.44806694984436035, + "learning_rate": 4.864298232853364e-05, + "loss": 0.212, + "step": 29720 + }, + { + "epoch": 1.080383748818955, + "grad_norm": 0.8236504197120667, + "learning_rate": 4.864129537038132e-05, + "loss": 0.1446, + "step": 29730 + }, + { + "epoch": 1.0807471473217531, + "grad_norm": 0.920353889465332, + "learning_rate": 4.863960739360971e-05, + "loss": 0.1472, + "step": 29740 + }, + { + "epoch": 1.0811105458245511, + "grad_norm": 1.415685772895813, + "learning_rate": 4.8637918398291514e-05, + "loss": 1.9346, + "step": 29750 + }, + { + "epoch": 1.0814739443273493, + "grad_norm": 0.7517853379249573, + "learning_rate": 4.8636228384499524e-05, + "loss": 0.1276, + "step": 29760 + }, + { + "epoch": 1.0818373428301475, + "grad_norm": 0.5632757544517517, + "learning_rate": 4.8634537352306554e-05, + "loss": 0.1385, + "step": 29770 + }, + { + "epoch": 1.0822007413329457, + "grad_norm": 1.5767742395401, + "learning_rate": 4.8632845301785455e-05, + "loss": 0.1451, + "step": 29780 + }, + { + "epoch": 1.082564139835744, + "grad_norm": 0.7501896619796753, + "learning_rate": 4.8631152233009146e-05, + "loss": 0.1337, + "step": 29790 + }, + { + "epoch": 1.082927538338542, + "grad_norm": 0.7235280871391296, + "learning_rate": 4.862945814605056e-05, + "loss": 0.1727, + "step": 29800 + }, + { + "epoch": 1.0832909368413401, + "grad_norm": 0.9608789682388306, + "learning_rate": 4.86277630409827e-05, + "loss": 0.1346, + "step": 29810 + }, + { + "epoch": 1.0836543353441384, + "grad_norm": 0.5176007151603699, + "learning_rate": 4.862606691787859e-05, + "loss": 0.1477, + "step": 29820 + }, + { + "epoch": 1.0840177338469366, + "grad_norm": 1.1901780366897583, + "learning_rate": 4.862436977681133e-05, + "loss": 0.1478, + "step": 29830 + }, + { + "epoch": 1.0843811323497348, + "grad_norm": 4.09995698928833, + "learning_rate": 4.8622671617854026e-05, + "loss": 0.1369, + "step": 29840 + }, + { + "epoch": 1.084744530852533, + "grad_norm": 10.049054145812988, + "learning_rate": 4.8620972441079855e-05, + "loss": 0.8392, + "step": 29850 + }, + { + "epoch": 1.085107929355331, + "grad_norm": 1.131095051765442, + "learning_rate": 4.861927224656202e-05, + "loss": 0.1404, + "step": 29860 + }, + { + "epoch": 1.0854713278581292, + "grad_norm": 1.2740205526351929, + "learning_rate": 4.861757103437379e-05, + "loss": 0.1726, + "step": 29870 + }, + { + "epoch": 1.0858347263609274, + "grad_norm": 0.9203113317489624, + "learning_rate": 4.861586880458845e-05, + "loss": 0.1088, + "step": 29880 + }, + { + "epoch": 1.0861981248637256, + "grad_norm": 0.8646379113197327, + "learning_rate": 4.8614165557279345e-05, + "loss": 0.124, + "step": 29890 + }, + { + "epoch": 1.0865615233665238, + "grad_norm": 1.283758521080017, + "learning_rate": 4.861246129251987e-05, + "loss": 0.1616, + "step": 29900 + }, + { + "epoch": 1.086924921869322, + "grad_norm": 2.0251550674438477, + "learning_rate": 4.861075601038345e-05, + "loss": 0.1475, + "step": 29910 + }, + { + "epoch": 1.08728832037212, + "grad_norm": 0.7173452973365784, + "learning_rate": 4.860904971094356e-05, + "loss": 0.182, + "step": 29920 + }, + { + "epoch": 1.0876517188749182, + "grad_norm": 0.7154909372329712, + "learning_rate": 4.8607342394273725e-05, + "loss": 0.1263, + "step": 29930 + }, + { + "epoch": 1.0880151173777164, + "grad_norm": 2.5288286209106445, + "learning_rate": 4.860563406044749e-05, + "loss": 0.1289, + "step": 29940 + }, + { + "epoch": 1.0883785158805146, + "grad_norm": 1.4772063493728638, + "learning_rate": 4.860392470953848e-05, + "loss": 0.1556, + "step": 29950 + }, + { + "epoch": 1.0887419143833128, + "grad_norm": 1.623298168182373, + "learning_rate": 4.8602214341620346e-05, + "loss": 0.1493, + "step": 29960 + }, + { + "epoch": 1.089105312886111, + "grad_norm": 0.4302707016468048, + "learning_rate": 4.860050295676676e-05, + "loss": 0.2303, + "step": 29970 + }, + { + "epoch": 1.089468711388909, + "grad_norm": 1.4086140394210815, + "learning_rate": 4.8598790555051474e-05, + "loss": 0.1549, + "step": 29980 + }, + { + "epoch": 1.0898321098917072, + "grad_norm": 1.1924636363983154, + "learning_rate": 4.859707713654828e-05, + "loss": 0.1426, + "step": 29990 + }, + { + "epoch": 1.0901955083945054, + "grad_norm": 0.8468578457832336, + "learning_rate": 4.859536270133097e-05, + "loss": 0.1607, + "step": 30000 + }, + { + "epoch": 1.0901955083945054, + "eval_loss": 0.38150739669799805, + "eval_runtime": 180.8598, + "eval_samples_per_second": 40.993, + "eval_steps_per_second": 5.126, + "eval_wer": 0.18088659756385353, + "step": 30000 + }, + { + "epoch": 1.0905589068973036, + "grad_norm": 1.3293052911758423, + "learning_rate": 4.859364724947345e-05, + "loss": 0.1199, + "step": 30010 + }, + { + "epoch": 1.0909223054001018, + "grad_norm": 1.421976923942566, + "learning_rate": 4.859193078104961e-05, + "loss": 0.1932, + "step": 30020 + }, + { + "epoch": 1.0912857039029, + "grad_norm": 5.226151466369629, + "learning_rate": 4.8590213296133415e-05, + "loss": 0.1365, + "step": 30030 + }, + { + "epoch": 1.091649102405698, + "grad_norm": 1.6307711601257324, + "learning_rate": 4.8588494794798866e-05, + "loss": 0.1533, + "step": 30040 + }, + { + "epoch": 1.0920125009084962, + "grad_norm": 1.190746784210205, + "learning_rate": 4.858677527712e-05, + "loss": 0.1701, + "step": 30050 + }, + { + "epoch": 1.0923758994112944, + "grad_norm": 1.1558239459991455, + "learning_rate": 4.858505474317091e-05, + "loss": 0.1792, + "step": 30060 + }, + { + "epoch": 1.0927392979140926, + "grad_norm": 0.5284643769264221, + "learning_rate": 4.858333319302573e-05, + "loss": 0.1789, + "step": 30070 + }, + { + "epoch": 1.0931026964168908, + "grad_norm": 0.7858747243881226, + "learning_rate": 4.858161062675863e-05, + "loss": 0.1456, + "step": 30080 + }, + { + "epoch": 1.0934660949196888, + "grad_norm": 1.2685805559158325, + "learning_rate": 4.857988704444383e-05, + "loss": 0.1326, + "step": 30090 + }, + { + "epoch": 1.093829493422487, + "grad_norm": 0.9551296830177307, + "learning_rate": 4.8578162446155595e-05, + "loss": 0.327, + "step": 30100 + }, + { + "epoch": 1.0941928919252852, + "grad_norm": 3.6769495010375977, + "learning_rate": 4.857643683196823e-05, + "loss": 0.1111, + "step": 30110 + }, + { + "epoch": 1.0945562904280834, + "grad_norm": 0.757580041885376, + "learning_rate": 4.8574710201956095e-05, + "loss": 0.1622, + "step": 30120 + }, + { + "epoch": 1.0949196889308817, + "grad_norm": 0.762323796749115, + "learning_rate": 4.857298255619357e-05, + "loss": 0.1218, + "step": 30130 + }, + { + "epoch": 1.0952830874336799, + "grad_norm": 0.6065217852592468, + "learning_rate": 4.85712538947551e-05, + "loss": 0.1297, + "step": 30140 + }, + { + "epoch": 1.0956464859364778, + "grad_norm": 1.1257789134979248, + "learning_rate": 4.856952421771517e-05, + "loss": 0.1862, + "step": 30150 + }, + { + "epoch": 1.096009884439276, + "grad_norm": 1.128233790397644, + "learning_rate": 4.85677935251483e-05, + "loss": 0.1377, + "step": 30160 + }, + { + "epoch": 1.0963732829420743, + "grad_norm": 0.48844701051712036, + "learning_rate": 4.856606181712906e-05, + "loss": 0.1967, + "step": 30170 + }, + { + "epoch": 1.0967366814448725, + "grad_norm": 0.682921290397644, + "learning_rate": 4.856432909373206e-05, + "loss": 0.1268, + "step": 30180 + }, + { + "epoch": 1.0971000799476707, + "grad_norm": 0.8049948215484619, + "learning_rate": 4.856259535503197e-05, + "loss": 0.0971, + "step": 30190 + }, + { + "epoch": 1.0974634784504689, + "grad_norm": 0.6435711979866028, + "learning_rate": 4.8560860601103485e-05, + "loss": 1.0273, + "step": 30200 + }, + { + "epoch": 1.0978268769532669, + "grad_norm": 1.014172911643982, + "learning_rate": 4.855912483202134e-05, + "loss": 0.1137, + "step": 30210 + }, + { + "epoch": 1.098190275456065, + "grad_norm": 1.4760230779647827, + "learning_rate": 4.8557388047860334e-05, + "loss": 0.1585, + "step": 30220 + }, + { + "epoch": 1.0985536739588633, + "grad_norm": 1.4756141901016235, + "learning_rate": 4.855565024869529e-05, + "loss": 0.1261, + "step": 30230 + }, + { + "epoch": 1.0989170724616615, + "grad_norm": 2.1977133750915527, + "learning_rate": 4.8553911434601085e-05, + "loss": 0.1532, + "step": 30240 + }, + { + "epoch": 1.0992804709644597, + "grad_norm": 1.9084991216659546, + "learning_rate": 4.855217160565265e-05, + "loss": 0.1611, + "step": 30250 + }, + { + "epoch": 1.099643869467258, + "grad_norm": 1.3657923936843872, + "learning_rate": 4.855043076192494e-05, + "loss": 0.1569, + "step": 30260 + }, + { + "epoch": 1.1000072679700559, + "grad_norm": 0.6824470162391663, + "learning_rate": 4.8548688903492943e-05, + "loss": 0.1632, + "step": 30270 + }, + { + "epoch": 1.100370666472854, + "grad_norm": 0.595958948135376, + "learning_rate": 4.854694603043175e-05, + "loss": 0.1394, + "step": 30280 + }, + { + "epoch": 1.1007340649756523, + "grad_norm": 1.1626547574996948, + "learning_rate": 4.854520214281642e-05, + "loss": 0.1193, + "step": 30290 + }, + { + "epoch": 1.1010974634784505, + "grad_norm": 1.2703717947006226, + "learning_rate": 4.8543457240722104e-05, + "loss": 0.1949, + "step": 30300 + }, + { + "epoch": 1.1014608619812487, + "grad_norm": 1.7159488201141357, + "learning_rate": 4.854171132422399e-05, + "loss": 0.1524, + "step": 30310 + }, + { + "epoch": 1.101824260484047, + "grad_norm": 0.7651236057281494, + "learning_rate": 4.85399643933973e-05, + "loss": 0.1992, + "step": 30320 + }, + { + "epoch": 1.102187658986845, + "grad_norm": 0.7985833287239075, + "learning_rate": 4.8538216448317286e-05, + "loss": 0.1363, + "step": 30330 + }, + { + "epoch": 1.102551057489643, + "grad_norm": 1.2583733797073364, + "learning_rate": 4.853646748905928e-05, + "loss": 0.1215, + "step": 30340 + }, + { + "epoch": 1.1029144559924413, + "grad_norm": 1.1982141733169556, + "learning_rate": 4.853471751569864e-05, + "loss": 0.1656, + "step": 30350 + }, + { + "epoch": 1.1032778544952395, + "grad_norm": 2.399423599243164, + "learning_rate": 4.853296652831075e-05, + "loss": 0.1164, + "step": 30360 + }, + { + "epoch": 1.1036412529980377, + "grad_norm": 1.5785446166992188, + "learning_rate": 4.853121452697107e-05, + "loss": 0.1682, + "step": 30370 + }, + { + "epoch": 1.1040046515008357, + "grad_norm": 1.3818514347076416, + "learning_rate": 4.852946151175508e-05, + "loss": 0.5349, + "step": 30380 + }, + { + "epoch": 1.104368050003634, + "grad_norm": 1.6894676685333252, + "learning_rate": 4.8527707482738305e-05, + "loss": 0.1314, + "step": 30390 + }, + { + "epoch": 1.1047314485064321, + "grad_norm": 1.1517245769500732, + "learning_rate": 4.852595243999633e-05, + "loss": 0.1515, + "step": 30400 + }, + { + "epoch": 1.1050948470092303, + "grad_norm": 1.2149289846420288, + "learning_rate": 4.852419638360477e-05, + "loss": 0.2572, + "step": 30410 + }, + { + "epoch": 1.1054582455120285, + "grad_norm": 0.8241190314292908, + "learning_rate": 4.852243931363929e-05, + "loss": 0.2045, + "step": 30420 + }, + { + "epoch": 1.1058216440148267, + "grad_norm": 0.8909230828285217, + "learning_rate": 4.852068123017559e-05, + "loss": 0.1281, + "step": 30430 + }, + { + "epoch": 1.1061850425176247, + "grad_norm": 0.7718971967697144, + "learning_rate": 4.8518922133289424e-05, + "loss": 0.1207, + "step": 30440 + }, + { + "epoch": 1.106548441020423, + "grad_norm": 3.393324136734009, + "learning_rate": 4.8517162023056575e-05, + "loss": 0.4812, + "step": 30450 + }, + { + "epoch": 1.1069118395232211, + "grad_norm": 1.5000587701797485, + "learning_rate": 4.85154008995529e-05, + "loss": 0.1246, + "step": 30460 + }, + { + "epoch": 1.1072752380260193, + "grad_norm": 1.3177014589309692, + "learning_rate": 4.8513638762854264e-05, + "loss": 0.2352, + "step": 30470 + }, + { + "epoch": 1.1076386365288176, + "grad_norm": 0.8771611452102661, + "learning_rate": 4.8511875613036596e-05, + "loss": 1.4369, + "step": 30480 + }, + { + "epoch": 1.1080020350316158, + "grad_norm": 0.46630170941352844, + "learning_rate": 4.8510111450175865e-05, + "loss": 0.1184, + "step": 30490 + }, + { + "epoch": 1.1083654335344137, + "grad_norm": 1.582541823387146, + "learning_rate": 4.850834627434808e-05, + "loss": 0.154, + "step": 30500 + }, + { + "epoch": 1.108728832037212, + "grad_norm": 0.9425756335258484, + "learning_rate": 4.850658008562929e-05, + "loss": 0.1632, + "step": 30510 + }, + { + "epoch": 1.1090922305400102, + "grad_norm": 0.6453799605369568, + "learning_rate": 4.8504812884095616e-05, + "loss": 0.2024, + "step": 30520 + }, + { + "epoch": 1.1094556290428084, + "grad_norm": 0.8643505573272705, + "learning_rate": 4.850304466982317e-05, + "loss": 0.1373, + "step": 30530 + }, + { + "epoch": 1.1098190275456066, + "grad_norm": 0.6463938355445862, + "learning_rate": 4.850127544288816e-05, + "loss": 0.1308, + "step": 30540 + }, + { + "epoch": 1.1101824260484048, + "grad_norm": 1.2465693950653076, + "learning_rate": 4.8499505203366816e-05, + "loss": 0.1628, + "step": 30550 + }, + { + "epoch": 1.1105458245512028, + "grad_norm": 1.085317850112915, + "learning_rate": 4.84977339513354e-05, + "loss": 0.1533, + "step": 30560 + }, + { + "epoch": 1.110909223054001, + "grad_norm": 0.5834909081459045, + "learning_rate": 4.849596168687022e-05, + "loss": 0.1811, + "step": 30570 + }, + { + "epoch": 1.1112726215567992, + "grad_norm": 1.407309889793396, + "learning_rate": 4.849418841004766e-05, + "loss": 0.1621, + "step": 30580 + }, + { + "epoch": 1.1116360200595974, + "grad_norm": 1.0903669595718384, + "learning_rate": 4.8492414120944116e-05, + "loss": 0.271, + "step": 30590 + }, + { + "epoch": 1.1119994185623956, + "grad_norm": 1.6495404243469238, + "learning_rate": 4.8490638819636036e-05, + "loss": 0.1602, + "step": 30600 + }, + { + "epoch": 1.1119994185623956, + "eval_loss": 0.3786245882511139, + "eval_runtime": 180.2563, + "eval_samples_per_second": 41.13, + "eval_steps_per_second": 5.143, + "eval_wer": 0.17622125002269137, + "step": 30600 + }, + { + "epoch": 1.1123628170651938, + "grad_norm": 1.6046833992004395, + "learning_rate": 4.8488862506199905e-05, + "loss": 0.142, + "step": 30610 + }, + { + "epoch": 1.1127262155679918, + "grad_norm": 0.7779229879379272, + "learning_rate": 4.848708518071226e-05, + "loss": 0.1556, + "step": 30620 + }, + { + "epoch": 1.11308961407079, + "grad_norm": 6.0123677253723145, + "learning_rate": 4.848530684324969e-05, + "loss": 0.1379, + "step": 30630 + }, + { + "epoch": 1.1134530125735882, + "grad_norm": 1.1593163013458252, + "learning_rate": 4.8483527493888796e-05, + "loss": 0.1091, + "step": 30640 + }, + { + "epoch": 1.1138164110763864, + "grad_norm": 1.1061301231384277, + "learning_rate": 4.848174713270627e-05, + "loss": 0.1521, + "step": 30650 + }, + { + "epoch": 1.1141798095791846, + "grad_norm": 2.006169080734253, + "learning_rate": 4.8479965759778804e-05, + "loss": 0.1188, + "step": 30660 + }, + { + "epoch": 1.1145432080819826, + "grad_norm": 0.632653534412384, + "learning_rate": 4.8478183375183154e-05, + "loss": 0.2039, + "step": 30670 + }, + { + "epoch": 1.1149066065847808, + "grad_norm": 2.2631378173828125, + "learning_rate": 4.847639997899611e-05, + "loss": 0.1324, + "step": 30680 + }, + { + "epoch": 1.115270005087579, + "grad_norm": 0.7694458365440369, + "learning_rate": 4.847461557129454e-05, + "loss": 0.1043, + "step": 30690 + }, + { + "epoch": 1.1156334035903772, + "grad_norm": 1.5386550426483154, + "learning_rate": 4.847283015215529e-05, + "loss": 0.1605, + "step": 30700 + }, + { + "epoch": 1.1159968020931754, + "grad_norm": 0.9068945646286011, + "learning_rate": 4.847104372165531e-05, + "loss": 0.1178, + "step": 30710 + }, + { + "epoch": 1.1163602005959736, + "grad_norm": 1.4700278043746948, + "learning_rate": 4.8469256279871564e-05, + "loss": 0.1458, + "step": 30720 + }, + { + "epoch": 1.1167235990987716, + "grad_norm": 1.125613808631897, + "learning_rate": 4.846746782688108e-05, + "loss": 0.1212, + "step": 30730 + }, + { + "epoch": 1.1170869976015698, + "grad_norm": 1.081297516822815, + "learning_rate": 4.846567836276089e-05, + "loss": 0.1218, + "step": 30740 + }, + { + "epoch": 1.117450396104368, + "grad_norm": 0.6549712419509888, + "learning_rate": 4.846388788758812e-05, + "loss": 0.1684, + "step": 30750 + }, + { + "epoch": 1.1178137946071662, + "grad_norm": 0.7256012558937073, + "learning_rate": 4.84620964014399e-05, + "loss": 0.1425, + "step": 30760 + }, + { + "epoch": 1.1181771931099644, + "grad_norm": 0.6661650538444519, + "learning_rate": 4.846030390439343e-05, + "loss": 0.2043, + "step": 30770 + }, + { + "epoch": 1.1185405916127626, + "grad_norm": 2.5043599605560303, + "learning_rate": 4.845851039652594e-05, + "loss": 0.1337, + "step": 30780 + }, + { + "epoch": 1.1189039901155606, + "grad_norm": 1.7362638711929321, + "learning_rate": 4.84567158779147e-05, + "loss": 0.1146, + "step": 30790 + }, + { + "epoch": 1.1192673886183588, + "grad_norm": 2.156850576400757, + "learning_rate": 4.845492034863703e-05, + "loss": 0.1402, + "step": 30800 + }, + { + "epoch": 1.119630787121157, + "grad_norm": 6.733970642089844, + "learning_rate": 4.8453123808770295e-05, + "loss": 0.137, + "step": 30810 + }, + { + "epoch": 1.1199941856239553, + "grad_norm": 1.2163270711898804, + "learning_rate": 4.84513262583919e-05, + "loss": 0.2038, + "step": 30820 + }, + { + "epoch": 1.1203575841267535, + "grad_norm": 1.0911026000976562, + "learning_rate": 4.84495276975793e-05, + "loss": 0.1247, + "step": 30830 + }, + { + "epoch": 1.1207209826295517, + "grad_norm": 8.4699125289917, + "learning_rate": 4.844772812640998e-05, + "loss": 0.1883, + "step": 30840 + }, + { + "epoch": 1.1210843811323496, + "grad_norm": 1.9448401927947998, + "learning_rate": 4.8445927544961486e-05, + "loss": 0.1259, + "step": 30850 + }, + { + "epoch": 1.1214477796351479, + "grad_norm": 1.2070740461349487, + "learning_rate": 4.844412595331139e-05, + "loss": 0.1478, + "step": 30860 + }, + { + "epoch": 1.121811178137946, + "grad_norm": 0.5514017939567566, + "learning_rate": 4.844232335153733e-05, + "loss": 0.2209, + "step": 30870 + }, + { + "epoch": 1.1221745766407443, + "grad_norm": 0.6462703943252563, + "learning_rate": 4.844051973971696e-05, + "loss": 0.1182, + "step": 30880 + }, + { + "epoch": 1.1225379751435425, + "grad_norm": 0.9222347140312195, + "learning_rate": 4.8438715117927995e-05, + "loss": 0.1079, + "step": 30890 + }, + { + "epoch": 1.1229013736463407, + "grad_norm": 1.1663174629211426, + "learning_rate": 4.8436909486248196e-05, + "loss": 0.1516, + "step": 30900 + }, + { + "epoch": 1.1232647721491387, + "grad_norm": 0.8301449418067932, + "learning_rate": 4.8435102844755356e-05, + "loss": 0.1204, + "step": 30910 + }, + { + "epoch": 1.1236281706519369, + "grad_norm": 0.8328074216842651, + "learning_rate": 4.8433295193527305e-05, + "loss": 0.1432, + "step": 30920 + }, + { + "epoch": 1.123991569154735, + "grad_norm": 1.0741894245147705, + "learning_rate": 4.843148653264195e-05, + "loss": 0.1286, + "step": 30930 + }, + { + "epoch": 1.1243549676575333, + "grad_norm": 1.5792789459228516, + "learning_rate": 4.842967686217721e-05, + "loss": 0.1235, + "step": 30940 + }, + { + "epoch": 1.1247183661603315, + "grad_norm": 10.680551528930664, + "learning_rate": 4.8427866182211056e-05, + "loss": 0.2624, + "step": 30950 + }, + { + "epoch": 1.1250817646631295, + "grad_norm": 0.753760576248169, + "learning_rate": 4.8426054492821503e-05, + "loss": 0.1185, + "step": 30960 + }, + { + "epoch": 1.1254451631659277, + "grad_norm": 0.681735098361969, + "learning_rate": 4.8424241794086614e-05, + "loss": 0.1249, + "step": 30970 + }, + { + "epoch": 1.125808561668726, + "grad_norm": 1.0460690259933472, + "learning_rate": 4.842242808608449e-05, + "loss": 0.1437, + "step": 30980 + }, + { + "epoch": 1.126171960171524, + "grad_norm": 0.8870137929916382, + "learning_rate": 4.8420613368893275e-05, + "loss": 0.1321, + "step": 30990 + }, + { + "epoch": 1.1265353586743223, + "grad_norm": 1.765331506729126, + "learning_rate": 4.841879764259116e-05, + "loss": 0.1722, + "step": 31000 + }, + { + "epoch": 1.1268987571771205, + "grad_norm": 2.9972617626190186, + "learning_rate": 4.841698090725638e-05, + "loss": 0.095, + "step": 31010 + }, + { + "epoch": 1.1272621556799187, + "grad_norm": 0.7516260743141174, + "learning_rate": 4.841516316296722e-05, + "loss": 0.2073, + "step": 31020 + }, + { + "epoch": 1.1276255541827167, + "grad_norm": 7.545155048370361, + "learning_rate": 4.841334440980197e-05, + "loss": 0.2066, + "step": 31030 + }, + { + "epoch": 1.127988952685515, + "grad_norm": 0.7127543687820435, + "learning_rate": 4.841152464783903e-05, + "loss": 0.1483, + "step": 31040 + }, + { + "epoch": 1.1283523511883131, + "grad_norm": 0.8178777694702148, + "learning_rate": 4.8409703877156786e-05, + "loss": 0.122, + "step": 31050 + }, + { + "epoch": 1.1287157496911113, + "grad_norm": 2.2176194190979004, + "learning_rate": 4.84078820978337e-05, + "loss": 0.1074, + "step": 31060 + }, + { + "epoch": 1.1290791481939095, + "grad_norm": 0.5790374279022217, + "learning_rate": 4.8406059309948246e-05, + "loss": 0.2401, + "step": 31070 + }, + { + "epoch": 1.1294425466967075, + "grad_norm": 1.335080623626709, + "learning_rate": 4.840423551357899e-05, + "loss": 0.1412, + "step": 31080 + }, + { + "epoch": 1.1298059451995057, + "grad_norm": 2.9304592609405518, + "learning_rate": 4.840241070880449e-05, + "loss": 0.1137, + "step": 31090 + }, + { + "epoch": 1.130169343702304, + "grad_norm": 0.6828371286392212, + "learning_rate": 4.840058489570338e-05, + "loss": 0.162, + "step": 31100 + }, + { + "epoch": 1.1305327422051021, + "grad_norm": 0.7623898983001709, + "learning_rate": 4.8398758074354334e-05, + "loss": 0.1164, + "step": 31110 + }, + { + "epoch": 1.1308961407079003, + "grad_norm": 0.44123783707618713, + "learning_rate": 4.8396930244836045e-05, + "loss": 0.1378, + "step": 31120 + }, + { + "epoch": 1.1312595392106983, + "grad_norm": 0.880264937877655, + "learning_rate": 4.839510140722728e-05, + "loss": 0.1168, + "step": 31130 + }, + { + "epoch": 1.1316229377134965, + "grad_norm": 0.9946479797363281, + "learning_rate": 4.839327156160684e-05, + "loss": 0.1099, + "step": 31140 + }, + { + "epoch": 1.1319863362162947, + "grad_norm": 1.5061123371124268, + "learning_rate": 4.8391440708053565e-05, + "loss": 0.1167, + "step": 31150 + }, + { + "epoch": 1.132349734719093, + "grad_norm": 3.687218427658081, + "learning_rate": 4.838960884664633e-05, + "loss": 0.1141, + "step": 31160 + }, + { + "epoch": 1.1327131332218912, + "grad_norm": 1.015309453010559, + "learning_rate": 4.838777597746408e-05, + "loss": 0.1851, + "step": 31170 + }, + { + "epoch": 1.1330765317246894, + "grad_norm": 0.9765021204948425, + "learning_rate": 4.838594210058577e-05, + "loss": 0.1276, + "step": 31180 + }, + { + "epoch": 1.1334399302274876, + "grad_norm": 0.986419677734375, + "learning_rate": 4.838410721609041e-05, + "loss": 0.107, + "step": 31190 + }, + { + "epoch": 1.1338033287302856, + "grad_norm": 0.850581169128418, + "learning_rate": 4.838227132405709e-05, + "loss": 0.1441, + "step": 31200 + }, + { + "epoch": 1.1338033287302856, + "eval_loss": 0.3806535005569458, + "eval_runtime": 180.0975, + "eval_samples_per_second": 41.167, + "eval_steps_per_second": 5.147, + "eval_wer": 0.17877176103254852, + "step": 31200 + }, + { + "epoch": 1.1341667272330838, + "grad_norm": 0.5492041110992432, + "learning_rate": 4.8380434424564885e-05, + "loss": 0.1644, + "step": 31210 + }, + { + "epoch": 1.134530125735882, + "grad_norm": 0.6244884729385376, + "learning_rate": 4.837859651769295e-05, + "loss": 0.1754, + "step": 31220 + }, + { + "epoch": 1.1348935242386802, + "grad_norm": 0.7327109575271606, + "learning_rate": 4.837675760352047e-05, + "loss": 0.1182, + "step": 31230 + }, + { + "epoch": 1.1352569227414784, + "grad_norm": 1.8642997741699219, + "learning_rate": 4.837491768212669e-05, + "loss": 0.1262, + "step": 31240 + }, + { + "epoch": 1.1356203212442764, + "grad_norm": 0.7738135457038879, + "learning_rate": 4.837307675359086e-05, + "loss": 0.1733, + "step": 31250 + }, + { + "epoch": 1.1359837197470746, + "grad_norm": 1.2241661548614502, + "learning_rate": 4.837123481799232e-05, + "loss": 0.158, + "step": 31260 + }, + { + "epoch": 1.1363471182498728, + "grad_norm": 0.5471898317337036, + "learning_rate": 4.836939187541043e-05, + "loss": 0.1745, + "step": 31270 + }, + { + "epoch": 1.136710516752671, + "grad_norm": 1.110005497932434, + "learning_rate": 4.836754792592459e-05, + "loss": 0.14, + "step": 31280 + }, + { + "epoch": 1.1370739152554692, + "grad_norm": 18.33467674255371, + "learning_rate": 4.836570296961425e-05, + "loss": 0.4864, + "step": 31290 + }, + { + "epoch": 1.1374373137582674, + "grad_norm": 2.1457314491271973, + "learning_rate": 4.836385700655891e-05, + "loss": 0.1431, + "step": 31300 + }, + { + "epoch": 1.1378007122610656, + "grad_norm": 1.3444671630859375, + "learning_rate": 4.8362010036838096e-05, + "loss": 0.1287, + "step": 31310 + }, + { + "epoch": 1.1381641107638636, + "grad_norm": 1.0178183317184448, + "learning_rate": 4.8360162060531395e-05, + "loss": 0.1984, + "step": 31320 + }, + { + "epoch": 1.1385275092666618, + "grad_norm": 1.013101577758789, + "learning_rate": 4.835831307771842e-05, + "loss": 0.1354, + "step": 31330 + }, + { + "epoch": 1.13889090776946, + "grad_norm": 1.1397134065628052, + "learning_rate": 4.8356463088478855e-05, + "loss": 0.1149, + "step": 31340 + }, + { + "epoch": 1.1392543062722582, + "grad_norm": 0.6582014560699463, + "learning_rate": 4.835461209289239e-05, + "loss": 0.1581, + "step": 31350 + }, + { + "epoch": 1.1396177047750564, + "grad_norm": 6.679111480712891, + "learning_rate": 4.835276009103878e-05, + "loss": 0.1136, + "step": 31360 + }, + { + "epoch": 1.1399811032778544, + "grad_norm": 1.710073709487915, + "learning_rate": 4.835090708299784e-05, + "loss": 0.1523, + "step": 31370 + }, + { + "epoch": 1.1403445017806526, + "grad_norm": 0.8167402148246765, + "learning_rate": 4.834905306884939e-05, + "loss": 0.1333, + "step": 31380 + }, + { + "epoch": 1.1407079002834508, + "grad_norm": 1.0377804040908813, + "learning_rate": 4.834719804867332e-05, + "loss": 0.1484, + "step": 31390 + }, + { + "epoch": 1.141071298786249, + "grad_norm": 0.6715871691703796, + "learning_rate": 4.8345342022549556e-05, + "loss": 0.133, + "step": 31400 + }, + { + "epoch": 1.1414346972890472, + "grad_norm": 0.8593924641609192, + "learning_rate": 4.834348499055807e-05, + "loss": 0.1381, + "step": 31410 + }, + { + "epoch": 1.1417980957918452, + "grad_norm": 1.4667985439300537, + "learning_rate": 4.834162695277887e-05, + "loss": 0.2026, + "step": 31420 + }, + { + "epoch": 1.1421614942946434, + "grad_norm": 1.1011070013046265, + "learning_rate": 4.8339767909292014e-05, + "loss": 0.1324, + "step": 31430 + }, + { + "epoch": 1.1425248927974416, + "grad_norm": 0.6192152500152588, + "learning_rate": 4.83379078601776e-05, + "loss": 0.116, + "step": 31440 + }, + { + "epoch": 1.1428882913002398, + "grad_norm": 0.7945598363876343, + "learning_rate": 4.8336046805515775e-05, + "loss": 0.1652, + "step": 31450 + }, + { + "epoch": 1.143251689803038, + "grad_norm": 0.9201329350471497, + "learning_rate": 4.833418474538672e-05, + "loss": 0.1279, + "step": 31460 + }, + { + "epoch": 1.1436150883058362, + "grad_norm": 0.7190477252006531, + "learning_rate": 4.833232167987067e-05, + "loss": 0.1603, + "step": 31470 + }, + { + "epoch": 1.1439784868086345, + "grad_norm": 0.92894047498703, + "learning_rate": 4.83304576090479e-05, + "loss": 0.1268, + "step": 31480 + }, + { + "epoch": 1.1443418853114324, + "grad_norm": 0.7764700055122375, + "learning_rate": 4.8328592532998716e-05, + "loss": 0.1307, + "step": 31490 + }, + { + "epoch": 1.1447052838142306, + "grad_norm": 1.3679301738739014, + "learning_rate": 4.832672645180348e-05, + "loss": 0.1734, + "step": 31500 + }, + { + "epoch": 1.1450686823170289, + "grad_norm": 2.3595213890075684, + "learning_rate": 4.832485936554261e-05, + "loss": 0.1335, + "step": 31510 + }, + { + "epoch": 1.145432080819827, + "grad_norm": 0.610569953918457, + "learning_rate": 4.832299127429653e-05, + "loss": 0.249, + "step": 31520 + }, + { + "epoch": 1.1457954793226253, + "grad_norm": 1.4595023393630981, + "learning_rate": 4.832112217814575e-05, + "loss": 0.1103, + "step": 31530 + }, + { + "epoch": 1.1461588778254233, + "grad_norm": 5.723475933074951, + "learning_rate": 4.831925207717077e-05, + "loss": 0.126, + "step": 31540 + }, + { + "epoch": 1.1465222763282215, + "grad_norm": 1.8982267379760742, + "learning_rate": 4.8317380971452205e-05, + "loss": 0.1422, + "step": 31550 + }, + { + "epoch": 1.1468856748310197, + "grad_norm": 0.8732501268386841, + "learning_rate": 4.831550886107066e-05, + "loss": 0.1364, + "step": 31560 + }, + { + "epoch": 1.1472490733338179, + "grad_norm": 1.0855740308761597, + "learning_rate": 4.831363574610679e-05, + "loss": 0.1728, + "step": 31570 + }, + { + "epoch": 1.147612471836616, + "grad_norm": 1.5332953929901123, + "learning_rate": 4.8311761626641304e-05, + "loss": 0.1336, + "step": 31580 + }, + { + "epoch": 1.1479758703394143, + "grad_norm": 0.5183860063552856, + "learning_rate": 4.8309886502754954e-05, + "loss": 0.6253, + "step": 31590 + }, + { + "epoch": 1.1483392688422125, + "grad_norm": 0.7912465929985046, + "learning_rate": 4.830801037452853e-05, + "loss": 0.1644, + "step": 31600 + }, + { + "epoch": 1.1487026673450105, + "grad_norm": 1.5750758647918701, + "learning_rate": 4.8306133242042875e-05, + "loss": 0.1787, + "step": 31610 + }, + { + "epoch": 1.1490660658478087, + "grad_norm": 0.5864933133125305, + "learning_rate": 4.830425510537886e-05, + "loss": 0.1751, + "step": 31620 + }, + { + "epoch": 1.1494294643506069, + "grad_norm": 0.9228208661079407, + "learning_rate": 4.830237596461741e-05, + "loss": 0.8842, + "step": 31630 + }, + { + "epoch": 1.149792862853405, + "grad_norm": 1.0034486055374146, + "learning_rate": 4.8300495819839486e-05, + "loss": 0.1047, + "step": 31640 + }, + { + "epoch": 1.1501562613562033, + "grad_norm": 1.125537395477295, + "learning_rate": 4.82986146711261e-05, + "loss": 0.1447, + "step": 31650 + }, + { + "epoch": 1.1505196598590013, + "grad_norm": 2.6315014362335205, + "learning_rate": 4.829673251855831e-05, + "loss": 0.116, + "step": 31660 + }, + { + "epoch": 1.1508830583617995, + "grad_norm": 0.8134027719497681, + "learning_rate": 4.82948493622172e-05, + "loss": 0.1889, + "step": 31670 + }, + { + "epoch": 1.1512464568645977, + "grad_norm": 1.003691554069519, + "learning_rate": 4.8292965202183916e-05, + "loss": 0.1477, + "step": 31680 + }, + { + "epoch": 1.151609855367396, + "grad_norm": 1.7551583051681519, + "learning_rate": 4.829108003853964e-05, + "loss": 0.1156, + "step": 31690 + }, + { + "epoch": 1.1519732538701941, + "grad_norm": 1.5970351696014404, + "learning_rate": 4.8289193871365594e-05, + "loss": 0.3698, + "step": 31700 + }, + { + "epoch": 1.152336652372992, + "grad_norm": 4.339359760284424, + "learning_rate": 4.828730670074305e-05, + "loss": 0.1259, + "step": 31710 + }, + { + "epoch": 1.1527000508757903, + "grad_norm": 0.76881343126297, + "learning_rate": 4.828541852675331e-05, + "loss": 0.3217, + "step": 31720 + }, + { + "epoch": 1.1530634493785885, + "grad_norm": 3.1044371128082275, + "learning_rate": 4.828352934947774e-05, + "loss": 0.13, + "step": 31730 + }, + { + "epoch": 1.1534268478813867, + "grad_norm": 0.5273496508598328, + "learning_rate": 4.828163916899774e-05, + "loss": 0.1197, + "step": 31740 + }, + { + "epoch": 1.153790246384185, + "grad_norm": 0.8816530108451843, + "learning_rate": 4.827974798539473e-05, + "loss": 0.1633, + "step": 31750 + }, + { + "epoch": 1.1541536448869831, + "grad_norm": 1.220786452293396, + "learning_rate": 4.827785579875022e-05, + "loss": 0.1293, + "step": 31760 + }, + { + "epoch": 1.1545170433897813, + "grad_norm": 2.700749397277832, + "learning_rate": 4.827596260914572e-05, + "loss": 0.1427, + "step": 31770 + }, + { + "epoch": 1.1548804418925793, + "grad_norm": 1.6649949550628662, + "learning_rate": 4.827406841666281e-05, + "loss": 3.8743, + "step": 31780 + }, + { + "epoch": 1.1552438403953775, + "grad_norm": 1.2381266355514526, + "learning_rate": 4.827217322138311e-05, + "loss": 0.1322, + "step": 31790 + }, + { + "epoch": 1.1556072388981757, + "grad_norm": 0.6668787598609924, + "learning_rate": 4.8270277023388255e-05, + "loss": 0.1566, + "step": 31800 + }, + { + "epoch": 1.1556072388981757, + "eval_loss": 0.35771968960762024, + "eval_runtime": 257.3302, + "eval_samples_per_second": 28.811, + "eval_steps_per_second": 3.602, + "eval_wer": 0.1810772051482201, + "step": 31800 + }, + { + "epoch": 1.155970637400974, + "grad_norm": 1.1917558908462524, + "learning_rate": 4.826837982275996e-05, + "loss": 0.1274, + "step": 31810 + }, + { + "epoch": 1.1563340359037722, + "grad_norm": 0.5347509980201721, + "learning_rate": 4.8266481619579973e-05, + "loss": 0.1872, + "step": 31820 + }, + { + "epoch": 1.1566974344065701, + "grad_norm": 1.023681402206421, + "learning_rate": 4.8264582413930076e-05, + "loss": 0.1505, + "step": 31830 + }, + { + "epoch": 1.1570608329093683, + "grad_norm": 1.00868821144104, + "learning_rate": 4.82626822058921e-05, + "loss": 0.1364, + "step": 31840 + }, + { + "epoch": 1.1574242314121665, + "grad_norm": 1.4557231664657593, + "learning_rate": 4.8260780995547905e-05, + "loss": 0.1799, + "step": 31850 + }, + { + "epoch": 1.1577876299149648, + "grad_norm": 1.1228946447372437, + "learning_rate": 4.8258878782979434e-05, + "loss": 0.1392, + "step": 31860 + }, + { + "epoch": 1.158151028417763, + "grad_norm": 0.6818620562553406, + "learning_rate": 4.825716593483377e-05, + "loss": 0.1876, + "step": 31870 + }, + { + "epoch": 1.1585144269205612, + "grad_norm": 1.4734445810317993, + "learning_rate": 4.8255261818264976e-05, + "loss": 0.1502, + "step": 31880 + }, + { + "epoch": 1.1588778254233594, + "grad_norm": 0.8915801048278809, + "learning_rate": 4.825335669970969e-05, + "loss": 0.1207, + "step": 31890 + }, + { + "epoch": 1.1592412239261574, + "grad_norm": 0.324372798204422, + "learning_rate": 4.825145057925e-05, + "loss": 0.1674, + "step": 31900 + }, + { + "epoch": 1.1596046224289556, + "grad_norm": 1.1831437349319458, + "learning_rate": 4.824954345696803e-05, + "loss": 0.1192, + "step": 31910 + }, + { + "epoch": 1.1599680209317538, + "grad_norm": 0.5911235809326172, + "learning_rate": 4.824763533294596e-05, + "loss": 0.1652, + "step": 31920 + }, + { + "epoch": 1.160331419434552, + "grad_norm": 2.4116334915161133, + "learning_rate": 4.8245726207265997e-05, + "loss": 0.1297, + "step": 31930 + }, + { + "epoch": 1.1606948179373502, + "grad_norm": 0.5179087519645691, + "learning_rate": 4.8243816080010404e-05, + "loss": 0.1066, + "step": 31940 + }, + { + "epoch": 1.1610582164401482, + "grad_norm": 0.6537795066833496, + "learning_rate": 4.824190495126148e-05, + "loss": 0.1409, + "step": 31950 + }, + { + "epoch": 1.1614216149429464, + "grad_norm": 1.0357365608215332, + "learning_rate": 4.823999282110155e-05, + "loss": 0.1146, + "step": 31960 + }, + { + "epoch": 1.1617850134457446, + "grad_norm": 0.4709915220737457, + "learning_rate": 4.823807968961303e-05, + "loss": 0.1726, + "step": 31970 + }, + { + "epoch": 1.1621484119485428, + "grad_norm": 0.9699262380599976, + "learning_rate": 4.823616555687833e-05, + "loss": 0.125, + "step": 31980 + }, + { + "epoch": 1.162511810451341, + "grad_norm": 1.2052152156829834, + "learning_rate": 4.8234250422979946e-05, + "loss": 0.1164, + "step": 31990 + }, + { + "epoch": 1.1628752089541392, + "grad_norm": 1.1892735958099365, + "learning_rate": 4.823233428800037e-05, + "loss": 0.1412, + "step": 32000 + }, + { + "epoch": 1.1632386074569372, + "grad_norm": 0.7427589893341064, + "learning_rate": 4.8230417152022165e-05, + "loss": 0.1315, + "step": 32010 + }, + { + "epoch": 1.1636020059597354, + "grad_norm": 0.7582072019577026, + "learning_rate": 4.8228499015127945e-05, + "loss": 0.1783, + "step": 32020 + }, + { + "epoch": 1.1639654044625336, + "grad_norm": 1.1409790515899658, + "learning_rate": 4.8226579877400345e-05, + "loss": 0.1303, + "step": 32030 + }, + { + "epoch": 1.1643288029653318, + "grad_norm": 1.1382596492767334, + "learning_rate": 4.822465973892206e-05, + "loss": 0.1426, + "step": 32040 + }, + { + "epoch": 1.16469220146813, + "grad_norm": 1.27096688747406, + "learning_rate": 4.822273859977583e-05, + "loss": 0.1505, + "step": 32050 + }, + { + "epoch": 1.1650555999709282, + "grad_norm": 1.5508397817611694, + "learning_rate": 4.822081646004441e-05, + "loss": 0.1366, + "step": 32060 + }, + { + "epoch": 1.1654189984737262, + "grad_norm": 0.2970573604106903, + "learning_rate": 4.821889331981063e-05, + "loss": 0.1505, + "step": 32070 + }, + { + "epoch": 1.1657823969765244, + "grad_norm": 0.9228662848472595, + "learning_rate": 4.821696917915736e-05, + "loss": 0.1112, + "step": 32080 + }, + { + "epoch": 1.1661457954793226, + "grad_norm": 0.770660936832428, + "learning_rate": 4.821504403816748e-05, + "loss": 0.1284, + "step": 32090 + }, + { + "epoch": 1.1665091939821208, + "grad_norm": 0.8875694274902344, + "learning_rate": 4.8213117896923954e-05, + "loss": 0.1421, + "step": 32100 + }, + { + "epoch": 1.166872592484919, + "grad_norm": 0.9725656509399414, + "learning_rate": 4.821119075550978e-05, + "loss": 0.1269, + "step": 32110 + }, + { + "epoch": 1.167235990987717, + "grad_norm": 0.5882539749145508, + "learning_rate": 4.820926261400797e-05, + "loss": 0.159, + "step": 32120 + }, + { + "epoch": 1.1675993894905152, + "grad_norm": 0.9551408886909485, + "learning_rate": 4.820733347250162e-05, + "loss": 0.1788, + "step": 32130 + }, + { + "epoch": 1.1679627879933134, + "grad_norm": 0.929642915725708, + "learning_rate": 4.820540333107384e-05, + "loss": 0.1329, + "step": 32140 + }, + { + "epoch": 1.1683261864961116, + "grad_norm": 1.707643747329712, + "learning_rate": 4.8203472189807795e-05, + "loss": 0.1317, + "step": 32150 + }, + { + "epoch": 1.1686895849989098, + "grad_norm": 1.401150107383728, + "learning_rate": 4.82015400487867e-05, + "loss": 0.126, + "step": 32160 + }, + { + "epoch": 1.169052983501708, + "grad_norm": 0.7058550715446472, + "learning_rate": 4.8199606908093785e-05, + "loss": 0.1811, + "step": 32170 + }, + { + "epoch": 1.1694163820045063, + "grad_norm": 1.2024914026260376, + "learning_rate": 4.8197672767812366e-05, + "loss": 0.1524, + "step": 32180 + }, + { + "epoch": 1.1697797805073042, + "grad_norm": 0.8119955062866211, + "learning_rate": 4.819573762802575e-05, + "loss": 0.1281, + "step": 32190 + }, + { + "epoch": 1.1701431790101025, + "grad_norm": 0.8531884551048279, + "learning_rate": 4.8193801488817336e-05, + "loss": 0.1362, + "step": 32200 + }, + { + "epoch": 1.1705065775129007, + "grad_norm": 0.7170140743255615, + "learning_rate": 4.819186435027054e-05, + "loss": 0.1276, + "step": 32210 + }, + { + "epoch": 1.1708699760156989, + "grad_norm": 1.7031468152999878, + "learning_rate": 4.8189926212468825e-05, + "loss": 0.1369, + "step": 32220 + }, + { + "epoch": 1.171233374518497, + "grad_norm": 1.639916181564331, + "learning_rate": 4.81879870754957e-05, + "loss": 0.1318, + "step": 32230 + }, + { + "epoch": 1.171596773021295, + "grad_norm": 1.0148886442184448, + "learning_rate": 4.8186046939434716e-05, + "loss": 0.119, + "step": 32240 + }, + { + "epoch": 1.1719601715240933, + "grad_norm": 1.4640549421310425, + "learning_rate": 4.818410580436947e-05, + "loss": 0.1603, + "step": 32250 + }, + { + "epoch": 1.1723235700268915, + "grad_norm": 1.0362626314163208, + "learning_rate": 4.818216367038358e-05, + "loss": 0.1318, + "step": 32260 + }, + { + "epoch": 1.1726869685296897, + "grad_norm": 0.507990837097168, + "learning_rate": 4.818022053756076e-05, + "loss": 0.1681, + "step": 32270 + }, + { + "epoch": 1.1730503670324879, + "grad_norm": 0.7118284106254578, + "learning_rate": 4.81782764059847e-05, + "loss": 0.1129, + "step": 32280 + }, + { + "epoch": 1.173413765535286, + "grad_norm": 2.637918472290039, + "learning_rate": 4.8176331275739175e-05, + "loss": 0.1866, + "step": 32290 + }, + { + "epoch": 1.173777164038084, + "grad_norm": 1.5417594909667969, + "learning_rate": 4.817438514690801e-05, + "loss": 0.1382, + "step": 32300 + }, + { + "epoch": 1.1741405625408823, + "grad_norm": 1.4842432737350464, + "learning_rate": 4.817243801957503e-05, + "loss": 0.1381, + "step": 32310 + }, + { + "epoch": 1.1745039610436805, + "grad_norm": 2.0502350330352783, + "learning_rate": 4.817048989382415e-05, + "loss": 0.1515, + "step": 32320 + }, + { + "epoch": 1.1748673595464787, + "grad_norm": 1.8963838815689087, + "learning_rate": 4.81685407697393e-05, + "loss": 0.1453, + "step": 32330 + }, + { + "epoch": 1.175230758049277, + "grad_norm": 0.6867222785949707, + "learning_rate": 4.8166590647404466e-05, + "loss": 0.1566, + "step": 32340 + }, + { + "epoch": 1.175594156552075, + "grad_norm": 1.4324911832809448, + "learning_rate": 4.8164639526903665e-05, + "loss": 0.1261, + "step": 32350 + }, + { + "epoch": 1.175957555054873, + "grad_norm": 4.706410884857178, + "learning_rate": 4.8162687408320963e-05, + "loss": 0.1168, + "step": 32360 + }, + { + "epoch": 1.1763209535576713, + "grad_norm": 0.6849080324172974, + "learning_rate": 4.8160734291740476e-05, + "loss": 0.1587, + "step": 32370 + }, + { + "epoch": 1.1766843520604695, + "grad_norm": 3.180955171585083, + "learning_rate": 4.815878017724636e-05, + "loss": 0.1312, + "step": 32380 + }, + { + "epoch": 1.1770477505632677, + "grad_norm": 0.5583860278129578, + "learning_rate": 4.81568250649228e-05, + "loss": 0.1385, + "step": 32390 + }, + { + "epoch": 1.177411149066066, + "grad_norm": 0.8250964283943176, + "learning_rate": 4.8154868954854036e-05, + "loss": 0.1393, + "step": 32400 + }, + { + "epoch": 1.177411149066066, + "eval_loss": 0.38513997197151184, + "eval_runtime": 179.4965, + "eval_samples_per_second": 41.304, + "eval_steps_per_second": 5.164, + "eval_wer": 0.18090475066712655, + "step": 32400 + }, + { + "epoch": 1.177774547568864, + "grad_norm": 2.3377466201782227, + "learning_rate": 4.815291184712437e-05, + "loss": 0.1197, + "step": 32410 + }, + { + "epoch": 1.1781379460716621, + "grad_norm": 0.7508591413497925, + "learning_rate": 4.81509537418181e-05, + "loss": 0.1786, + "step": 32420 + }, + { + "epoch": 1.1785013445744603, + "grad_norm": 0.8103131651878357, + "learning_rate": 4.81489946390196e-05, + "loss": 0.1617, + "step": 32430 + }, + { + "epoch": 1.1788647430772585, + "grad_norm": 1.2582241296768188, + "learning_rate": 4.814703453881329e-05, + "loss": 0.1326, + "step": 32440 + }, + { + "epoch": 1.1792281415800567, + "grad_norm": 1.110107660293579, + "learning_rate": 4.8145073441283613e-05, + "loss": 0.1504, + "step": 32450 + }, + { + "epoch": 1.179591540082855, + "grad_norm": 0.9912093281745911, + "learning_rate": 4.814311134651509e-05, + "loss": 0.133, + "step": 32460 + }, + { + "epoch": 1.1799549385856531, + "grad_norm": 1.1711434125900269, + "learning_rate": 4.814114825459223e-05, + "loss": 0.1328, + "step": 32470 + }, + { + "epoch": 1.1803183370884511, + "grad_norm": 3.884737491607666, + "learning_rate": 4.813918416559963e-05, + "loss": 0.1225, + "step": 32480 + }, + { + "epoch": 1.1806817355912493, + "grad_norm": 0.9459224939346313, + "learning_rate": 4.8137219079621906e-05, + "loss": 0.1507, + "step": 32490 + }, + { + "epoch": 1.1810451340940475, + "grad_norm": 1.7159967422485352, + "learning_rate": 4.813525299674374e-05, + "loss": 0.1823, + "step": 32500 + }, + { + "epoch": 1.1814085325968458, + "grad_norm": 1.3824647665023804, + "learning_rate": 4.8133285917049844e-05, + "loss": 0.1393, + "step": 32510 + }, + { + "epoch": 1.181771931099644, + "grad_norm": 214.12107849121094, + "learning_rate": 4.813131784062496e-05, + "loss": 4.0762, + "step": 32520 + }, + { + "epoch": 1.182135329602442, + "grad_norm": 2.5384116172790527, + "learning_rate": 4.812934876755389e-05, + "loss": 0.1379, + "step": 32530 + }, + { + "epoch": 1.1824987281052401, + "grad_norm": 1.4254207611083984, + "learning_rate": 4.812737869792148e-05, + "loss": 0.1461, + "step": 32540 + }, + { + "epoch": 1.1828621266080384, + "grad_norm": 1.571662187576294, + "learning_rate": 4.812540763181261e-05, + "loss": 0.178, + "step": 32550 + }, + { + "epoch": 1.1832255251108366, + "grad_norm": 5.712926864624023, + "learning_rate": 4.8123435569312206e-05, + "loss": 0.1071, + "step": 32560 + }, + { + "epoch": 1.1835889236136348, + "grad_norm": 0.8147953152656555, + "learning_rate": 4.812146251050523e-05, + "loss": 0.1211, + "step": 32570 + }, + { + "epoch": 1.183952322116433, + "grad_norm": 1.1877583265304565, + "learning_rate": 4.8119488455476714e-05, + "loss": 0.1668, + "step": 32580 + }, + { + "epoch": 1.184315720619231, + "grad_norm": 0.7466074824333191, + "learning_rate": 4.8117513404311686e-05, + "loss": 0.098, + "step": 32590 + }, + { + "epoch": 1.1846791191220292, + "grad_norm": 1.6904805898666382, + "learning_rate": 4.8115537357095265e-05, + "loss": 0.1626, + "step": 32600 + }, + { + "epoch": 1.1850425176248274, + "grad_norm": 0.879503607749939, + "learning_rate": 4.811356031391259e-05, + "loss": 0.1129, + "step": 32610 + }, + { + "epoch": 1.1854059161276256, + "grad_norm": 2.447317600250244, + "learning_rate": 4.811158227484883e-05, + "loss": 0.1255, + "step": 32620 + }, + { + "epoch": 1.1857693146304238, + "grad_norm": 0.9513424038887024, + "learning_rate": 4.810960323998922e-05, + "loss": 0.1347, + "step": 32630 + }, + { + "epoch": 1.186132713133222, + "grad_norm": 0.46179428696632385, + "learning_rate": 4.810762320941903e-05, + "loss": 0.1002, + "step": 32640 + }, + { + "epoch": 1.18649611163602, + "grad_norm": 0.7595782279968262, + "learning_rate": 4.8105642183223585e-05, + "loss": 0.1585, + "step": 32650 + }, + { + "epoch": 1.1868595101388182, + "grad_norm": 1.8892844915390015, + "learning_rate": 4.8103660161488216e-05, + "loss": 0.1475, + "step": 32660 + }, + { + "epoch": 1.1872229086416164, + "grad_norm": 3.290606737136841, + "learning_rate": 4.810167714429834e-05, + "loss": 0.1603, + "step": 32670 + }, + { + "epoch": 1.1875863071444146, + "grad_norm": 1.3222955465316772, + "learning_rate": 4.809969313173939e-05, + "loss": 0.1251, + "step": 32680 + }, + { + "epoch": 1.1879497056472128, + "grad_norm": 1.8568757772445679, + "learning_rate": 4.809770812389686e-05, + "loss": 0.1517, + "step": 32690 + }, + { + "epoch": 1.1883131041500108, + "grad_norm": 1.3318365812301636, + "learning_rate": 4.8095722120856255e-05, + "loss": 0.1778, + "step": 32700 + }, + { + "epoch": 1.188676502652809, + "grad_norm": 1.513069748878479, + "learning_rate": 4.8093735122703164e-05, + "loss": 0.1325, + "step": 32710 + }, + { + "epoch": 1.1890399011556072, + "grad_norm": 0.37486693263053894, + "learning_rate": 4.809174712952319e-05, + "loss": 0.1482, + "step": 32720 + }, + { + "epoch": 1.1894032996584054, + "grad_norm": 3.7855522632598877, + "learning_rate": 4.8089758141402e-05, + "loss": 0.1237, + "step": 32730 + }, + { + "epoch": 1.1897666981612036, + "grad_norm": 0.6902849674224854, + "learning_rate": 4.8087768158425285e-05, + "loss": 0.1099, + "step": 32740 + }, + { + "epoch": 1.1901300966640018, + "grad_norm": 0.6842343211174011, + "learning_rate": 4.808577718067878e-05, + "loss": 0.17, + "step": 32750 + }, + { + "epoch": 1.1904934951668, + "grad_norm": 0.9745518565177917, + "learning_rate": 4.808378520824829e-05, + "loss": 0.1446, + "step": 32760 + }, + { + "epoch": 1.190856893669598, + "grad_norm": 1.468474268913269, + "learning_rate": 4.808179224121962e-05, + "loss": 0.1563, + "step": 32770 + }, + { + "epoch": 1.1912202921723962, + "grad_norm": 1.6509790420532227, + "learning_rate": 4.807979827967864e-05, + "loss": 0.1416, + "step": 32780 + }, + { + "epoch": 1.1915836906751944, + "grad_norm": 0.9928446412086487, + "learning_rate": 4.8077803323711277e-05, + "loss": 1.6808, + "step": 32790 + }, + { + "epoch": 1.1919470891779926, + "grad_norm": 3.463270425796509, + "learning_rate": 4.807580737340348e-05, + "loss": 0.1462, + "step": 32800 + }, + { + "epoch": 1.1923104876807908, + "grad_norm": 1.0357753038406372, + "learning_rate": 4.807381042884125e-05, + "loss": 0.1475, + "step": 32810 + }, + { + "epoch": 1.1926738861835888, + "grad_norm": 0.6824864745140076, + "learning_rate": 4.807181249011062e-05, + "loss": 0.1884, + "step": 32820 + }, + { + "epoch": 1.193037284686387, + "grad_norm": 8.779791831970215, + "learning_rate": 4.8069813557297685e-05, + "loss": 0.128, + "step": 32830 + }, + { + "epoch": 1.1934006831891852, + "grad_norm": 1.07723867893219, + "learning_rate": 4.806781363048856e-05, + "loss": 0.1433, + "step": 32840 + }, + { + "epoch": 1.1937640816919834, + "grad_norm": 1.9113037586212158, + "learning_rate": 4.806581270976942e-05, + "loss": 0.1575, + "step": 32850 + }, + { + "epoch": 1.1941274801947817, + "grad_norm": 1.2443821430206299, + "learning_rate": 4.806381079522648e-05, + "loss": 0.1585, + "step": 32860 + }, + { + "epoch": 1.1944908786975799, + "grad_norm": 0.46389827132225037, + "learning_rate": 4.8061807886946e-05, + "loss": 0.2578, + "step": 32870 + }, + { + "epoch": 1.1948542772003778, + "grad_norm": 0.9189543128013611, + "learning_rate": 4.8059803985014274e-05, + "loss": 0.125, + "step": 32880 + }, + { + "epoch": 1.195217675703176, + "grad_norm": 0.8623115420341492, + "learning_rate": 4.805779908951763e-05, + "loss": 0.1094, + "step": 32890 + }, + { + "epoch": 1.1955810742059743, + "grad_norm": 0.5328871607780457, + "learning_rate": 4.805579320054247e-05, + "loss": 0.142, + "step": 32900 + }, + { + "epoch": 1.1959444727087725, + "grad_norm": 0.8360912799835205, + "learning_rate": 4.805378631817522e-05, + "loss": 0.1465, + "step": 32910 + }, + { + "epoch": 1.1963078712115707, + "grad_norm": 0.4089026153087616, + "learning_rate": 4.805177844250234e-05, + "loss": 0.1779, + "step": 32920 + }, + { + "epoch": 1.1966712697143689, + "grad_norm": 1.4934437274932861, + "learning_rate": 4.8049769573610336e-05, + "loss": 0.1577, + "step": 32930 + }, + { + "epoch": 1.1970346682171669, + "grad_norm": 0.991147518157959, + "learning_rate": 4.8047759711585784e-05, + "loss": 0.1302, + "step": 32940 + }, + { + "epoch": 1.197398066719965, + "grad_norm": 4.548572540283203, + "learning_rate": 4.804574885651526e-05, + "loss": 0.1184, + "step": 32950 + }, + { + "epoch": 1.1977614652227633, + "grad_norm": 1.7906454801559448, + "learning_rate": 4.8043737008485424e-05, + "loss": 0.138, + "step": 32960 + }, + { + "epoch": 1.1981248637255615, + "grad_norm": 0.4827491044998169, + "learning_rate": 4.804172416758294e-05, + "loss": 0.1573, + "step": 32970 + }, + { + "epoch": 1.1984882622283597, + "grad_norm": 0.8055851459503174, + "learning_rate": 4.803971033389455e-05, + "loss": 0.1203, + "step": 32980 + }, + { + "epoch": 1.1988516607311577, + "grad_norm": 0.7492426633834839, + "learning_rate": 4.8037695507507016e-05, + "loss": 0.1158, + "step": 32990 + }, + { + "epoch": 1.1992150592339559, + "grad_norm": 0.8737430572509766, + "learning_rate": 4.8035679688507154e-05, + "loss": 0.1672, + "step": 33000 + }, + { + "epoch": 1.1992150592339559, + "eval_loss": 0.36745160818099976, + "eval_runtime": 180.2517, + "eval_samples_per_second": 41.131, + "eval_steps_per_second": 5.143, + "eval_wer": 0.17665692450124348, + "step": 33000 + }, + { + "epoch": 1.199578457736754, + "grad_norm": 0.9123022556304932, + "learning_rate": 4.803366287698182e-05, + "loss": 0.1182, + "step": 33010 + }, + { + "epoch": 1.1999418562395523, + "grad_norm": 0.5147042870521545, + "learning_rate": 4.803164507301789e-05, + "loss": 0.1293, + "step": 33020 + }, + { + "epoch": 1.2003052547423505, + "grad_norm": 2.508376359939575, + "learning_rate": 4.8029626276702336e-05, + "loss": 0.1518, + "step": 33030 + }, + { + "epoch": 1.2006686532451487, + "grad_norm": 1.3006081581115723, + "learning_rate": 4.802760648812213e-05, + "loss": 0.1503, + "step": 33040 + }, + { + "epoch": 1.201032051747947, + "grad_norm": 1.490337610244751, + "learning_rate": 4.802558570736427e-05, + "loss": 0.1589, + "step": 33050 + }, + { + "epoch": 1.201395450250745, + "grad_norm": 0.6895734667778015, + "learning_rate": 4.802356393451587e-05, + "loss": 0.137, + "step": 33060 + }, + { + "epoch": 1.201758848753543, + "grad_norm": 0.45895853638648987, + "learning_rate": 4.8021541169664006e-05, + "loss": 0.2112, + "step": 33070 + }, + { + "epoch": 1.2021222472563413, + "grad_norm": 1.6609526872634888, + "learning_rate": 4.801951741289585e-05, + "loss": 0.1392, + "step": 33080 + }, + { + "epoch": 1.2024856457591395, + "grad_norm": 1.1131823062896729, + "learning_rate": 4.801749266429858e-05, + "loss": 0.124, + "step": 33090 + }, + { + "epoch": 1.2028490442619377, + "grad_norm": 0.547478973865509, + "learning_rate": 4.8015466923959465e-05, + "loss": 0.166, + "step": 33100 + }, + { + "epoch": 1.2032124427647357, + "grad_norm": 0.778753936290741, + "learning_rate": 4.801344019196576e-05, + "loss": 0.1414, + "step": 33110 + }, + { + "epoch": 1.203575841267534, + "grad_norm": 1.1527098417282104, + "learning_rate": 4.801141246840481e-05, + "loss": 0.1719, + "step": 33120 + }, + { + "epoch": 1.2039392397703321, + "grad_norm": 0.9628286361694336, + "learning_rate": 4.800938375336395e-05, + "loss": 0.1168, + "step": 33130 + }, + { + "epoch": 1.2043026382731303, + "grad_norm": 1.7359286546707153, + "learning_rate": 4.8007354046930624e-05, + "loss": 0.1145, + "step": 33140 + }, + { + "epoch": 1.2046660367759285, + "grad_norm": 0.8443882465362549, + "learning_rate": 4.8005323349192276e-05, + "loss": 0.138, + "step": 33150 + }, + { + "epoch": 1.2050294352787267, + "grad_norm": 1.166198968887329, + "learning_rate": 4.8003291660236396e-05, + "loss": 0.164, + "step": 33160 + }, + { + "epoch": 1.2053928337815247, + "grad_norm": 0.42992278933525085, + "learning_rate": 4.800125898015052e-05, + "loss": 0.1786, + "step": 33170 + }, + { + "epoch": 1.205756232284323, + "grad_norm": 0.7348678112030029, + "learning_rate": 4.799922530902223e-05, + "loss": 0.1175, + "step": 33180 + }, + { + "epoch": 1.2061196307871211, + "grad_norm": 1.4282450675964355, + "learning_rate": 4.799719064693917e-05, + "loss": 0.1397, + "step": 33190 + }, + { + "epoch": 1.2064830292899194, + "grad_norm": 0.9985376596450806, + "learning_rate": 4.7995154993988974e-05, + "loss": 0.1382, + "step": 33200 + }, + { + "epoch": 1.2068464277927176, + "grad_norm": 0.7168998718261719, + "learning_rate": 4.799311835025937e-05, + "loss": 0.1123, + "step": 33210 + }, + { + "epoch": 1.2072098262955158, + "grad_norm": 0.521123468875885, + "learning_rate": 4.799108071583811e-05, + "loss": 0.1753, + "step": 33220 + }, + { + "epoch": 1.2075732247983137, + "grad_norm": 1.0951159000396729, + "learning_rate": 4.7989042090812976e-05, + "loss": 0.1182, + "step": 33230 + }, + { + "epoch": 1.207936623301112, + "grad_norm": 1.108727216720581, + "learning_rate": 4.798700247527182e-05, + "loss": 1.4247, + "step": 33240 + }, + { + "epoch": 1.2083000218039102, + "grad_norm": 0.4534373879432678, + "learning_rate": 4.7984961869302516e-05, + "loss": 0.1715, + "step": 33250 + }, + { + "epoch": 1.2086634203067084, + "grad_norm": 0.6849185824394226, + "learning_rate": 4.798292027299298e-05, + "loss": 0.1367, + "step": 33260 + }, + { + "epoch": 1.2090268188095066, + "grad_norm": 0.8563576340675354, + "learning_rate": 4.7980877686431195e-05, + "loss": 0.2058, + "step": 33270 + }, + { + "epoch": 1.2093902173123046, + "grad_norm": 0.5488440990447998, + "learning_rate": 4.797883410970514e-05, + "loss": 0.1246, + "step": 33280 + }, + { + "epoch": 1.2097536158151028, + "grad_norm": 0.5783109068870544, + "learning_rate": 4.7976789542902895e-05, + "loss": 0.1135, + "step": 33290 + }, + { + "epoch": 1.210117014317901, + "grad_norm": 2.218514919281006, + "learning_rate": 4.7974743986112536e-05, + "loss": 0.8269, + "step": 33300 + }, + { + "epoch": 1.2104804128206992, + "grad_norm": 1.6320664882659912, + "learning_rate": 4.79726974394222e-05, + "loss": 0.1185, + "step": 33310 + }, + { + "epoch": 1.2108438113234974, + "grad_norm": 1.287618637084961, + "learning_rate": 4.797064990292007e-05, + "loss": 0.1815, + "step": 33320 + }, + { + "epoch": 1.2112072098262956, + "grad_norm": 2.3232581615448, + "learning_rate": 4.796860137669437e-05, + "loss": 0.1285, + "step": 33330 + }, + { + "epoch": 1.2115706083290938, + "grad_norm": 1.2804290056228638, + "learning_rate": 4.796655186083335e-05, + "loss": 0.1339, + "step": 33340 + }, + { + "epoch": 1.2119340068318918, + "grad_norm": 0.6492500901222229, + "learning_rate": 4.796450135542534e-05, + "loss": 0.1278, + "step": 33350 + }, + { + "epoch": 1.21229740533469, + "grad_norm": 1.7094756364822388, + "learning_rate": 4.796244986055867e-05, + "loss": 0.1337, + "step": 33360 + }, + { + "epoch": 1.2126608038374882, + "grad_norm": 1.5763776302337646, + "learning_rate": 4.796039737632173e-05, + "loss": 0.2283, + "step": 33370 + }, + { + "epoch": 1.2130242023402864, + "grad_norm": 0.631926417350769, + "learning_rate": 4.795834390280296e-05, + "loss": 0.2165, + "step": 33380 + }, + { + "epoch": 1.2133876008430846, + "grad_norm": 1.4329982995986938, + "learning_rate": 4.795628944009084e-05, + "loss": 0.1255, + "step": 33390 + }, + { + "epoch": 1.2137509993458826, + "grad_norm": 0.5400133728981018, + "learning_rate": 4.795423398827389e-05, + "loss": 0.1361, + "step": 33400 + }, + { + "epoch": 1.2141143978486808, + "grad_norm": 0.8651421070098877, + "learning_rate": 4.795217754744067e-05, + "loss": 0.1336, + "step": 33410 + }, + { + "epoch": 1.214477796351479, + "grad_norm": 0.32640397548675537, + "learning_rate": 4.795012011767977e-05, + "loss": 0.208, + "step": 33420 + }, + { + "epoch": 1.2148411948542772, + "grad_norm": 2.425781726837158, + "learning_rate": 4.794806169907987e-05, + "loss": 0.1107, + "step": 33430 + }, + { + "epoch": 1.2152045933570754, + "grad_norm": 1.9098165035247803, + "learning_rate": 4.794600229172963e-05, + "loss": 0.1087, + "step": 33440 + }, + { + "epoch": 1.2155679918598736, + "grad_norm": 1.4842039346694946, + "learning_rate": 4.794394189571779e-05, + "loss": 0.141, + "step": 33450 + }, + { + "epoch": 1.2159313903626716, + "grad_norm": 1.6379314661026, + "learning_rate": 4.794188051113313e-05, + "loss": 0.1382, + "step": 33460 + }, + { + "epoch": 1.2162947888654698, + "grad_norm": 1.6831467151641846, + "learning_rate": 4.7939818138064474e-05, + "loss": 0.1564, + "step": 33470 + }, + { + "epoch": 1.216658187368268, + "grad_norm": 0.4303675889968872, + "learning_rate": 4.793775477660067e-05, + "loss": 0.1153, + "step": 33480 + }, + { + "epoch": 1.2170215858710662, + "grad_norm": 0.5871365070343018, + "learning_rate": 4.7935690426830624e-05, + "loss": 0.1122, + "step": 33490 + }, + { + "epoch": 1.2173849843738644, + "grad_norm": 0.7488551735877991, + "learning_rate": 4.7933625088843287e-05, + "loss": 1.8561, + "step": 33500 + }, + { + "epoch": 1.2177483828766626, + "grad_norm": 1.4515953063964844, + "learning_rate": 4.793155876272764e-05, + "loss": 0.127, + "step": 33510 + }, + { + "epoch": 1.2181117813794606, + "grad_norm": 0.9288650155067444, + "learning_rate": 4.7929491448572716e-05, + "loss": 0.2149, + "step": 33520 + }, + { + "epoch": 1.2184751798822588, + "grad_norm": 1.544545292854309, + "learning_rate": 4.792742314646759e-05, + "loss": 0.6921, + "step": 33530 + }, + { + "epoch": 1.218838578385057, + "grad_norm": 1.1275858879089355, + "learning_rate": 4.792535385650138e-05, + "loss": 0.1592, + "step": 33540 + }, + { + "epoch": 1.2192019768878553, + "grad_norm": 0.7861330509185791, + "learning_rate": 4.7923283578763236e-05, + "loss": 0.156, + "step": 33550 + }, + { + "epoch": 1.2195653753906535, + "grad_norm": 1.7547698020935059, + "learning_rate": 4.792121231334237e-05, + "loss": 0.135, + "step": 33560 + }, + { + "epoch": 1.2199287738934514, + "grad_norm": 0.9989791512489319, + "learning_rate": 4.7919140060328014e-05, + "loss": 0.2015, + "step": 33570 + }, + { + "epoch": 1.2202921723962497, + "grad_norm": 0.8089576959609985, + "learning_rate": 4.791706681980945e-05, + "loss": 2.7874, + "step": 33580 + }, + { + "epoch": 1.2206555708990479, + "grad_norm": 1.2729178667068481, + "learning_rate": 4.791499259187603e-05, + "loss": 0.1749, + "step": 33590 + }, + { + "epoch": 1.221018969401846, + "grad_norm": 1.6203336715698242, + "learning_rate": 4.7912917376617106e-05, + "loss": 0.1524, + "step": 33600 + }, + { + "epoch": 1.221018969401846, + "eval_loss": 0.3562403917312622, + "eval_runtime": 180.3906, + "eval_samples_per_second": 41.1, + "eval_steps_per_second": 5.139, + "eval_wer": 0.1815582623849547, + "step": 33600 + }, + { + "epoch": 1.2213823679046443, + "grad_norm": 1.8868520259857178, + "learning_rate": 4.7910841174122104e-05, + "loss": 0.1514, + "step": 33610 + }, + { + "epoch": 1.2217457664074425, + "grad_norm": 1.3601691722869873, + "learning_rate": 4.7908763984480465e-05, + "loss": 0.1675, + "step": 33620 + }, + { + "epoch": 1.2221091649102407, + "grad_norm": 1.2268040180206299, + "learning_rate": 4.790668580778169e-05, + "loss": 0.1363, + "step": 33630 + }, + { + "epoch": 1.2224725634130387, + "grad_norm": 1.918747901916504, + "learning_rate": 4.790460664411534e-05, + "loss": 0.1397, + "step": 33640 + }, + { + "epoch": 1.2228359619158369, + "grad_norm": 0.6259877681732178, + "learning_rate": 4.790252649357098e-05, + "loss": 0.1555, + "step": 33650 + }, + { + "epoch": 1.223199360418635, + "grad_norm": 2.5940511226654053, + "learning_rate": 4.7900445356238235e-05, + "loss": 0.1508, + "step": 33660 + }, + { + "epoch": 1.2235627589214333, + "grad_norm": 1.1692243814468384, + "learning_rate": 4.7898363232206785e-05, + "loss": 0.1642, + "step": 33670 + }, + { + "epoch": 1.2239261574242315, + "grad_norm": 1.459763526916504, + "learning_rate": 4.789628012156633e-05, + "loss": 0.1325, + "step": 33680 + }, + { + "epoch": 1.2242895559270295, + "grad_norm": 0.4898362159729004, + "learning_rate": 4.789419602440663e-05, + "loss": 0.1604, + "step": 33690 + }, + { + "epoch": 1.2246529544298277, + "grad_norm": 1.6771429777145386, + "learning_rate": 4.7892110940817495e-05, + "loss": 0.217, + "step": 33700 + }, + { + "epoch": 1.225016352932626, + "grad_norm": 1.0040748119354248, + "learning_rate": 4.789002487088874e-05, + "loss": 0.1428, + "step": 33710 + }, + { + "epoch": 1.225379751435424, + "grad_norm": 0.5210689306259155, + "learning_rate": 4.788793781471025e-05, + "loss": 0.1777, + "step": 33720 + }, + { + "epoch": 1.2257431499382223, + "grad_norm": 2.0783729553222656, + "learning_rate": 4.788584977237196e-05, + "loss": 0.1373, + "step": 33730 + }, + { + "epoch": 1.2261065484410205, + "grad_norm": 0.8238822221755981, + "learning_rate": 4.788376074396384e-05, + "loss": 0.1246, + "step": 33740 + }, + { + "epoch": 1.2264699469438185, + "grad_norm": 1.1031908988952637, + "learning_rate": 4.7881670729575875e-05, + "loss": 0.1488, + "step": 33750 + }, + { + "epoch": 1.2268333454466167, + "grad_norm": 1.3136149644851685, + "learning_rate": 4.787957972929814e-05, + "loss": 0.1382, + "step": 33760 + }, + { + "epoch": 1.227196743949415, + "grad_norm": 0.9418723583221436, + "learning_rate": 4.7877487743220726e-05, + "loss": 0.1531, + "step": 33770 + }, + { + "epoch": 1.2275601424522131, + "grad_norm": 1.3498002290725708, + "learning_rate": 4.7875394771433755e-05, + "loss": 0.1345, + "step": 33780 + }, + { + "epoch": 1.2279235409550113, + "grad_norm": 1.0489355325698853, + "learning_rate": 4.7873300814027415e-05, + "loss": 0.1522, + "step": 33790 + }, + { + "epoch": 1.2282869394578095, + "grad_norm": 1.1034955978393555, + "learning_rate": 4.7871205871091926e-05, + "loss": 0.1721, + "step": 33800 + }, + { + "epoch": 1.2286503379606075, + "grad_norm": 1.1162317991256714, + "learning_rate": 4.786910994271756e-05, + "loss": 0.1774, + "step": 33810 + }, + { + "epoch": 1.2290137364634057, + "grad_norm": 0.6511724591255188, + "learning_rate": 4.786701302899461e-05, + "loss": 0.1491, + "step": 33820 + }, + { + "epoch": 1.229377134966204, + "grad_norm": 0.730034589767456, + "learning_rate": 4.786491513001343e-05, + "loss": 0.1304, + "step": 33830 + }, + { + "epoch": 1.2297405334690021, + "grad_norm": 0.3531613051891327, + "learning_rate": 4.786281624586441e-05, + "loss": 0.1022, + "step": 33840 + }, + { + "epoch": 1.2301039319718003, + "grad_norm": 0.8404261469841003, + "learning_rate": 4.786071637663798e-05, + "loss": 0.1366, + "step": 33850 + }, + { + "epoch": 1.2304673304745983, + "grad_norm": 1.0911661386489868, + "learning_rate": 4.785861552242462e-05, + "loss": 0.1132, + "step": 33860 + }, + { + "epoch": 1.2308307289773965, + "grad_norm": 0.9053283333778381, + "learning_rate": 4.785651368331485e-05, + "loss": 0.1393, + "step": 33870 + }, + { + "epoch": 1.2311941274801947, + "grad_norm": 1.065520167350769, + "learning_rate": 4.7854410859399236e-05, + "loss": 0.1277, + "step": 33880 + }, + { + "epoch": 1.231557525982993, + "grad_norm": 0.3727855384349823, + "learning_rate": 4.785230705076837e-05, + "loss": 0.1213, + "step": 33890 + }, + { + "epoch": 1.2319209244857912, + "grad_norm": 1.7203010320663452, + "learning_rate": 4.78502022575129e-05, + "loss": 0.1735, + "step": 33900 + }, + { + "epoch": 1.2322843229885894, + "grad_norm": 0.7186889052391052, + "learning_rate": 4.7848096479723516e-05, + "loss": 0.1195, + "step": 33910 + }, + { + "epoch": 1.2326477214913876, + "grad_norm": 1.0675809383392334, + "learning_rate": 4.784598971749095e-05, + "loss": 0.1699, + "step": 33920 + }, + { + "epoch": 1.2330111199941856, + "grad_norm": 1.6882377862930298, + "learning_rate": 4.784388197090597e-05, + "loss": 0.1275, + "step": 33930 + }, + { + "epoch": 1.2333745184969838, + "grad_norm": 0.4500318765640259, + "learning_rate": 4.78417732400594e-05, + "loss": 0.1248, + "step": 33940 + }, + { + "epoch": 1.233737916999782, + "grad_norm": 1.0862751007080078, + "learning_rate": 4.783966352504209e-05, + "loss": 0.1585, + "step": 33950 + }, + { + "epoch": 1.2341013155025802, + "grad_norm": 0.9130736589431763, + "learning_rate": 4.7837552825944943e-05, + "loss": 0.1489, + "step": 33960 + }, + { + "epoch": 1.2344647140053784, + "grad_norm": 0.47646433115005493, + "learning_rate": 4.783544114285891e-05, + "loss": 0.135, + "step": 33970 + }, + { + "epoch": 1.2348281125081764, + "grad_norm": 0.7090937495231628, + "learning_rate": 4.783332847587495e-05, + "loss": 0.1231, + "step": 33980 + }, + { + "epoch": 1.2351915110109746, + "grad_norm": 2.1009280681610107, + "learning_rate": 4.7831214825084117e-05, + "loss": 0.1239, + "step": 33990 + }, + { + "epoch": 1.2355549095137728, + "grad_norm": 0.6040928363800049, + "learning_rate": 4.782910019057747e-05, + "loss": 0.1757, + "step": 34000 + }, + { + "epoch": 1.235918308016571, + "grad_norm": 3.8224098682403564, + "learning_rate": 4.782698457244612e-05, + "loss": 0.1201, + "step": 34010 + }, + { + "epoch": 1.2362817065193692, + "grad_norm": 0.4506910741329193, + "learning_rate": 4.782486797078122e-05, + "loss": 0.1381, + "step": 34020 + }, + { + "epoch": 1.2366451050221674, + "grad_norm": 1.0445079803466797, + "learning_rate": 4.782275038567398e-05, + "loss": 0.1386, + "step": 34030 + }, + { + "epoch": 1.2370085035249654, + "grad_norm": 1.1579469442367554, + "learning_rate": 4.7820631817215625e-05, + "loss": 0.1221, + "step": 34040 + }, + { + "epoch": 1.2373719020277636, + "grad_norm": 1.023468017578125, + "learning_rate": 4.781851226549743e-05, + "loss": 0.1524, + "step": 34050 + }, + { + "epoch": 1.2377353005305618, + "grad_norm": 1.0542868375778198, + "learning_rate": 4.781639173061074e-05, + "loss": 0.1268, + "step": 34060 + }, + { + "epoch": 1.23809869903336, + "grad_norm": 0.7573347687721252, + "learning_rate": 4.7814270212646915e-05, + "loss": 0.2058, + "step": 34070 + }, + { + "epoch": 1.2384620975361582, + "grad_norm": 1.2218323945999146, + "learning_rate": 4.781214771169736e-05, + "loss": 0.1141, + "step": 34080 + }, + { + "epoch": 1.2388254960389564, + "grad_norm": 0.7725077867507935, + "learning_rate": 4.781002422785352e-05, + "loss": 0.1221, + "step": 34090 + }, + { + "epoch": 1.2391888945417544, + "grad_norm": 2.2234578132629395, + "learning_rate": 4.78078997612069e-05, + "loss": 1.9034, + "step": 34100 + }, + { + "epoch": 1.2395522930445526, + "grad_norm": 1.260764718055725, + "learning_rate": 4.780577431184902e-05, + "loss": 0.1205, + "step": 34110 + }, + { + "epoch": 1.2399156915473508, + "grad_norm": 0.5173097252845764, + "learning_rate": 4.780364787987148e-05, + "loss": 0.2101, + "step": 34120 + }, + { + "epoch": 1.240279090050149, + "grad_norm": 0.9755317568778992, + "learning_rate": 4.780152046536588e-05, + "loss": 0.1041, + "step": 34130 + }, + { + "epoch": 1.2406424885529472, + "grad_norm": 1.4319573640823364, + "learning_rate": 4.77993920684239e-05, + "loss": 0.1382, + "step": 34140 + }, + { + "epoch": 1.2410058870557452, + "grad_norm": 0.8623887896537781, + "learning_rate": 4.7797262689137224e-05, + "loss": 0.1646, + "step": 34150 + }, + { + "epoch": 1.2413692855585434, + "grad_norm": 1.1775789260864258, + "learning_rate": 4.779513232759762e-05, + "loss": 0.124, + "step": 34160 + }, + { + "epoch": 1.2417326840613416, + "grad_norm": 23.601593017578125, + "learning_rate": 4.779300098389687e-05, + "loss": 0.304, + "step": 34170 + }, + { + "epoch": 1.2420960825641398, + "grad_norm": 0.9336787462234497, + "learning_rate": 4.77908686581268e-05, + "loss": 0.1376, + "step": 34180 + }, + { + "epoch": 1.242459481066938, + "grad_norm": 0.7417952418327332, + "learning_rate": 4.77887353503793e-05, + "loss": 0.1208, + "step": 34190 + }, + { + "epoch": 1.2428228795697362, + "grad_norm": 1.48567795753479, + "learning_rate": 4.778660106074626e-05, + "loss": 0.1198, + "step": 34200 + }, + { + "epoch": 1.2428228795697362, + "eval_loss": 0.3608033359050751, + "eval_runtime": 180.8757, + "eval_samples_per_second": 40.989, + "eval_steps_per_second": 5.125, + "eval_wer": 0.18227530996423838, + "step": 34200 + }, + { + "epoch": 1.2431862780725345, + "grad_norm": 4.077025890350342, + "learning_rate": 4.778446578931967e-05, + "loss": 0.13, + "step": 34210 + }, + { + "epoch": 1.2435496765753324, + "grad_norm": 1.267830729484558, + "learning_rate": 4.7782329536191504e-05, + "loss": 0.183, + "step": 34220 + }, + { + "epoch": 1.2439130750781306, + "grad_norm": 0.9263830780982971, + "learning_rate": 4.778019230145383e-05, + "loss": 0.135, + "step": 34230 + }, + { + "epoch": 1.2442764735809289, + "grad_norm": 1.3920031785964966, + "learning_rate": 4.777805408519872e-05, + "loss": 0.1485, + "step": 34240 + }, + { + "epoch": 1.244639872083727, + "grad_norm": 1.263641357421875, + "learning_rate": 4.7775914887518306e-05, + "loss": 0.1503, + "step": 34250 + }, + { + "epoch": 1.2450032705865253, + "grad_norm": 1.514445185661316, + "learning_rate": 4.777377470850475e-05, + "loss": 0.1604, + "step": 34260 + }, + { + "epoch": 1.2453666690893233, + "grad_norm": 0.9733619093894958, + "learning_rate": 4.7771633548250266e-05, + "loss": 0.1674, + "step": 34270 + }, + { + "epoch": 1.2457300675921215, + "grad_norm": 1.6468124389648438, + "learning_rate": 4.776949140684712e-05, + "loss": 0.1229, + "step": 34280 + }, + { + "epoch": 1.2460934660949197, + "grad_norm": 0.9954056739807129, + "learning_rate": 4.77673482843876e-05, + "loss": 0.1237, + "step": 34290 + }, + { + "epoch": 1.2464568645977179, + "grad_norm": 1.7785327434539795, + "learning_rate": 4.776520418096406e-05, + "loss": 0.1784, + "step": 34300 + }, + { + "epoch": 1.246820263100516, + "grad_norm": 1.952333688735962, + "learning_rate": 4.776305909666886e-05, + "loss": 0.1355, + "step": 34310 + }, + { + "epoch": 1.2471836616033143, + "grad_norm": 0.7019221782684326, + "learning_rate": 4.7760913031594445e-05, + "loss": 0.1856, + "step": 34320 + }, + { + "epoch": 1.2475470601061123, + "grad_norm": 2.3900887966156006, + "learning_rate": 4.775876598583327e-05, + "loss": 0.1279, + "step": 34330 + }, + { + "epoch": 1.2479104586089105, + "grad_norm": 2.4521565437316895, + "learning_rate": 4.7756617959477834e-05, + "loss": 0.1384, + "step": 34340 + }, + { + "epoch": 1.2482738571117087, + "grad_norm": 1.043819546699524, + "learning_rate": 4.7754468952620704e-05, + "loss": 0.1485, + "step": 34350 + }, + { + "epoch": 1.248637255614507, + "grad_norm": 2.2905571460723877, + "learning_rate": 4.775231896535446e-05, + "loss": 0.1342, + "step": 34360 + }, + { + "epoch": 1.249000654117305, + "grad_norm": 1.3930597305297852, + "learning_rate": 4.7750167997771756e-05, + "loss": 0.1989, + "step": 34370 + }, + { + "epoch": 1.2493640526201033, + "grad_norm": 1.1254252195358276, + "learning_rate": 4.7748016049965255e-05, + "loss": 0.113, + "step": 34380 + }, + { + "epoch": 1.2497274511229013, + "grad_norm": 0.8257030248641968, + "learning_rate": 4.774586312202768e-05, + "loss": 0.1212, + "step": 34390 + }, + { + "epoch": 1.2500908496256995, + "grad_norm": 1.2986866235733032, + "learning_rate": 4.774370921405179e-05, + "loss": 0.1446, + "step": 34400 + }, + { + "epoch": 1.2504542481284977, + "grad_norm": 2.2006325721740723, + "learning_rate": 4.77415543261304e-05, + "loss": 0.1567, + "step": 34410 + }, + { + "epoch": 1.250817646631296, + "grad_norm": 0.6778092384338379, + "learning_rate": 4.7739398458356335e-05, + "loss": 0.1768, + "step": 34420 + }, + { + "epoch": 1.2511810451340941, + "grad_norm": 1.152696132659912, + "learning_rate": 4.773724161082251e-05, + "loss": 0.1024, + "step": 34430 + }, + { + "epoch": 1.251544443636892, + "grad_norm": 2.375783681869507, + "learning_rate": 4.7735083783621835e-05, + "loss": 0.2642, + "step": 34440 + }, + { + "epoch": 1.2519078421396903, + "grad_norm": 1.3765895366668701, + "learning_rate": 4.77329249768473e-05, + "loss": 0.1861, + "step": 34450 + }, + { + "epoch": 1.2522712406424885, + "grad_norm": 1.7743607759475708, + "learning_rate": 4.773076519059191e-05, + "loss": 0.1429, + "step": 34460 + }, + { + "epoch": 1.2526346391452867, + "grad_norm": 1.4731152057647705, + "learning_rate": 4.772860442494872e-05, + "loss": 0.1508, + "step": 34470 + }, + { + "epoch": 1.252998037648085, + "grad_norm": 1.5612653493881226, + "learning_rate": 4.7726442680010836e-05, + "loss": 0.1291, + "step": 34480 + }, + { + "epoch": 1.2533614361508831, + "grad_norm": 1.9972872734069824, + "learning_rate": 4.77242799558714e-05, + "loss": 0.1154, + "step": 34490 + }, + { + "epoch": 1.2537248346536813, + "grad_norm": 0.7144235372543335, + "learning_rate": 4.772211625262359e-05, + "loss": 0.1793, + "step": 34500 + }, + { + "epoch": 1.2540882331564793, + "grad_norm": 10.059864044189453, + "learning_rate": 4.7719951570360636e-05, + "loss": 0.1346, + "step": 34510 + }, + { + "epoch": 1.2544516316592775, + "grad_norm": 1.0801091194152832, + "learning_rate": 4.771778590917581e-05, + "loss": 0.1918, + "step": 34520 + }, + { + "epoch": 1.2548150301620757, + "grad_norm": 2.0628061294555664, + "learning_rate": 4.771561926916242e-05, + "loss": 0.1477, + "step": 34530 + }, + { + "epoch": 1.255178428664874, + "grad_norm": 2.5143215656280518, + "learning_rate": 4.771345165041381e-05, + "loss": 0.1226, + "step": 34540 + }, + { + "epoch": 1.2555418271676722, + "grad_norm": 1.197352409362793, + "learning_rate": 4.7711283053023394e-05, + "loss": 0.152, + "step": 34550 + }, + { + "epoch": 1.2559052256704701, + "grad_norm": 0.9427943825721741, + "learning_rate": 4.7709113477084595e-05, + "loss": 0.1359, + "step": 34560 + }, + { + "epoch": 1.2562686241732683, + "grad_norm": 1.0930500030517578, + "learning_rate": 4.770694292269089e-05, + "loss": 0.1659, + "step": 34570 + }, + { + "epoch": 1.2566320226760666, + "grad_norm": 0.7914316654205322, + "learning_rate": 4.770477138993581e-05, + "loss": 0.1224, + "step": 34580 + }, + { + "epoch": 1.2569954211788648, + "grad_norm": 0.6064370274543762, + "learning_rate": 4.770259887891292e-05, + "loss": 0.1153, + "step": 34590 + }, + { + "epoch": 1.257358819681663, + "grad_norm": 0.8653318285942078, + "learning_rate": 4.770042538971581e-05, + "loss": 0.1715, + "step": 34600 + }, + { + "epoch": 1.257722218184461, + "grad_norm": 0.5470715761184692, + "learning_rate": 4.7698250922438145e-05, + "loss": 0.1447, + "step": 34610 + }, + { + "epoch": 1.2580856166872594, + "grad_norm": 0.8058337569236755, + "learning_rate": 4.769607547717361e-05, + "loss": 0.1742, + "step": 34620 + }, + { + "epoch": 1.2584490151900574, + "grad_norm": 2.5231611728668213, + "learning_rate": 4.7693899054015926e-05, + "loss": 1.9069, + "step": 34630 + }, + { + "epoch": 1.2588124136928556, + "grad_norm": 0.603464663028717, + "learning_rate": 4.7691721653058886e-05, + "loss": 0.1244, + "step": 34640 + }, + { + "epoch": 1.2591758121956538, + "grad_norm": 0.7844828963279724, + "learning_rate": 4.76895432743963e-05, + "loss": 0.3787, + "step": 34650 + }, + { + "epoch": 1.259539210698452, + "grad_norm": 0.7887173295021057, + "learning_rate": 4.7687363918122016e-05, + "loss": 0.1268, + "step": 34660 + }, + { + "epoch": 1.2599026092012502, + "grad_norm": 0.669452965259552, + "learning_rate": 4.768518358432994e-05, + "loss": 0.1572, + "step": 34670 + }, + { + "epoch": 1.2602660077040482, + "grad_norm": 1.193303108215332, + "learning_rate": 4.768300227311403e-05, + "loss": 0.5754, + "step": 34680 + }, + { + "epoch": 1.2606294062068464, + "grad_norm": 0.8210042715072632, + "learning_rate": 4.7680819984568246e-05, + "loss": 0.1372, + "step": 34690 + }, + { + "epoch": 1.2609928047096446, + "grad_norm": 2.98244309425354, + "learning_rate": 4.767863671878663e-05, + "loss": 0.2028, + "step": 34700 + }, + { + "epoch": 1.2613562032124428, + "grad_norm": 1.3739604949951172, + "learning_rate": 4.767645247586325e-05, + "loss": 0.164, + "step": 34710 + }, + { + "epoch": 1.261719601715241, + "grad_norm": 0.6770296096801758, + "learning_rate": 4.7674267255892226e-05, + "loss": 0.2234, + "step": 34720 + }, + { + "epoch": 1.262083000218039, + "grad_norm": 0.5827689170837402, + "learning_rate": 4.767208105896769e-05, + "loss": 0.1405, + "step": 34730 + }, + { + "epoch": 1.2624463987208372, + "grad_norm": 0.7818326354026794, + "learning_rate": 4.766989388518385e-05, + "loss": 0.1194, + "step": 34740 + }, + { + "epoch": 1.2628097972236354, + "grad_norm": 0.8514626026153564, + "learning_rate": 4.7667705734634946e-05, + "loss": 0.1529, + "step": 34750 + }, + { + "epoch": 1.2631731957264336, + "grad_norm": 0.7973842024803162, + "learning_rate": 4.766551660741525e-05, + "loss": 0.1224, + "step": 34760 + }, + { + "epoch": 1.2635365942292318, + "grad_norm": 1.019089937210083, + "learning_rate": 4.766332650361909e-05, + "loss": 0.1768, + "step": 34770 + }, + { + "epoch": 1.26389999273203, + "grad_norm": 1.0458087921142578, + "learning_rate": 4.766113542334082e-05, + "loss": 0.1382, + "step": 34780 + }, + { + "epoch": 1.2642633912348282, + "grad_norm": 1.0272470712661743, + "learning_rate": 4.765894336667486e-05, + "loss": 0.1272, + "step": 34790 + }, + { + "epoch": 1.2646267897376262, + "grad_norm": 2.0589025020599365, + "learning_rate": 4.765675033371565e-05, + "loss": 0.1682, + "step": 34800 + }, + { + "epoch": 1.2646267897376262, + "eval_loss": 0.3476085662841797, + "eval_runtime": 181.248, + "eval_samples_per_second": 40.905, + "eval_steps_per_second": 5.115, + "eval_wer": 0.17651169967505945, + "step": 34800 + }, + { + "epoch": 1.2649901882404244, + "grad_norm": 1.1303410530090332, + "learning_rate": 4.7654556324557685e-05, + "loss": 0.1348, + "step": 34810 + }, + { + "epoch": 1.2653535867432226, + "grad_norm": 0.799231231212616, + "learning_rate": 4.765236133929549e-05, + "loss": 0.1645, + "step": 34820 + }, + { + "epoch": 1.2657169852460208, + "grad_norm": 1.2402738332748413, + "learning_rate": 4.765016537802364e-05, + "loss": 0.1235, + "step": 34830 + }, + { + "epoch": 1.266080383748819, + "grad_norm": 4.433220386505127, + "learning_rate": 4.7647968440836753e-05, + "loss": 0.1706, + "step": 34840 + }, + { + "epoch": 1.266443782251617, + "grad_norm": 0.8201845288276672, + "learning_rate": 4.764577052782949e-05, + "loss": 0.1704, + "step": 34850 + }, + { + "epoch": 1.2668071807544152, + "grad_norm": 1.2809802293777466, + "learning_rate": 4.764357163909655e-05, + "loss": 0.1368, + "step": 34860 + }, + { + "epoch": 1.2671705792572134, + "grad_norm": 0.7995765209197998, + "learning_rate": 4.7641371774732676e-05, + "loss": 0.1773, + "step": 34870 + }, + { + "epoch": 1.2675339777600116, + "grad_norm": 1.1338168382644653, + "learning_rate": 4.763917093483264e-05, + "loss": 0.138, + "step": 34880 + }, + { + "epoch": 1.2678973762628098, + "grad_norm": 0.85684734582901, + "learning_rate": 4.763696911949129e-05, + "loss": 0.1387, + "step": 34890 + }, + { + "epoch": 1.2682607747656078, + "grad_norm": 1.212156057357788, + "learning_rate": 4.763476632880348e-05, + "loss": 0.1377, + "step": 34900 + }, + { + "epoch": 1.2686241732684063, + "grad_norm": 2.2248573303222656, + "learning_rate": 4.7632562562864125e-05, + "loss": 0.1295, + "step": 34910 + }, + { + "epoch": 1.2689875717712042, + "grad_norm": 1.2567734718322754, + "learning_rate": 4.763035782176818e-05, + "loss": 0.2109, + "step": 34920 + }, + { + "epoch": 1.2693509702740025, + "grad_norm": 0.9226292967796326, + "learning_rate": 4.7628152105610624e-05, + "loss": 0.1315, + "step": 34930 + }, + { + "epoch": 1.2697143687768007, + "grad_norm": 0.9735257029533386, + "learning_rate": 4.762594541448651e-05, + "loss": 0.1139, + "step": 34940 + }, + { + "epoch": 1.2700777672795989, + "grad_norm": 3.8411102294921875, + "learning_rate": 4.7623737748490914e-05, + "loss": 0.5175, + "step": 34950 + }, + { + "epoch": 1.270441165782397, + "grad_norm": 1.0780479907989502, + "learning_rate": 4.762152910771895e-05, + "loss": 0.1226, + "step": 34960 + }, + { + "epoch": 1.270804564285195, + "grad_norm": 0.7885404229164124, + "learning_rate": 4.761931949226579e-05, + "loss": 0.1821, + "step": 34970 + }, + { + "epoch": 1.2711679627879933, + "grad_norm": 3.387125015258789, + "learning_rate": 4.761710890222663e-05, + "loss": 0.1608, + "step": 34980 + }, + { + "epoch": 1.2715313612907915, + "grad_norm": 0.9549399614334106, + "learning_rate": 4.761489733769672e-05, + "loss": 0.1071, + "step": 34990 + }, + { + "epoch": 1.2718947597935897, + "grad_norm": 1.3453798294067383, + "learning_rate": 4.761268479877134e-05, + "loss": 0.1466, + "step": 35000 + }, + { + "epoch": 1.2722581582963879, + "grad_norm": 0.9733071327209473, + "learning_rate": 4.761047128554584e-05, + "loss": 0.1646, + "step": 35010 + }, + { + "epoch": 1.2726215567991859, + "grad_norm": 0.5874946713447571, + "learning_rate": 4.760825679811557e-05, + "loss": 0.2291, + "step": 35020 + }, + { + "epoch": 1.272984955301984, + "grad_norm": 1.9506993293762207, + "learning_rate": 4.7606041336575965e-05, + "loss": 0.2256, + "step": 35030 + }, + { + "epoch": 1.2733483538047823, + "grad_norm": 1.2343640327453613, + "learning_rate": 4.760382490102247e-05, + "loss": 0.133, + "step": 35040 + }, + { + "epoch": 1.2737117523075805, + "grad_norm": 0.38314980268478394, + "learning_rate": 4.7601607491550574e-05, + "loss": 0.1682, + "step": 35050 + }, + { + "epoch": 1.2740751508103787, + "grad_norm": 0.928424596786499, + "learning_rate": 4.7599389108255846e-05, + "loss": 0.0998, + "step": 35060 + }, + { + "epoch": 1.274438549313177, + "grad_norm": 1.730793833732605, + "learning_rate": 4.7597169751233833e-05, + "loss": 0.1682, + "step": 35070 + }, + { + "epoch": 1.2748019478159751, + "grad_norm": 1.0121824741363525, + "learning_rate": 4.7594949420580184e-05, + "loss": 0.1093, + "step": 35080 + }, + { + "epoch": 1.275165346318773, + "grad_norm": 1.4093934297561646, + "learning_rate": 4.759272811639055e-05, + "loss": 0.156, + "step": 35090 + }, + { + "epoch": 1.2755287448215713, + "grad_norm": 0.9377339482307434, + "learning_rate": 4.759050583876066e-05, + "loss": 0.1577, + "step": 35100 + }, + { + "epoch": 1.2758921433243695, + "grad_norm": 1.6535552740097046, + "learning_rate": 4.7588282587786246e-05, + "loss": 0.1117, + "step": 35110 + }, + { + "epoch": 1.2762555418271677, + "grad_norm": 0.7115573287010193, + "learning_rate": 4.75860583635631e-05, + "loss": 0.1443, + "step": 35120 + }, + { + "epoch": 1.276618940329966, + "grad_norm": 1.6995899677276611, + "learning_rate": 4.7583833166187065e-05, + "loss": 0.126, + "step": 35130 + }, + { + "epoch": 1.276982338832764, + "grad_norm": 1.9350817203521729, + "learning_rate": 4.7581606995754005e-05, + "loss": 0.1224, + "step": 35140 + }, + { + "epoch": 1.2773457373355621, + "grad_norm": 1.8662594556808472, + "learning_rate": 4.757937985235985e-05, + "loss": 0.1735, + "step": 35150 + }, + { + "epoch": 1.2777091358383603, + "grad_norm": 3.6977062225341797, + "learning_rate": 4.7577151736100554e-05, + "loss": 0.162, + "step": 35160 + }, + { + "epoch": 1.2780725343411585, + "grad_norm": 0.5185838341712952, + "learning_rate": 4.757492264707213e-05, + "loss": 0.1593, + "step": 35170 + }, + { + "epoch": 1.2784359328439567, + "grad_norm": 0.6665944457054138, + "learning_rate": 4.7572692585370596e-05, + "loss": 0.1144, + "step": 35180 + }, + { + "epoch": 1.2787993313467547, + "grad_norm": 1.1617207527160645, + "learning_rate": 4.757046155109206e-05, + "loss": 0.1162, + "step": 35190 + }, + { + "epoch": 1.2791627298495531, + "grad_norm": 1.3124502897262573, + "learning_rate": 4.756822954433264e-05, + "loss": 0.1441, + "step": 35200 + }, + { + "epoch": 1.2795261283523511, + "grad_norm": 1.4907313585281372, + "learning_rate": 4.756599656518851e-05, + "loss": 0.1275, + "step": 35210 + }, + { + "epoch": 1.2798895268551493, + "grad_norm": 1.0033677816390991, + "learning_rate": 4.756376261375587e-05, + "loss": 0.1795, + "step": 35220 + }, + { + "epoch": 1.2802529253579475, + "grad_norm": 0.9439616799354553, + "learning_rate": 4.756152769013099e-05, + "loss": 0.1158, + "step": 35230 + }, + { + "epoch": 1.2806163238607458, + "grad_norm": 2.641103506088257, + "learning_rate": 4.755929179441016e-05, + "loss": 0.1102, + "step": 35240 + }, + { + "epoch": 1.280979722363544, + "grad_norm": 1.1864644289016724, + "learning_rate": 4.7557054926689694e-05, + "loss": 0.136, + "step": 35250 + }, + { + "epoch": 1.281343120866342, + "grad_norm": 0.7801216244697571, + "learning_rate": 4.755481708706601e-05, + "loss": 0.1436, + "step": 35260 + }, + { + "epoch": 1.2817065193691402, + "grad_norm": 0.5771633386611938, + "learning_rate": 4.7552578275635494e-05, + "loss": 0.1687, + "step": 35270 + }, + { + "epoch": 1.2820699178719384, + "grad_norm": 1.247269630432129, + "learning_rate": 4.755033849249463e-05, + "loss": 0.1168, + "step": 35280 + }, + { + "epoch": 1.2824333163747366, + "grad_norm": 1.3845196962356567, + "learning_rate": 4.7548097737739905e-05, + "loss": 0.1122, + "step": 35290 + }, + { + "epoch": 1.2827967148775348, + "grad_norm": 1.6793280839920044, + "learning_rate": 4.754585601146788e-05, + "loss": 0.168, + "step": 35300 + }, + { + "epoch": 1.2831601133803328, + "grad_norm": 0.8970069289207458, + "learning_rate": 4.754361331377514e-05, + "loss": 0.1172, + "step": 35310 + }, + { + "epoch": 1.283523511883131, + "grad_norm": 0.6038461923599243, + "learning_rate": 4.7541369644758315e-05, + "loss": 0.1962, + "step": 35320 + }, + { + "epoch": 1.2838869103859292, + "grad_norm": 0.9473531246185303, + "learning_rate": 4.753912500451407e-05, + "loss": 0.1288, + "step": 35330 + }, + { + "epoch": 1.2842503088887274, + "grad_norm": 1.5339337587356567, + "learning_rate": 4.753687939313912e-05, + "loss": 0.1644, + "step": 35340 + }, + { + "epoch": 1.2846137073915256, + "grad_norm": 0.9454206228256226, + "learning_rate": 4.753463281073023e-05, + "loss": 0.1319, + "step": 35350 + }, + { + "epoch": 1.2849771058943238, + "grad_norm": 4.068907260894775, + "learning_rate": 4.753238525738419e-05, + "loss": 0.1241, + "step": 35360 + }, + { + "epoch": 1.285340504397122, + "grad_norm": 1.1616491079330444, + "learning_rate": 4.753013673319784e-05, + "loss": 0.2266, + "step": 35370 + }, + { + "epoch": 1.28570390289992, + "grad_norm": 1.232142448425293, + "learning_rate": 4.7527887238268065e-05, + "loss": 0.1438, + "step": 35380 + }, + { + "epoch": 1.2860673014027182, + "grad_norm": 0.5053390860557556, + "learning_rate": 4.7525636772691775e-05, + "loss": 0.1214, + "step": 35390 + }, + { + "epoch": 1.2864306999055164, + "grad_norm": 0.9241679310798645, + "learning_rate": 4.752338533656594e-05, + "loss": 0.3857, + "step": 35400 + }, + { + "epoch": 1.2864306999055164, + "eval_loss": 0.33814677596092224, + "eval_runtime": 180.0316, + "eval_samples_per_second": 41.182, + "eval_steps_per_second": 5.149, + "eval_wer": 0.1815401092816817, + "step": 35400 + }, + { + "epoch": 1.2867940984083146, + "grad_norm": 1.1576555967330933, + "learning_rate": 4.7521132929987575e-05, + "loss": 0.1429, + "step": 35410 + }, + { + "epoch": 1.2871574969111128, + "grad_norm": 0.4765828549861908, + "learning_rate": 4.751887955305372e-05, + "loss": 0.1976, + "step": 35420 + }, + { + "epoch": 1.2875208954139108, + "grad_norm": 0.8831065893173218, + "learning_rate": 4.751662520586148e-05, + "loss": 0.1543, + "step": 35430 + }, + { + "epoch": 1.287884293916709, + "grad_norm": 1.9074327945709229, + "learning_rate": 4.751436988850796e-05, + "loss": 0.133, + "step": 35440 + }, + { + "epoch": 1.2882476924195072, + "grad_norm": 1.6553431749343872, + "learning_rate": 4.7512113601090356e-05, + "loss": 0.1731, + "step": 35450 + }, + { + "epoch": 1.2886110909223054, + "grad_norm": 1.2409085035324097, + "learning_rate": 4.750985634370587e-05, + "loss": 0.119, + "step": 35460 + }, + { + "epoch": 1.2889744894251036, + "grad_norm": 4.575315475463867, + "learning_rate": 4.7507598116451763e-05, + "loss": 0.2287, + "step": 35470 + }, + { + "epoch": 1.2893378879279016, + "grad_norm": 1.1517298221588135, + "learning_rate": 4.7505338919425334e-05, + "loss": 0.1263, + "step": 35480 + }, + { + "epoch": 1.2897012864307, + "grad_norm": 0.8862209320068359, + "learning_rate": 4.7503078752723935e-05, + "loss": 0.9246, + "step": 35490 + }, + { + "epoch": 1.290064684933498, + "grad_norm": 2.2056024074554443, + "learning_rate": 4.750081761644493e-05, + "loss": 0.1316, + "step": 35500 + }, + { + "epoch": 1.2904280834362962, + "grad_norm": 1.9249043464660645, + "learning_rate": 4.749855551068576e-05, + "loss": 0.1341, + "step": 35510 + }, + { + "epoch": 1.2907914819390944, + "grad_norm": 0.4050438404083252, + "learning_rate": 4.749629243554387e-05, + "loss": 0.1876, + "step": 35520 + }, + { + "epoch": 1.2911548804418926, + "grad_norm": 0.8166261911392212, + "learning_rate": 4.74940283911168e-05, + "loss": 0.1141, + "step": 35530 + }, + { + "epoch": 1.2915182789446908, + "grad_norm": 0.4988127648830414, + "learning_rate": 4.749176337750206e-05, + "loss": 0.1548, + "step": 35540 + }, + { + "epoch": 1.2918816774474888, + "grad_norm": 1.80185067653656, + "learning_rate": 4.748949739479728e-05, + "loss": 0.1658, + "step": 35550 + }, + { + "epoch": 1.292245075950287, + "grad_norm": 0.9549736380577087, + "learning_rate": 4.748723044310006e-05, + "loss": 0.1373, + "step": 35560 + }, + { + "epoch": 1.2926084744530852, + "grad_norm": 1.3219162225723267, + "learning_rate": 4.74849625225081e-05, + "loss": 0.2101, + "step": 35570 + }, + { + "epoch": 1.2929718729558835, + "grad_norm": 1.4803717136383057, + "learning_rate": 4.74826936331191e-05, + "loss": 0.121, + "step": 35580 + }, + { + "epoch": 1.2933352714586817, + "grad_norm": 2.6438705921173096, + "learning_rate": 4.7480423775030834e-05, + "loss": 0.13, + "step": 35590 + }, + { + "epoch": 1.2936986699614796, + "grad_norm": 2.5969061851501465, + "learning_rate": 4.7478152948341094e-05, + "loss": 0.165, + "step": 35600 + }, + { + "epoch": 1.2940620684642778, + "grad_norm": 0.6594710946083069, + "learning_rate": 4.74758811531477e-05, + "loss": 0.3067, + "step": 35610 + }, + { + "epoch": 1.294425466967076, + "grad_norm": 1.1262328624725342, + "learning_rate": 4.747360838954858e-05, + "loss": 0.1782, + "step": 35620 + }, + { + "epoch": 1.2947888654698743, + "grad_norm": 0.9950854182243347, + "learning_rate": 4.747133465764163e-05, + "loss": 0.1254, + "step": 35630 + }, + { + "epoch": 1.2951522639726725, + "grad_norm": 0.7515049576759338, + "learning_rate": 4.746905995752482e-05, + "loss": 0.144, + "step": 35640 + }, + { + "epoch": 1.2955156624754707, + "grad_norm": 0.9141899347305298, + "learning_rate": 4.746678428929616e-05, + "loss": 0.1263, + "step": 35650 + }, + { + "epoch": 1.2958790609782689, + "grad_norm": 1.5138301849365234, + "learning_rate": 4.74645076530537e-05, + "loss": 0.143, + "step": 35660 + }, + { + "epoch": 1.2962424594810669, + "grad_norm": 0.6234374046325684, + "learning_rate": 4.746223004889554e-05, + "loss": 0.1492, + "step": 35670 + }, + { + "epoch": 1.296605857983865, + "grad_norm": 0.6530427932739258, + "learning_rate": 4.745995147691981e-05, + "loss": 0.1579, + "step": 35680 + }, + { + "epoch": 1.2969692564866633, + "grad_norm": 0.9193394780158997, + "learning_rate": 4.745767193722468e-05, + "loss": 0.141, + "step": 35690 + }, + { + "epoch": 1.2973326549894615, + "grad_norm": 0.8602085709571838, + "learning_rate": 4.745539142990837e-05, + "loss": 0.1302, + "step": 35700 + }, + { + "epoch": 1.2976960534922597, + "grad_norm": 0.7672144174575806, + "learning_rate": 4.745310995506914e-05, + "loss": 0.1632, + "step": 35710 + }, + { + "epoch": 1.2980594519950577, + "grad_norm": 0.728992760181427, + "learning_rate": 4.74508275128053e-05, + "loss": 0.161, + "step": 35720 + }, + { + "epoch": 1.2984228504978559, + "grad_norm": 0.923298716545105, + "learning_rate": 4.7448544103215164e-05, + "loss": 0.125, + "step": 35730 + }, + { + "epoch": 1.298786249000654, + "grad_norm": 0.6956040859222412, + "learning_rate": 4.744625972639715e-05, + "loss": 0.1071, + "step": 35740 + }, + { + "epoch": 1.2991496475034523, + "grad_norm": 0.7756535410881042, + "learning_rate": 4.7443974382449664e-05, + "loss": 0.1452, + "step": 35750 + }, + { + "epoch": 1.2995130460062505, + "grad_norm": 1.3024572134017944, + "learning_rate": 4.7441688071471174e-05, + "loss": 0.1328, + "step": 35760 + }, + { + "epoch": 1.2998764445090485, + "grad_norm": 1.8605810403823853, + "learning_rate": 4.7439400793560196e-05, + "loss": 0.2063, + "step": 35770 + }, + { + "epoch": 1.300239843011847, + "grad_norm": 1.6558598279953003, + "learning_rate": 4.743711254881528e-05, + "loss": 0.1282, + "step": 35780 + }, + { + "epoch": 1.300603241514645, + "grad_norm": 0.8223969340324402, + "learning_rate": 4.743482333733501e-05, + "loss": 0.1348, + "step": 35790 + }, + { + "epoch": 1.300966640017443, + "grad_norm": 1.5702069997787476, + "learning_rate": 4.743253315921803e-05, + "loss": 0.1656, + "step": 35800 + }, + { + "epoch": 1.3013300385202413, + "grad_norm": 0.6384185552597046, + "learning_rate": 4.743024201456301e-05, + "loss": 0.1369, + "step": 35810 + }, + { + "epoch": 1.3016934370230395, + "grad_norm": 1.1003926992416382, + "learning_rate": 4.7427949903468667e-05, + "loss": 0.1606, + "step": 35820 + }, + { + "epoch": 1.3020568355258377, + "grad_norm": 1.1869399547576904, + "learning_rate": 4.742565682603376e-05, + "loss": 0.131, + "step": 35830 + }, + { + "epoch": 1.3024202340286357, + "grad_norm": 1.0088342428207397, + "learning_rate": 4.7423362782357096e-05, + "loss": 0.1183, + "step": 35840 + }, + { + "epoch": 1.302783632531434, + "grad_norm": 0.8730582594871521, + "learning_rate": 4.7421067772537506e-05, + "loss": 0.1228, + "step": 35850 + }, + { + "epoch": 1.3031470310342321, + "grad_norm": 1.3678339719772339, + "learning_rate": 4.7418771796673886e-05, + "loss": 0.119, + "step": 35860 + }, + { + "epoch": 1.3035104295370303, + "grad_norm": 0.5349250435829163, + "learning_rate": 4.7416474854865154e-05, + "loss": 0.1571, + "step": 35870 + }, + { + "epoch": 1.3038738280398285, + "grad_norm": 1.4240535497665405, + "learning_rate": 4.741417694721028e-05, + "loss": 0.1221, + "step": 35880 + }, + { + "epoch": 1.3042372265426265, + "grad_norm": 2.4048521518707275, + "learning_rate": 4.741187807380827e-05, + "loss": 0.1265, + "step": 35890 + }, + { + "epoch": 1.3046006250454247, + "grad_norm": 0.9620640277862549, + "learning_rate": 4.740957823475818e-05, + "loss": 0.1601, + "step": 35900 + }, + { + "epoch": 1.304964023548223, + "grad_norm": 2.351884603500366, + "learning_rate": 4.740727743015909e-05, + "loss": 0.1134, + "step": 35910 + }, + { + "epoch": 1.3053274220510211, + "grad_norm": 0.6285625100135803, + "learning_rate": 4.7404975660110146e-05, + "loss": 0.4726, + "step": 35920 + }, + { + "epoch": 1.3056908205538194, + "grad_norm": 0.6645105481147766, + "learning_rate": 4.740267292471051e-05, + "loss": 0.1163, + "step": 35930 + }, + { + "epoch": 1.3060542190566176, + "grad_norm": 1.6493772268295288, + "learning_rate": 4.7400369224059415e-05, + "loss": 1.652, + "step": 35940 + }, + { + "epoch": 1.3064176175594158, + "grad_norm": 0.6978940367698669, + "learning_rate": 4.739806455825611e-05, + "loss": 0.1534, + "step": 35950 + }, + { + "epoch": 1.3067810160622138, + "grad_norm": 3.241497039794922, + "learning_rate": 4.739575892739989e-05, + "loss": 0.1196, + "step": 35960 + }, + { + "epoch": 1.307144414565012, + "grad_norm": 1.1746867895126343, + "learning_rate": 4.739345233159011e-05, + "loss": 0.1576, + "step": 35970 + }, + { + "epoch": 1.3075078130678102, + "grad_norm": 0.9227213859558105, + "learning_rate": 4.7391144770926144e-05, + "loss": 0.5262, + "step": 35980 + }, + { + "epoch": 1.3078712115706084, + "grad_norm": 2.3250370025634766, + "learning_rate": 4.738883624550741e-05, + "loss": 0.1231, + "step": 35990 + }, + { + "epoch": 1.3082346100734066, + "grad_norm": 1.344184398651123, + "learning_rate": 4.738652675543339e-05, + "loss": 0.1508, + "step": 36000 + }, + { + "epoch": 1.3082346100734066, + "eval_loss": 0.3749592900276184, + "eval_runtime": 180.4784, + "eval_samples_per_second": 41.08, + "eval_steps_per_second": 5.136, + "eval_wer": 0.17836331620890591, + "step": 36000 + }, + { + "epoch": 1.3085980085762046, + "grad_norm": 2.1917757987976074, + "learning_rate": 4.738421630080358e-05, + "loss": 0.1232, + "step": 36010 + }, + { + "epoch": 1.3089614070790028, + "grad_norm": 0.7760763764381409, + "learning_rate": 4.738190488171753e-05, + "loss": 0.1965, + "step": 36020 + }, + { + "epoch": 1.309324805581801, + "grad_norm": 1.6439956426620483, + "learning_rate": 4.737959249827484e-05, + "loss": 0.13, + "step": 36030 + }, + { + "epoch": 1.3096882040845992, + "grad_norm": 1.612452507019043, + "learning_rate": 4.7377279150575137e-05, + "loss": 0.1335, + "step": 36040 + }, + { + "epoch": 1.3100516025873974, + "grad_norm": 0.9884024858474731, + "learning_rate": 4.737496483871809e-05, + "loss": 1.4829, + "step": 36050 + }, + { + "epoch": 1.3104150010901954, + "grad_norm": 0.8376805782318115, + "learning_rate": 4.737264956280342e-05, + "loss": 0.1502, + "step": 36060 + }, + { + "epoch": 1.3107783995929938, + "grad_norm": 0.7544919848442078, + "learning_rate": 4.7370333322930884e-05, + "loss": 0.1799, + "step": 36070 + }, + { + "epoch": 1.3111417980957918, + "grad_norm": 1.0415360927581787, + "learning_rate": 4.736801611920028e-05, + "loss": 0.133, + "step": 36080 + }, + { + "epoch": 1.31150519659859, + "grad_norm": 1.3284482955932617, + "learning_rate": 4.736569795171144e-05, + "loss": 0.6017, + "step": 36090 + }, + { + "epoch": 1.3118685951013882, + "grad_norm": 0.766444742679596, + "learning_rate": 4.736337882056425e-05, + "loss": 0.1629, + "step": 36100 + }, + { + "epoch": 1.3122319936041864, + "grad_norm": 1.8423712253570557, + "learning_rate": 4.7361058725858645e-05, + "loss": 0.1417, + "step": 36110 + }, + { + "epoch": 1.3125953921069846, + "grad_norm": 0.7671094536781311, + "learning_rate": 4.735873766769458e-05, + "loss": 0.1498, + "step": 36120 + }, + { + "epoch": 1.3129587906097826, + "grad_norm": 0.7939559817314148, + "learning_rate": 4.735641564617206e-05, + "loss": 0.1101, + "step": 36130 + }, + { + "epoch": 1.3133221891125808, + "grad_norm": 1.1651771068572998, + "learning_rate": 4.735409266139113e-05, + "loss": 0.1478, + "step": 36140 + }, + { + "epoch": 1.313685587615379, + "grad_norm": 0.3433835506439209, + "learning_rate": 4.735176871345188e-05, + "loss": 0.1775, + "step": 36150 + }, + { + "epoch": 1.3140489861181772, + "grad_norm": 1.2903847694396973, + "learning_rate": 4.734944380245445e-05, + "loss": 0.129, + "step": 36160 + }, + { + "epoch": 1.3144123846209754, + "grad_norm": 1.4051779508590698, + "learning_rate": 4.734711792849901e-05, + "loss": 0.2257, + "step": 36170 + }, + { + "epoch": 1.3147757831237734, + "grad_norm": 0.7781183123588562, + "learning_rate": 4.734479109168577e-05, + "loss": 0.1072, + "step": 36180 + }, + { + "epoch": 1.3151391816265716, + "grad_norm": 3.805746078491211, + "learning_rate": 4.734246329211498e-05, + "loss": 0.1327, + "step": 36190 + }, + { + "epoch": 1.3155025801293698, + "grad_norm": 1.0254390239715576, + "learning_rate": 4.734013452988694e-05, + "loss": 0.1479, + "step": 36200 + }, + { + "epoch": 1.315865978632168, + "grad_norm": 1.2095835208892822, + "learning_rate": 4.7337804805101994e-05, + "loss": 0.1286, + "step": 36210 + }, + { + "epoch": 1.3162293771349662, + "grad_norm": 0.7073403596878052, + "learning_rate": 4.733547411786052e-05, + "loss": 0.1385, + "step": 36220 + }, + { + "epoch": 1.3165927756377644, + "grad_norm": 6.6172709465026855, + "learning_rate": 4.7333142468262924e-05, + "loss": 0.1202, + "step": 36230 + }, + { + "epoch": 1.3169561741405627, + "grad_norm": 0.9846429228782654, + "learning_rate": 4.733080985640969e-05, + "loss": 0.1202, + "step": 36240 + }, + { + "epoch": 1.3173195726433606, + "grad_norm": 1.7326525449752808, + "learning_rate": 4.7328476282401313e-05, + "loss": 0.1269, + "step": 36250 + }, + { + "epoch": 1.3176829711461588, + "grad_norm": 1.7362529039382935, + "learning_rate": 4.7326141746338334e-05, + "loss": 0.139, + "step": 36260 + }, + { + "epoch": 1.318046369648957, + "grad_norm": 0.5070465207099915, + "learning_rate": 4.732380624832135e-05, + "loss": 0.1963, + "step": 36270 + }, + { + "epoch": 1.3184097681517553, + "grad_norm": 2.0635170936584473, + "learning_rate": 4.7321469788450976e-05, + "loss": 0.1251, + "step": 36280 + }, + { + "epoch": 1.3187731666545535, + "grad_norm": 17.00756072998047, + "learning_rate": 4.731913236682789e-05, + "loss": 0.4956, + "step": 36290 + }, + { + "epoch": 1.3191365651573514, + "grad_norm": 0.4699925184249878, + "learning_rate": 4.7316793983552806e-05, + "loss": 0.1397, + "step": 36300 + }, + { + "epoch": 1.3194999636601497, + "grad_norm": 1.5378074645996094, + "learning_rate": 4.731445463872647e-05, + "loss": 0.1444, + "step": 36310 + }, + { + "epoch": 1.3198633621629479, + "grad_norm": 0.6688012480735779, + "learning_rate": 4.731211433244967e-05, + "loss": 0.2393, + "step": 36320 + }, + { + "epoch": 1.320226760665746, + "grad_norm": 1.1277016401290894, + "learning_rate": 4.7309773064823274e-05, + "loss": 0.6115, + "step": 36330 + }, + { + "epoch": 1.3205901591685443, + "grad_norm": 0.7446697354316711, + "learning_rate": 4.7307430835948114e-05, + "loss": 0.1687, + "step": 36340 + }, + { + "epoch": 1.3209535576713423, + "grad_norm": 1.6629223823547363, + "learning_rate": 4.730508764592514e-05, + "loss": 0.1681, + "step": 36350 + }, + { + "epoch": 1.3213169561741407, + "grad_norm": 1.4008903503417969, + "learning_rate": 4.73027434948553e-05, + "loss": 0.1343, + "step": 36360 + }, + { + "epoch": 1.3216803546769387, + "grad_norm": 0.6811515092849731, + "learning_rate": 4.7300398382839586e-05, + "loss": 0.186, + "step": 36370 + }, + { + "epoch": 1.3220437531797369, + "grad_norm": 0.822640597820282, + "learning_rate": 4.7298052309979055e-05, + "loss": 0.1336, + "step": 36380 + }, + { + "epoch": 1.322407151682535, + "grad_norm": 1.145392894744873, + "learning_rate": 4.729570527637479e-05, + "loss": 0.1267, + "step": 36390 + }, + { + "epoch": 1.3227705501853333, + "grad_norm": 1.6381548643112183, + "learning_rate": 4.729335728212792e-05, + "loss": 0.1213, + "step": 36400 + }, + { + "epoch": 1.3231339486881315, + "grad_norm": 1.882562518119812, + "learning_rate": 4.729100832733959e-05, + "loss": 0.1384, + "step": 36410 + }, + { + "epoch": 1.3234973471909295, + "grad_norm": 0.7675313949584961, + "learning_rate": 4.728865841211103e-05, + "loss": 0.1554, + "step": 36420 + }, + { + "epoch": 1.3238607456937277, + "grad_norm": 0.900806725025177, + "learning_rate": 4.728630753654349e-05, + "loss": 0.1174, + "step": 36430 + }, + { + "epoch": 1.324224144196526, + "grad_norm": 1.4791582822799683, + "learning_rate": 4.7283955700738235e-05, + "loss": 0.1449, + "step": 36440 + }, + { + "epoch": 1.324587542699324, + "grad_norm": 0.5582447052001953, + "learning_rate": 4.728160290479663e-05, + "loss": 0.1754, + "step": 36450 + }, + { + "epoch": 1.3249509412021223, + "grad_norm": 2.113154411315918, + "learning_rate": 4.727924914882002e-05, + "loss": 0.1091, + "step": 36460 + }, + { + "epoch": 1.3253143397049203, + "grad_norm": 0.9747204184532166, + "learning_rate": 4.727689443290985e-05, + "loss": 0.144, + "step": 36470 + }, + { + "epoch": 1.3256777382077185, + "grad_norm": 0.6118887662887573, + "learning_rate": 4.727453875716755e-05, + "loss": 0.1144, + "step": 36480 + }, + { + "epoch": 1.3260411367105167, + "grad_norm": 1.146438717842102, + "learning_rate": 4.727218212169464e-05, + "loss": 0.1138, + "step": 36490 + }, + { + "epoch": 1.326404535213315, + "grad_norm": 1.2453789710998535, + "learning_rate": 4.7269824526592636e-05, + "loss": 0.1642, + "step": 36500 + }, + { + "epoch": 1.3267679337161131, + "grad_norm": 2.001384973526001, + "learning_rate": 4.726746597196313e-05, + "loss": 0.1317, + "step": 36510 + }, + { + "epoch": 1.3271313322189113, + "grad_norm": 0.4389583170413971, + "learning_rate": 4.726510645790775e-05, + "loss": 0.1477, + "step": 36520 + }, + { + "epoch": 1.3274947307217095, + "grad_norm": 0.5064995884895325, + "learning_rate": 4.726274598452815e-05, + "loss": 0.1208, + "step": 36530 + }, + { + "epoch": 1.3278581292245075, + "grad_norm": 1.937470555305481, + "learning_rate": 4.726038455192603e-05, + "loss": 0.1425, + "step": 36540 + }, + { + "epoch": 1.3282215277273057, + "grad_norm": 0.7836539149284363, + "learning_rate": 4.725802216020315e-05, + "loss": 0.1942, + "step": 36550 + }, + { + "epoch": 1.328584926230104, + "grad_norm": 0.7476559281349182, + "learning_rate": 4.725565880946129e-05, + "loss": 0.1269, + "step": 36560 + }, + { + "epoch": 1.3289483247329021, + "grad_norm": 1.584917426109314, + "learning_rate": 4.725329449980227e-05, + "loss": 0.1939, + "step": 36570 + }, + { + "epoch": 1.3293117232357003, + "grad_norm": 1.6503409147262573, + "learning_rate": 4.7250929231327975e-05, + "loss": 0.1281, + "step": 36580 + }, + { + "epoch": 1.3296751217384983, + "grad_norm": 1.2938586473464966, + "learning_rate": 4.72485630041403e-05, + "loss": 0.1505, + "step": 36590 + }, + { + "epoch": 1.3300385202412965, + "grad_norm": 0.7093682289123535, + "learning_rate": 4.724619581834121e-05, + "loss": 0.1517, + "step": 36600 + }, + { + "epoch": 1.3300385202412965, + "eval_loss": 0.3578657805919647, + "eval_runtime": 181.1919, + "eval_samples_per_second": 40.918, + "eval_steps_per_second": 5.116, + "eval_wer": 0.17275400729754753, + "step": 36600 + }, + { + "epoch": 1.3304019187440947, + "grad_norm": 2.326840877532959, + "learning_rate": 4.72438276740327e-05, + "loss": 0.1333, + "step": 36610 + }, + { + "epoch": 1.330765317246893, + "grad_norm": 1.0283209085464478, + "learning_rate": 4.7241458571316794e-05, + "loss": 0.177, + "step": 36620 + }, + { + "epoch": 1.3311287157496912, + "grad_norm": 1.416473388671875, + "learning_rate": 4.7239325559518525e-05, + "loss": 3.9838, + "step": 36630 + }, + { + "epoch": 1.3314921142524891, + "grad_norm": 0.40842917561531067, + "learning_rate": 4.7236954636109833e-05, + "loss": 0.1116, + "step": 36640 + }, + { + "epoch": 1.3318555127552876, + "grad_norm": 2.1062543392181396, + "learning_rate": 4.7234582754589886e-05, + "loss": 0.121, + "step": 36650 + }, + { + "epoch": 1.3322189112580856, + "grad_norm": 0.6768646836280823, + "learning_rate": 4.723220991506088e-05, + "loss": 0.1271, + "step": 36660 + }, + { + "epoch": 1.3325823097608838, + "grad_norm": 1.1778359413146973, + "learning_rate": 4.7229836117625044e-05, + "loss": 0.1289, + "step": 36670 + }, + { + "epoch": 1.332945708263682, + "grad_norm": 2.1446762084960938, + "learning_rate": 4.7227461362384664e-05, + "loss": 0.163, + "step": 36680 + }, + { + "epoch": 1.3333091067664802, + "grad_norm": 0.9764724373817444, + "learning_rate": 4.7225085649442063e-05, + "loss": 0.1131, + "step": 36690 + }, + { + "epoch": 1.3336725052692784, + "grad_norm": 0.6134273409843445, + "learning_rate": 4.72227089788996e-05, + "loss": 0.1269, + "step": 36700 + }, + { + "epoch": 1.3340359037720764, + "grad_norm": 0.8482096195220947, + "learning_rate": 4.722033135085967e-05, + "loss": 0.1211, + "step": 36710 + }, + { + "epoch": 1.3343993022748746, + "grad_norm": 1.1198707818984985, + "learning_rate": 4.7217952765424734e-05, + "loss": 0.1681, + "step": 36720 + }, + { + "epoch": 1.3347627007776728, + "grad_norm": 1.5771534442901611, + "learning_rate": 4.721557322269725e-05, + "loss": 0.7935, + "step": 36730 + }, + { + "epoch": 1.335126099280471, + "grad_norm": 1.8235740661621094, + "learning_rate": 4.721319272277977e-05, + "loss": 0.1249, + "step": 36740 + }, + { + "epoch": 1.3354894977832692, + "grad_norm": 2.7422354221343994, + "learning_rate": 4.7210811265774845e-05, + "loss": 0.1637, + "step": 36750 + }, + { + "epoch": 1.3358528962860672, + "grad_norm": 3.130943775177002, + "learning_rate": 4.720842885178509e-05, + "loss": 0.1437, + "step": 36760 + }, + { + "epoch": 1.3362162947888654, + "grad_norm": 0.48167362809181213, + "learning_rate": 4.720604548091316e-05, + "loss": 0.2081, + "step": 36770 + }, + { + "epoch": 1.3365796932916636, + "grad_norm": 1.3520551919937134, + "learning_rate": 4.720366115326174e-05, + "loss": 0.1566, + "step": 36780 + }, + { + "epoch": 1.3369430917944618, + "grad_norm": 0.9920271635055542, + "learning_rate": 4.720127586893355e-05, + "loss": 0.0995, + "step": 36790 + }, + { + "epoch": 1.33730649029726, + "grad_norm": 0.6343932747840881, + "learning_rate": 4.7198889628031376e-05, + "loss": 0.6462, + "step": 36800 + }, + { + "epoch": 1.3376698888000582, + "grad_norm": 0.8678078651428223, + "learning_rate": 4.719650243065804e-05, + "loss": 0.1224, + "step": 36810 + }, + { + "epoch": 1.3380332873028564, + "grad_norm": 1.1642274856567383, + "learning_rate": 4.719411427691639e-05, + "loss": 0.1438, + "step": 36820 + }, + { + "epoch": 1.3383966858056544, + "grad_norm": 0.6214116811752319, + "learning_rate": 4.719172516690932e-05, + "loss": 0.1172, + "step": 36830 + }, + { + "epoch": 1.3387600843084526, + "grad_norm": 0.9811148643493652, + "learning_rate": 4.7189335100739764e-05, + "loss": 0.1229, + "step": 36840 + }, + { + "epoch": 1.3391234828112508, + "grad_norm": 0.6965753436088562, + "learning_rate": 4.718694407851072e-05, + "loss": 0.1432, + "step": 36850 + }, + { + "epoch": 1.339486881314049, + "grad_norm": 2.3423101902008057, + "learning_rate": 4.718455210032519e-05, + "loss": 0.1578, + "step": 36860 + }, + { + "epoch": 1.3398502798168472, + "grad_norm": 1.1584868431091309, + "learning_rate": 4.718215916628625e-05, + "loss": 0.1792, + "step": 36870 + }, + { + "epoch": 1.3402136783196452, + "grad_norm": 1.1057560443878174, + "learning_rate": 4.717976527649698e-05, + "loss": 0.129, + "step": 36880 + }, + { + "epoch": 1.3405770768224434, + "grad_norm": 1.6027841567993164, + "learning_rate": 4.7177370431060554e-05, + "loss": 0.0984, + "step": 36890 + }, + { + "epoch": 1.3409404753252416, + "grad_norm": 1.4970412254333496, + "learning_rate": 4.717497463008014e-05, + "loss": 0.1764, + "step": 36900 + }, + { + "epoch": 1.3413038738280398, + "grad_norm": 7.802013397216797, + "learning_rate": 4.717257787365897e-05, + "loss": 0.1511, + "step": 36910 + }, + { + "epoch": 1.341667272330838, + "grad_norm": 0.6998898983001709, + "learning_rate": 4.717018016190031e-05, + "loss": 0.1874, + "step": 36920 + }, + { + "epoch": 1.342030670833636, + "grad_norm": 1.1323654651641846, + "learning_rate": 4.716778149490747e-05, + "loss": 0.0968, + "step": 36930 + }, + { + "epoch": 1.3423940693364345, + "grad_norm": 0.9213439226150513, + "learning_rate": 4.716538187278379e-05, + "loss": 0.11, + "step": 36940 + }, + { + "epoch": 1.3427574678392324, + "grad_norm": 3.241694688796997, + "learning_rate": 4.7162981295632676e-05, + "loss": 0.1695, + "step": 36950 + }, + { + "epoch": 1.3431208663420307, + "grad_norm": 2.2264153957366943, + "learning_rate": 4.716057976355755e-05, + "loss": 0.1568, + "step": 36960 + }, + { + "epoch": 1.3434842648448289, + "grad_norm": 2.439816474914551, + "learning_rate": 4.715817727666189e-05, + "loss": 0.2052, + "step": 36970 + }, + { + "epoch": 1.343847663347627, + "grad_norm": 0.8145691752433777, + "learning_rate": 4.715577383504921e-05, + "loss": 0.1501, + "step": 36980 + }, + { + "epoch": 1.3442110618504253, + "grad_norm": 0.8044644594192505, + "learning_rate": 4.7153369438823074e-05, + "loss": 0.1088, + "step": 36990 + }, + { + "epoch": 1.3445744603532233, + "grad_norm": 1.2818701267242432, + "learning_rate": 4.715096408808707e-05, + "loss": 0.1492, + "step": 37000 + }, + { + "epoch": 1.3449378588560215, + "grad_norm": 3.879460096359253, + "learning_rate": 4.714855778294482e-05, + "loss": 0.1407, + "step": 37010 + }, + { + "epoch": 1.3453012573588197, + "grad_norm": 0.7606347799301147, + "learning_rate": 4.714615052350004e-05, + "loss": 0.2055, + "step": 37020 + }, + { + "epoch": 1.3456646558616179, + "grad_norm": 14.105179786682129, + "learning_rate": 4.714374230985642e-05, + "loss": 0.2572, + "step": 37030 + }, + { + "epoch": 1.346028054364416, + "grad_norm": 0.9659761786460876, + "learning_rate": 4.714133314211774e-05, + "loss": 0.1126, + "step": 37040 + }, + { + "epoch": 1.346391452867214, + "grad_norm": 0.8018509745597839, + "learning_rate": 4.7138923020387785e-05, + "loss": 0.1731, + "step": 37050 + }, + { + "epoch": 1.3467548513700123, + "grad_norm": 1.450352668762207, + "learning_rate": 4.7136511944770414e-05, + "loss": 0.139, + "step": 37060 + }, + { + "epoch": 1.3471182498728105, + "grad_norm": 0.4030288755893707, + "learning_rate": 4.71340999153695e-05, + "loss": 0.1317, + "step": 37070 + }, + { + "epoch": 1.3474816483756087, + "grad_norm": 1.5737247467041016, + "learning_rate": 4.713168693228898e-05, + "loss": 0.1183, + "step": 37080 + }, + { + "epoch": 1.347845046878407, + "grad_norm": 0.9841533303260803, + "learning_rate": 4.712927299563281e-05, + "loss": 0.126, + "step": 37090 + }, + { + "epoch": 1.348208445381205, + "grad_norm": 0.9880457520484924, + "learning_rate": 4.7126858105505004e-05, + "loss": 0.1518, + "step": 37100 + }, + { + "epoch": 1.3485718438840033, + "grad_norm": 1.250982403755188, + "learning_rate": 4.7124442262009605e-05, + "loss": 0.1241, + "step": 37110 + }, + { + "epoch": 1.3489352423868013, + "grad_norm": 0.8015254139900208, + "learning_rate": 4.712202546525071e-05, + "loss": 0.1294, + "step": 37120 + }, + { + "epoch": 1.3492986408895995, + "grad_norm": 1.3923901319503784, + "learning_rate": 4.711960771533245e-05, + "loss": 0.1273, + "step": 37130 + }, + { + "epoch": 1.3496620393923977, + "grad_norm": 1.7166532278060913, + "learning_rate": 4.7117189012359e-05, + "loss": 0.1309, + "step": 37140 + }, + { + "epoch": 1.350025437895196, + "grad_norm": 1.0079472064971924, + "learning_rate": 4.711476935643456e-05, + "loss": 0.1275, + "step": 37150 + }, + { + "epoch": 1.3503888363979941, + "grad_norm": 0.7986971139907837, + "learning_rate": 4.71123487476634e-05, + "loss": 0.1422, + "step": 37160 + }, + { + "epoch": 1.350752234900792, + "grad_norm": 0.3768475353717804, + "learning_rate": 4.71099271861498e-05, + "loss": 0.1943, + "step": 37170 + }, + { + "epoch": 1.3511156334035903, + "grad_norm": 1.1428521871566772, + "learning_rate": 4.7107504671998115e-05, + "loss": 0.1335, + "step": 37180 + }, + { + "epoch": 1.3514790319063885, + "grad_norm": 0.627876341342926, + "learning_rate": 4.7105081205312715e-05, + "loss": 0.1301, + "step": 37190 + }, + { + "epoch": 1.3518424304091867, + "grad_norm": 1.823798656463623, + "learning_rate": 4.710265678619801e-05, + "loss": 0.1732, + "step": 37200 + }, + { + "epoch": 1.3518424304091867, + "eval_loss": 0.3695838451385498, + "eval_runtime": 181.0815, + "eval_samples_per_second": 40.943, + "eval_steps_per_second": 5.119, + "eval_wer": 0.17419717900775136, + "step": 37200 + }, + { + "epoch": 1.352205828911985, + "grad_norm": 1.1491807699203491, + "learning_rate": 4.710023141475846e-05, + "loss": 0.1267, + "step": 37210 + }, + { + "epoch": 1.352569227414783, + "grad_norm": 0.9230825901031494, + "learning_rate": 4.709780509109858e-05, + "loss": 0.1339, + "step": 37220 + }, + { + "epoch": 1.3529326259175813, + "grad_norm": 1.4637092351913452, + "learning_rate": 4.7095377815322893e-05, + "loss": 0.1199, + "step": 37230 + }, + { + "epoch": 1.3532960244203793, + "grad_norm": 2.766608476638794, + "learning_rate": 4.7092949587536e-05, + "loss": 0.1153, + "step": 37240 + }, + { + "epoch": 1.3536594229231775, + "grad_norm": 0.6508689522743225, + "learning_rate": 4.7090520407842516e-05, + "loss": 0.1263, + "step": 37250 + }, + { + "epoch": 1.3540228214259757, + "grad_norm": 1.2959270477294922, + "learning_rate": 4.70880902763471e-05, + "loss": 0.207, + "step": 37260 + }, + { + "epoch": 1.354386219928774, + "grad_norm": 0.4432971477508545, + "learning_rate": 4.708565919315447e-05, + "loss": 0.189, + "step": 37270 + }, + { + "epoch": 1.3547496184315722, + "grad_norm": 1.1359493732452393, + "learning_rate": 4.708322715836936e-05, + "loss": 0.1161, + "step": 37280 + }, + { + "epoch": 1.3551130169343701, + "grad_norm": 1.1669936180114746, + "learning_rate": 4.708079417209657e-05, + "loss": 0.1298, + "step": 37290 + }, + { + "epoch": 1.3554764154371683, + "grad_norm": 1.0905638933181763, + "learning_rate": 4.707836023444092e-05, + "loss": 0.1763, + "step": 37300 + }, + { + "epoch": 1.3558398139399666, + "grad_norm": 1.1597601175308228, + "learning_rate": 4.707592534550729e-05, + "loss": 0.1346, + "step": 37310 + }, + { + "epoch": 1.3562032124427648, + "grad_norm": 0.3999848961830139, + "learning_rate": 4.707348950540057e-05, + "loss": 0.1614, + "step": 37320 + }, + { + "epoch": 1.356566610945563, + "grad_norm": 1.9580241441726685, + "learning_rate": 4.7071052714225736e-05, + "loss": 0.134, + "step": 37330 + }, + { + "epoch": 1.356930009448361, + "grad_norm": 0.7617779970169067, + "learning_rate": 4.7068614972087764e-05, + "loss": 0.1372, + "step": 37340 + }, + { + "epoch": 1.3572934079511592, + "grad_norm": 1.103390097618103, + "learning_rate": 4.706617627909169e-05, + "loss": 0.1162, + "step": 37350 + }, + { + "epoch": 1.3576568064539574, + "grad_norm": 1.1971250772476196, + "learning_rate": 4.70637366353426e-05, + "loss": 0.1358, + "step": 37360 + }, + { + "epoch": 1.3580202049567556, + "grad_norm": 0.47730955481529236, + "learning_rate": 4.70612960409456e-05, + "loss": 0.1518, + "step": 37370 + }, + { + "epoch": 1.3583836034595538, + "grad_norm": 1.2747211456298828, + "learning_rate": 4.705885449600584e-05, + "loss": 0.1264, + "step": 37380 + }, + { + "epoch": 1.358747001962352, + "grad_norm": 1.5373166799545288, + "learning_rate": 4.705641200062854e-05, + "loss": 0.1362, + "step": 37390 + }, + { + "epoch": 1.3591104004651502, + "grad_norm": 7.401641368865967, + "learning_rate": 4.705396855491891e-05, + "loss": 0.1524, + "step": 37400 + }, + { + "epoch": 1.3594737989679482, + "grad_norm": 1.0285519361495972, + "learning_rate": 4.705152415898225e-05, + "loss": 0.1343, + "step": 37410 + }, + { + "epoch": 1.3598371974707464, + "grad_norm": 0.9249128699302673, + "learning_rate": 4.704907881292387e-05, + "loss": 0.1857, + "step": 37420 + }, + { + "epoch": 1.3602005959735446, + "grad_norm": 1.0541716814041138, + "learning_rate": 4.7046632516849135e-05, + "loss": 0.1519, + "step": 37430 + }, + { + "epoch": 1.3605639944763428, + "grad_norm": 0.9401641488075256, + "learning_rate": 4.704418527086345e-05, + "loss": 0.1239, + "step": 37440 + }, + { + "epoch": 1.360927392979141, + "grad_norm": 1.179436445236206, + "learning_rate": 4.7041737075072254e-05, + "loss": 0.1345, + "step": 37450 + }, + { + "epoch": 1.361290791481939, + "grad_norm": 0.5075955390930176, + "learning_rate": 4.703928792958103e-05, + "loss": 0.1119, + "step": 37460 + }, + { + "epoch": 1.3616541899847372, + "grad_norm": 1.7815593481063843, + "learning_rate": 4.7036837834495306e-05, + "loss": 0.1378, + "step": 37470 + }, + { + "epoch": 1.3620175884875354, + "grad_norm": 1.0368989706039429, + "learning_rate": 4.7034386789920646e-05, + "loss": 0.1193, + "step": 37480 + }, + { + "epoch": 1.3623809869903336, + "grad_norm": 0.9185715913772583, + "learning_rate": 4.703193479596266e-05, + "loss": 0.1539, + "step": 37490 + }, + { + "epoch": 1.3627443854931318, + "grad_norm": 1.1198723316192627, + "learning_rate": 4.7029481852726996e-05, + "loss": 0.1483, + "step": 37500 + }, + { + "epoch": 1.3631077839959298, + "grad_norm": 0.8500091433525085, + "learning_rate": 4.702702796031934e-05, + "loss": 0.1742, + "step": 37510 + }, + { + "epoch": 1.3634711824987282, + "grad_norm": 1.0554280281066895, + "learning_rate": 4.7024573118845414e-05, + "loss": 0.1406, + "step": 37520 + }, + { + "epoch": 1.3638345810015262, + "grad_norm": 1.524234414100647, + "learning_rate": 4.702211732841101e-05, + "loss": 0.1242, + "step": 37530 + }, + { + "epoch": 1.3641979795043244, + "grad_norm": 1.3234226703643799, + "learning_rate": 4.701966058912191e-05, + "loss": 0.1114, + "step": 37540 + }, + { + "epoch": 1.3645613780071226, + "grad_norm": 4.683910846710205, + "learning_rate": 4.701720290108399e-05, + "loss": 0.1377, + "step": 37550 + }, + { + "epoch": 1.3649247765099208, + "grad_norm": 1.4473618268966675, + "learning_rate": 4.701474426440313e-05, + "loss": 0.1136, + "step": 37560 + }, + { + "epoch": 1.365288175012719, + "grad_norm": 1.2548261880874634, + "learning_rate": 4.701228467918527e-05, + "loss": 0.1376, + "step": 37570 + }, + { + "epoch": 1.365651573515517, + "grad_norm": 0.5335317850112915, + "learning_rate": 4.7009824145536385e-05, + "loss": 0.1103, + "step": 37580 + }, + { + "epoch": 1.3660149720183152, + "grad_norm": 2.0894274711608887, + "learning_rate": 4.700736266356249e-05, + "loss": 0.2852, + "step": 37590 + }, + { + "epoch": 1.3663783705211134, + "grad_norm": 4.296121120452881, + "learning_rate": 4.700490023336963e-05, + "loss": 0.1475, + "step": 37600 + }, + { + "epoch": 1.3667417690239116, + "grad_norm": 3.073425054550171, + "learning_rate": 4.700243685506393e-05, + "loss": 0.1254, + "step": 37610 + }, + { + "epoch": 1.3671051675267099, + "grad_norm": 0.5121023058891296, + "learning_rate": 4.69999725287515e-05, + "loss": 0.1737, + "step": 37620 + }, + { + "epoch": 1.3674685660295078, + "grad_norm": 0.8064444661140442, + "learning_rate": 4.699750725453853e-05, + "loss": 0.1045, + "step": 37630 + }, + { + "epoch": 1.367831964532306, + "grad_norm": 0.5956308841705322, + "learning_rate": 4.699504103253124e-05, + "loss": 0.1059, + "step": 37640 + }, + { + "epoch": 1.3681953630351043, + "grad_norm": 1.8260743618011475, + "learning_rate": 4.699257386283589e-05, + "loss": 0.1677, + "step": 37650 + }, + { + "epoch": 1.3685587615379025, + "grad_norm": 2.3063583374023438, + "learning_rate": 4.699010574555879e-05, + "loss": 0.1406, + "step": 37660 + }, + { + "epoch": 1.3689221600407007, + "grad_norm": 0.5565524697303772, + "learning_rate": 4.698763668080627e-05, + "loss": 0.1556, + "step": 37670 + }, + { + "epoch": 1.3692855585434989, + "grad_norm": 1.148147702217102, + "learning_rate": 4.698516666868471e-05, + "loss": 0.124, + "step": 37680 + }, + { + "epoch": 1.369648957046297, + "grad_norm": 1.5487512350082397, + "learning_rate": 4.698269570930055e-05, + "loss": 0.1478, + "step": 37690 + }, + { + "epoch": 1.370012355549095, + "grad_norm": 1.8273712396621704, + "learning_rate": 4.698022380276024e-05, + "loss": 0.1591, + "step": 37700 + }, + { + "epoch": 1.3703757540518933, + "grad_norm": 1.7402414083480835, + "learning_rate": 4.6977750949170294e-05, + "loss": 0.1075, + "step": 37710 + }, + { + "epoch": 1.3707391525546915, + "grad_norm": 0.7992825508117676, + "learning_rate": 4.697527714863726e-05, + "loss": 0.1653, + "step": 37720 + }, + { + "epoch": 1.3711025510574897, + "grad_norm": 5.170393943786621, + "learning_rate": 4.697280240126772e-05, + "loss": 0.1563, + "step": 37730 + }, + { + "epoch": 1.371465949560288, + "grad_norm": 1.3735640048980713, + "learning_rate": 4.697032670716831e-05, + "loss": 0.1231, + "step": 37740 + }, + { + "epoch": 1.3718293480630859, + "grad_norm": 1.8720015287399292, + "learning_rate": 4.696785006644569e-05, + "loss": 1.526, + "step": 37750 + }, + { + "epoch": 1.372192746565884, + "grad_norm": 1.9550750255584717, + "learning_rate": 4.696537247920657e-05, + "loss": 0.1503, + "step": 37760 + }, + { + "epoch": 1.3725561450686823, + "grad_norm": 0.5374103784561157, + "learning_rate": 4.6962893945557704e-05, + "loss": 0.1949, + "step": 37770 + }, + { + "epoch": 1.3729195435714805, + "grad_norm": 1.097432255744934, + "learning_rate": 4.6960414465605876e-05, + "loss": 0.1157, + "step": 37780 + }, + { + "epoch": 1.3732829420742787, + "grad_norm": 0.40494269132614136, + "learning_rate": 4.695793403945793e-05, + "loss": 0.126, + "step": 37790 + }, + { + "epoch": 1.3736463405770767, + "grad_norm": 1.9734747409820557, + "learning_rate": 4.695545266722073e-05, + "loss": 0.1457, + "step": 37800 + }, + { + "epoch": 1.3736463405770767, + "eval_loss": 0.3492252230644226, + "eval_runtime": 181.0909, + "eval_samples_per_second": 40.941, + "eval_steps_per_second": 5.119, + "eval_wer": 0.18146749686858968, + "step": 37800 + }, + { + "epoch": 1.3740097390798751, + "grad_norm": 19.56266212463379, + "learning_rate": 4.6952970349001204e-05, + "loss": 0.421, + "step": 37810 + }, + { + "epoch": 1.374373137582673, + "grad_norm": 0.8468944430351257, + "learning_rate": 4.695048708490628e-05, + "loss": 0.2035, + "step": 37820 + }, + { + "epoch": 1.3747365360854713, + "grad_norm": 0.6463280320167542, + "learning_rate": 4.6948002875042976e-05, + "loss": 0.1323, + "step": 37830 + }, + { + "epoch": 1.3750999345882695, + "grad_norm": 1.1540967226028442, + "learning_rate": 4.694551771951831e-05, + "loss": 1.2856, + "step": 37840 + }, + { + "epoch": 1.3754633330910677, + "grad_norm": 1.295023798942566, + "learning_rate": 4.6943031618439374e-05, + "loss": 0.7144, + "step": 37850 + }, + { + "epoch": 1.375826731593866, + "grad_norm": 1.5403015613555908, + "learning_rate": 4.694054457191328e-05, + "loss": 0.1429, + "step": 37860 + }, + { + "epoch": 1.376190130096664, + "grad_norm": 1.434574842453003, + "learning_rate": 4.693805658004718e-05, + "loss": 0.1686, + "step": 37870 + }, + { + "epoch": 1.3765535285994621, + "grad_norm": 0.6648684740066528, + "learning_rate": 4.693556764294829e-05, + "loss": 0.1098, + "step": 37880 + }, + { + "epoch": 1.3769169271022603, + "grad_norm": 0.7901143431663513, + "learning_rate": 4.6933077760723824e-05, + "loss": 0.115, + "step": 37890 + }, + { + "epoch": 1.3772803256050585, + "grad_norm": 0.9131706953048706, + "learning_rate": 4.693058693348108e-05, + "loss": 0.1266, + "step": 37900 + }, + { + "epoch": 1.3776437241078567, + "grad_norm": 1.2103451490402222, + "learning_rate": 4.692809516132738e-05, + "loss": 0.1205, + "step": 37910 + }, + { + "epoch": 1.3780071226106547, + "grad_norm": 0.42282989621162415, + "learning_rate": 4.6925602444370075e-05, + "loss": 0.1753, + "step": 37920 + }, + { + "epoch": 1.3783705211134532, + "grad_norm": 0.51373291015625, + "learning_rate": 4.692310878271658e-05, + "loss": 0.1635, + "step": 37930 + }, + { + "epoch": 1.3787339196162511, + "grad_norm": 0.7226901650428772, + "learning_rate": 4.692061417647431e-05, + "loss": 0.1226, + "step": 37940 + }, + { + "epoch": 1.3790973181190493, + "grad_norm": 5.131813049316406, + "learning_rate": 4.6918118625750784e-05, + "loss": 0.1682, + "step": 37950 + }, + { + "epoch": 1.3794607166218475, + "grad_norm": 1.30665922164917, + "learning_rate": 4.6915622130653506e-05, + "loss": 0.128, + "step": 37960 + }, + { + "epoch": 1.3798241151246458, + "grad_norm": 0.3638138175010681, + "learning_rate": 4.691312469129006e-05, + "loss": 0.1852, + "step": 37970 + }, + { + "epoch": 1.380187513627444, + "grad_norm": 1.5417994260787964, + "learning_rate": 4.691062630776802e-05, + "loss": 0.119, + "step": 37980 + }, + { + "epoch": 1.380550912130242, + "grad_norm": 1.4151712656021118, + "learning_rate": 4.6908126980195055e-05, + "loss": 0.1079, + "step": 37990 + }, + { + "epoch": 1.3809143106330402, + "grad_norm": 1.137370228767395, + "learning_rate": 4.6905626708678855e-05, + "loss": 0.2152, + "step": 38000 + }, + { + "epoch": 1.3812777091358384, + "grad_norm": 1.0430890321731567, + "learning_rate": 4.690312549332714e-05, + "loss": 0.1243, + "step": 38010 + }, + { + "epoch": 1.3816411076386366, + "grad_norm": 1.260365605354309, + "learning_rate": 4.690062333424767e-05, + "loss": 0.1755, + "step": 38020 + }, + { + "epoch": 1.3820045061414348, + "grad_norm": 0.8367292284965515, + "learning_rate": 4.689812023154827e-05, + "loss": 1.1881, + "step": 38030 + }, + { + "epoch": 1.3823679046442328, + "grad_norm": 1.2440451383590698, + "learning_rate": 4.6895616185336775e-05, + "loss": 0.1292, + "step": 38040 + }, + { + "epoch": 1.382731303147031, + "grad_norm": 2.23522686958313, + "learning_rate": 4.6893111195721094e-05, + "loss": 0.1491, + "step": 38050 + }, + { + "epoch": 1.3830947016498292, + "grad_norm": 1.0018017292022705, + "learning_rate": 4.6890605262809145e-05, + "loss": 0.112, + "step": 38060 + }, + { + "epoch": 1.3834581001526274, + "grad_norm": 0.6445533037185669, + "learning_rate": 4.68880983867089e-05, + "loss": 0.1597, + "step": 38070 + }, + { + "epoch": 1.3838214986554256, + "grad_norm": 1.5876944065093994, + "learning_rate": 4.6885590567528375e-05, + "loss": 0.1341, + "step": 38080 + }, + { + "epoch": 1.3841848971582236, + "grad_norm": 1.5150282382965088, + "learning_rate": 4.6883081805375616e-05, + "loss": 0.1167, + "step": 38090 + }, + { + "epoch": 1.384548295661022, + "grad_norm": 1.7657722234725952, + "learning_rate": 4.688057210035873e-05, + "loss": 0.1608, + "step": 38100 + }, + { + "epoch": 1.38491169416382, + "grad_norm": 1.027761459350586, + "learning_rate": 4.687806145258584e-05, + "loss": 0.1566, + "step": 38110 + }, + { + "epoch": 1.3852750926666182, + "grad_norm": 0.3475823998451233, + "learning_rate": 4.6875549862165126e-05, + "loss": 0.1502, + "step": 38120 + }, + { + "epoch": 1.3856384911694164, + "grad_norm": 0.7863835692405701, + "learning_rate": 4.687303732920481e-05, + "loss": 0.1761, + "step": 38130 + }, + { + "epoch": 1.3860018896722146, + "grad_norm": 2.0150928497314453, + "learning_rate": 4.687052385381313e-05, + "loss": 0.1417, + "step": 38140 + }, + { + "epoch": 1.3863652881750128, + "grad_norm": 0.6676269769668579, + "learning_rate": 4.6868009436098386e-05, + "loss": 0.1307, + "step": 38150 + }, + { + "epoch": 1.3867286866778108, + "grad_norm": 1.6957210302352905, + "learning_rate": 4.6865494076168934e-05, + "loss": 0.1385, + "step": 38160 + }, + { + "epoch": 1.387092085180609, + "grad_norm": 0.8534975051879883, + "learning_rate": 4.686297777413313e-05, + "loss": 0.1568, + "step": 38170 + }, + { + "epoch": 1.3874554836834072, + "grad_norm": 0.7309104800224304, + "learning_rate": 4.6860460530099416e-05, + "loss": 0.5466, + "step": 38180 + }, + { + "epoch": 1.3878188821862054, + "grad_norm": 1.2103863954544067, + "learning_rate": 4.6857942344176225e-05, + "loss": 0.1227, + "step": 38190 + }, + { + "epoch": 1.3881822806890036, + "grad_norm": 0.7991679906845093, + "learning_rate": 4.685542321647207e-05, + "loss": 0.1603, + "step": 38200 + }, + { + "epoch": 1.3885456791918016, + "grad_norm": 1.146906852722168, + "learning_rate": 4.685290314709549e-05, + "loss": 0.1268, + "step": 38210 + }, + { + "epoch": 1.3889090776946, + "grad_norm": 1.020175576210022, + "learning_rate": 4.685038213615508e-05, + "loss": 0.1517, + "step": 38220 + }, + { + "epoch": 1.389272476197398, + "grad_norm": 1.1214244365692139, + "learning_rate": 4.684786018375944e-05, + "loss": 0.1185, + "step": 38230 + }, + { + "epoch": 1.3896358747001962, + "grad_norm": 0.830916166305542, + "learning_rate": 4.6845337290017235e-05, + "loss": 0.1281, + "step": 38240 + }, + { + "epoch": 1.3899992732029944, + "grad_norm": 0.5939742922782898, + "learning_rate": 4.684281345503718e-05, + "loss": 0.1599, + "step": 38250 + }, + { + "epoch": 1.3903626717057926, + "grad_norm": 4.498940467834473, + "learning_rate": 4.6840288678928003e-05, + "loss": 0.1159, + "step": 38260 + }, + { + "epoch": 1.3907260702085908, + "grad_norm": 0.6612393856048584, + "learning_rate": 4.6837762961798495e-05, + "loss": 0.1842, + "step": 38270 + }, + { + "epoch": 1.3910894687113888, + "grad_norm": 2.456289529800415, + "learning_rate": 4.683523630375748e-05, + "loss": 0.1558, + "step": 38280 + }, + { + "epoch": 1.391452867214187, + "grad_norm": 0.5414180159568787, + "learning_rate": 4.683270870491383e-05, + "loss": 0.1347, + "step": 38290 + }, + { + "epoch": 1.3918162657169852, + "grad_norm": 2.1812076568603516, + "learning_rate": 4.683018016537644e-05, + "loss": 0.1558, + "step": 38300 + }, + { + "epoch": 1.3921796642197835, + "grad_norm": 1.2050772905349731, + "learning_rate": 4.682765068525425e-05, + "loss": 0.1591, + "step": 38310 + }, + { + "epoch": 1.3925430627225817, + "grad_norm": 1.050423264503479, + "learning_rate": 4.6825120264656266e-05, + "loss": 0.1578, + "step": 38320 + }, + { + "epoch": 1.3929064612253796, + "grad_norm": 2.753676652908325, + "learning_rate": 4.68225889036915e-05, + "loss": 0.1373, + "step": 38330 + }, + { + "epoch": 1.3932698597281779, + "grad_norm": 2.3123908042907715, + "learning_rate": 4.682005660246902e-05, + "loss": 0.1198, + "step": 38340 + }, + { + "epoch": 1.393633258230976, + "grad_norm": 0.6317697167396545, + "learning_rate": 4.681752336109794e-05, + "loss": 0.157, + "step": 38350 + }, + { + "epoch": 1.3939966567337743, + "grad_norm": 1.788620948791504, + "learning_rate": 4.681498917968741e-05, + "loss": 0.1424, + "step": 38360 + }, + { + "epoch": 1.3943600552365725, + "grad_norm": 1.064799189567566, + "learning_rate": 4.68124540583466e-05, + "loss": 0.1762, + "step": 38370 + }, + { + "epoch": 1.3947234537393705, + "grad_norm": 1.3951762914657593, + "learning_rate": 4.6809917997184764e-05, + "loss": 0.1198, + "step": 38380 + }, + { + "epoch": 1.3950868522421689, + "grad_norm": 1.0863114595413208, + "learning_rate": 4.6807380996311154e-05, + "loss": 0.1393, + "step": 38390 + }, + { + "epoch": 1.3954502507449669, + "grad_norm": 1.141787052154541, + "learning_rate": 4.6804843055835105e-05, + "loss": 0.1603, + "step": 38400 + }, + { + "epoch": 1.3954502507449669, + "eval_loss": 0.3523618280887604, + "eval_runtime": 180.8149, + "eval_samples_per_second": 41.003, + "eval_steps_per_second": 5.127, + "eval_wer": 0.17399749487174831, + "step": 38400 + }, + { + "epoch": 1.395813649247765, + "grad_norm": 1.5692111253738403, + "learning_rate": 4.6802304175865936e-05, + "loss": 0.1328, + "step": 38410 + }, + { + "epoch": 1.3961770477505633, + "grad_norm": 0.4182591140270233, + "learning_rate": 4.679976435651305e-05, + "loss": 0.1562, + "step": 38420 + }, + { + "epoch": 1.3965404462533615, + "grad_norm": 0.6963622570037842, + "learning_rate": 4.67972235978859e-05, + "loss": 0.1131, + "step": 38430 + }, + { + "epoch": 1.3969038447561597, + "grad_norm": 1.0345783233642578, + "learning_rate": 4.679468190009392e-05, + "loss": 0.1231, + "step": 38440 + }, + { + "epoch": 1.3972672432589577, + "grad_norm": 1.6084190607070923, + "learning_rate": 4.679213926324665e-05, + "loss": 0.133, + "step": 38450 + }, + { + "epoch": 1.3976306417617559, + "grad_norm": 1.2635602951049805, + "learning_rate": 4.678959568745364e-05, + "loss": 0.1344, + "step": 38460 + }, + { + "epoch": 1.397994040264554, + "grad_norm": 0.6128044724464417, + "learning_rate": 4.678705117282447e-05, + "loss": 0.1639, + "step": 38470 + }, + { + "epoch": 1.3983574387673523, + "grad_norm": 1.128151297569275, + "learning_rate": 4.6784505719468795e-05, + "loss": 0.1342, + "step": 38480 + }, + { + "epoch": 1.3987208372701505, + "grad_norm": 1.6067559719085693, + "learning_rate": 4.678195932749627e-05, + "loss": 0.1344, + "step": 38490 + }, + { + "epoch": 1.3990842357729485, + "grad_norm": 0.4303024113178253, + "learning_rate": 4.677941199701662e-05, + "loss": 0.1519, + "step": 38500 + }, + { + "epoch": 1.399447634275747, + "grad_norm": 3.09531307220459, + "learning_rate": 4.6776863728139596e-05, + "loss": 0.1197, + "step": 38510 + }, + { + "epoch": 1.399811032778545, + "grad_norm": 1.2062981128692627, + "learning_rate": 4.6774314520975e-05, + "loss": 0.1488, + "step": 38520 + }, + { + "epoch": 1.4001744312813431, + "grad_norm": 0.7981544733047485, + "learning_rate": 4.6771764375632664e-05, + "loss": 0.1155, + "step": 38530 + }, + { + "epoch": 1.4005378297841413, + "grad_norm": 0.6589852571487427, + "learning_rate": 4.676921329222247e-05, + "loss": 0.1463, + "step": 38540 + }, + { + "epoch": 1.4009012282869395, + "grad_norm": 1.496664047241211, + "learning_rate": 4.676666127085433e-05, + "loss": 0.1811, + "step": 38550 + }, + { + "epoch": 1.4012646267897377, + "grad_norm": 0.7335402965545654, + "learning_rate": 4.676410831163819e-05, + "loss": 0.1364, + "step": 38560 + }, + { + "epoch": 1.4016280252925357, + "grad_norm": 0.8753761053085327, + "learning_rate": 4.676155441468407e-05, + "loss": 0.1954, + "step": 38570 + }, + { + "epoch": 1.401991423795334, + "grad_norm": 1.4288660287857056, + "learning_rate": 4.6758999580101994e-05, + "loss": 0.1347, + "step": 38580 + }, + { + "epoch": 1.4023548222981321, + "grad_norm": 1.1383757591247559, + "learning_rate": 4.675644380800205e-05, + "loss": 0.1604, + "step": 38590 + }, + { + "epoch": 1.4027182208009303, + "grad_norm": 1.4642599821090698, + "learning_rate": 4.6753887098494344e-05, + "loss": 0.1878, + "step": 38600 + }, + { + "epoch": 1.4030816193037285, + "grad_norm": 0.9396153688430786, + "learning_rate": 4.675132945168905e-05, + "loss": 0.1201, + "step": 38610 + }, + { + "epoch": 1.4034450178065265, + "grad_norm": 0.835436999797821, + "learning_rate": 4.674877086769636e-05, + "loss": 0.4225, + "step": 38620 + }, + { + "epoch": 1.4038084163093247, + "grad_norm": 0.9934596419334412, + "learning_rate": 4.674621134662651e-05, + "loss": 0.1145, + "step": 38630 + }, + { + "epoch": 1.404171814812123, + "grad_norm": 1.5066030025482178, + "learning_rate": 4.674365088858979e-05, + "loss": 0.101, + "step": 38640 + }, + { + "epoch": 1.4045352133149211, + "grad_norm": 2.4759950637817383, + "learning_rate": 4.674108949369652e-05, + "loss": 0.1225, + "step": 38650 + }, + { + "epoch": 1.4048986118177194, + "grad_norm": 2.4329168796539307, + "learning_rate": 4.6738527162057054e-05, + "loss": 0.1469, + "step": 38660 + }, + { + "epoch": 1.4052620103205173, + "grad_norm": 0.7068483829498291, + "learning_rate": 4.67359638937818e-05, + "loss": 0.237, + "step": 38670 + }, + { + "epoch": 1.4056254088233158, + "grad_norm": 2.3423826694488525, + "learning_rate": 4.6733399688981207e-05, + "loss": 0.1095, + "step": 38680 + }, + { + "epoch": 1.4059888073261138, + "grad_norm": 0.7500453591346741, + "learning_rate": 4.673083454776575e-05, + "loss": 0.1337, + "step": 38690 + }, + { + "epoch": 1.406352205828912, + "grad_norm": 2.1220805644989014, + "learning_rate": 4.6728268470245937e-05, + "loss": 0.1489, + "step": 38700 + }, + { + "epoch": 1.4067156043317102, + "grad_norm": 3.195551633834839, + "learning_rate": 4.672570145653234e-05, + "loss": 0.1456, + "step": 38710 + }, + { + "epoch": 1.4070790028345084, + "grad_norm": 0.8845533728599548, + "learning_rate": 4.672313350673558e-05, + "loss": 0.1529, + "step": 38720 + }, + { + "epoch": 1.4074424013373066, + "grad_norm": 1.278830885887146, + "learning_rate": 4.6720564620966294e-05, + "loss": 0.1209, + "step": 38730 + }, + { + "epoch": 1.4078057998401046, + "grad_norm": 1.8450745344161987, + "learning_rate": 4.671799479933515e-05, + "loss": 0.0863, + "step": 38740 + }, + { + "epoch": 1.4081691983429028, + "grad_norm": 0.5718597173690796, + "learning_rate": 4.6715424041952894e-05, + "loss": 0.1597, + "step": 38750 + }, + { + "epoch": 1.408532596845701, + "grad_norm": 3.594273090362549, + "learning_rate": 4.671285234893027e-05, + "loss": 0.1373, + "step": 38760 + }, + { + "epoch": 1.4088959953484992, + "grad_norm": 1.3270690441131592, + "learning_rate": 4.671027972037809e-05, + "loss": 0.1727, + "step": 38770 + }, + { + "epoch": 1.4092593938512974, + "grad_norm": 88.81269073486328, + "learning_rate": 4.670770615640721e-05, + "loss": 1.6965, + "step": 38780 + }, + { + "epoch": 1.4096227923540954, + "grad_norm": 1.1233614683151245, + "learning_rate": 4.670513165712851e-05, + "loss": 0.1316, + "step": 38790 + }, + { + "epoch": 1.4099861908568938, + "grad_norm": 0.49995678663253784, + "learning_rate": 4.6702556222652905e-05, + "loss": 0.1492, + "step": 38800 + }, + { + "epoch": 1.4103495893596918, + "grad_norm": 0.7330392599105835, + "learning_rate": 4.669997985309138e-05, + "loss": 0.1065, + "step": 38810 + }, + { + "epoch": 1.41071298786249, + "grad_norm": 0.49762871861457825, + "learning_rate": 4.6697402548554925e-05, + "loss": 0.167, + "step": 38820 + }, + { + "epoch": 1.4110763863652882, + "grad_norm": 3.198273181915283, + "learning_rate": 4.6694824309154596e-05, + "loss": 0.5685, + "step": 38830 + }, + { + "epoch": 1.4114397848680864, + "grad_norm": 0.7750107645988464, + "learning_rate": 4.6692245135001476e-05, + "loss": 0.1291, + "step": 38840 + }, + { + "epoch": 1.4118031833708846, + "grad_norm": 0.6449529528617859, + "learning_rate": 4.66896650262067e-05, + "loss": 0.1522, + "step": 38850 + }, + { + "epoch": 1.4121665818736826, + "grad_norm": 0.7553302049636841, + "learning_rate": 4.668708398288142e-05, + "loss": 0.1089, + "step": 38860 + }, + { + "epoch": 1.4125299803764808, + "grad_norm": 0.3948783576488495, + "learning_rate": 4.6684502005136864e-05, + "loss": 0.1421, + "step": 38870 + }, + { + "epoch": 1.412893378879279, + "grad_norm": 0.7775730490684509, + "learning_rate": 4.668191909308426e-05, + "loss": 0.1014, + "step": 38880 + }, + { + "epoch": 1.4132567773820772, + "grad_norm": 0.3911081850528717, + "learning_rate": 4.667933524683492e-05, + "loss": 0.1504, + "step": 38890 + }, + { + "epoch": 1.4136201758848754, + "grad_norm": 0.48814857006073, + "learning_rate": 4.667675046650015e-05, + "loss": 0.138, + "step": 38900 + }, + { + "epoch": 1.4139835743876734, + "grad_norm": 2.613859176635742, + "learning_rate": 4.667416475219133e-05, + "loss": 0.1158, + "step": 38910 + }, + { + "epoch": 1.4143469728904716, + "grad_norm": 0.9073649048805237, + "learning_rate": 4.667157810401987e-05, + "loss": 0.1753, + "step": 38920 + }, + { + "epoch": 1.4147103713932698, + "grad_norm": 1.4347561597824097, + "learning_rate": 4.666899052209722e-05, + "loss": 0.1397, + "step": 38930 + }, + { + "epoch": 1.415073769896068, + "grad_norm": 1.014145851135254, + "learning_rate": 4.666640200653486e-05, + "loss": 0.1425, + "step": 38940 + }, + { + "epoch": 1.4154371683988662, + "grad_norm": 0.5404003858566284, + "learning_rate": 4.6663812557444334e-05, + "loss": 0.1918, + "step": 38950 + }, + { + "epoch": 1.4158005669016644, + "grad_norm": 0.7507174015045166, + "learning_rate": 4.66612221749372e-05, + "loss": 0.1492, + "step": 38960 + }, + { + "epoch": 1.4161639654044627, + "grad_norm": 0.47643178701400757, + "learning_rate": 4.665863085912508e-05, + "loss": 0.136, + "step": 38970 + }, + { + "epoch": 1.4165273639072606, + "grad_norm": 1.3153865337371826, + "learning_rate": 4.66560386101196e-05, + "loss": 0.1421, + "step": 38980 + }, + { + "epoch": 1.4168907624100588, + "grad_norm": 0.7111690640449524, + "learning_rate": 4.665344542803248e-05, + "loss": 0.1184, + "step": 38990 + }, + { + "epoch": 1.417254160912857, + "grad_norm": 5.245561599731445, + "learning_rate": 4.665085131297544e-05, + "loss": 0.1343, + "step": 39000 + }, + { + "epoch": 1.417254160912857, + "eval_loss": 0.34118154644966125, + "eval_runtime": 180.7794, + "eval_samples_per_second": 41.011, + "eval_steps_per_second": 5.128, + "eval_wer": 0.17092962041861057, + "step": 39000 + }, + { + "epoch": 1.4176175594156553, + "grad_norm": 1.7938792705535889, + "learning_rate": 4.664825626506025e-05, + "loss": 0.1703, + "step": 39010 + }, + { + "epoch": 1.4179809579184535, + "grad_norm": 0.7494391202926636, + "learning_rate": 4.664566028439873e-05, + "loss": 0.1644, + "step": 39020 + }, + { + "epoch": 1.4183443564212515, + "grad_norm": 0.7234100699424744, + "learning_rate": 4.664306337110272e-05, + "loss": 0.1115, + "step": 39030 + }, + { + "epoch": 1.4187077549240497, + "grad_norm": 0.6583457589149475, + "learning_rate": 4.6640465525284114e-05, + "loss": 0.1448, + "step": 39040 + }, + { + "epoch": 1.4190711534268479, + "grad_norm": 1.7157262563705444, + "learning_rate": 4.663786674705484e-05, + "loss": 0.176, + "step": 39050 + }, + { + "epoch": 1.419434551929646, + "grad_norm": 1.9635696411132812, + "learning_rate": 4.663526703652688e-05, + "loss": 0.1453, + "step": 39060 + }, + { + "epoch": 1.4197979504324443, + "grad_norm": 0.5495097041130066, + "learning_rate": 4.663266639381224e-05, + "loss": 0.1114, + "step": 39070 + }, + { + "epoch": 1.4201613489352423, + "grad_norm": 1.3244194984436035, + "learning_rate": 4.663006481902298e-05, + "loss": 0.146, + "step": 39080 + }, + { + "epoch": 1.4205247474380407, + "grad_norm": 1.1683903932571411, + "learning_rate": 4.662746231227119e-05, + "loss": 0.1351, + "step": 39090 + }, + { + "epoch": 1.4208881459408387, + "grad_norm": 2.392890214920044, + "learning_rate": 4.662485887366899e-05, + "loss": 0.2378, + "step": 39100 + }, + { + "epoch": 1.4212515444436369, + "grad_norm": 1.7971110343933105, + "learning_rate": 4.662225450332856e-05, + "loss": 0.1168, + "step": 39110 + }, + { + "epoch": 1.421614942946435, + "grad_norm": 1.0140278339385986, + "learning_rate": 4.6619649201362124e-05, + "loss": 0.1332, + "step": 39120 + }, + { + "epoch": 1.4219783414492333, + "grad_norm": 3.1083390712738037, + "learning_rate": 4.661704296788193e-05, + "loss": 0.1483, + "step": 39130 + }, + { + "epoch": 1.4223417399520315, + "grad_norm": 0.6209553480148315, + "learning_rate": 4.661443580300026e-05, + "loss": 0.1212, + "step": 39140 + }, + { + "epoch": 1.4227051384548295, + "grad_norm": 0.6733147501945496, + "learning_rate": 4.661182770682946e-05, + "loss": 0.1343, + "step": 39150 + }, + { + "epoch": 1.4230685369576277, + "grad_norm": 1.1502153873443604, + "learning_rate": 4.660921867948189e-05, + "loss": 0.1508, + "step": 39160 + }, + { + "epoch": 1.423431935460426, + "grad_norm": 0.33853545784950256, + "learning_rate": 4.660660872106999e-05, + "loss": 0.1661, + "step": 39170 + }, + { + "epoch": 1.423795333963224, + "grad_norm": 1.0470768213272095, + "learning_rate": 4.660399783170618e-05, + "loss": 0.1231, + "step": 39180 + }, + { + "epoch": 1.4241587324660223, + "grad_norm": 0.5467321872711182, + "learning_rate": 4.660138601150298e-05, + "loss": 0.1166, + "step": 39190 + }, + { + "epoch": 1.4245221309688203, + "grad_norm": 1.3816486597061157, + "learning_rate": 4.659877326057291e-05, + "loss": 0.1177, + "step": 39200 + }, + { + "epoch": 1.4248855294716185, + "grad_norm": 1.0260194540023804, + "learning_rate": 4.659615957902855e-05, + "loss": 0.153, + "step": 39210 + }, + { + "epoch": 1.4252489279744167, + "grad_norm": 0.7443385124206543, + "learning_rate": 4.6593544966982524e-05, + "loss": 0.1735, + "step": 39220 + }, + { + "epoch": 1.425612326477215, + "grad_norm": 1.1526659727096558, + "learning_rate": 4.659092942454746e-05, + "loss": 0.1491, + "step": 39230 + }, + { + "epoch": 1.4259757249800131, + "grad_norm": 0.8841147422790527, + "learning_rate": 4.658831295183608e-05, + "loss": 0.1611, + "step": 39240 + }, + { + "epoch": 1.4263391234828113, + "grad_norm": 1.596132516860962, + "learning_rate": 4.65856955489611e-05, + "loss": 0.1543, + "step": 39250 + }, + { + "epoch": 1.4267025219856095, + "grad_norm": 2.1355278491973877, + "learning_rate": 4.65830772160353e-05, + "loss": 0.1489, + "step": 39260 + }, + { + "epoch": 1.4270659204884075, + "grad_norm": 0.9080690145492554, + "learning_rate": 4.6580457953171496e-05, + "loss": 0.28, + "step": 39270 + }, + { + "epoch": 1.4274293189912057, + "grad_norm": 0.9187225699424744, + "learning_rate": 4.6577837760482546e-05, + "loss": 0.1243, + "step": 39280 + }, + { + "epoch": 1.427792717494004, + "grad_norm": 1.0374051332473755, + "learning_rate": 4.6575216638081335e-05, + "loss": 0.1088, + "step": 39290 + }, + { + "epoch": 1.4281561159968021, + "grad_norm": 0.5795188546180725, + "learning_rate": 4.657259458608081e-05, + "loss": 0.1288, + "step": 39300 + }, + { + "epoch": 1.4285195144996004, + "grad_norm": 1.0621544122695923, + "learning_rate": 4.656997160459394e-05, + "loss": 0.1311, + "step": 39310 + }, + { + "epoch": 1.4288829130023983, + "grad_norm": 0.35324281454086304, + "learning_rate": 4.656734769373373e-05, + "loss": 0.1635, + "step": 39320 + }, + { + "epoch": 1.4292463115051965, + "grad_norm": 1.4020544290542603, + "learning_rate": 4.656472285361326e-05, + "loss": 0.1281, + "step": 39330 + }, + { + "epoch": 1.4296097100079947, + "grad_norm": 0.9644222259521484, + "learning_rate": 4.65620970843456e-05, + "loss": 0.111, + "step": 39340 + }, + { + "epoch": 1.429973108510793, + "grad_norm": 3.3897273540496826, + "learning_rate": 4.65594703860439e-05, + "loss": 0.2219, + "step": 39350 + }, + { + "epoch": 1.4303365070135912, + "grad_norm": 1.1418486833572388, + "learning_rate": 4.655684275882132e-05, + "loss": 0.1347, + "step": 39360 + }, + { + "epoch": 1.4306999055163891, + "grad_norm": 0.7159132361412048, + "learning_rate": 4.655421420279109e-05, + "loss": 0.2765, + "step": 39370 + }, + { + "epoch": 1.4310633040191876, + "grad_norm": 1.4189454317092896, + "learning_rate": 4.655158471806647e-05, + "loss": 0.1247, + "step": 39380 + }, + { + "epoch": 1.4314267025219856, + "grad_norm": 1.0472137928009033, + "learning_rate": 4.6548954304760725e-05, + "loss": 0.1193, + "step": 39390 + }, + { + "epoch": 1.4317901010247838, + "grad_norm": 1.6883853673934937, + "learning_rate": 4.654632296298723e-05, + "loss": 0.1336, + "step": 39400 + }, + { + "epoch": 1.432153499527582, + "grad_norm": 3.4792749881744385, + "learning_rate": 4.654369069285933e-05, + "loss": 0.1224, + "step": 39410 + }, + { + "epoch": 1.4325168980303802, + "grad_norm": 0.9245648384094238, + "learning_rate": 4.654105749449046e-05, + "loss": 0.1684, + "step": 39420 + }, + { + "epoch": 1.4328802965331784, + "grad_norm": 0.7134508490562439, + "learning_rate": 4.653842336799406e-05, + "loss": 0.1268, + "step": 39430 + }, + { + "epoch": 1.4332436950359764, + "grad_norm": 0.8069209456443787, + "learning_rate": 4.6535788313483624e-05, + "loss": 0.1224, + "step": 39440 + }, + { + "epoch": 1.4336070935387746, + "grad_norm": 1.5594000816345215, + "learning_rate": 4.6533152331072706e-05, + "loss": 0.178, + "step": 39450 + }, + { + "epoch": 1.4339704920415728, + "grad_norm": 0.6994547843933105, + "learning_rate": 4.653051542087486e-05, + "loss": 0.1184, + "step": 39460 + }, + { + "epoch": 1.434333890544371, + "grad_norm": 1.0693833827972412, + "learning_rate": 4.6527877583003714e-05, + "loss": 0.1696, + "step": 39470 + }, + { + "epoch": 1.4346972890471692, + "grad_norm": 1.625401496887207, + "learning_rate": 4.652523881757292e-05, + "loss": 0.9774, + "step": 39480 + }, + { + "epoch": 1.4350606875499672, + "grad_norm": 1.0620099306106567, + "learning_rate": 4.652259912469618e-05, + "loss": 0.141, + "step": 39490 + }, + { + "epoch": 1.4354240860527654, + "grad_norm": 0.8238838315010071, + "learning_rate": 4.6519958504487206e-05, + "loss": 0.2848, + "step": 39500 + }, + { + "epoch": 1.4357874845555636, + "grad_norm": 4.339720726013184, + "learning_rate": 4.6517316957059796e-05, + "loss": 0.1307, + "step": 39510 + }, + { + "epoch": 1.4361508830583618, + "grad_norm": 1.6212254762649536, + "learning_rate": 4.6514674482527754e-05, + "loss": 0.1486, + "step": 39520 + }, + { + "epoch": 1.43651428156116, + "grad_norm": 1.0148829221725464, + "learning_rate": 4.651203108100494e-05, + "loss": 0.1216, + "step": 39530 + }, + { + "epoch": 1.4368776800639582, + "grad_norm": 0.8317530155181885, + "learning_rate": 4.650938675260525e-05, + "loss": 0.1058, + "step": 39540 + }, + { + "epoch": 1.4372410785667564, + "grad_norm": 1.1663634777069092, + "learning_rate": 4.6506741497442614e-05, + "loss": 0.2442, + "step": 39550 + }, + { + "epoch": 1.4376044770695544, + "grad_norm": 1.4678232669830322, + "learning_rate": 4.6504095315631006e-05, + "loss": 0.1353, + "step": 39560 + }, + { + "epoch": 1.4379678755723526, + "grad_norm": 0.8588351011276245, + "learning_rate": 4.6501448207284446e-05, + "loss": 0.157, + "step": 39570 + }, + { + "epoch": 1.4383312740751508, + "grad_norm": 0.7000893950462341, + "learning_rate": 4.6498800172516985e-05, + "loss": 0.1191, + "step": 39580 + }, + { + "epoch": 1.438694672577949, + "grad_norm": 1.0854928493499756, + "learning_rate": 4.649615121144271e-05, + "loss": 0.1258, + "step": 39590 + }, + { + "epoch": 1.4390580710807472, + "grad_norm": 1.003110647201538, + "learning_rate": 4.649350132417577e-05, + "loss": 0.1264, + "step": 39600 + }, + { + "epoch": 1.4390580710807472, + "eval_loss": 0.34889447689056396, + "eval_runtime": 180.6213, + "eval_samples_per_second": 41.047, + "eval_steps_per_second": 5.132, + "eval_wer": 0.17173743351425927, + "step": 39600 + }, + { + "epoch": 1.4394214695835452, + "grad_norm": 0.8560311198234558, + "learning_rate": 4.649085051083033e-05, + "loss": 0.129, + "step": 39610 + }, + { + "epoch": 1.4397848680863434, + "grad_norm": 1.6158629655838013, + "learning_rate": 4.6488198771520605e-05, + "loss": 0.1359, + "step": 39620 + }, + { + "epoch": 1.4401482665891416, + "grad_norm": 0.6946542263031006, + "learning_rate": 4.6485546106360856e-05, + "loss": 0.1195, + "step": 39630 + }, + { + "epoch": 1.4405116650919398, + "grad_norm": 0.6091057062149048, + "learning_rate": 4.648289251546536e-05, + "loss": 0.1321, + "step": 39640 + }, + { + "epoch": 1.440875063594738, + "grad_norm": 0.37464994192123413, + "learning_rate": 4.648023799894847e-05, + "loss": 0.1339, + "step": 39650 + }, + { + "epoch": 1.441238462097536, + "grad_norm": 0.5378652215003967, + "learning_rate": 4.647758255692456e-05, + "loss": 0.1127, + "step": 39660 + }, + { + "epoch": 1.4416018606003345, + "grad_norm": 1.4586265087127686, + "learning_rate": 4.647492618950802e-05, + "loss": 0.2027, + "step": 39670 + }, + { + "epoch": 1.4419652591031324, + "grad_norm": 0.6388387084007263, + "learning_rate": 4.647226889681333e-05, + "loss": 0.1197, + "step": 39680 + }, + { + "epoch": 1.4423286576059307, + "grad_norm": 0.7849758267402649, + "learning_rate": 4.646961067895496e-05, + "loss": 0.1165, + "step": 39690 + }, + { + "epoch": 1.4426920561087289, + "grad_norm": 1.16459059715271, + "learning_rate": 4.6466951536047464e-05, + "loss": 0.1799, + "step": 39700 + }, + { + "epoch": 1.443055454611527, + "grad_norm": 3.2792208194732666, + "learning_rate": 4.64642914682054e-05, + "loss": 0.1191, + "step": 39710 + }, + { + "epoch": 1.4434188531143253, + "grad_norm": 0.3408263921737671, + "learning_rate": 4.64616304755434e-05, + "loss": 0.1971, + "step": 39720 + }, + { + "epoch": 1.4437822516171233, + "grad_norm": 0.45033156871795654, + "learning_rate": 4.645896855817609e-05, + "loss": 0.1441, + "step": 39730 + }, + { + "epoch": 1.4441456501199215, + "grad_norm": 2.284130096435547, + "learning_rate": 4.645630571621817e-05, + "loss": 0.122, + "step": 39740 + }, + { + "epoch": 1.4445090486227197, + "grad_norm": 3.047889232635498, + "learning_rate": 4.645364194978439e-05, + "loss": 0.172, + "step": 39750 + }, + { + "epoch": 1.4448724471255179, + "grad_norm": 1.2850980758666992, + "learning_rate": 4.645097725898951e-05, + "loss": 0.1088, + "step": 39760 + }, + { + "epoch": 1.445235845628316, + "grad_norm": 0.8555011749267578, + "learning_rate": 4.644831164394834e-05, + "loss": 0.1516, + "step": 39770 + }, + { + "epoch": 1.445599244131114, + "grad_norm": 0.9414917230606079, + "learning_rate": 4.644564510477574e-05, + "loss": 0.1104, + "step": 39780 + }, + { + "epoch": 1.4459626426339123, + "grad_norm": 1.658109188079834, + "learning_rate": 4.644297764158659e-05, + "loss": 0.1361, + "step": 39790 + }, + { + "epoch": 1.4463260411367105, + "grad_norm": 1.9984872341156006, + "learning_rate": 4.644030925449583e-05, + "loss": 0.5671, + "step": 39800 + }, + { + "epoch": 1.4466894396395087, + "grad_norm": 2.086899995803833, + "learning_rate": 4.6437639943618424e-05, + "loss": 0.124, + "step": 39810 + }, + { + "epoch": 1.447052838142307, + "grad_norm": 0.39096391201019287, + "learning_rate": 4.64349697090694e-05, + "loss": 0.2021, + "step": 39820 + }, + { + "epoch": 1.447416236645105, + "grad_norm": 0.9561779499053955, + "learning_rate": 4.643229855096378e-05, + "loss": 0.1371, + "step": 39830 + }, + { + "epoch": 1.4477796351479033, + "grad_norm": 1.6168954372406006, + "learning_rate": 4.6429626469416685e-05, + "loss": 0.1149, + "step": 39840 + }, + { + "epoch": 1.4481430336507013, + "grad_norm": 1.4393991231918335, + "learning_rate": 4.642695346454323e-05, + "loss": 0.1472, + "step": 39850 + }, + { + "epoch": 1.4485064321534995, + "grad_norm": 1.9806978702545166, + "learning_rate": 4.642427953645859e-05, + "loss": 0.1443, + "step": 39860 + }, + { + "epoch": 1.4488698306562977, + "grad_norm": 1.012040615081787, + "learning_rate": 4.642160468527797e-05, + "loss": 0.155, + "step": 39870 + }, + { + "epoch": 1.449233229159096, + "grad_norm": 0.620448112487793, + "learning_rate": 4.641892891111662e-05, + "loss": 0.12, + "step": 39880 + }, + { + "epoch": 1.4495966276618941, + "grad_norm": 0.5192741751670837, + "learning_rate": 4.6416252214089834e-05, + "loss": 0.1096, + "step": 39890 + }, + { + "epoch": 1.449960026164692, + "grad_norm": 1.4234672784805298, + "learning_rate": 4.641357459431294e-05, + "loss": 0.1548, + "step": 39900 + }, + { + "epoch": 1.4503234246674903, + "grad_norm": 1.2218151092529297, + "learning_rate": 4.641089605190131e-05, + "loss": 0.1452, + "step": 39910 + }, + { + "epoch": 1.4506868231702885, + "grad_norm": 0.5271123051643372, + "learning_rate": 4.6408216586970344e-05, + "loss": 0.1238, + "step": 39920 + }, + { + "epoch": 1.4510502216730867, + "grad_norm": 1.0670936107635498, + "learning_rate": 4.640553619963549e-05, + "loss": 0.7119, + "step": 39930 + }, + { + "epoch": 1.451413620175885, + "grad_norm": 2.9407644271850586, + "learning_rate": 4.6402854890012256e-05, + "loss": 0.1246, + "step": 39940 + }, + { + "epoch": 1.451777018678683, + "grad_norm": 0.8619846701622009, + "learning_rate": 4.6400172658216144e-05, + "loss": 0.1524, + "step": 39950 + }, + { + "epoch": 1.4521404171814813, + "grad_norm": 1.110069751739502, + "learning_rate": 4.639748950436275e-05, + "loss": 0.1147, + "step": 39960 + }, + { + "epoch": 1.4525038156842793, + "grad_norm": 0.6605796813964844, + "learning_rate": 4.639480542856764e-05, + "loss": 0.1266, + "step": 39970 + }, + { + "epoch": 1.4528672141870775, + "grad_norm": 0.8681196570396423, + "learning_rate": 4.639212043094651e-05, + "loss": 0.1168, + "step": 39980 + }, + { + "epoch": 1.4532306126898757, + "grad_norm": 0.7025002241134644, + "learning_rate": 4.6389434511615015e-05, + "loss": 0.1117, + "step": 39990 + }, + { + "epoch": 1.453594011192674, + "grad_norm": 1.203703761100769, + "learning_rate": 4.6386747670688897e-05, + "loss": 0.1524, + "step": 40000 + }, + { + "epoch": 1.4539574096954722, + "grad_norm": 2.274060010910034, + "learning_rate": 4.638405990828391e-05, + "loss": 0.113, + "step": 40010 + }, + { + "epoch": 1.4543208081982701, + "grad_norm": 1.424842357635498, + "learning_rate": 4.638137122451587e-05, + "loss": 0.1986, + "step": 40020 + }, + { + "epoch": 1.4546842067010683, + "grad_norm": 2.1440541744232178, + "learning_rate": 4.637868161950062e-05, + "loss": 0.1406, + "step": 40030 + }, + { + "epoch": 1.4550476052038666, + "grad_norm": 0.9488077759742737, + "learning_rate": 4.6375991093354035e-05, + "loss": 0.1827, + "step": 40040 + }, + { + "epoch": 1.4554110037066648, + "grad_norm": 7.7812724113464355, + "learning_rate": 4.637329964619206e-05, + "loss": 0.2488, + "step": 40050 + }, + { + "epoch": 1.455774402209463, + "grad_norm": 1.2816716432571411, + "learning_rate": 4.6370607278130646e-05, + "loss": 0.1125, + "step": 40060 + }, + { + "epoch": 1.456137800712261, + "grad_norm": 0.6140567660331726, + "learning_rate": 4.63679139892858e-05, + "loss": 0.1526, + "step": 40070 + }, + { + "epoch": 1.4565011992150592, + "grad_norm": 1.3745895624160767, + "learning_rate": 4.636521977977357e-05, + "loss": 0.1406, + "step": 40080 + }, + { + "epoch": 1.4568645977178574, + "grad_norm": 0.934893786907196, + "learning_rate": 4.636252464971004e-05, + "loss": 0.1236, + "step": 40090 + }, + { + "epoch": 1.4572279962206556, + "grad_norm": 4.1601738929748535, + "learning_rate": 4.635982859921132e-05, + "loss": 0.1256, + "step": 40100 + }, + { + "epoch": 1.4575913947234538, + "grad_norm": 1.3297815322875977, + "learning_rate": 4.635713162839359e-05, + "loss": 0.1076, + "step": 40110 + }, + { + "epoch": 1.457954793226252, + "grad_norm": 1.6878186464309692, + "learning_rate": 4.6354433737373055e-05, + "loss": 0.2212, + "step": 40120 + }, + { + "epoch": 1.4583181917290502, + "grad_norm": 1.2743428945541382, + "learning_rate": 4.6351734926265946e-05, + "loss": 0.105, + "step": 40130 + }, + { + "epoch": 1.4586815902318482, + "grad_norm": 1.5052075386047363, + "learning_rate": 4.634903519518854e-05, + "loss": 0.234, + "step": 40140 + }, + { + "epoch": 1.4590449887346464, + "grad_norm": 1.7959517240524292, + "learning_rate": 4.634633454425718e-05, + "loss": 0.1572, + "step": 40150 + }, + { + "epoch": 1.4594083872374446, + "grad_norm": 0.578747034072876, + "learning_rate": 4.63436329735882e-05, + "loss": 0.1111, + "step": 40160 + }, + { + "epoch": 1.4597717857402428, + "grad_norm": 0.3820185959339142, + "learning_rate": 4.634093048329803e-05, + "loss": 0.1589, + "step": 40170 + }, + { + "epoch": 1.460135184243041, + "grad_norm": 1.621700406074524, + "learning_rate": 4.633822707350309e-05, + "loss": 0.1401, + "step": 40180 + }, + { + "epoch": 1.460498582745839, + "grad_norm": 1.2941464185714722, + "learning_rate": 4.633552274431987e-05, + "loss": 0.1678, + "step": 40190 + }, + { + "epoch": 1.4608619812486372, + "grad_norm": 0.9888546466827393, + "learning_rate": 4.633281749586488e-05, + "loss": 0.1649, + "step": 40200 + }, + { + "epoch": 1.4608619812486372, + "eval_loss": 0.3407399654388428, + "eval_runtime": 180.8259, + "eval_samples_per_second": 41.001, + "eval_steps_per_second": 5.126, + "eval_wer": 0.17137437144879916, + "step": 40200 + }, + { + "epoch": 1.4612253797514354, + "grad_norm": 0.6030024290084839, + "learning_rate": 4.633011132825469e-05, + "loss": 0.1086, + "step": 40210 + }, + { + "epoch": 1.4615887782542336, + "grad_norm": 2.6498842239379883, + "learning_rate": 4.63274042416059e-05, + "loss": 0.1341, + "step": 40220 + }, + { + "epoch": 1.4619521767570318, + "grad_norm": 1.0614917278289795, + "learning_rate": 4.632469623603514e-05, + "loss": 0.1197, + "step": 40230 + }, + { + "epoch": 1.4623155752598298, + "grad_norm": 1.7546344995498657, + "learning_rate": 4.63219873116591e-05, + "loss": 0.1518, + "step": 40240 + }, + { + "epoch": 1.4626789737626282, + "grad_norm": 2.726959705352783, + "learning_rate": 4.631927746859448e-05, + "loss": 0.1514, + "step": 40250 + }, + { + "epoch": 1.4630423722654262, + "grad_norm": 1.0468662977218628, + "learning_rate": 4.6316566706958055e-05, + "loss": 0.1309, + "step": 40260 + }, + { + "epoch": 1.4634057707682244, + "grad_norm": 0.7446948885917664, + "learning_rate": 4.631385502686661e-05, + "loss": 0.176, + "step": 40270 + }, + { + "epoch": 1.4637691692710226, + "grad_norm": 2.804288148880005, + "learning_rate": 4.6311142428436996e-05, + "loss": 0.103, + "step": 40280 + }, + { + "epoch": 1.4641325677738208, + "grad_norm": 1.1131904125213623, + "learning_rate": 4.630842891178607e-05, + "loss": 0.1168, + "step": 40290 + }, + { + "epoch": 1.464495966276619, + "grad_norm": 1.084128975868225, + "learning_rate": 4.6305714477030766e-05, + "loss": 0.5731, + "step": 40300 + }, + { + "epoch": 1.464859364779417, + "grad_norm": 2.9369328022003174, + "learning_rate": 4.630299912428803e-05, + "loss": 0.12, + "step": 40310 + }, + { + "epoch": 1.4652227632822152, + "grad_norm": 0.449259489774704, + "learning_rate": 4.630028285367485e-05, + "loss": 0.1396, + "step": 40320 + }, + { + "epoch": 1.4655861617850134, + "grad_norm": 0.6570121049880981, + "learning_rate": 4.6297565665308276e-05, + "loss": 0.1143, + "step": 40330 + }, + { + "epoch": 1.4659495602878116, + "grad_norm": 0.9117491841316223, + "learning_rate": 4.629484755930537e-05, + "loss": 0.0996, + "step": 40340 + }, + { + "epoch": 1.4663129587906099, + "grad_norm": 1.197102665901184, + "learning_rate": 4.629212853578325e-05, + "loss": 0.1522, + "step": 40350 + }, + { + "epoch": 1.4666763572934078, + "grad_norm": 2.8265323638916016, + "learning_rate": 4.6289408594859075e-05, + "loss": 0.1239, + "step": 40360 + }, + { + "epoch": 1.467039755796206, + "grad_norm": 0.4153755307197571, + "learning_rate": 4.628668773665002e-05, + "loss": 0.1448, + "step": 40370 + }, + { + "epoch": 1.4674031542990043, + "grad_norm": 0.9625080823898315, + "learning_rate": 4.628396596127335e-05, + "loss": 0.1249, + "step": 40380 + }, + { + "epoch": 1.4677665528018025, + "grad_norm": 3.2815330028533936, + "learning_rate": 4.62812432688463e-05, + "loss": 0.1238, + "step": 40390 + }, + { + "epoch": 1.4681299513046007, + "grad_norm": 0.5435966849327087, + "learning_rate": 4.627851965948619e-05, + "loss": 0.149, + "step": 40400 + }, + { + "epoch": 1.4684933498073989, + "grad_norm": 1.5212714672088623, + "learning_rate": 4.6275795133310383e-05, + "loss": 0.1458, + "step": 40410 + }, + { + "epoch": 1.468856748310197, + "grad_norm": 0.9295603632926941, + "learning_rate": 4.627306969043627e-05, + "loss": 0.2099, + "step": 40420 + }, + { + "epoch": 1.469220146812995, + "grad_norm": 1.0282838344573975, + "learning_rate": 4.627034333098127e-05, + "loss": 0.1408, + "step": 40430 + }, + { + "epoch": 1.4695835453157933, + "grad_norm": 1.5155305862426758, + "learning_rate": 4.6267616055062855e-05, + "loss": 0.2335, + "step": 40440 + }, + { + "epoch": 1.4699469438185915, + "grad_norm": 0.6780584454536438, + "learning_rate": 4.626488786279854e-05, + "loss": 0.1611, + "step": 40450 + }, + { + "epoch": 1.4703103423213897, + "grad_norm": 1.1376898288726807, + "learning_rate": 4.626215875430586e-05, + "loss": 0.1282, + "step": 40460 + }, + { + "epoch": 1.470673740824188, + "grad_norm": 9.535052299499512, + "learning_rate": 4.6259428729702414e-05, + "loss": 0.3186, + "step": 40470 + }, + { + "epoch": 1.4710371393269859, + "grad_norm": 1.4366358518600464, + "learning_rate": 4.625669778910582e-05, + "loss": 0.1202, + "step": 40480 + }, + { + "epoch": 1.471400537829784, + "grad_norm": 0.9380308985710144, + "learning_rate": 4.625396593263376e-05, + "loss": 0.1125, + "step": 40490 + }, + { + "epoch": 1.4717639363325823, + "grad_norm": 1.2832533121109009, + "learning_rate": 4.6251233160403916e-05, + "loss": 2.9509, + "step": 40500 + }, + { + "epoch": 1.4721273348353805, + "grad_norm": 3.405505895614624, + "learning_rate": 4.624849947253406e-05, + "loss": 0.1369, + "step": 40510 + }, + { + "epoch": 1.4724907333381787, + "grad_norm": 0.7077997326850891, + "learning_rate": 4.624576486914196e-05, + "loss": 0.1233, + "step": 40520 + }, + { + "epoch": 1.4728541318409767, + "grad_norm": 0.7341346740722656, + "learning_rate": 4.624302935034545e-05, + "loss": 0.1146, + "step": 40530 + }, + { + "epoch": 1.4732175303437751, + "grad_norm": 0.9452338218688965, + "learning_rate": 4.6240292916262376e-05, + "loss": 0.1696, + "step": 40540 + }, + { + "epoch": 1.473580928846573, + "grad_norm": 0.8162540197372437, + "learning_rate": 4.623755556701066e-05, + "loss": 0.138, + "step": 40550 + }, + { + "epoch": 1.4739443273493713, + "grad_norm": 0.551977276802063, + "learning_rate": 4.623481730270824e-05, + "loss": 0.2382, + "step": 40560 + }, + { + "epoch": 1.4743077258521695, + "grad_norm": 0.6273486018180847, + "learning_rate": 4.623207812347309e-05, + "loss": 0.1902, + "step": 40570 + }, + { + "epoch": 1.4746711243549677, + "grad_norm": 0.5531787872314453, + "learning_rate": 4.622933802942324e-05, + "loss": 0.1103, + "step": 40580 + }, + { + "epoch": 1.475034522857766, + "grad_norm": 0.4982399344444275, + "learning_rate": 4.622659702067675e-05, + "loss": 0.1081, + "step": 40590 + }, + { + "epoch": 1.475397921360564, + "grad_norm": 0.9091404676437378, + "learning_rate": 4.622385509735172e-05, + "loss": 0.3927, + "step": 40600 + }, + { + "epoch": 1.4757613198633621, + "grad_norm": 0.6481756567955017, + "learning_rate": 4.622111225956629e-05, + "loss": 0.1186, + "step": 40610 + }, + { + "epoch": 1.4761247183661603, + "grad_norm": 1.1302157640457153, + "learning_rate": 4.621836850743864e-05, + "loss": 0.1667, + "step": 40620 + }, + { + "epoch": 1.4764881168689585, + "grad_norm": 2.197112560272217, + "learning_rate": 4.6215623841086974e-05, + "loss": 0.1226, + "step": 40630 + }, + { + "epoch": 1.4768515153717567, + "grad_norm": 3.960108995437622, + "learning_rate": 4.621287826062957e-05, + "loss": 0.1343, + "step": 40640 + }, + { + "epoch": 1.4772149138745547, + "grad_norm": 1.0748779773712158, + "learning_rate": 4.6210131766184714e-05, + "loss": 0.1492, + "step": 40650 + }, + { + "epoch": 1.477578312377353, + "grad_norm": 1.8779007196426392, + "learning_rate": 4.620738435787075e-05, + "loss": 0.1446, + "step": 40660 + }, + { + "epoch": 1.4779417108801511, + "grad_norm": 0.5000749230384827, + "learning_rate": 4.620463603580605e-05, + "loss": 0.1762, + "step": 40670 + }, + { + "epoch": 1.4783051093829493, + "grad_norm": 0.6756991147994995, + "learning_rate": 4.620188680010903e-05, + "loss": 0.1375, + "step": 40680 + }, + { + "epoch": 1.4786685078857476, + "grad_norm": 0.7340139746665955, + "learning_rate": 4.619941170692398e-05, + "loss": 2.0738, + "step": 40690 + }, + { + "epoch": 1.4790319063885458, + "grad_norm": 4.0170722007751465, + "learning_rate": 4.6196660735651925e-05, + "loss": 0.157, + "step": 40700 + }, + { + "epoch": 1.479395304891344, + "grad_norm": 0.8254412412643433, + "learning_rate": 4.619390885109118e-05, + "loss": 0.1385, + "step": 40710 + }, + { + "epoch": 1.479758703394142, + "grad_norm": 0.7477695345878601, + "learning_rate": 4.619115605336031e-05, + "loss": 0.1772, + "step": 40720 + }, + { + "epoch": 1.4801221018969402, + "grad_norm": 0.9684391617774963, + "learning_rate": 4.618840234257792e-05, + "loss": 3.1891, + "step": 40730 + }, + { + "epoch": 1.4804855003997384, + "grad_norm": 0.9600037336349487, + "learning_rate": 4.6185647718862655e-05, + "loss": 0.1289, + "step": 40740 + }, + { + "epoch": 1.4808488989025366, + "grad_norm": 1.848919153213501, + "learning_rate": 4.6182892182333226e-05, + "loss": 0.1588, + "step": 40750 + }, + { + "epoch": 1.4812122974053348, + "grad_norm": 1.707576036453247, + "learning_rate": 4.6180135733108335e-05, + "loss": 0.1578, + "step": 40760 + }, + { + "epoch": 1.4815756959081328, + "grad_norm": 0.9908716678619385, + "learning_rate": 4.617737837130675e-05, + "loss": 0.1763, + "step": 40770 + }, + { + "epoch": 1.481939094410931, + "grad_norm": 1.638818383216858, + "learning_rate": 4.617462009704728e-05, + "loss": 0.1323, + "step": 40780 + }, + { + "epoch": 1.4823024929137292, + "grad_norm": 2.1605606079101562, + "learning_rate": 4.6171860910448774e-05, + "loss": 0.123, + "step": 40790 + }, + { + "epoch": 1.4826658914165274, + "grad_norm": 0.7089453935623169, + "learning_rate": 4.6169100811630106e-05, + "loss": 0.1489, + "step": 40800 + }, + { + "epoch": 1.4826658914165274, + "eval_loss": 0.3524834215641022, + "eval_runtime": 180.6874, + "eval_samples_per_second": 41.032, + "eval_steps_per_second": 5.13, + "eval_wer": 0.17247263419681594, + "step": 40800 + }, + { + "epoch": 1.4830292899193256, + "grad_norm": 0.8966375589370728, + "learning_rate": 4.616633980071021e-05, + "loss": 0.1114, + "step": 40810 + }, + { + "epoch": 1.4833926884221236, + "grad_norm": 0.8344945907592773, + "learning_rate": 4.616357787780804e-05, + "loss": 0.1765, + "step": 40820 + }, + { + "epoch": 1.483756086924922, + "grad_norm": 1.2400190830230713, + "learning_rate": 4.616081504304259e-05, + "loss": 0.1805, + "step": 40830 + }, + { + "epoch": 1.48411948542772, + "grad_norm": 0.9686151742935181, + "learning_rate": 4.615805129653292e-05, + "loss": 0.1259, + "step": 40840 + }, + { + "epoch": 1.4844828839305182, + "grad_norm": 1.00034499168396, + "learning_rate": 4.615528663839811e-05, + "loss": 0.1632, + "step": 40850 + }, + { + "epoch": 1.4848462824333164, + "grad_norm": 3.2312328815460205, + "learning_rate": 4.6152521068757256e-05, + "loss": 0.116, + "step": 40860 + }, + { + "epoch": 1.4852096809361146, + "grad_norm": 0.4837055206298828, + "learning_rate": 4.6149754587729535e-05, + "loss": 0.1534, + "step": 40870 + }, + { + "epoch": 1.4855730794389128, + "grad_norm": 8.643519401550293, + "learning_rate": 4.614698719543413e-05, + "loss": 0.1122, + "step": 40880 + }, + { + "epoch": 1.4859364779417108, + "grad_norm": 0.9113799333572388, + "learning_rate": 4.61442188919903e-05, + "loss": 0.1046, + "step": 40890 + }, + { + "epoch": 1.486299876444509, + "grad_norm": 0.7763462662696838, + "learning_rate": 4.61414496775173e-05, + "loss": 0.1515, + "step": 40900 + }, + { + "epoch": 1.4866632749473072, + "grad_norm": 1.2019357681274414, + "learning_rate": 4.6138679552134464e-05, + "loss": 0.1372, + "step": 40910 + }, + { + "epoch": 1.4870266734501054, + "grad_norm": 1.1948570013046265, + "learning_rate": 4.6135908515961136e-05, + "loss": 0.2073, + "step": 40920 + }, + { + "epoch": 1.4873900719529036, + "grad_norm": 1.3027549982070923, + "learning_rate": 4.6133136569116706e-05, + "loss": 0.12, + "step": 40930 + }, + { + "epoch": 1.4877534704557016, + "grad_norm": 1.4980496168136597, + "learning_rate": 4.613036371172062e-05, + "loss": 2.4225, + "step": 40940 + }, + { + "epoch": 1.4881168689584998, + "grad_norm": 0.7265346050262451, + "learning_rate": 4.612758994389234e-05, + "loss": 0.1631, + "step": 40950 + }, + { + "epoch": 1.488480267461298, + "grad_norm": 0.6485431790351868, + "learning_rate": 4.612481526575138e-05, + "loss": 0.1175, + "step": 40960 + }, + { + "epoch": 1.4888436659640962, + "grad_norm": 0.9532496333122253, + "learning_rate": 4.612203967741729e-05, + "loss": 0.2368, + "step": 40970 + }, + { + "epoch": 1.4892070644668944, + "grad_norm": 3.3696892261505127, + "learning_rate": 4.6119263179009676e-05, + "loss": 0.1388, + "step": 40980 + }, + { + "epoch": 1.4895704629696926, + "grad_norm": 0.7628744840621948, + "learning_rate": 4.611648577064814e-05, + "loss": 0.1475, + "step": 40990 + }, + { + "epoch": 1.4899338614724909, + "grad_norm": 1.4854507446289062, + "learning_rate": 4.611370745245237e-05, + "loss": 0.1717, + "step": 41000 + }, + { + "epoch": 1.4902972599752888, + "grad_norm": 1.2280082702636719, + "learning_rate": 4.6110928224542074e-05, + "loss": 0.1389, + "step": 41010 + }, + { + "epoch": 1.490660658478087, + "grad_norm": 0.5658448934555054, + "learning_rate": 4.6108148087036984e-05, + "loss": 0.1625, + "step": 41020 + }, + { + "epoch": 1.4910240569808852, + "grad_norm": 1.1708754301071167, + "learning_rate": 4.6105367040056903e-05, + "loss": 0.1283, + "step": 41030 + }, + { + "epoch": 1.4913874554836835, + "grad_norm": 1.175658106803894, + "learning_rate": 4.610258508372165e-05, + "loss": 0.1197, + "step": 41040 + }, + { + "epoch": 1.4917508539864817, + "grad_norm": 1.0719672441482544, + "learning_rate": 4.609980221815109e-05, + "loss": 0.1361, + "step": 41050 + }, + { + "epoch": 1.4921142524892796, + "grad_norm": 0.7982541918754578, + "learning_rate": 4.6097018443465114e-05, + "loss": 0.1302, + "step": 41060 + }, + { + "epoch": 1.4924776509920779, + "grad_norm": 0.360454797744751, + "learning_rate": 4.609423375978369e-05, + "loss": 0.2231, + "step": 41070 + }, + { + "epoch": 1.492841049494876, + "grad_norm": 0.64405757188797, + "learning_rate": 4.609144816722678e-05, + "loss": 0.1212, + "step": 41080 + }, + { + "epoch": 1.4932044479976743, + "grad_norm": 0.7874402403831482, + "learning_rate": 4.608866166591441e-05, + "loss": 3.1348, + "step": 41090 + }, + { + "epoch": 1.4935678465004725, + "grad_norm": 1.059163212776184, + "learning_rate": 4.608587425596665e-05, + "loss": 0.1464, + "step": 41100 + }, + { + "epoch": 1.4939312450032705, + "grad_norm": 1.5717148780822754, + "learning_rate": 4.608308593750359e-05, + "loss": 0.1104, + "step": 41110 + }, + { + "epoch": 1.4942946435060689, + "grad_norm": 0.6417020559310913, + "learning_rate": 4.6080296710645365e-05, + "loss": 0.1573, + "step": 41120 + }, + { + "epoch": 1.4946580420088669, + "grad_norm": 0.8871016502380371, + "learning_rate": 4.607750657551216e-05, + "loss": 0.1087, + "step": 41130 + }, + { + "epoch": 1.495021440511665, + "grad_norm": 2.3125686645507812, + "learning_rate": 4.6074715532224196e-05, + "loss": 0.1379, + "step": 41140 + }, + { + "epoch": 1.4953848390144633, + "grad_norm": 2.087214708328247, + "learning_rate": 4.607192358090172e-05, + "loss": 0.1311, + "step": 41150 + }, + { + "epoch": 1.4957482375172615, + "grad_norm": 3.1915369033813477, + "learning_rate": 4.6069130721665035e-05, + "loss": 0.1231, + "step": 41160 + }, + { + "epoch": 1.4961116360200597, + "grad_norm": 0.4626937508583069, + "learning_rate": 4.606633695463447e-05, + "loss": 0.1544, + "step": 41170 + }, + { + "epoch": 1.4964750345228577, + "grad_norm": 161.15541076660156, + "learning_rate": 4.6063542279930395e-05, + "loss": 3.413, + "step": 41180 + }, + { + "epoch": 1.496838433025656, + "grad_norm": 0.9905474185943604, + "learning_rate": 4.606074669767323e-05, + "loss": 0.1285, + "step": 41190 + }, + { + "epoch": 1.497201831528454, + "grad_norm": 0.5389920473098755, + "learning_rate": 4.6057950207983426e-05, + "loss": 0.1184, + "step": 41200 + }, + { + "epoch": 1.4975652300312523, + "grad_norm": 2.7976090908050537, + "learning_rate": 4.605515281098147e-05, + "loss": 0.2461, + "step": 41210 + }, + { + "epoch": 1.4979286285340505, + "grad_norm": 0.4971259534358978, + "learning_rate": 4.60523545067879e-05, + "loss": 0.2034, + "step": 41220 + }, + { + "epoch": 1.4982920270368485, + "grad_norm": 1.5046378374099731, + "learning_rate": 4.6049555295523274e-05, + "loss": 0.1342, + "step": 41230 + }, + { + "epoch": 1.4986554255396467, + "grad_norm": 1.337195634841919, + "learning_rate": 4.60467551773082e-05, + "loss": 0.1198, + "step": 41240 + }, + { + "epoch": 1.499018824042445, + "grad_norm": 1.2729612588882446, + "learning_rate": 4.6043954152263336e-05, + "loss": 0.1312, + "step": 41250 + }, + { + "epoch": 1.4993822225452431, + "grad_norm": 0.9693030714988708, + "learning_rate": 4.6041152220509365e-05, + "loss": 0.1554, + "step": 41260 + }, + { + "epoch": 1.4997456210480413, + "grad_norm": 0.48035889863967896, + "learning_rate": 4.6038349382167e-05, + "loss": 0.1611, + "step": 41270 + }, + { + "epoch": 1.5001090195508393, + "grad_norm": 1.015608787536621, + "learning_rate": 4.603554563735702e-05, + "loss": 0.1322, + "step": 41280 + }, + { + "epoch": 1.5004724180536377, + "grad_norm": 1.900895595550537, + "learning_rate": 4.603274098620023e-05, + "loss": 0.1819, + "step": 41290 + }, + { + "epoch": 1.5008358165564357, + "grad_norm": 1.780765414237976, + "learning_rate": 4.602993542881745e-05, + "loss": 0.1396, + "step": 41300 + }, + { + "epoch": 1.501199215059234, + "grad_norm": 3.2523162364959717, + "learning_rate": 4.602712896532959e-05, + "loss": 0.1333, + "step": 41310 + }, + { + "epoch": 1.5015626135620321, + "grad_norm": 0.344933420419693, + "learning_rate": 4.6024321595857554e-05, + "loss": 0.1417, + "step": 41320 + }, + { + "epoch": 1.5019260120648303, + "grad_norm": 0.7336893081665039, + "learning_rate": 4.6021513320522304e-05, + "loss": 0.1551, + "step": 41330 + }, + { + "epoch": 1.5022894105676285, + "grad_norm": 0.9252750873565674, + "learning_rate": 4.601870413944484e-05, + "loss": 0.1049, + "step": 41340 + }, + { + "epoch": 1.5026528090704265, + "grad_norm": 2.0064470767974854, + "learning_rate": 4.60158940527462e-05, + "loss": 0.1593, + "step": 41350 + }, + { + "epoch": 1.503016207573225, + "grad_norm": 1.2280207872390747, + "learning_rate": 4.601308306054746e-05, + "loss": 0.1276, + "step": 41360 + }, + { + "epoch": 1.503379606076023, + "grad_norm": 0.7326213717460632, + "learning_rate": 4.601027116296974e-05, + "loss": 0.1853, + "step": 41370 + }, + { + "epoch": 1.5037430045788212, + "grad_norm": 2.208380937576294, + "learning_rate": 4.600745836013418e-05, + "loss": 0.1343, + "step": 41380 + }, + { + "epoch": 1.5041064030816194, + "grad_norm": 0.7113050818443298, + "learning_rate": 4.6004644652161996e-05, + "loss": 0.198, + "step": 41390 + }, + { + "epoch": 1.5044698015844173, + "grad_norm": 1.8392037153244019, + "learning_rate": 4.60018300391744e-05, + "loss": 0.1283, + "step": 41400 + }, + { + "epoch": 1.5044698015844173, + "eval_loss": 0.35653889179229736, + "eval_runtime": 180.3372, + "eval_samples_per_second": 41.112, + "eval_steps_per_second": 5.14, + "eval_wer": 0.16764390872619675, + "step": 41400 + }, + { + "epoch": 1.5048332000872158, + "grad_norm": 1.0530060529708862, + "learning_rate": 4.5999014521292674e-05, + "loss": 0.1072, + "step": 41410 + }, + { + "epoch": 1.5051965985900138, + "grad_norm": 1.0648863315582275, + "learning_rate": 4.599619809863813e-05, + "loss": 0.1939, + "step": 41420 + }, + { + "epoch": 1.505559997092812, + "grad_norm": 1.4178556203842163, + "learning_rate": 4.599338077133212e-05, + "loss": 0.1325, + "step": 41430 + }, + { + "epoch": 1.5059233955956102, + "grad_norm": 0.6156584024429321, + "learning_rate": 4.5990562539496015e-05, + "loss": 0.1337, + "step": 41440 + }, + { + "epoch": 1.5062867940984082, + "grad_norm": 0.9399839639663696, + "learning_rate": 4.598774340325126e-05, + "loss": 0.1697, + "step": 41450 + }, + { + "epoch": 1.5066501926012066, + "grad_norm": 0.9702737927436829, + "learning_rate": 4.598492336271931e-05, + "loss": 0.0978, + "step": 41460 + }, + { + "epoch": 1.5070135911040046, + "grad_norm": 0.8199527263641357, + "learning_rate": 4.598210241802169e-05, + "loss": 0.185, + "step": 41470 + }, + { + "epoch": 1.5073769896068028, + "grad_norm": 1.122827172279358, + "learning_rate": 4.597928056927993e-05, + "loss": 0.1234, + "step": 41480 + }, + { + "epoch": 1.507740388109601, + "grad_norm": 1.9142221212387085, + "learning_rate": 4.5976457816615606e-05, + "loss": 0.1346, + "step": 41490 + }, + { + "epoch": 1.5081037866123992, + "grad_norm": 1.0756717920303345, + "learning_rate": 4.5973634160150345e-05, + "loss": 0.1431, + "step": 41500 + }, + { + "epoch": 1.5084671851151974, + "grad_norm": 1.6231876611709595, + "learning_rate": 4.5970809600005826e-05, + "loss": 0.1608, + "step": 41510 + }, + { + "epoch": 1.5088305836179954, + "grad_norm": 0.3704961836338043, + "learning_rate": 4.596798413630373e-05, + "loss": 0.1501, + "step": 41520 + }, + { + "epoch": 1.5091939821207938, + "grad_norm": 0.7752798199653625, + "learning_rate": 4.59651577691658e-05, + "loss": 0.1344, + "step": 41530 + }, + { + "epoch": 1.5095573806235918, + "grad_norm": 2.622103214263916, + "learning_rate": 4.596233049871382e-05, + "loss": 0.1232, + "step": 41540 + }, + { + "epoch": 1.50992077912639, + "grad_norm": 0.4142579436302185, + "learning_rate": 4.595950232506961e-05, + "loss": 0.1227, + "step": 41550 + }, + { + "epoch": 1.5102841776291882, + "grad_norm": 0.9995001554489136, + "learning_rate": 4.5956673248355e-05, + "loss": 0.1143, + "step": 41560 + }, + { + "epoch": 1.5106475761319862, + "grad_norm": 2.1356821060180664, + "learning_rate": 4.595384326869191e-05, + "loss": 0.1969, + "step": 41570 + }, + { + "epoch": 1.5110109746347846, + "grad_norm": 0.9950689673423767, + "learning_rate": 4.5951012386202274e-05, + "loss": 0.1362, + "step": 41580 + }, + { + "epoch": 1.5113743731375826, + "grad_norm": 0.6441085934638977, + "learning_rate": 4.5948180601008054e-05, + "loss": 0.1557, + "step": 41590 + }, + { + "epoch": 1.5117377716403808, + "grad_norm": 2.1033713817596436, + "learning_rate": 4.594534791323127e-05, + "loss": 0.1718, + "step": 41600 + }, + { + "epoch": 1.512101170143179, + "grad_norm": 1.3968003988265991, + "learning_rate": 4.5942514322993965e-05, + "loss": 0.2915, + "step": 41610 + }, + { + "epoch": 1.5124645686459772, + "grad_norm": 0.7833322882652283, + "learning_rate": 4.593967983041823e-05, + "loss": 0.1379, + "step": 41620 + }, + { + "epoch": 1.5128279671487754, + "grad_norm": 1.0050405263900757, + "learning_rate": 4.5936844435626196e-05, + "loss": 0.1307, + "step": 41630 + }, + { + "epoch": 1.5131913656515734, + "grad_norm": 1.9530189037322998, + "learning_rate": 4.593400813874003e-05, + "loss": 0.139, + "step": 41640 + }, + { + "epoch": 1.5135547641543718, + "grad_norm": 0.45743170380592346, + "learning_rate": 4.593117093988194e-05, + "loss": 0.1422, + "step": 41650 + }, + { + "epoch": 1.5139181626571698, + "grad_norm": 1.310746431350708, + "learning_rate": 4.592833283917416e-05, + "loss": 1.58, + "step": 41660 + }, + { + "epoch": 1.514281561159968, + "grad_norm": 0.6696259379386902, + "learning_rate": 4.592549383673898e-05, + "loss": 0.1466, + "step": 41670 + }, + { + "epoch": 1.5146449596627662, + "grad_norm": 1.0350476503372192, + "learning_rate": 4.5922653932698734e-05, + "loss": 0.1114, + "step": 41680 + }, + { + "epoch": 1.5150083581655642, + "grad_norm": 1.5413391590118408, + "learning_rate": 4.591981312717577e-05, + "loss": 0.1225, + "step": 41690 + }, + { + "epoch": 1.5153717566683627, + "grad_norm": 0.8129068613052368, + "learning_rate": 4.5916971420292485e-05, + "loss": 0.1951, + "step": 41700 + }, + { + "epoch": 1.5157351551711606, + "grad_norm": 1.1114506721496582, + "learning_rate": 4.591412881217133e-05, + "loss": 0.1227, + "step": 41710 + }, + { + "epoch": 1.5160985536739588, + "grad_norm": 0.5106993317604065, + "learning_rate": 4.5911285302934775e-05, + "loss": 0.1985, + "step": 41720 + }, + { + "epoch": 1.516461952176757, + "grad_norm": 1.2125110626220703, + "learning_rate": 4.590844089270534e-05, + "loss": 0.1233, + "step": 41730 + }, + { + "epoch": 1.516825350679555, + "grad_norm": 1.3580394983291626, + "learning_rate": 4.590559558160558e-05, + "loss": 0.1227, + "step": 41740 + }, + { + "epoch": 1.5171887491823535, + "grad_norm": 0.4338432252407074, + "learning_rate": 4.590274936975809e-05, + "loss": 0.1462, + "step": 41750 + }, + { + "epoch": 1.5175521476851515, + "grad_norm": 0.9010568857192993, + "learning_rate": 4.58999022572855e-05, + "loss": 0.1372, + "step": 41760 + }, + { + "epoch": 1.5179155461879497, + "grad_norm": 0.737705647945404, + "learning_rate": 4.589705424431048e-05, + "loss": 0.1538, + "step": 41770 + }, + { + "epoch": 1.5182789446907479, + "grad_norm": 1.0285004377365112, + "learning_rate": 4.589420533095575e-05, + "loss": 0.1101, + "step": 41780 + }, + { + "epoch": 1.518642343193546, + "grad_norm": 0.5717383027076721, + "learning_rate": 4.589135551734405e-05, + "loss": 0.1157, + "step": 41790 + }, + { + "epoch": 1.5190057416963443, + "grad_norm": 1.1417220830917358, + "learning_rate": 4.588850480359818e-05, + "loss": 0.1359, + "step": 41800 + }, + { + "epoch": 1.5193691401991423, + "grad_norm": 2.673459768295288, + "learning_rate": 4.588565318984095e-05, + "loss": 0.1238, + "step": 41810 + }, + { + "epoch": 1.5197325387019407, + "grad_norm": 1.1211605072021484, + "learning_rate": 4.588280067619524e-05, + "loss": 0.1642, + "step": 41820 + }, + { + "epoch": 1.5200959372047387, + "grad_norm": 2.358137369155884, + "learning_rate": 4.587994726278395e-05, + "loss": 0.1234, + "step": 41830 + }, + { + "epoch": 1.5204593357075369, + "grad_norm": 0.8301489949226379, + "learning_rate": 4.587709294973002e-05, + "loss": 0.1274, + "step": 41840 + }, + { + "epoch": 1.520822734210335, + "grad_norm": 2.1138226985931396, + "learning_rate": 4.587423773715644e-05, + "loss": 0.1326, + "step": 41850 + }, + { + "epoch": 1.521186132713133, + "grad_norm": 0.7757201194763184, + "learning_rate": 4.587138162518623e-05, + "loss": 0.1183, + "step": 41860 + }, + { + "epoch": 1.5215495312159315, + "grad_norm": 0.7807698249816895, + "learning_rate": 4.586852461394243e-05, + "loss": 0.1485, + "step": 41870 + }, + { + "epoch": 1.5219129297187295, + "grad_norm": 2.2938053607940674, + "learning_rate": 4.586566670354817e-05, + "loss": 0.1152, + "step": 41880 + }, + { + "epoch": 1.5222763282215277, + "grad_norm": 1.2340235710144043, + "learning_rate": 4.5862807894126566e-05, + "loss": 0.1766, + "step": 41890 + }, + { + "epoch": 1.522639726724326, + "grad_norm": 0.9382178783416748, + "learning_rate": 4.5859948185800806e-05, + "loss": 0.1273, + "step": 41900 + }, + { + "epoch": 1.523003125227124, + "grad_norm": 4.5072526931762695, + "learning_rate": 4.58570875786941e-05, + "loss": 0.1333, + "step": 41910 + }, + { + "epoch": 1.5233665237299223, + "grad_norm": 0.41228216886520386, + "learning_rate": 4.5854226072929696e-05, + "loss": 0.1766, + "step": 41920 + }, + { + "epoch": 1.5237299222327203, + "grad_norm": 0.869669497013092, + "learning_rate": 4.5851363668630886e-05, + "loss": 0.1271, + "step": 41930 + }, + { + "epoch": 1.5240933207355187, + "grad_norm": 1.169318675994873, + "learning_rate": 4.584850036592101e-05, + "loss": 0.083, + "step": 41940 + }, + { + "epoch": 1.5244567192383167, + "grad_norm": 3.336904287338257, + "learning_rate": 4.5845636164923426e-05, + "loss": 0.1357, + "step": 41950 + }, + { + "epoch": 1.524820117741115, + "grad_norm": 1.167758584022522, + "learning_rate": 4.584277106576156e-05, + "loss": 0.1162, + "step": 41960 + }, + { + "epoch": 1.5251835162439131, + "grad_norm": 0.9635423421859741, + "learning_rate": 4.5839905068558835e-05, + "loss": 0.2177, + "step": 41970 + }, + { + "epoch": 1.5255469147467111, + "grad_norm": 1.3818042278289795, + "learning_rate": 4.583703817343876e-05, + "loss": 0.1246, + "step": 41980 + }, + { + "epoch": 1.5259103132495095, + "grad_norm": 1.1299431324005127, + "learning_rate": 4.583417038052484e-05, + "loss": 0.1359, + "step": 41990 + }, + { + "epoch": 1.5262737117523075, + "grad_norm": 2.181351661682129, + "learning_rate": 4.583130168994065e-05, + "loss": 0.1706, + "step": 42000 + }, + { + "epoch": 1.5262737117523075, + "eval_loss": 0.3528802692890167, + "eval_runtime": 181.0519, + "eval_samples_per_second": 40.95, + "eval_steps_per_second": 5.12, + "eval_wer": 0.17613956105796286, + "step": 42000 + }, + { + "epoch": 1.5266371102551057, + "grad_norm": 1.0958346128463745, + "learning_rate": 4.582843210180979e-05, + "loss": 0.1187, + "step": 42010 + }, + { + "epoch": 1.527000508757904, + "grad_norm": 0.463438481092453, + "learning_rate": 4.58255616162559e-05, + "loss": 0.1539, + "step": 42020 + }, + { + "epoch": 1.527363907260702, + "grad_norm": 0.5655350685119629, + "learning_rate": 4.5822690233402656e-05, + "loss": 0.1503, + "step": 42030 + }, + { + "epoch": 1.5277273057635004, + "grad_norm": 1.5692224502563477, + "learning_rate": 4.5819817953373764e-05, + "loss": 0.1219, + "step": 42040 + }, + { + "epoch": 1.5280907042662983, + "grad_norm": 0.48884958028793335, + "learning_rate": 4.5816944776293016e-05, + "loss": 0.1455, + "step": 42050 + }, + { + "epoch": 1.5284541027690965, + "grad_norm": 0.8623284697532654, + "learning_rate": 4.5814070702284175e-05, + "loss": 0.1498, + "step": 42060 + }, + { + "epoch": 1.5288175012718948, + "grad_norm": 0.5985013246536255, + "learning_rate": 4.581119573147108e-05, + "loss": 0.4594, + "step": 42070 + }, + { + "epoch": 1.529180899774693, + "grad_norm": 0.9812720417976379, + "learning_rate": 4.580831986397761e-05, + "loss": 0.1234, + "step": 42080 + }, + { + "epoch": 1.5295442982774912, + "grad_norm": 0.5680709481239319, + "learning_rate": 4.5805443099927666e-05, + "loss": 0.1061, + "step": 42090 + }, + { + "epoch": 1.5299076967802892, + "grad_norm": 0.6387588977813721, + "learning_rate": 4.5802565439445225e-05, + "loss": 0.1436, + "step": 42100 + }, + { + "epoch": 1.5302710952830876, + "grad_norm": 1.1865098476409912, + "learning_rate": 4.5799686882654236e-05, + "loss": 0.1155, + "step": 42110 + }, + { + "epoch": 1.5306344937858856, + "grad_norm": 0.7588171362876892, + "learning_rate": 4.579680742967875e-05, + "loss": 0.1799, + "step": 42120 + }, + { + "epoch": 1.5309978922886838, + "grad_norm": 0.9183505773544312, + "learning_rate": 4.579392708064283e-05, + "loss": 0.1133, + "step": 42130 + }, + { + "epoch": 1.531361290791482, + "grad_norm": 1.1988872289657593, + "learning_rate": 4.5791045835670575e-05, + "loss": 0.1107, + "step": 42140 + }, + { + "epoch": 1.53172468929428, + "grad_norm": 0.6209965944290161, + "learning_rate": 4.578816369488613e-05, + "loss": 0.1518, + "step": 42150 + }, + { + "epoch": 1.5320880877970784, + "grad_norm": 1.3487142324447632, + "learning_rate": 4.5785280658413674e-05, + "loss": 0.1126, + "step": 42160 + }, + { + "epoch": 1.5324514862998764, + "grad_norm": 0.6516602039337158, + "learning_rate": 4.578239672637743e-05, + "loss": 0.1498, + "step": 42170 + }, + { + "epoch": 1.5328148848026746, + "grad_norm": 2.4193315505981445, + "learning_rate": 4.577951189890166e-05, + "loss": 0.1408, + "step": 42180 + }, + { + "epoch": 1.5331782833054728, + "grad_norm": 0.6747106313705444, + "learning_rate": 4.577662617611065e-05, + "loss": 0.1226, + "step": 42190 + }, + { + "epoch": 1.533541681808271, + "grad_norm": 3.124244451522827, + "learning_rate": 4.5773739558128744e-05, + "loss": 0.1512, + "step": 42200 + }, + { + "epoch": 1.5339050803110692, + "grad_norm": 0.8625807762145996, + "learning_rate": 4.5770852045080314e-05, + "loss": 0.1187, + "step": 42210 + }, + { + "epoch": 1.5342684788138672, + "grad_norm": 0.9007976651191711, + "learning_rate": 4.576796363708977e-05, + "loss": 0.2001, + "step": 42220 + }, + { + "epoch": 1.5346318773166656, + "grad_norm": 0.7381039261817932, + "learning_rate": 4.576507433428157e-05, + "loss": 0.1063, + "step": 42230 + }, + { + "epoch": 1.5349952758194636, + "grad_norm": 0.9550501704216003, + "learning_rate": 4.57621841367802e-05, + "loss": 0.1448, + "step": 42240 + }, + { + "epoch": 1.5353586743222618, + "grad_norm": 0.5087346434593201, + "learning_rate": 4.5759293044710175e-05, + "loss": 0.1665, + "step": 42250 + }, + { + "epoch": 1.53572207282506, + "grad_norm": 0.4684658646583557, + "learning_rate": 4.575640105819609e-05, + "loss": 0.1089, + "step": 42260 + }, + { + "epoch": 1.536085471327858, + "grad_norm": 0.6353893876075745, + "learning_rate": 4.575350817736252e-05, + "loss": 0.2437, + "step": 42270 + }, + { + "epoch": 1.5364488698306564, + "grad_norm": 0.7524349689483643, + "learning_rate": 4.575061440233414e-05, + "loss": 0.1858, + "step": 42280 + }, + { + "epoch": 1.5368122683334544, + "grad_norm": 0.9425112009048462, + "learning_rate": 4.57477197332356e-05, + "loss": 0.0948, + "step": 42290 + }, + { + "epoch": 1.5371756668362526, + "grad_norm": 1.419872522354126, + "learning_rate": 4.574482417019165e-05, + "loss": 0.1272, + "step": 42300 + }, + { + "epoch": 1.5375390653390508, + "grad_norm": 0.6511875987052917, + "learning_rate": 4.574192771332703e-05, + "loss": 0.176, + "step": 42310 + }, + { + "epoch": 1.5379024638418488, + "grad_norm": 1.2612382173538208, + "learning_rate": 4.573903036276655e-05, + "loss": 0.1681, + "step": 42320 + }, + { + "epoch": 1.5382658623446472, + "grad_norm": 0.828471839427948, + "learning_rate": 4.573613211863504e-05, + "loss": 0.1218, + "step": 42330 + }, + { + "epoch": 1.5386292608474452, + "grad_norm": 0.7098140716552734, + "learning_rate": 4.573323298105737e-05, + "loss": 0.1264, + "step": 42340 + }, + { + "epoch": 1.5389926593502434, + "grad_norm": 0.612920343875885, + "learning_rate": 4.573033295015847e-05, + "loss": 0.1457, + "step": 42350 + }, + { + "epoch": 1.5393560578530416, + "grad_norm": 2.700010299682617, + "learning_rate": 4.572743202606328e-05, + "loss": 0.1416, + "step": 42360 + }, + { + "epoch": 1.5397194563558398, + "grad_norm": 0.4544985890388489, + "learning_rate": 4.5724530208896784e-05, + "loss": 0.2174, + "step": 42370 + }, + { + "epoch": 1.540082854858638, + "grad_norm": 1.7702118158340454, + "learning_rate": 4.5721627498784025e-05, + "loss": 0.7935, + "step": 42380 + }, + { + "epoch": 1.540446253361436, + "grad_norm": 2.3855764865875244, + "learning_rate": 4.571872389585007e-05, + "loss": 0.1142, + "step": 42390 + }, + { + "epoch": 1.5408096518642345, + "grad_norm": 1.9382286071777344, + "learning_rate": 4.5715819400220004e-05, + "loss": 0.1349, + "step": 42400 + }, + { + "epoch": 1.5411730503670324, + "grad_norm": 1.8577841520309448, + "learning_rate": 4.5712914012019003e-05, + "loss": 0.1154, + "step": 42410 + }, + { + "epoch": 1.5415364488698307, + "grad_norm": 1.4880726337432861, + "learning_rate": 4.571000773137223e-05, + "loss": 0.1402, + "step": 42420 + }, + { + "epoch": 1.5418998473726289, + "grad_norm": 0.6903501152992249, + "learning_rate": 4.570710055840491e-05, + "loss": 0.1137, + "step": 42430 + }, + { + "epoch": 1.5422632458754268, + "grad_norm": 1.4438791275024414, + "learning_rate": 4.57041924932423e-05, + "loss": 0.1285, + "step": 42440 + }, + { + "epoch": 1.5426266443782253, + "grad_norm": 0.41870322823524475, + "learning_rate": 4.57012835360097e-05, + "loss": 0.1366, + "step": 42450 + }, + { + "epoch": 1.5429900428810233, + "grad_norm": 0.9365738034248352, + "learning_rate": 4.569837368683245e-05, + "loss": 0.1051, + "step": 42460 + }, + { + "epoch": 1.5433534413838215, + "grad_norm": 1.940673828125, + "learning_rate": 4.569546294583593e-05, + "loss": 0.157, + "step": 42470 + }, + { + "epoch": 1.5437168398866197, + "grad_norm": 1.1944515705108643, + "learning_rate": 4.5692551313145536e-05, + "loss": 1.5159, + "step": 42480 + }, + { + "epoch": 1.5440802383894179, + "grad_norm": 0.6140870451927185, + "learning_rate": 4.568963878888673e-05, + "loss": 0.0986, + "step": 42490 + }, + { + "epoch": 1.544443636892216, + "grad_norm": 1.8208271265029907, + "learning_rate": 4.5686725373185016e-05, + "loss": 0.1519, + "step": 42500 + }, + { + "epoch": 1.544807035395014, + "grad_norm": 1.2457455396652222, + "learning_rate": 4.56838110661659e-05, + "loss": 0.1634, + "step": 42510 + }, + { + "epoch": 1.5451704338978125, + "grad_norm": 0.5140019655227661, + "learning_rate": 4.568089586795496e-05, + "loss": 0.1628, + "step": 42520 + }, + { + "epoch": 1.5455338324006105, + "grad_norm": 0.8539334535598755, + "learning_rate": 4.5677979778677796e-05, + "loss": 0.1243, + "step": 42530 + }, + { + "epoch": 1.5458972309034087, + "grad_norm": 1.2581802606582642, + "learning_rate": 4.567506279846006e-05, + "loss": 0.1715, + "step": 42540 + }, + { + "epoch": 1.546260629406207, + "grad_norm": 1.8808507919311523, + "learning_rate": 4.567214492742743e-05, + "loss": 0.1415, + "step": 42550 + }, + { + "epoch": 1.5466240279090049, + "grad_norm": 1.948970079421997, + "learning_rate": 4.566922616570562e-05, + "loss": 0.1147, + "step": 42560 + }, + { + "epoch": 1.5469874264118033, + "grad_norm": 1.5000864267349243, + "learning_rate": 4.566630651342041e-05, + "loss": 0.1614, + "step": 42570 + }, + { + "epoch": 1.5473508249146013, + "grad_norm": 1.5625576972961426, + "learning_rate": 4.566338597069757e-05, + "loss": 0.1145, + "step": 42580 + }, + { + "epoch": 1.5477142234173995, + "grad_norm": 1.2443382740020752, + "learning_rate": 4.566046453766295e-05, + "loss": 0.1203, + "step": 42590 + }, + { + "epoch": 1.5480776219201977, + "grad_norm": 1.5014569759368896, + "learning_rate": 4.5657542214442426e-05, + "loss": 0.1459, + "step": 42600 + }, + { + "epoch": 1.5480776219201977, + "eval_loss": 0.351544588804245, + "eval_runtime": 180.3519, + "eval_samples_per_second": 41.109, + "eval_steps_per_second": 5.14, + "eval_wer": 0.17858115344818196, + "step": 42600 + }, + { + "epoch": 1.5484410204229957, + "grad_norm": 1.0584172010421753, + "learning_rate": 4.565461900116191e-05, + "loss": 0.1046, + "step": 42610 + }, + { + "epoch": 1.5488044189257941, + "grad_norm": 0.6157267689704895, + "learning_rate": 4.565169489794735e-05, + "loss": 0.135, + "step": 42620 + }, + { + "epoch": 1.549167817428592, + "grad_norm": 0.898263692855835, + "learning_rate": 4.564876990492474e-05, + "loss": 0.1157, + "step": 42630 + }, + { + "epoch": 1.5495312159313903, + "grad_norm": 0.6782193779945374, + "learning_rate": 4.5645844022220096e-05, + "loss": 0.2191, + "step": 42640 + }, + { + "epoch": 1.5498946144341885, + "grad_norm": 0.6636195182800293, + "learning_rate": 4.5642917249959493e-05, + "loss": 0.1709, + "step": 42650 + }, + { + "epoch": 1.5502580129369867, + "grad_norm": 1.3367676734924316, + "learning_rate": 4.563998958826904e-05, + "loss": 0.1197, + "step": 42660 + }, + { + "epoch": 1.550621411439785, + "grad_norm": 0.470985472202301, + "learning_rate": 4.563706103727486e-05, + "loss": 0.1395, + "step": 42670 + }, + { + "epoch": 1.550984809942583, + "grad_norm": 1.1232322454452515, + "learning_rate": 4.563413159710316e-05, + "loss": 0.1139, + "step": 42680 + }, + { + "epoch": 1.5513482084453813, + "grad_norm": 1.0105756521224976, + "learning_rate": 4.563120126788013e-05, + "loss": 0.1243, + "step": 42690 + }, + { + "epoch": 1.5517116069481793, + "grad_norm": 0.785205602645874, + "learning_rate": 4.562827004973206e-05, + "loss": 0.1588, + "step": 42700 + }, + { + "epoch": 1.5520750054509775, + "grad_norm": 1.4863699674606323, + "learning_rate": 4.5625337942785224e-05, + "loss": 0.0913, + "step": 42710 + }, + { + "epoch": 1.5524384039537757, + "grad_norm": 0.33174383640289307, + "learning_rate": 4.562240494716596e-05, + "loss": 0.158, + "step": 42720 + }, + { + "epoch": 1.5528018024565737, + "grad_norm": 0.7735195159912109, + "learning_rate": 4.5619471063000644e-05, + "loss": 0.1295, + "step": 42730 + }, + { + "epoch": 1.5531652009593722, + "grad_norm": 3.2964320182800293, + "learning_rate": 4.561653629041568e-05, + "loss": 0.1144, + "step": 42740 + }, + { + "epoch": 1.5535285994621701, + "grad_norm": 0.6756449937820435, + "learning_rate": 4.5613600629537526e-05, + "loss": 0.119, + "step": 42750 + }, + { + "epoch": 1.5538919979649684, + "grad_norm": 1.7608799934387207, + "learning_rate": 4.5610664080492655e-05, + "loss": 0.1239, + "step": 42760 + }, + { + "epoch": 1.5542553964677666, + "grad_norm": 0.8312143087387085, + "learning_rate": 4.5607726643407614e-05, + "loss": 0.1434, + "step": 42770 + }, + { + "epoch": 1.5546187949705648, + "grad_norm": 1.3083513975143433, + "learning_rate": 4.560478831840894e-05, + "loss": 0.125, + "step": 42780 + }, + { + "epoch": 1.554982193473363, + "grad_norm": 1.4495130777359009, + "learning_rate": 4.560184910562326e-05, + "loss": 0.1172, + "step": 42790 + }, + { + "epoch": 1.555345591976161, + "grad_norm": 0.5549319982528687, + "learning_rate": 4.559890900517721e-05, + "loss": 1.7985, + "step": 42800 + }, + { + "epoch": 1.5557089904789594, + "grad_norm": 1.0677647590637207, + "learning_rate": 4.5595968017197446e-05, + "loss": 0.1485, + "step": 42810 + }, + { + "epoch": 1.5560723889817574, + "grad_norm": 0.5432078242301941, + "learning_rate": 4.559302614181071e-05, + "loss": 0.1372, + "step": 42820 + }, + { + "epoch": 1.5564357874845556, + "grad_norm": 2.0982048511505127, + "learning_rate": 4.559008337914375e-05, + "loss": 0.1543, + "step": 42830 + }, + { + "epoch": 1.5567991859873538, + "grad_norm": 2.8568451404571533, + "learning_rate": 4.558713972932335e-05, + "loss": 0.1271, + "step": 42840 + }, + { + "epoch": 1.5571625844901518, + "grad_norm": 0.9933029413223267, + "learning_rate": 4.558419519247635e-05, + "loss": 1.0891, + "step": 42850 + }, + { + "epoch": 1.5575259829929502, + "grad_norm": 0.6010461449623108, + "learning_rate": 4.5581249768729614e-05, + "loss": 0.1509, + "step": 42860 + }, + { + "epoch": 1.5578893814957482, + "grad_norm": 0.6242499351501465, + "learning_rate": 4.557830345821006e-05, + "loss": 0.1527, + "step": 42870 + }, + { + "epoch": 1.5582527799985464, + "grad_norm": 0.48831334710121155, + "learning_rate": 4.557535626104463e-05, + "loss": 0.1451, + "step": 42880 + }, + { + "epoch": 1.5586161785013446, + "grad_norm": 1.1660668849945068, + "learning_rate": 4.55724081773603e-05, + "loss": 0.1558, + "step": 42890 + }, + { + "epoch": 1.5589795770041426, + "grad_norm": 1.067808747291565, + "learning_rate": 4.5569459207284106e-05, + "loss": 0.1634, + "step": 42900 + }, + { + "epoch": 1.559342975506941, + "grad_norm": 1.6434768438339233, + "learning_rate": 4.556650935094309e-05, + "loss": 0.1269, + "step": 42910 + }, + { + "epoch": 1.559706374009739, + "grad_norm": 0.4303635358810425, + "learning_rate": 4.556355860846437e-05, + "loss": 0.1536, + "step": 42920 + }, + { + "epoch": 1.5600697725125372, + "grad_norm": 3.148212194442749, + "learning_rate": 4.5560606979975075e-05, + "loss": 0.1062, + "step": 42930 + }, + { + "epoch": 1.5604331710153354, + "grad_norm": 3.3599109649658203, + "learning_rate": 4.5557654465602376e-05, + "loss": 0.1158, + "step": 42940 + }, + { + "epoch": 1.5607965695181336, + "grad_norm": 3.2170286178588867, + "learning_rate": 4.5554701065473494e-05, + "loss": 0.1491, + "step": 42950 + }, + { + "epoch": 1.5611599680209318, + "grad_norm": 1.1147798299789429, + "learning_rate": 4.555174677971567e-05, + "loss": 0.1143, + "step": 42960 + }, + { + "epoch": 1.5615233665237298, + "grad_norm": 0.4949367046356201, + "learning_rate": 4.5548791608456206e-05, + "loss": 0.1639, + "step": 42970 + }, + { + "epoch": 1.5618867650265282, + "grad_norm": 0.7166339755058289, + "learning_rate": 4.554583555182244e-05, + "loss": 0.137, + "step": 42980 + }, + { + "epoch": 1.5622501635293262, + "grad_norm": 0.48903581500053406, + "learning_rate": 4.55428786099417e-05, + "loss": 0.126, + "step": 42990 + }, + { + "epoch": 1.5626135620321244, + "grad_norm": 0.43728914856910706, + "learning_rate": 4.553992078294142e-05, + "loss": 0.1371, + "step": 43000 + }, + { + "epoch": 1.5629769605349226, + "grad_norm": 0.7486665844917297, + "learning_rate": 4.5536962070949035e-05, + "loss": 0.1233, + "step": 43010 + }, + { + "epoch": 1.5633403590377206, + "grad_norm": 0.7540434002876282, + "learning_rate": 4.5534002474092025e-05, + "loss": 0.1356, + "step": 43020 + }, + { + "epoch": 1.563703757540519, + "grad_norm": 1.2763710021972656, + "learning_rate": 4.55310419924979e-05, + "loss": 0.1234, + "step": 43030 + }, + { + "epoch": 1.564067156043317, + "grad_norm": 0.5709404945373535, + "learning_rate": 4.552808062629424e-05, + "loss": 0.1224, + "step": 43040 + }, + { + "epoch": 1.5644305545461152, + "grad_norm": 0.5243006348609924, + "learning_rate": 4.552511837560862e-05, + "loss": 0.1175, + "step": 43050 + }, + { + "epoch": 1.5647939530489134, + "grad_norm": 1.3225644826889038, + "learning_rate": 4.552215524056867e-05, + "loss": 0.1408, + "step": 43060 + }, + { + "epoch": 1.5651573515517117, + "grad_norm": 0.2830749452114105, + "learning_rate": 4.551919122130208e-05, + "loss": 0.1588, + "step": 43070 + }, + { + "epoch": 1.5655207500545099, + "grad_norm": 1.7666617631912231, + "learning_rate": 4.551622631793654e-05, + "loss": 0.109, + "step": 43080 + }, + { + "epoch": 1.5658841485573078, + "grad_norm": 0.6468254327774048, + "learning_rate": 4.551326053059981e-05, + "loss": 0.1199, + "step": 43090 + }, + { + "epoch": 1.5662475470601063, + "grad_norm": 0.7526164650917053, + "learning_rate": 4.551029385941967e-05, + "loss": 0.1648, + "step": 43100 + }, + { + "epoch": 1.5666109455629043, + "grad_norm": 3.8184330463409424, + "learning_rate": 4.550732630452394e-05, + "loss": 0.1392, + "step": 43110 + }, + { + "epoch": 1.5669743440657025, + "grad_norm": 0.9396213293075562, + "learning_rate": 4.550435786604049e-05, + "loss": 0.1659, + "step": 43120 + }, + { + "epoch": 1.5673377425685007, + "grad_norm": 1.536440372467041, + "learning_rate": 4.550168551604358e-05, + "loss": 0.1227, + "step": 43130 + }, + { + "epoch": 1.5677011410712987, + "grad_norm": 1.6777888536453247, + "learning_rate": 4.549871539909584e-05, + "loss": 0.128, + "step": 43140 + }, + { + "epoch": 1.568064539574097, + "grad_norm": 21.312944412231445, + "learning_rate": 4.5495744398931396e-05, + "loss": 0.2651, + "step": 43150 + }, + { + "epoch": 1.568427938076895, + "grad_norm": 0.8739009499549866, + "learning_rate": 4.549277251567824e-05, + "loss": 0.12, + "step": 43160 + }, + { + "epoch": 1.5687913365796933, + "grad_norm": 0.3690776526927948, + "learning_rate": 4.548979974946444e-05, + "loss": 0.1665, + "step": 43170 + }, + { + "epoch": 1.5691547350824915, + "grad_norm": 1.3902113437652588, + "learning_rate": 4.548682610041807e-05, + "loss": 0.1502, + "step": 43180 + }, + { + "epoch": 1.5695181335852895, + "grad_norm": 0.9234703779220581, + "learning_rate": 4.5483851568667244e-05, + "loss": 0.1168, + "step": 43190 + }, + { + "epoch": 1.569881532088088, + "grad_norm": 0.7674643397331238, + "learning_rate": 4.5480876154340145e-05, + "loss": 0.1404, + "step": 43200 + }, + { + "epoch": 1.569881532088088, + "eval_loss": 0.3601061701774597, + "eval_runtime": 180.5599, + "eval_samples_per_second": 41.061, + "eval_steps_per_second": 5.134, + "eval_wer": 0.16855156388984696, + "step": 43200 + }, + { + "epoch": 1.5702449305908859, + "grad_norm": 0.7668557167053223, + "learning_rate": 4.5477899857564966e-05, + "loss": 0.1842, + "step": 43210 + }, + { + "epoch": 1.570608329093684, + "grad_norm": 0.7534570693969727, + "learning_rate": 4.5474922678469936e-05, + "loss": 0.1558, + "step": 43220 + }, + { + "epoch": 1.5709717275964823, + "grad_norm": 0.9190795421600342, + "learning_rate": 4.547194461718334e-05, + "loss": 0.1808, + "step": 43230 + }, + { + "epoch": 1.5713351260992805, + "grad_norm": 0.4574483633041382, + "learning_rate": 4.54689656738335e-05, + "loss": 0.1146, + "step": 43240 + }, + { + "epoch": 1.5716985246020787, + "grad_norm": 1.1554951667785645, + "learning_rate": 4.5465985848548744e-05, + "loss": 0.8771, + "step": 43250 + }, + { + "epoch": 1.5720619231048767, + "grad_norm": 1.175336480140686, + "learning_rate": 4.546300514145748e-05, + "loss": 0.1337, + "step": 43260 + }, + { + "epoch": 1.5724253216076751, + "grad_norm": 0.4004783630371094, + "learning_rate": 4.5460023552688136e-05, + "loss": 0.1963, + "step": 43270 + }, + { + "epoch": 1.572788720110473, + "grad_norm": 0.5944772362709045, + "learning_rate": 4.5457041082369164e-05, + "loss": 0.1223, + "step": 43280 + }, + { + "epoch": 1.5731521186132713, + "grad_norm": 0.7069734334945679, + "learning_rate": 4.545405773062909e-05, + "loss": 3.2472, + "step": 43290 + }, + { + "epoch": 1.5735155171160695, + "grad_norm": 1.0471086502075195, + "learning_rate": 4.545107349759644e-05, + "loss": 0.1558, + "step": 43300 + }, + { + "epoch": 1.5738789156188675, + "grad_norm": 0.6987308263778687, + "learning_rate": 4.54480883833998e-05, + "loss": 0.4641, + "step": 43310 + }, + { + "epoch": 1.574242314121666, + "grad_norm": 0.599287211894989, + "learning_rate": 4.5445102388167785e-05, + "loss": 0.1592, + "step": 43320 + }, + { + "epoch": 1.574605712624464, + "grad_norm": 0.9643434286117554, + "learning_rate": 4.544211551202904e-05, + "loss": 0.1165, + "step": 43330 + }, + { + "epoch": 1.5749691111272621, + "grad_norm": 0.5655382871627808, + "learning_rate": 4.5439127755112285e-05, + "loss": 0.1234, + "step": 43340 + }, + { + "epoch": 1.5753325096300603, + "grad_norm": 1.7126801013946533, + "learning_rate": 4.5436139117546235e-05, + "loss": 0.1647, + "step": 43350 + }, + { + "epoch": 1.5756959081328585, + "grad_norm": 0.6298018097877502, + "learning_rate": 4.543314959945966e-05, + "loss": 0.1028, + "step": 43360 + }, + { + "epoch": 1.5760593066356567, + "grad_norm": 0.5706765651702881, + "learning_rate": 4.543015920098137e-05, + "loss": 0.5641, + "step": 43370 + }, + { + "epoch": 1.5764227051384547, + "grad_norm": 0.9098716974258423, + "learning_rate": 4.542716792224022e-05, + "loss": 0.1233, + "step": 43380 + }, + { + "epoch": 1.5767861036412532, + "grad_norm": 1.0217915773391724, + "learning_rate": 4.5424175763365075e-05, + "loss": 0.1306, + "step": 43390 + }, + { + "epoch": 1.5771495021440511, + "grad_norm": 0.651685893535614, + "learning_rate": 4.5421182724484866e-05, + "loss": 0.1433, + "step": 43400 + }, + { + "epoch": 1.5775129006468493, + "grad_norm": 0.6281771659851074, + "learning_rate": 4.541818880572856e-05, + "loss": 0.1313, + "step": 43410 + }, + { + "epoch": 1.5778762991496476, + "grad_norm": 1.7486456632614136, + "learning_rate": 4.541519400722514e-05, + "loss": 0.122, + "step": 43420 + }, + { + "epoch": 1.5782396976524455, + "grad_norm": 1.2109237909317017, + "learning_rate": 4.541219832910364e-05, + "loss": 0.1297, + "step": 43430 + }, + { + "epoch": 1.578603096155244, + "grad_norm": 1.041900634765625, + "learning_rate": 4.540920177149315e-05, + "loss": 0.1014, + "step": 43440 + }, + { + "epoch": 1.578966494658042, + "grad_norm": 0.7674359083175659, + "learning_rate": 4.540620433452277e-05, + "loss": 0.1838, + "step": 43450 + }, + { + "epoch": 1.5793298931608402, + "grad_norm": 1.9548803567886353, + "learning_rate": 4.540320601832165e-05, + "loss": 0.1345, + "step": 43460 + }, + { + "epoch": 1.5796932916636384, + "grad_norm": 0.39995163679122925, + "learning_rate": 4.540020682301898e-05, + "loss": 0.1305, + "step": 43470 + }, + { + "epoch": 1.5800566901664364, + "grad_norm": 0.9415978789329529, + "learning_rate": 4.539720674874398e-05, + "loss": 0.123, + "step": 43480 + }, + { + "epoch": 1.5804200886692348, + "grad_norm": 0.8457926511764526, + "learning_rate": 4.539420579562592e-05, + "loss": 0.1145, + "step": 43490 + }, + { + "epoch": 1.5807834871720328, + "grad_norm": 2.9950082302093506, + "learning_rate": 4.539120396379409e-05, + "loss": 0.1551, + "step": 43500 + }, + { + "epoch": 1.581146885674831, + "grad_norm": 1.8456460237503052, + "learning_rate": 4.5388201253377834e-05, + "loss": 0.0885, + "step": 43510 + }, + { + "epoch": 1.5815102841776292, + "grad_norm": 0.4476306736469269, + "learning_rate": 4.538519766450653e-05, + "loss": 0.1351, + "step": 43520 + }, + { + "epoch": 1.5818736826804274, + "grad_norm": 0.7363295555114746, + "learning_rate": 4.5382193197309584e-05, + "loss": 0.1045, + "step": 43530 + }, + { + "epoch": 1.5822370811832256, + "grad_norm": 2.1484272480010986, + "learning_rate": 4.5379187851916463e-05, + "loss": 0.1304, + "step": 43540 + }, + { + "epoch": 1.5826004796860236, + "grad_norm": 0.5627908706665039, + "learning_rate": 4.537618162845664e-05, + "loss": 0.1454, + "step": 43550 + }, + { + "epoch": 1.582963878188822, + "grad_norm": 1.4841351509094238, + "learning_rate": 4.537317452705964e-05, + "loss": 0.1301, + "step": 43560 + }, + { + "epoch": 1.58332727669162, + "grad_norm": 0.7127716541290283, + "learning_rate": 4.537016654785505e-05, + "loss": 0.1608, + "step": 43570 + }, + { + "epoch": 1.5836906751944182, + "grad_norm": 1.0103297233581543, + "learning_rate": 4.536715769097246e-05, + "loss": 0.1137, + "step": 43580 + }, + { + "epoch": 1.5840540736972164, + "grad_norm": 0.8980743288993835, + "learning_rate": 4.536414795654151e-05, + "loss": 0.1213, + "step": 43590 + }, + { + "epoch": 1.5844174722000144, + "grad_norm": 0.5678355097770691, + "learning_rate": 4.536113734469188e-05, + "loss": 0.1253, + "step": 43600 + }, + { + "epoch": 1.5847808707028128, + "grad_norm": 0.6713634729385376, + "learning_rate": 4.535812585555328e-05, + "loss": 0.1144, + "step": 43610 + }, + { + "epoch": 1.5851442692056108, + "grad_norm": 0.4925456643104553, + "learning_rate": 4.5355113489255484e-05, + "loss": 0.1448, + "step": 43620 + }, + { + "epoch": 1.585507667708409, + "grad_norm": 1.3464380502700806, + "learning_rate": 4.5352100245928267e-05, + "loss": 0.1213, + "step": 43630 + }, + { + "epoch": 1.5858710662112072, + "grad_norm": 1.3755130767822266, + "learning_rate": 4.5349086125701456e-05, + "loss": 0.1277, + "step": 43640 + }, + { + "epoch": 1.5862344647140054, + "grad_norm": 1.2649788856506348, + "learning_rate": 4.534607112870494e-05, + "loss": 0.1379, + "step": 43650 + }, + { + "epoch": 1.5865978632168036, + "grad_norm": 0.6860102415084839, + "learning_rate": 4.53430552550686e-05, + "loss": 0.1209, + "step": 43660 + }, + { + "epoch": 1.5869612617196016, + "grad_norm": 0.9149149656295776, + "learning_rate": 4.534003850492239e-05, + "loss": 0.158, + "step": 43670 + }, + { + "epoch": 1.5873246602224, + "grad_norm": 1.1880120038986206, + "learning_rate": 4.53370208783963e-05, + "loss": 0.1283, + "step": 43680 + }, + { + "epoch": 1.587688058725198, + "grad_norm": 2.6330199241638184, + "learning_rate": 4.533400237562033e-05, + "loss": 0.1414, + "step": 43690 + }, + { + "epoch": 1.5880514572279962, + "grad_norm": 0.7637589573860168, + "learning_rate": 4.533098299672455e-05, + "loss": 0.1267, + "step": 43700 + }, + { + "epoch": 1.5884148557307944, + "grad_norm": 1.7144758701324463, + "learning_rate": 4.5327962741839044e-05, + "loss": 0.1222, + "step": 43710 + }, + { + "epoch": 1.5887782542335924, + "grad_norm": 1.0269776582717896, + "learning_rate": 4.532494161109396e-05, + "loss": 0.1862, + "step": 43720 + }, + { + "epoch": 1.5891416527363909, + "grad_norm": 0.8622583746910095, + "learning_rate": 4.532191960461946e-05, + "loss": 0.1894, + "step": 43730 + }, + { + "epoch": 1.5895050512391888, + "grad_norm": 1.0310677289962769, + "learning_rate": 4.531889672254575e-05, + "loss": 0.1284, + "step": 43740 + }, + { + "epoch": 1.589868449741987, + "grad_norm": 2.753690242767334, + "learning_rate": 4.531587296500306e-05, + "loss": 0.1404, + "step": 43750 + }, + { + "epoch": 1.5902318482447853, + "grad_norm": 0.5997269749641418, + "learning_rate": 4.53128483321217e-05, + "loss": 0.1119, + "step": 43760 + }, + { + "epoch": 1.5905952467475832, + "grad_norm": 0.8589096665382385, + "learning_rate": 4.5309822824031976e-05, + "loss": 0.1319, + "step": 43770 + }, + { + "epoch": 1.5909586452503817, + "grad_norm": 0.7129044532775879, + "learning_rate": 4.530679644086425e-05, + "loss": 0.2389, + "step": 43780 + }, + { + "epoch": 1.5913220437531796, + "grad_norm": 0.6947050094604492, + "learning_rate": 4.530376918274892e-05, + "loss": 0.115, + "step": 43790 + }, + { + "epoch": 1.5916854422559779, + "grad_norm": 0.9983404278755188, + "learning_rate": 4.530074104981641e-05, + "loss": 0.1446, + "step": 43800 + }, + { + "epoch": 1.5916854422559779, + "eval_loss": 0.3569597005844116, + "eval_runtime": 180.6536, + "eval_samples_per_second": 41.04, + "eval_steps_per_second": 5.131, + "eval_wer": 0.17362535625465172, + "step": 43800 + }, + { + "epoch": 1.592048840758776, + "grad_norm": 0.712482750415802, + "learning_rate": 4.529771204219721e-05, + "loss": 0.1434, + "step": 43810 + }, + { + "epoch": 1.5924122392615743, + "grad_norm": 0.5298041105270386, + "learning_rate": 4.5294682160021806e-05, + "loss": 0.1771, + "step": 43820 + }, + { + "epoch": 1.5927756377643725, + "grad_norm": 1.337560772895813, + "learning_rate": 4.529165140342076e-05, + "loss": 0.1144, + "step": 43830 + }, + { + "epoch": 1.5931390362671705, + "grad_norm": 0.5129504203796387, + "learning_rate": 4.5288619772524654e-05, + "loss": 0.1001, + "step": 43840 + }, + { + "epoch": 1.593502434769969, + "grad_norm": 0.7407031059265137, + "learning_rate": 4.528558726746411e-05, + "loss": 0.1302, + "step": 43850 + }, + { + "epoch": 1.5938658332727669, + "grad_norm": 0.9279839992523193, + "learning_rate": 4.5282553888369785e-05, + "loss": 0.1452, + "step": 43860 + }, + { + "epoch": 1.594229231775565, + "grad_norm": 0.5245470404624939, + "learning_rate": 4.5279519635372374e-05, + "loss": 0.1756, + "step": 43870 + }, + { + "epoch": 1.5945926302783633, + "grad_norm": 0.6099745631217957, + "learning_rate": 4.527648450860262e-05, + "loss": 0.2019, + "step": 43880 + }, + { + "epoch": 1.5949560287811613, + "grad_norm": 0.9615786075592041, + "learning_rate": 4.52734485081913e-05, + "loss": 0.1252, + "step": 43890 + }, + { + "epoch": 1.5953194272839597, + "grad_norm": 1.52881920337677, + "learning_rate": 4.527041163426921e-05, + "loss": 1.8751, + "step": 43900 + }, + { + "epoch": 1.5956828257867577, + "grad_norm": 0.8344588875770569, + "learning_rate": 4.526737388696721e-05, + "loss": 0.129, + "step": 43910 + }, + { + "epoch": 1.596046224289556, + "grad_norm": 0.5732100605964661, + "learning_rate": 4.526433526641617e-05, + "loss": 0.1475, + "step": 43920 + }, + { + "epoch": 1.596409622792354, + "grad_norm": 0.8947811722755432, + "learning_rate": 4.526129577274704e-05, + "loss": 0.4153, + "step": 43930 + }, + { + "epoch": 1.5967730212951523, + "grad_norm": 1.6199461221694946, + "learning_rate": 4.5258255406090746e-05, + "loss": 0.1379, + "step": 43940 + }, + { + "epoch": 1.5971364197979505, + "grad_norm": 1.3465640544891357, + "learning_rate": 4.525521416657832e-05, + "loss": 0.1515, + "step": 43950 + }, + { + "epoch": 1.5974998183007485, + "grad_norm": 1.7875219583511353, + "learning_rate": 4.525217205434078e-05, + "loss": 0.1119, + "step": 43960 + }, + { + "epoch": 1.597863216803547, + "grad_norm": 0.5457040071487427, + "learning_rate": 4.52491290695092e-05, + "loss": 0.1499, + "step": 43970 + }, + { + "epoch": 1.598226615306345, + "grad_norm": 1.2962692975997925, + "learning_rate": 4.52460852122147e-05, + "loss": 0.1311, + "step": 43980 + }, + { + "epoch": 1.5985900138091431, + "grad_norm": 0.679913341999054, + "learning_rate": 4.5243040482588426e-05, + "loss": 0.1298, + "step": 43990 + }, + { + "epoch": 1.5989534123119413, + "grad_norm": 1.5390740633010864, + "learning_rate": 4.523999488076156e-05, + "loss": 0.1483, + "step": 44000 + }, + { + "epoch": 1.5993168108147393, + "grad_norm": 3.566751003265381, + "learning_rate": 4.523694840686532e-05, + "loss": 0.1303, + "step": 44010 + }, + { + "epoch": 1.5996802093175377, + "grad_norm": 0.7023512125015259, + "learning_rate": 4.5233901061030984e-05, + "loss": 0.1305, + "step": 44020 + }, + { + "epoch": 1.6000436078203357, + "grad_norm": 1.47295343875885, + "learning_rate": 4.523085284338985e-05, + "loss": 0.1173, + "step": 44030 + }, + { + "epoch": 1.600407006323134, + "grad_norm": 0.7622318863868713, + "learning_rate": 4.522780375407324e-05, + "loss": 0.1494, + "step": 44040 + }, + { + "epoch": 1.6007704048259321, + "grad_norm": 2.0168585777282715, + "learning_rate": 4.522475379321254e-05, + "loss": 0.1575, + "step": 44050 + }, + { + "epoch": 1.6011338033287301, + "grad_norm": 0.9191824793815613, + "learning_rate": 4.522170296093916e-05, + "loss": 0.1111, + "step": 44060 + }, + { + "epoch": 1.6014972018315285, + "grad_norm": 0.5007340908050537, + "learning_rate": 4.521865125738455e-05, + "loss": 0.193, + "step": 44070 + }, + { + "epoch": 1.6018606003343265, + "grad_norm": 0.8389549851417542, + "learning_rate": 4.5215598682680186e-05, + "loss": 0.1227, + "step": 44080 + }, + { + "epoch": 1.6022239988371247, + "grad_norm": 0.7387205362319946, + "learning_rate": 4.521254523695761e-05, + "loss": 0.2035, + "step": 44090 + }, + { + "epoch": 1.602587397339923, + "grad_norm": 1.1978685855865479, + "learning_rate": 4.520949092034837e-05, + "loss": 0.1739, + "step": 44100 + }, + { + "epoch": 1.6029507958427212, + "grad_norm": 1.9989899396896362, + "learning_rate": 4.5206435732984085e-05, + "loss": 0.1285, + "step": 44110 + }, + { + "epoch": 1.6033141943455194, + "grad_norm": 0.6451914310455322, + "learning_rate": 4.5203379674996365e-05, + "loss": 0.1466, + "step": 44120 + }, + { + "epoch": 1.6036775928483173, + "grad_norm": 0.6689841747283936, + "learning_rate": 4.5200322746516904e-05, + "loss": 0.113, + "step": 44130 + }, + { + "epoch": 1.6040409913511158, + "grad_norm": 1.1558260917663574, + "learning_rate": 4.519726494767741e-05, + "loss": 0.1005, + "step": 44140 + }, + { + "epoch": 1.6044043898539138, + "grad_norm": 13.844839096069336, + "learning_rate": 4.519420627860963e-05, + "loss": 0.1279, + "step": 44150 + }, + { + "epoch": 1.604767788356712, + "grad_norm": 0.6856222152709961, + "learning_rate": 4.519114673944536e-05, + "loss": 0.1147, + "step": 44160 + }, + { + "epoch": 1.6051311868595102, + "grad_norm": 0.7829769253730774, + "learning_rate": 4.5188086330316405e-05, + "loss": 0.1336, + "step": 44170 + }, + { + "epoch": 1.6054945853623082, + "grad_norm": 1.3698971271514893, + "learning_rate": 4.518502505135465e-05, + "loss": 0.1158, + "step": 44180 + }, + { + "epoch": 1.6058579838651066, + "grad_norm": 1.3197015523910522, + "learning_rate": 4.5181962902691975e-05, + "loss": 0.1293, + "step": 44190 + }, + { + "epoch": 1.6062213823679046, + "grad_norm": 0.8092926740646362, + "learning_rate": 4.517889988446033e-05, + "loss": 0.1466, + "step": 44200 + }, + { + "epoch": 1.6065847808707028, + "grad_norm": 2.015113115310669, + "learning_rate": 4.5175835996791684e-05, + "loss": 0.1228, + "step": 44210 + }, + { + "epoch": 1.606948179373501, + "grad_norm": 1.2220087051391602, + "learning_rate": 4.5172771239818056e-05, + "loss": 0.199, + "step": 44220 + }, + { + "epoch": 1.6073115778762992, + "grad_norm": 0.5432813167572021, + "learning_rate": 4.516970561367149e-05, + "loss": 0.1453, + "step": 44230 + }, + { + "epoch": 1.6076749763790974, + "grad_norm": 0.6337705850601196, + "learning_rate": 4.516663911848407e-05, + "loss": 0.1257, + "step": 44240 + }, + { + "epoch": 1.6080383748818954, + "grad_norm": 0.6741940379142761, + "learning_rate": 4.5163571754387915e-05, + "loss": 0.1062, + "step": 44250 + }, + { + "epoch": 1.6084017733846938, + "grad_norm": 2.3033409118652344, + "learning_rate": 4.516050352151521e-05, + "loss": 0.1452, + "step": 44260 + }, + { + "epoch": 1.6087651718874918, + "grad_norm": 0.4420888125896454, + "learning_rate": 4.515743441999814e-05, + "loss": 0.1358, + "step": 44270 + }, + { + "epoch": 1.60912857039029, + "grad_norm": 1.5571812391281128, + "learning_rate": 4.515436444996893e-05, + "loss": 0.1102, + "step": 44280 + }, + { + "epoch": 1.6094919688930882, + "grad_norm": 1.084507703781128, + "learning_rate": 4.5151293611559865e-05, + "loss": 0.1099, + "step": 44290 + }, + { + "epoch": 1.6098553673958862, + "grad_norm": 0.7025009989738464, + "learning_rate": 4.514822190490327e-05, + "loss": 0.2296, + "step": 44300 + }, + { + "epoch": 1.6102187658986846, + "grad_norm": 2.125432252883911, + "learning_rate": 4.514514933013147e-05, + "loss": 0.1189, + "step": 44310 + }, + { + "epoch": 1.6105821644014826, + "grad_norm": 0.47693368792533875, + "learning_rate": 4.5142075887376856e-05, + "loss": 0.1488, + "step": 44320 + }, + { + "epoch": 1.6109455629042808, + "grad_norm": 0.7935511469841003, + "learning_rate": 4.5139001576771865e-05, + "loss": 1.8833, + "step": 44330 + }, + { + "epoch": 1.611308961407079, + "grad_norm": 0.6441402435302734, + "learning_rate": 4.513592639844896e-05, + "loss": 0.1173, + "step": 44340 + }, + { + "epoch": 1.611672359909877, + "grad_norm": 1.3646268844604492, + "learning_rate": 4.513285035254062e-05, + "loss": 0.1171, + "step": 44350 + }, + { + "epoch": 1.6120357584126754, + "grad_norm": 1.0334749221801758, + "learning_rate": 4.512977343917939e-05, + "loss": 0.1069, + "step": 44360 + }, + { + "epoch": 1.6123991569154734, + "grad_norm": 0.3879293203353882, + "learning_rate": 4.5126695658497856e-05, + "loss": 0.1244, + "step": 44370 + }, + { + "epoch": 1.6127625554182716, + "grad_norm": 0.6635248064994812, + "learning_rate": 4.5123617010628606e-05, + "loss": 0.1102, + "step": 44380 + }, + { + "epoch": 1.6131259539210698, + "grad_norm": 0.8040985465049744, + "learning_rate": 4.51205374957043e-05, + "loss": 0.1455, + "step": 44390 + }, + { + "epoch": 1.613489352423868, + "grad_norm": 0.5279836654663086, + "learning_rate": 4.511745711385763e-05, + "loss": 0.1547, + "step": 44400 + }, + { + "epoch": 1.613489352423868, + "eval_loss": 0.34678882360458374, + "eval_runtime": 179.7828, + "eval_samples_per_second": 41.239, + "eval_steps_per_second": 5.156, + "eval_wer": 0.17216403144117487, + "step": 44400 + }, + { + "epoch": 1.6138527509266662, + "grad_norm": 1.1375586986541748, + "learning_rate": 4.51143758652213e-05, + "loss": 0.14, + "step": 44410 + }, + { + "epoch": 1.6142161494294642, + "grad_norm": 1.5960606336593628, + "learning_rate": 4.511129374992809e-05, + "loss": 0.1336, + "step": 44420 + }, + { + "epoch": 1.6145795479322627, + "grad_norm": 0.5347716808319092, + "learning_rate": 4.5108210768110785e-05, + "loss": 0.1083, + "step": 44430 + }, + { + "epoch": 1.6149429464350606, + "grad_norm": 1.816926121711731, + "learning_rate": 4.510512691990222e-05, + "loss": 0.1122, + "step": 44440 + }, + { + "epoch": 1.6153063449378589, + "grad_norm": 1.2517473697662354, + "learning_rate": 4.510204220543528e-05, + "loss": 0.144, + "step": 44450 + }, + { + "epoch": 1.615669743440657, + "grad_norm": 1.0830953121185303, + "learning_rate": 4.509895662484286e-05, + "loss": 0.1851, + "step": 44460 + }, + { + "epoch": 1.616033141943455, + "grad_norm": 0.45219525694847107, + "learning_rate": 4.50958701782579e-05, + "loss": 0.1589, + "step": 44470 + }, + { + "epoch": 1.6163965404462535, + "grad_norm": 0.940949559211731, + "learning_rate": 4.509278286581341e-05, + "loss": 0.113, + "step": 44480 + }, + { + "epoch": 1.6167599389490515, + "grad_norm": 0.7262178659439087, + "learning_rate": 4.5089694687642394e-05, + "loss": 0.1294, + "step": 44490 + }, + { + "epoch": 1.6171233374518497, + "grad_norm": 0.8851106762886047, + "learning_rate": 4.508660564387791e-05, + "loss": 0.1563, + "step": 44500 + }, + { + "epoch": 1.6174867359546479, + "grad_norm": 1.4259148836135864, + "learning_rate": 4.508351573465306e-05, + "loss": 0.1298, + "step": 44510 + }, + { + "epoch": 1.617850134457446, + "grad_norm": 1.7158180475234985, + "learning_rate": 4.508042496010098e-05, + "loss": 0.197, + "step": 44520 + }, + { + "epoch": 1.6182135329602443, + "grad_norm": 1.1961179971694946, + "learning_rate": 4.507733332035482e-05, + "loss": 2.6746, + "step": 44530 + }, + { + "epoch": 1.6185769314630423, + "grad_norm": 1.0735702514648438, + "learning_rate": 4.507424081554782e-05, + "loss": 0.1132, + "step": 44540 + }, + { + "epoch": 1.6189403299658407, + "grad_norm": 0.8479132056236267, + "learning_rate": 4.507114744581319e-05, + "loss": 0.1411, + "step": 44550 + }, + { + "epoch": 1.6193037284686387, + "grad_norm": 0.804205596446991, + "learning_rate": 4.506805321128424e-05, + "loss": 0.1301, + "step": 44560 + }, + { + "epoch": 1.6196671269714369, + "grad_norm": 0.4933542013168335, + "learning_rate": 4.506495811209428e-05, + "loss": 0.1765, + "step": 44570 + }, + { + "epoch": 1.620030525474235, + "grad_norm": 1.0244536399841309, + "learning_rate": 4.506186214837666e-05, + "loss": 0.131, + "step": 44580 + }, + { + "epoch": 1.620393923977033, + "grad_norm": 0.4374043941497803, + "learning_rate": 4.5058765320264784e-05, + "loss": 0.102, + "step": 44590 + }, + { + "epoch": 1.6207573224798315, + "grad_norm": 0.5329868197441101, + "learning_rate": 4.505566762789208e-05, + "loss": 0.1168, + "step": 44600 + }, + { + "epoch": 1.6211207209826295, + "grad_norm": 0.9576613306999207, + "learning_rate": 4.5052569071392014e-05, + "loss": 0.0948, + "step": 44610 + }, + { + "epoch": 1.6214841194854277, + "grad_norm": 0.3620557188987732, + "learning_rate": 4.50494696508981e-05, + "loss": 0.1693, + "step": 44620 + }, + { + "epoch": 1.621847517988226, + "grad_norm": 119.01215362548828, + "learning_rate": 4.504636936654387e-05, + "loss": 2.0014, + "step": 44630 + }, + { + "epoch": 1.622210916491024, + "grad_norm": 0.491005003452301, + "learning_rate": 4.504326821846291e-05, + "loss": 0.0958, + "step": 44640 + }, + { + "epoch": 1.6225743149938223, + "grad_norm": 0.8035761713981628, + "learning_rate": 4.504016620678883e-05, + "loss": 0.6652, + "step": 44650 + }, + { + "epoch": 1.6229377134966203, + "grad_norm": 1.4501937627792358, + "learning_rate": 4.5037063331655305e-05, + "loss": 0.1282, + "step": 44660 + }, + { + "epoch": 1.6233011119994185, + "grad_norm": 0.3285962641239166, + "learning_rate": 4.503395959319601e-05, + "loss": 0.6589, + "step": 44670 + }, + { + "epoch": 1.6236645105022167, + "grad_norm": 1.3429205417633057, + "learning_rate": 4.5030854991544666e-05, + "loss": 0.1224, + "step": 44680 + }, + { + "epoch": 1.624027909005015, + "grad_norm": 0.6868845224380493, + "learning_rate": 4.502774952683506e-05, + "loss": 0.1229, + "step": 44690 + }, + { + "epoch": 1.6243913075078131, + "grad_norm": 0.7645006775856018, + "learning_rate": 4.502464319920099e-05, + "loss": 0.1587, + "step": 44700 + }, + { + "epoch": 1.6247547060106111, + "grad_norm": 1.2401680946350098, + "learning_rate": 4.502153600877628e-05, + "loss": 0.1274, + "step": 44710 + }, + { + "epoch": 1.6251181045134095, + "grad_norm": 0.4394826292991638, + "learning_rate": 4.501842795569483e-05, + "loss": 0.1434, + "step": 44720 + }, + { + "epoch": 1.6254815030162075, + "grad_norm": 0.5105617046356201, + "learning_rate": 4.5015319040090545e-05, + "loss": 0.1089, + "step": 44730 + }, + { + "epoch": 1.6258449015190057, + "grad_norm": 1.5043278932571411, + "learning_rate": 4.5012209262097365e-05, + "loss": 0.1391, + "step": 44740 + }, + { + "epoch": 1.626208300021804, + "grad_norm": 0.8561335802078247, + "learning_rate": 4.5009098621849296e-05, + "loss": 0.2735, + "step": 44750 + }, + { + "epoch": 1.626571698524602, + "grad_norm": 1.71244478225708, + "learning_rate": 4.500598711948037e-05, + "loss": 0.1855, + "step": 44760 + }, + { + "epoch": 1.6269350970274004, + "grad_norm": 0.6392226815223694, + "learning_rate": 4.500287475512463e-05, + "loss": 0.1675, + "step": 44770 + }, + { + "epoch": 1.6272984955301983, + "grad_norm": 0.9670777916908264, + "learning_rate": 4.4999761528916194e-05, + "loss": 0.1201, + "step": 44780 + }, + { + "epoch": 1.6276618940329965, + "grad_norm": 0.6879392862319946, + "learning_rate": 4.4996647440989195e-05, + "loss": 0.15, + "step": 44790 + }, + { + "epoch": 1.6280252925357948, + "grad_norm": 1.038004994392395, + "learning_rate": 4.49935324914778e-05, + "loss": 0.148, + "step": 44800 + }, + { + "epoch": 1.628388691038593, + "grad_norm": 1.1731406450271606, + "learning_rate": 4.499041668051624e-05, + "loss": 0.1225, + "step": 44810 + }, + { + "epoch": 1.6287520895413912, + "grad_norm": 1.0449947118759155, + "learning_rate": 4.498730000823873e-05, + "loss": 0.1348, + "step": 44820 + }, + { + "epoch": 1.6291154880441892, + "grad_norm": 0.7107880115509033, + "learning_rate": 4.498418247477959e-05, + "loss": 0.1185, + "step": 44830 + }, + { + "epoch": 1.6294788865469876, + "grad_norm": 0.9275081157684326, + "learning_rate": 4.498106408027313e-05, + "loss": 0.1405, + "step": 44840 + }, + { + "epoch": 1.6298422850497856, + "grad_norm": 1.5348129272460938, + "learning_rate": 4.497794482485371e-05, + "loss": 0.1401, + "step": 44850 + }, + { + "epoch": 1.6302056835525838, + "grad_norm": 1.6144418716430664, + "learning_rate": 4.497482470865574e-05, + "loss": 0.1191, + "step": 44860 + }, + { + "epoch": 1.630569082055382, + "grad_norm": 1.1674468517303467, + "learning_rate": 4.497170373181363e-05, + "loss": 0.7629, + "step": 44870 + }, + { + "epoch": 1.63093248055818, + "grad_norm": 0.9818703532218933, + "learning_rate": 4.496858189446187e-05, + "loss": 0.1275, + "step": 44880 + }, + { + "epoch": 1.6312958790609784, + "grad_norm": 8.3660249710083, + "learning_rate": 4.496545919673496e-05, + "loss": 0.1166, + "step": 44890 + }, + { + "epoch": 1.6316592775637764, + "grad_norm": 0.7371792793273926, + "learning_rate": 4.496233563876746e-05, + "loss": 0.1559, + "step": 44900 + }, + { + "epoch": 1.6320226760665746, + "grad_norm": 0.9537221789360046, + "learning_rate": 4.4959211220693945e-05, + "loss": 0.125, + "step": 44910 + }, + { + "epoch": 1.6323860745693728, + "grad_norm": 0.9887855648994446, + "learning_rate": 4.495608594264902e-05, + "loss": 0.1406, + "step": 44920 + }, + { + "epoch": 1.632749473072171, + "grad_norm": 0.7480888962745667, + "learning_rate": 4.495295980476737e-05, + "loss": 0.1227, + "step": 44930 + }, + { + "epoch": 1.6331128715749692, + "grad_norm": 0.784050703048706, + "learning_rate": 4.494983280718367e-05, + "loss": 0.1068, + "step": 44940 + }, + { + "epoch": 1.6334762700777672, + "grad_norm": 2.8426759243011475, + "learning_rate": 4.494670495003265e-05, + "loss": 0.1327, + "step": 44950 + }, + { + "epoch": 1.6338396685805654, + "grad_norm": 3.448587417602539, + "learning_rate": 4.494357623344909e-05, + "loss": 0.142, + "step": 44960 + }, + { + "epoch": 1.6342030670833636, + "grad_norm": 0.669575572013855, + "learning_rate": 4.4940446657567784e-05, + "loss": 0.1398, + "step": 44970 + }, + { + "epoch": 1.6345664655861618, + "grad_norm": 1.1868761777877808, + "learning_rate": 4.493731622252358e-05, + "loss": 0.117, + "step": 44980 + }, + { + "epoch": 1.63492986408896, + "grad_norm": 0.8725171685218811, + "learning_rate": 4.4934184928451364e-05, + "loss": 0.0914, + "step": 44990 + }, + { + "epoch": 1.635293262591758, + "grad_norm": 1.300013780593872, + "learning_rate": 4.493105277548605e-05, + "loss": 0.1761, + "step": 45000 + }, + { + "epoch": 1.635293262591758, + "eval_loss": 0.3556674122810364, + "eval_runtime": 180.1231, + "eval_samples_per_second": 41.161, + "eval_steps_per_second": 5.146, + "eval_wer": 0.17496868589685408, + "step": 45000 + }, + { + "epoch": 1.6356566610945564, + "grad_norm": 2.436525583267212, + "learning_rate": 4.4927919763762574e-05, + "loss": 0.1164, + "step": 45010 + }, + { + "epoch": 1.6360200595973544, + "grad_norm": 1.3236192464828491, + "learning_rate": 4.492478589341594e-05, + "loss": 0.1496, + "step": 45020 + }, + { + "epoch": 1.6363834581001526, + "grad_norm": 0.6237584948539734, + "learning_rate": 4.4921651164581185e-05, + "loss": 0.1449, + "step": 45030 + }, + { + "epoch": 1.6367468566029508, + "grad_norm": 0.5335447192192078, + "learning_rate": 4.491851557739336e-05, + "loss": 0.1349, + "step": 45040 + }, + { + "epoch": 1.6371102551057488, + "grad_norm": 1.4458340406417847, + "learning_rate": 4.491537913198757e-05, + "loss": 0.1994, + "step": 45050 + }, + { + "epoch": 1.6374736536085472, + "grad_norm": 1.4140558242797852, + "learning_rate": 4.4912241828498944e-05, + "loss": 0.1432, + "step": 45060 + }, + { + "epoch": 1.6378370521113452, + "grad_norm": 1.127317190170288, + "learning_rate": 4.4909103667062666e-05, + "loss": 0.1932, + "step": 45070 + }, + { + "epoch": 1.6382004506141434, + "grad_norm": 3.4496073722839355, + "learning_rate": 4.490596464781395e-05, + "loss": 0.1057, + "step": 45080 + }, + { + "epoch": 1.6385638491169416, + "grad_norm": 0.663720428943634, + "learning_rate": 4.490282477088805e-05, + "loss": 0.1081, + "step": 45090 + }, + { + "epoch": 1.6389272476197398, + "grad_norm": 0.8442180156707764, + "learning_rate": 4.4899684036420244e-05, + "loss": 0.1354, + "step": 45100 + }, + { + "epoch": 1.639290646122538, + "grad_norm": 1.3163623809814453, + "learning_rate": 4.489654244454585e-05, + "loss": 0.1247, + "step": 45110 + }, + { + "epoch": 1.639654044625336, + "grad_norm": 0.639021635055542, + "learning_rate": 4.489339999540023e-05, + "loss": 0.1598, + "step": 45120 + }, + { + "epoch": 1.6400174431281345, + "grad_norm": 0.549207329750061, + "learning_rate": 4.489025668911879e-05, + "loss": 0.1157, + "step": 45130 + }, + { + "epoch": 1.6403808416309325, + "grad_norm": 3.4274439811706543, + "learning_rate": 4.488711252583696e-05, + "loss": 0.1635, + "step": 45140 + }, + { + "epoch": 1.6407442401337307, + "grad_norm": 3.244072914123535, + "learning_rate": 4.488396750569022e-05, + "loss": 0.1216, + "step": 45150 + }, + { + "epoch": 1.6411076386365289, + "grad_norm": 1.9557846784591675, + "learning_rate": 4.4880821628814054e-05, + "loss": 0.1197, + "step": 45160 + }, + { + "epoch": 1.6414710371393268, + "grad_norm": 0.8653383851051331, + "learning_rate": 4.487767489534402e-05, + "loss": 0.1929, + "step": 45170 + }, + { + "epoch": 1.6418344356421253, + "grad_norm": 0.9569295048713684, + "learning_rate": 4.4874527305415706e-05, + "loss": 0.1064, + "step": 45180 + }, + { + "epoch": 1.6421978341449233, + "grad_norm": 0.9595149159431458, + "learning_rate": 4.487137885916473e-05, + "loss": 0.1087, + "step": 45190 + }, + { + "epoch": 1.6425612326477215, + "grad_norm": 1.06610906124115, + "learning_rate": 4.486822955672673e-05, + "loss": 0.1126, + "step": 45200 + }, + { + "epoch": 1.6429246311505197, + "grad_norm": 0.5096926689147949, + "learning_rate": 4.4865079398237407e-05, + "loss": 0.1175, + "step": 45210 + }, + { + "epoch": 1.6432880296533179, + "grad_norm": 0.6575452089309692, + "learning_rate": 4.48619283838325e-05, + "loss": 0.2033, + "step": 45220 + }, + { + "epoch": 1.643651428156116, + "grad_norm": 1.5489494800567627, + "learning_rate": 4.485877651364777e-05, + "loss": 0.1283, + "step": 45230 + }, + { + "epoch": 1.644014826658914, + "grad_norm": 1.2202279567718506, + "learning_rate": 4.485562378781901e-05, + "loss": 0.129, + "step": 45240 + }, + { + "epoch": 1.6443782251617123, + "grad_norm": 2.960289716720581, + "learning_rate": 4.485247020648208e-05, + "loss": 0.1547, + "step": 45250 + }, + { + "epoch": 1.6447416236645105, + "grad_norm": 1.178314447402954, + "learning_rate": 4.4849315769772835e-05, + "loss": 0.1424, + "step": 45260 + }, + { + "epoch": 1.6451050221673087, + "grad_norm": 0.47237566113471985, + "learning_rate": 4.484616047782719e-05, + "loss": 0.1003, + "step": 45270 + }, + { + "epoch": 1.645468420670107, + "grad_norm": 0.7487808465957642, + "learning_rate": 4.484300433078112e-05, + "loss": 1.3485, + "step": 45280 + }, + { + "epoch": 1.6458318191729049, + "grad_norm": 1.5242539644241333, + "learning_rate": 4.483984732877059e-05, + "loss": 0.1301, + "step": 45290 + }, + { + "epoch": 1.6461952176757033, + "grad_norm": 1.1392406225204468, + "learning_rate": 4.4836689471931624e-05, + "loss": 0.1385, + "step": 45300 + }, + { + "epoch": 1.6465586161785013, + "grad_norm": 0.856468677520752, + "learning_rate": 4.483353076040029e-05, + "loss": 0.1029, + "step": 45310 + }, + { + "epoch": 1.6469220146812995, + "grad_norm": 0.414461225271225, + "learning_rate": 4.483037119431268e-05, + "loss": 0.1697, + "step": 45320 + }, + { + "epoch": 1.6472854131840977, + "grad_norm": 1.7020654678344727, + "learning_rate": 4.482721077380494e-05, + "loss": 0.1102, + "step": 45330 + }, + { + "epoch": 1.6476488116868957, + "grad_norm": 0.9631456136703491, + "learning_rate": 4.482404949901323e-05, + "loss": 0.1193, + "step": 45340 + }, + { + "epoch": 1.6480122101896941, + "grad_norm": 0.5286620855331421, + "learning_rate": 4.482088737007376e-05, + "loss": 0.628, + "step": 45350 + }, + { + "epoch": 1.6483756086924921, + "grad_norm": 1.0761183500289917, + "learning_rate": 4.481772438712277e-05, + "loss": 0.1131, + "step": 45360 + }, + { + "epoch": 1.6487390071952903, + "grad_norm": 0.46266233921051025, + "learning_rate": 4.481456055029656e-05, + "loss": 0.1641, + "step": 45370 + }, + { + "epoch": 1.6491024056980885, + "grad_norm": 1.1350431442260742, + "learning_rate": 4.481139585973142e-05, + "loss": 0.122, + "step": 45380 + }, + { + "epoch": 1.6494658042008867, + "grad_norm": 4.3756632804870605, + "learning_rate": 4.4808230315563735e-05, + "loss": 0.1424, + "step": 45390 + }, + { + "epoch": 1.649829202703685, + "grad_norm": 1.386616826057434, + "learning_rate": 4.480506391792988e-05, + "loss": 0.1579, + "step": 45400 + }, + { + "epoch": 1.650192601206483, + "grad_norm": 0.6531800031661987, + "learning_rate": 4.480189666696629e-05, + "loss": 0.1275, + "step": 45410 + }, + { + "epoch": 1.6505559997092814, + "grad_norm": 1.8200130462646484, + "learning_rate": 4.479872856280942e-05, + "loss": 0.1943, + "step": 45420 + }, + { + "epoch": 1.6509193982120793, + "grad_norm": 0.6366170048713684, + "learning_rate": 4.47955596055958e-05, + "loss": 0.1172, + "step": 45430 + }, + { + "epoch": 1.6512827967148775, + "grad_norm": 0.8036410808563232, + "learning_rate": 4.479238979546193e-05, + "loss": 0.1181, + "step": 45440 + }, + { + "epoch": 1.6516461952176757, + "grad_norm": 0.7934151291847229, + "learning_rate": 4.47892191325444e-05, + "loss": 0.1319, + "step": 45450 + }, + { + "epoch": 1.6520095937204737, + "grad_norm": 1.5175780057907104, + "learning_rate": 4.4786047616979845e-05, + "loss": 0.1328, + "step": 45460 + }, + { + "epoch": 1.6523729922232722, + "grad_norm": 31.52168083190918, + "learning_rate": 4.478287524890489e-05, + "loss": 0.4917, + "step": 45470 + }, + { + "epoch": 1.6527363907260701, + "grad_norm": 0.8360010981559753, + "learning_rate": 4.477970202845623e-05, + "loss": 0.1071, + "step": 45480 + }, + { + "epoch": 1.6530997892288684, + "grad_norm": 0.5976376533508301, + "learning_rate": 4.4776527955770586e-05, + "loss": 0.1098, + "step": 45490 + }, + { + "epoch": 1.6534631877316666, + "grad_norm": 0.779091477394104, + "learning_rate": 4.4773353030984715e-05, + "loss": 0.1621, + "step": 45500 + }, + { + "epoch": 1.6538265862344648, + "grad_norm": 0.7147294282913208, + "learning_rate": 4.477017725423542e-05, + "loss": 0.2085, + "step": 45510 + }, + { + "epoch": 1.654189984737263, + "grad_norm": 1.0562430620193481, + "learning_rate": 4.4767000625659525e-05, + "loss": 0.1489, + "step": 45520 + }, + { + "epoch": 1.654553383240061, + "grad_norm": 0.6119662523269653, + "learning_rate": 4.4763823145393906e-05, + "loss": 0.0975, + "step": 45530 + }, + { + "epoch": 1.6549167817428592, + "grad_norm": 2.1033360958099365, + "learning_rate": 4.476064481357547e-05, + "loss": 0.1151, + "step": 45540 + }, + { + "epoch": 1.6552801802456574, + "grad_norm": 0.5644105672836304, + "learning_rate": 4.4757465630341154e-05, + "loss": 0.1479, + "step": 45550 + }, + { + "epoch": 1.6556435787484556, + "grad_norm": 1.2466843128204346, + "learning_rate": 4.475428559582794e-05, + "loss": 0.1047, + "step": 45560 + }, + { + "epoch": 1.6560069772512538, + "grad_norm": 0.6210132241249084, + "learning_rate": 4.475110471017285e-05, + "loss": 0.1703, + "step": 45570 + }, + { + "epoch": 1.6563703757540518, + "grad_norm": 0.5731077194213867, + "learning_rate": 4.474792297351293e-05, + "loss": 0.1154, + "step": 45580 + }, + { + "epoch": 1.6567337742568502, + "grad_norm": 1.2748225927352905, + "learning_rate": 4.474474038598527e-05, + "loss": 0.1106, + "step": 45590 + }, + { + "epoch": 1.6570971727596482, + "grad_norm": 1.0353822708129883, + "learning_rate": 4.4741556947727e-05, + "loss": 0.1453, + "step": 45600 + }, + { + "epoch": 1.6570971727596482, + "eval_loss": 0.35429847240448, + "eval_runtime": 180.0899, + "eval_samples_per_second": 41.168, + "eval_steps_per_second": 5.147, + "eval_wer": 0.16677255976909253, + "step": 45600 + }, + { + "epoch": 1.6574605712624464, + "grad_norm": 0.8197756409645081, + "learning_rate": 4.4738372658875286e-05, + "loss": 0.6064, + "step": 45610 + }, + { + "epoch": 1.6578239697652446, + "grad_norm": 1.9029946327209473, + "learning_rate": 4.473518751956732e-05, + "loss": 0.122, + "step": 45620 + }, + { + "epoch": 1.6581873682680426, + "grad_norm": 0.897566020488739, + "learning_rate": 4.473200152994035e-05, + "loss": 0.5106, + "step": 45630 + }, + { + "epoch": 1.658550766770841, + "grad_norm": 0.448548823595047, + "learning_rate": 4.472881469013163e-05, + "loss": 0.1316, + "step": 45640 + }, + { + "epoch": 1.658914165273639, + "grad_norm": 0.9315693974494934, + "learning_rate": 4.472562700027849e-05, + "loss": 0.1616, + "step": 45650 + }, + { + "epoch": 1.6592775637764372, + "grad_norm": 0.6731955409049988, + "learning_rate": 4.4722438460518255e-05, + "loss": 0.1138, + "step": 45660 + }, + { + "epoch": 1.6596409622792354, + "grad_norm": 0.949320375919342, + "learning_rate": 4.4719249070988325e-05, + "loss": 0.1464, + "step": 45670 + }, + { + "epoch": 1.6600043607820336, + "grad_norm": 1.0242235660552979, + "learning_rate": 4.471605883182611e-05, + "loss": 0.1135, + "step": 45680 + }, + { + "epoch": 1.6603677592848318, + "grad_norm": 2.5394222736358643, + "learning_rate": 4.471318689025813e-05, + "loss": 1.5802, + "step": 45690 + }, + { + "epoch": 1.6607311577876298, + "grad_norm": 0.5729508996009827, + "learning_rate": 4.4709995037173305e-05, + "loss": 0.44, + "step": 45700 + }, + { + "epoch": 1.6610945562904282, + "grad_norm": 1.372788906097412, + "learning_rate": 4.470680233485492e-05, + "loss": 0.1286, + "step": 45710 + }, + { + "epoch": 1.6614579547932262, + "grad_norm": 1.5759491920471191, + "learning_rate": 4.470360878344055e-05, + "loss": 0.1221, + "step": 45720 + }, + { + "epoch": 1.6618213532960244, + "grad_norm": 1.0494245290756226, + "learning_rate": 4.470041438306778e-05, + "loss": 0.126, + "step": 45730 + }, + { + "epoch": 1.6621847517988226, + "grad_norm": 0.469928115606308, + "learning_rate": 4.469721913387424e-05, + "loss": 0.1295, + "step": 45740 + }, + { + "epoch": 1.6625481503016206, + "grad_norm": 0.9547176361083984, + "learning_rate": 4.469402303599761e-05, + "loss": 0.1724, + "step": 45750 + }, + { + "epoch": 1.662911548804419, + "grad_norm": 0.5945098400115967, + "learning_rate": 4.469082608957561e-05, + "loss": 0.1282, + "step": 45760 + }, + { + "epoch": 1.663274947307217, + "grad_norm": 0.8782799243927002, + "learning_rate": 4.468762829474597e-05, + "loss": 0.1594, + "step": 45770 + }, + { + "epoch": 1.6636383458100152, + "grad_norm": 0.8542808294296265, + "learning_rate": 4.4684429651646476e-05, + "loss": 0.1147, + "step": 45780 + }, + { + "epoch": 1.6640017443128134, + "grad_norm": 1.189684271812439, + "learning_rate": 4.4681230160414946e-05, + "loss": 0.1348, + "step": 45790 + }, + { + "epoch": 1.6643651428156117, + "grad_norm": 0.9197025895118713, + "learning_rate": 4.467802982118923e-05, + "loss": 0.1528, + "step": 45800 + }, + { + "epoch": 1.6647285413184099, + "grad_norm": 0.5935563445091248, + "learning_rate": 4.4674828634107226e-05, + "loss": 0.1356, + "step": 45810 + }, + { + "epoch": 1.6650919398212078, + "grad_norm": 0.7441408038139343, + "learning_rate": 4.467162659930686e-05, + "loss": 0.1553, + "step": 45820 + }, + { + "epoch": 1.665455338324006, + "grad_norm": 0.5700821280479431, + "learning_rate": 4.466842371692609e-05, + "loss": 0.1206, + "step": 45830 + }, + { + "epoch": 1.6658187368268043, + "grad_norm": 1.0566598176956177, + "learning_rate": 4.466521998710292e-05, + "loss": 0.1137, + "step": 45840 + }, + { + "epoch": 1.6661821353296025, + "grad_norm": 0.8243798613548279, + "learning_rate": 4.4662015409975406e-05, + "loss": 0.1531, + "step": 45850 + }, + { + "epoch": 1.6665455338324007, + "grad_norm": 1.1144201755523682, + "learning_rate": 4.465880998568159e-05, + "loss": 0.1122, + "step": 45860 + }, + { + "epoch": 1.6669089323351987, + "grad_norm": 0.8346664309501648, + "learning_rate": 4.46556037143596e-05, + "loss": 0.1365, + "step": 45870 + }, + { + "epoch": 1.667272330837997, + "grad_norm": 1.140259027481079, + "learning_rate": 4.46523965961476e-05, + "loss": 0.1105, + "step": 45880 + }, + { + "epoch": 1.667635729340795, + "grad_norm": 1.7616723775863647, + "learning_rate": 4.464918863118374e-05, + "loss": 0.1092, + "step": 45890 + }, + { + "epoch": 1.6679991278435933, + "grad_norm": 0.5135784149169922, + "learning_rate": 4.464597981960625e-05, + "loss": 0.1502, + "step": 45900 + }, + { + "epoch": 1.6683625263463915, + "grad_norm": 1.542801022529602, + "learning_rate": 4.464277016155339e-05, + "loss": 0.125, + "step": 45910 + }, + { + "epoch": 1.6687259248491895, + "grad_norm": 0.31144529581069946, + "learning_rate": 4.463955965716346e-05, + "loss": 0.21, + "step": 45920 + }, + { + "epoch": 1.669089323351988, + "grad_norm": 0.642985463142395, + "learning_rate": 4.463634830657478e-05, + "loss": 0.1213, + "step": 45930 + }, + { + "epoch": 1.6694527218547859, + "grad_norm": 2.417689800262451, + "learning_rate": 4.4633136109925716e-05, + "loss": 0.1244, + "step": 45940 + }, + { + "epoch": 1.669816120357584, + "grad_norm": 0.5426376461982727, + "learning_rate": 4.462992306735467e-05, + "loss": 0.1789, + "step": 45950 + }, + { + "epoch": 1.6701795188603823, + "grad_norm": 0.5924781560897827, + "learning_rate": 4.4626709179000094e-05, + "loss": 0.118, + "step": 45960 + }, + { + "epoch": 1.6705429173631805, + "grad_norm": 0.5799354314804077, + "learning_rate": 4.4623494445000435e-05, + "loss": 0.1714, + "step": 45970 + }, + { + "epoch": 1.6709063158659787, + "grad_norm": 0.6282142400741577, + "learning_rate": 4.462027886549423e-05, + "loss": 0.1099, + "step": 45980 + }, + { + "epoch": 1.6712697143687767, + "grad_norm": 1.1201330423355103, + "learning_rate": 4.461706244062002e-05, + "loss": 0.1281, + "step": 45990 + }, + { + "epoch": 1.6716331128715751, + "grad_norm": 1.1922492980957031, + "learning_rate": 4.461384517051638e-05, + "loss": 0.1245, + "step": 46000 + }, + { + "epoch": 1.671996511374373, + "grad_norm": 1.0188281536102295, + "learning_rate": 4.461062705532194e-05, + "loss": 0.1715, + "step": 46010 + }, + { + "epoch": 1.6723599098771713, + "grad_norm": 0.5861912369728088, + "learning_rate": 4.4607408095175364e-05, + "loss": 0.1391, + "step": 46020 + }, + { + "epoch": 1.6727233083799695, + "grad_norm": 0.6984696388244629, + "learning_rate": 4.4604188290215324e-05, + "loss": 0.116, + "step": 46030 + }, + { + "epoch": 1.6730867068827675, + "grad_norm": 0.5184624791145325, + "learning_rate": 4.460096764058057e-05, + "loss": 0.1173, + "step": 46040 + }, + { + "epoch": 1.673450105385566, + "grad_norm": 0.39695462584495544, + "learning_rate": 4.4597746146409856e-05, + "loss": 0.1325, + "step": 46050 + }, + { + "epoch": 1.673813503888364, + "grad_norm": 0.687271237373352, + "learning_rate": 4.459452380784199e-05, + "loss": 0.1123, + "step": 46060 + }, + { + "epoch": 1.6741769023911621, + "grad_norm": 0.8372097015380859, + "learning_rate": 4.459130062501582e-05, + "loss": 0.1895, + "step": 46070 + }, + { + "epoch": 1.6745403008939603, + "grad_norm": 1.8692165613174438, + "learning_rate": 4.4588076598070206e-05, + "loss": 0.1213, + "step": 46080 + }, + { + "epoch": 1.6749036993967585, + "grad_norm": 12.919623374938965, + "learning_rate": 4.458485172714406e-05, + "loss": 0.115, + "step": 46090 + }, + { + "epoch": 1.6752670978995567, + "grad_norm": 0.6733956933021545, + "learning_rate": 4.458162601237634e-05, + "loss": 0.1473, + "step": 46100 + }, + { + "epoch": 1.6756304964023547, + "grad_norm": 0.8653566241264343, + "learning_rate": 4.457839945390603e-05, + "loss": 0.0995, + "step": 46110 + }, + { + "epoch": 1.675993894905153, + "grad_norm": 0.536120593547821, + "learning_rate": 4.4575172051872145e-05, + "loss": 0.1494, + "step": 46120 + }, + { + "epoch": 1.6763572934079511, + "grad_norm": 3.844902753829956, + "learning_rate": 4.4571943806413743e-05, + "loss": 0.1086, + "step": 46130 + }, + { + "epoch": 1.6767206919107494, + "grad_norm": 2.0951857566833496, + "learning_rate": 4.4568714717669926e-05, + "loss": 0.111, + "step": 46140 + }, + { + "epoch": 1.6770840904135476, + "grad_norm": 2.954204559326172, + "learning_rate": 4.456548478577981e-05, + "loss": 0.1481, + "step": 46150 + }, + { + "epoch": 1.6774474889163455, + "grad_norm": 0.7243287563323975, + "learning_rate": 4.456225401088258e-05, + "loss": 0.1192, + "step": 46160 + }, + { + "epoch": 1.677810887419144, + "grad_norm": 1.103082299232483, + "learning_rate": 4.455902239311741e-05, + "loss": 0.156, + "step": 46170 + }, + { + "epoch": 1.678174285921942, + "grad_norm": 1.2734848260879517, + "learning_rate": 4.455578993262357e-05, + "loss": 0.1154, + "step": 46180 + }, + { + "epoch": 1.6785376844247402, + "grad_norm": 0.9912572503089905, + "learning_rate": 4.455255662954032e-05, + "loss": 0.1228, + "step": 46190 + }, + { + "epoch": 1.6789010829275384, + "grad_norm": 0.8736640214920044, + "learning_rate": 4.454932248400697e-05, + "loss": 0.12, + "step": 46200 + }, + { + "epoch": 1.6789010829275384, + "eval_loss": 0.3570244014263153, + "eval_runtime": 179.9492, + "eval_samples_per_second": 41.201, + "eval_steps_per_second": 5.151, + "eval_wer": 0.17022164939096338, + "step": 46200 + }, + { + "epoch": 1.6792644814303364, + "grad_norm": 3.907130241394043, + "learning_rate": 4.454608749616287e-05, + "loss": 0.1108, + "step": 46210 + }, + { + "epoch": 1.6796278799331348, + "grad_norm": 0.3899100720882416, + "learning_rate": 4.4542851666147404e-05, + "loss": 0.1699, + "step": 46220 + }, + { + "epoch": 1.6799912784359328, + "grad_norm": 1.6752989292144775, + "learning_rate": 4.45396149941e-05, + "loss": 0.124, + "step": 46230 + }, + { + "epoch": 1.680354676938731, + "grad_norm": 5.3016886711120605, + "learning_rate": 4.453637748016011e-05, + "loss": 0.1239, + "step": 46240 + }, + { + "epoch": 1.6807180754415292, + "grad_norm": 2.723459482192993, + "learning_rate": 4.453313912446722e-05, + "loss": 0.1245, + "step": 46250 + }, + { + "epoch": 1.6810814739443274, + "grad_norm": 2.0152530670166016, + "learning_rate": 4.4529899927160854e-05, + "loss": 0.1056, + "step": 46260 + }, + { + "epoch": 1.6814448724471256, + "grad_norm": 0.7301231622695923, + "learning_rate": 4.452665988838059e-05, + "loss": 0.1899, + "step": 46270 + }, + { + "epoch": 1.6818082709499236, + "grad_norm": 0.7544482350349426, + "learning_rate": 4.4523419008266045e-05, + "loss": 0.1233, + "step": 46280 + }, + { + "epoch": 1.682171669452722, + "grad_norm": 0.9912691712379456, + "learning_rate": 4.4520177286956824e-05, + "loss": 0.1263, + "step": 46290 + }, + { + "epoch": 1.68253506795552, + "grad_norm": 2.0335001945495605, + "learning_rate": 4.451693472459262e-05, + "loss": 0.1328, + "step": 46300 + }, + { + "epoch": 1.6828984664583182, + "grad_norm": 0.5679906606674194, + "learning_rate": 4.451369132131314e-05, + "loss": 0.0828, + "step": 46310 + }, + { + "epoch": 1.6832618649611164, + "grad_norm": 1.3262155055999756, + "learning_rate": 4.451044707725814e-05, + "loss": 0.1426, + "step": 46320 + }, + { + "epoch": 1.6836252634639144, + "grad_norm": 1.1101963520050049, + "learning_rate": 4.4507201992567386e-05, + "loss": 0.1385, + "step": 46330 + }, + { + "epoch": 1.6839886619667128, + "grad_norm": 0.8079712390899658, + "learning_rate": 4.4503956067380704e-05, + "loss": 0.1278, + "step": 46340 + }, + { + "epoch": 1.6843520604695108, + "grad_norm": 0.79506516456604, + "learning_rate": 4.450070930183795e-05, + "loss": 0.1281, + "step": 46350 + }, + { + "epoch": 1.684715458972309, + "grad_norm": 1.1767312288284302, + "learning_rate": 4.4497461696079024e-05, + "loss": 0.1299, + "step": 46360 + }, + { + "epoch": 1.6850788574751072, + "grad_norm": 0.7996610403060913, + "learning_rate": 4.449421325024384e-05, + "loss": 0.156, + "step": 46370 + }, + { + "epoch": 1.6854422559779054, + "grad_norm": 0.638761579990387, + "learning_rate": 4.449096396447237e-05, + "loss": 0.1197, + "step": 46380 + }, + { + "epoch": 1.6858056544807036, + "grad_norm": 3.2339584827423096, + "learning_rate": 4.448771383890461e-05, + "loss": 0.0992, + "step": 46390 + }, + { + "epoch": 1.6861690529835016, + "grad_norm": 1.0168710947036743, + "learning_rate": 4.448446287368059e-05, + "loss": 0.1471, + "step": 46400 + }, + { + "epoch": 1.6865324514862998, + "grad_norm": 0.7783123850822449, + "learning_rate": 4.44812110689404e-05, + "loss": 0.1012, + "step": 46410 + }, + { + "epoch": 1.686895849989098, + "grad_norm": 0.7757607698440552, + "learning_rate": 4.447795842482414e-05, + "loss": 0.1355, + "step": 46420 + }, + { + "epoch": 1.6872592484918962, + "grad_norm": 1.9442954063415527, + "learning_rate": 4.447470494147195e-05, + "loss": 0.0953, + "step": 46430 + }, + { + "epoch": 1.6876226469946944, + "grad_norm": 1.0810720920562744, + "learning_rate": 4.447145061902401e-05, + "loss": 0.1037, + "step": 46440 + }, + { + "epoch": 1.6879860454974924, + "grad_norm": 5.812492847442627, + "learning_rate": 4.4468195457620556e-05, + "loss": 0.1376, + "step": 46450 + }, + { + "epoch": 1.6883494440002909, + "grad_norm": 2.3588967323303223, + "learning_rate": 4.4464939457401825e-05, + "loss": 0.1164, + "step": 46460 + }, + { + "epoch": 1.6887128425030888, + "grad_norm": 1.6339848041534424, + "learning_rate": 4.4461682618508106e-05, + "loss": 0.1604, + "step": 46470 + }, + { + "epoch": 1.689076241005887, + "grad_norm": 1.6590059995651245, + "learning_rate": 4.445842494107973e-05, + "loss": 0.1529, + "step": 46480 + }, + { + "epoch": 1.6894396395086853, + "grad_norm": 0.8776388764381409, + "learning_rate": 4.445516642525705e-05, + "loss": 0.1165, + "step": 46490 + }, + { + "epoch": 1.6898030380114832, + "grad_norm": 2.1173806190490723, + "learning_rate": 4.4451907071180474e-05, + "loss": 0.1431, + "step": 46500 + }, + { + "epoch": 1.6901664365142817, + "grad_norm": 1.3882853984832764, + "learning_rate": 4.444864687899043e-05, + "loss": 0.134, + "step": 46510 + }, + { + "epoch": 1.6905298350170797, + "grad_norm": 0.5224485993385315, + "learning_rate": 4.4445385848827395e-05, + "loss": 0.1586, + "step": 46520 + }, + { + "epoch": 1.6908932335198779, + "grad_norm": 1.3461922407150269, + "learning_rate": 4.444212398083187e-05, + "loss": 0.119, + "step": 46530 + }, + { + "epoch": 1.691256632022676, + "grad_norm": 0.7005299925804138, + "learning_rate": 4.4438861275144395e-05, + "loss": 0.1046, + "step": 46540 + }, + { + "epoch": 1.6916200305254743, + "grad_norm": 2.497610092163086, + "learning_rate": 4.4435597731905554e-05, + "loss": 0.1164, + "step": 46550 + }, + { + "epoch": 1.6919834290282725, + "grad_norm": 4.168522357940674, + "learning_rate": 4.443233335125596e-05, + "loss": 0.1342, + "step": 46560 + }, + { + "epoch": 1.6923468275310705, + "grad_norm": 0.59686678647995, + "learning_rate": 4.442906813333626e-05, + "loss": 0.1536, + "step": 46570 + }, + { + "epoch": 1.692710226033869, + "grad_norm": 0.9300062656402588, + "learning_rate": 4.442580207828715e-05, + "loss": 0.1176, + "step": 46580 + }, + { + "epoch": 1.6930736245366669, + "grad_norm": 0.8389412760734558, + "learning_rate": 4.442253518624934e-05, + "loss": 0.1085, + "step": 46590 + }, + { + "epoch": 1.693437023039465, + "grad_norm": 1.0124256610870361, + "learning_rate": 4.441926745736359e-05, + "loss": 0.1357, + "step": 46600 + }, + { + "epoch": 1.6938004215422633, + "grad_norm": 1.5312106609344482, + "learning_rate": 4.4415998891770704e-05, + "loss": 0.0911, + "step": 46610 + }, + { + "epoch": 1.6941638200450613, + "grad_norm": 0.7724300622940063, + "learning_rate": 4.441272948961151e-05, + "loss": 0.1338, + "step": 46620 + }, + { + "epoch": 1.6945272185478597, + "grad_norm": 0.9552409052848816, + "learning_rate": 4.4409459251026864e-05, + "loss": 0.1181, + "step": 46630 + }, + { + "epoch": 1.6948906170506577, + "grad_norm": 0.7531419992446899, + "learning_rate": 4.440618817615768e-05, + "loss": 0.1301, + "step": 46640 + }, + { + "epoch": 1.695254015553456, + "grad_norm": 1.4831467866897583, + "learning_rate": 4.44029162651449e-05, + "loss": 0.1734, + "step": 46650 + }, + { + "epoch": 1.695617414056254, + "grad_norm": 1.008574366569519, + "learning_rate": 4.43996435181295e-05, + "loss": 0.1205, + "step": 46660 + }, + { + "epoch": 1.6959808125590523, + "grad_norm": 0.7653814554214478, + "learning_rate": 4.4396369935252475e-05, + "loss": 0.1886, + "step": 46670 + }, + { + "epoch": 1.6963442110618505, + "grad_norm": 0.668803334236145, + "learning_rate": 4.439309551665488e-05, + "loss": 0.1066, + "step": 46680 + }, + { + "epoch": 1.6967076095646485, + "grad_norm": 0.6227422952651978, + "learning_rate": 4.438982026247781e-05, + "loss": 0.1191, + "step": 46690 + }, + { + "epoch": 1.6970710080674467, + "grad_norm": 4.826232433319092, + "learning_rate": 4.438654417286237e-05, + "loss": 0.1594, + "step": 46700 + }, + { + "epoch": 1.697434406570245, + "grad_norm": 7.682708740234375, + "learning_rate": 4.4383267247949714e-05, + "loss": 0.1131, + "step": 46710 + }, + { + "epoch": 1.6977978050730431, + "grad_norm": 1.0136793851852417, + "learning_rate": 4.4379989487881046e-05, + "loss": 0.1575, + "step": 46720 + }, + { + "epoch": 1.6981612035758413, + "grad_norm": 2.774850606918335, + "learning_rate": 4.437671089279758e-05, + "loss": 0.1384, + "step": 46730 + }, + { + "epoch": 1.6985246020786393, + "grad_norm": 11.532723426818848, + "learning_rate": 4.4373431462840584e-05, + "loss": 0.1338, + "step": 46740 + }, + { + "epoch": 1.6988880005814377, + "grad_norm": 1.9600322246551514, + "learning_rate": 4.437015119815136e-05, + "loss": 0.1311, + "step": 46750 + }, + { + "epoch": 1.6992513990842357, + "grad_norm": 0.5886809229850769, + "learning_rate": 4.436687009887124e-05, + "loss": 0.1647, + "step": 46760 + }, + { + "epoch": 1.699614797587034, + "grad_norm": 2.5982067584991455, + "learning_rate": 4.436358816514159e-05, + "loss": 0.2078, + "step": 46770 + }, + { + "epoch": 1.6999781960898321, + "grad_norm": 0.7927113771438599, + "learning_rate": 4.436030539710383e-05, + "loss": 0.1017, + "step": 46780 + }, + { + "epoch": 1.7003415945926301, + "grad_norm": 0.5954931974411011, + "learning_rate": 4.435702179489939e-05, + "loss": 0.1354, + "step": 46790 + }, + { + "epoch": 1.7007049930954286, + "grad_norm": 1.10712468624115, + "learning_rate": 4.4353737358669755e-05, + "loss": 0.1531, + "step": 46800 + }, + { + "epoch": 1.7007049930954286, + "eval_loss": 0.3568388819694519, + "eval_runtime": 179.9769, + "eval_samples_per_second": 41.194, + "eval_steps_per_second": 5.151, + "eval_wer": 0.16614627770617388, + "step": 46800 + }, + { + "epoch": 1.7010683915982265, + "grad_norm": 2.1790926456451416, + "learning_rate": 4.435045208855644e-05, + "loss": 0.1336, + "step": 46810 + }, + { + "epoch": 1.7014317901010247, + "grad_norm": 0.3032105267047882, + "learning_rate": 4.434716598470099e-05, + "loss": 0.1234, + "step": 46820 + }, + { + "epoch": 1.701795188603823, + "grad_norm": 0.8836687207221985, + "learning_rate": 4.434387904724499e-05, + "loss": 0.1007, + "step": 46830 + }, + { + "epoch": 1.7021585871066212, + "grad_norm": 0.9671638011932373, + "learning_rate": 4.4340591276330075e-05, + "loss": 0.3498, + "step": 46840 + }, + { + "epoch": 1.7025219856094194, + "grad_norm": 3.225950002670288, + "learning_rate": 4.4337302672097894e-05, + "loss": 0.1493, + "step": 46850 + }, + { + "epoch": 1.7028853841122173, + "grad_norm": 1.0754051208496094, + "learning_rate": 4.4334013234690144e-05, + "loss": 0.11, + "step": 46860 + }, + { + "epoch": 1.7032487826150158, + "grad_norm": 0.3170652687549591, + "learning_rate": 4.433072296424855e-05, + "loss": 0.1543, + "step": 46870 + }, + { + "epoch": 1.7036121811178138, + "grad_norm": 2.504772424697876, + "learning_rate": 4.4327431860914885e-05, + "loss": 0.1221, + "step": 46880 + }, + { + "epoch": 1.703975579620612, + "grad_norm": 3.071819305419922, + "learning_rate": 4.4324139924830956e-05, + "loss": 2.0837, + "step": 46890 + }, + { + "epoch": 1.7043389781234102, + "grad_norm": 0.6085606813430786, + "learning_rate": 4.4320847156138584e-05, + "loss": 0.2395, + "step": 46900 + }, + { + "epoch": 1.7047023766262082, + "grad_norm": 1.1668941974639893, + "learning_rate": 4.431755355497965e-05, + "loss": 1.7169, + "step": 46910 + }, + { + "epoch": 1.7050657751290066, + "grad_norm": 2.0856447219848633, + "learning_rate": 4.431425912149607e-05, + "loss": 0.1911, + "step": 46920 + }, + { + "epoch": 1.7054291736318046, + "grad_norm": 2.468442916870117, + "learning_rate": 4.431096385582979e-05, + "loss": 0.1408, + "step": 46930 + }, + { + "epoch": 1.7057925721346028, + "grad_norm": 0.5875902771949768, + "learning_rate": 4.430766775812278e-05, + "loss": 0.0959, + "step": 46940 + }, + { + "epoch": 1.706155970637401, + "grad_norm": 0.7292661070823669, + "learning_rate": 4.430437082851706e-05, + "loss": 0.1416, + "step": 46950 + }, + { + "epoch": 1.7065193691401992, + "grad_norm": 0.9893856644630432, + "learning_rate": 4.43010730671547e-05, + "loss": 0.1233, + "step": 46960 + }, + { + "epoch": 1.7068827676429974, + "grad_norm": 0.5555475950241089, + "learning_rate": 4.4297774474177755e-05, + "loss": 0.1817, + "step": 46970 + }, + { + "epoch": 1.7072461661457954, + "grad_norm": 2.520148754119873, + "learning_rate": 4.429447504972838e-05, + "loss": 0.123, + "step": 46980 + }, + { + "epoch": 1.7076095646485938, + "grad_norm": 0.4278533160686493, + "learning_rate": 4.429117479394873e-05, + "loss": 0.1161, + "step": 46990 + }, + { + "epoch": 1.7079729631513918, + "grad_norm": 1.8297946453094482, + "learning_rate": 4.428787370698099e-05, + "loss": 0.1347, + "step": 47000 + }, + { + "epoch": 1.70833636165419, + "grad_norm": 1.1131938695907593, + "learning_rate": 4.42845717889674e-05, + "loss": 0.113, + "step": 47010 + }, + { + "epoch": 1.7086997601569882, + "grad_norm": 0.6185646057128906, + "learning_rate": 4.428126904005022e-05, + "loss": 0.179, + "step": 47020 + }, + { + "epoch": 1.7090631586597862, + "grad_norm": 0.5343379378318787, + "learning_rate": 4.4277965460371775e-05, + "loss": 0.1108, + "step": 47030 + }, + { + "epoch": 1.7094265571625846, + "grad_norm": 1.2087206840515137, + "learning_rate": 4.427466105007437e-05, + "loss": 0.1308, + "step": 47040 + }, + { + "epoch": 1.7097899556653826, + "grad_norm": 1.3067313432693481, + "learning_rate": 4.4271355809300416e-05, + "loss": 0.126, + "step": 47050 + }, + { + "epoch": 1.7101533541681808, + "grad_norm": 3.69439697265625, + "learning_rate": 4.42680497381923e-05, + "loss": 0.1235, + "step": 47060 + }, + { + "epoch": 1.710516752670979, + "grad_norm": 1.7959043979644775, + "learning_rate": 4.4264742836892484e-05, + "loss": 0.1596, + "step": 47070 + }, + { + "epoch": 1.710880151173777, + "grad_norm": 0.7556483149528503, + "learning_rate": 4.4261435105543434e-05, + "loss": 0.139, + "step": 47080 + }, + { + "epoch": 1.7112435496765754, + "grad_norm": 1.0691754817962646, + "learning_rate": 4.425812654428768e-05, + "loss": 0.1137, + "step": 47090 + }, + { + "epoch": 1.7116069481793734, + "grad_norm": 0.39985036849975586, + "learning_rate": 4.425481715326778e-05, + "loss": 0.1147, + "step": 47100 + }, + { + "epoch": 1.7119703466821716, + "grad_norm": 0.8595932126045227, + "learning_rate": 4.425150693262631e-05, + "loss": 0.124, + "step": 47110 + }, + { + "epoch": 1.7123337451849698, + "grad_norm": 1.0184409618377686, + "learning_rate": 4.424819588250591e-05, + "loss": 0.1196, + "step": 47120 + }, + { + "epoch": 1.712697143687768, + "grad_norm": 0.9141554236412048, + "learning_rate": 4.4244884003049234e-05, + "loss": 0.1139, + "step": 47130 + }, + { + "epoch": 1.7130605421905662, + "grad_norm": 3.891220808029175, + "learning_rate": 4.424157129439897e-05, + "loss": 0.1123, + "step": 47140 + }, + { + "epoch": 1.7134239406933642, + "grad_norm": 1.1739959716796875, + "learning_rate": 4.4238257756697875e-05, + "loss": 0.1472, + "step": 47150 + }, + { + "epoch": 1.7137873391961627, + "grad_norm": 1.062530517578125, + "learning_rate": 4.423494339008869e-05, + "loss": 0.1222, + "step": 47160 + }, + { + "epoch": 1.7141507376989606, + "grad_norm": 0.840980052947998, + "learning_rate": 4.423162819471424e-05, + "loss": 0.209, + "step": 47170 + }, + { + "epoch": 1.7145141362017589, + "grad_norm": 1.5960949659347534, + "learning_rate": 4.4228312170717353e-05, + "loss": 0.2366, + "step": 47180 + }, + { + "epoch": 1.714877534704557, + "grad_norm": 1.9222055673599243, + "learning_rate": 4.4224995318240914e-05, + "loss": 0.1195, + "step": 47190 + }, + { + "epoch": 1.715240933207355, + "grad_norm": 0.9651756286621094, + "learning_rate": 4.422167763742783e-05, + "loss": 1.1511, + "step": 47200 + }, + { + "epoch": 1.7156043317101535, + "grad_norm": 0.7599210739135742, + "learning_rate": 4.421835912842105e-05, + "loss": 0.1067, + "step": 47210 + }, + { + "epoch": 1.7159677302129515, + "grad_norm": 0.9147046804428101, + "learning_rate": 4.4215039791363546e-05, + "loss": 0.2011, + "step": 47220 + }, + { + "epoch": 1.7163311287157497, + "grad_norm": 3.473452091217041, + "learning_rate": 4.421171962639835e-05, + "loss": 0.1297, + "step": 47230 + }, + { + "epoch": 1.7166945272185479, + "grad_norm": 0.8271628618240356, + "learning_rate": 4.420839863366851e-05, + "loss": 0.1042, + "step": 47240 + }, + { + "epoch": 1.717057925721346, + "grad_norm": 1.2343850135803223, + "learning_rate": 4.4205076813317115e-05, + "loss": 0.1401, + "step": 47250 + }, + { + "epoch": 1.7174213242241443, + "grad_norm": 2.5408592224121094, + "learning_rate": 4.420175416548729e-05, + "loss": 0.1331, + "step": 47260 + }, + { + "epoch": 1.7177847227269423, + "grad_norm": 0.9061927795410156, + "learning_rate": 4.41984306903222e-05, + "loss": 0.151, + "step": 47270 + }, + { + "epoch": 1.7181481212297407, + "grad_norm": 1.4675298929214478, + "learning_rate": 4.419510638796505e-05, + "loss": 0.133, + "step": 47280 + }, + { + "epoch": 1.7185115197325387, + "grad_norm": 0.5093546509742737, + "learning_rate": 4.4191781258559044e-05, + "loss": 0.1091, + "step": 47290 + }, + { + "epoch": 1.718874918235337, + "grad_norm": 1.7886688709259033, + "learning_rate": 4.418845530224748e-05, + "loss": 0.1364, + "step": 47300 + }, + { + "epoch": 1.719238316738135, + "grad_norm": 0.9077005982398987, + "learning_rate": 4.418512851917365e-05, + "loss": 0.0886, + "step": 47310 + }, + { + "epoch": 1.719601715240933, + "grad_norm": 0.9992018938064575, + "learning_rate": 4.418180090948088e-05, + "loss": 0.204, + "step": 47320 + }, + { + "epoch": 1.7199651137437315, + "grad_norm": 3.1602091789245605, + "learning_rate": 4.417847247331257e-05, + "loss": 0.1099, + "step": 47330 + }, + { + "epoch": 1.7203285122465295, + "grad_norm": 0.867824912071228, + "learning_rate": 4.417514321081212e-05, + "loss": 0.1015, + "step": 47340 + }, + { + "epoch": 1.7206919107493277, + "grad_norm": 0.8763206005096436, + "learning_rate": 4.4171813122122966e-05, + "loss": 0.1669, + "step": 47350 + }, + { + "epoch": 1.721055309252126, + "grad_norm": 0.579663097858429, + "learning_rate": 4.4168482207388604e-05, + "loss": 0.1216, + "step": 47360 + }, + { + "epoch": 1.721418707754924, + "grad_norm": 0.9902794361114502, + "learning_rate": 4.416515046675255e-05, + "loss": 0.1507, + "step": 47370 + }, + { + "epoch": 1.7217821062577223, + "grad_norm": 1.082514762878418, + "learning_rate": 4.4161817900358334e-05, + "loss": 0.1218, + "step": 47380 + }, + { + "epoch": 1.7221455047605203, + "grad_norm": 1.6784402132034302, + "learning_rate": 4.415848450834958e-05, + "loss": 0.1112, + "step": 47390 + }, + { + "epoch": 1.7225089032633185, + "grad_norm": 1.4955846071243286, + "learning_rate": 4.415515029086989e-05, + "loss": 0.1539, + "step": 47400 + }, + { + "epoch": 1.7225089032633185, + "eval_loss": 0.3519718050956726, + "eval_runtime": 180.5197, + "eval_samples_per_second": 41.07, + "eval_steps_per_second": 5.135, + "eval_wer": 0.1726087824713635, + "step": 47400 + }, + { + "epoch": 1.7228723017661167, + "grad_norm": 1.609122395515442, + "learning_rate": 4.415181524806293e-05, + "loss": 0.1218, + "step": 47410 + }, + { + "epoch": 1.723235700268915, + "grad_norm": 0.5392347574234009, + "learning_rate": 4.4148479380072386e-05, + "loss": 0.1304, + "step": 47420 + }, + { + "epoch": 1.7235990987717131, + "grad_norm": 1.874009132385254, + "learning_rate": 4.4145142687042e-05, + "loss": 0.1251, + "step": 47430 + }, + { + "epoch": 1.7239624972745111, + "grad_norm": 0.5162834525108337, + "learning_rate": 4.4141805169115534e-05, + "loss": 0.0922, + "step": 47440 + }, + { + "epoch": 1.7243258957773095, + "grad_norm": 0.5982137322425842, + "learning_rate": 4.41384668264368e-05, + "loss": 0.1274, + "step": 47450 + }, + { + "epoch": 1.7246892942801075, + "grad_norm": 3.50940203666687, + "learning_rate": 4.413512765914961e-05, + "loss": 0.134, + "step": 47460 + }, + { + "epoch": 1.7250526927829057, + "grad_norm": 0.49185237288475037, + "learning_rate": 4.413178766739786e-05, + "loss": 0.1573, + "step": 47470 + }, + { + "epoch": 1.725416091285704, + "grad_norm": 0.7658770680427551, + "learning_rate": 4.412844685132545e-05, + "loss": 0.1231, + "step": 47480 + }, + { + "epoch": 1.725779489788502, + "grad_norm": 0.7022835612297058, + "learning_rate": 4.4125105211076324e-05, + "loss": 0.124, + "step": 47490 + }, + { + "epoch": 1.7261428882913004, + "grad_norm": 0.7009884119033813, + "learning_rate": 4.4121762746794456e-05, + "loss": 0.1207, + "step": 47500 + }, + { + "epoch": 1.7265062867940983, + "grad_norm": 1.4415069818496704, + "learning_rate": 4.4118419458623875e-05, + "loss": 0.1223, + "step": 47510 + }, + { + "epoch": 1.7268696852968966, + "grad_norm": 0.8234976530075073, + "learning_rate": 4.411507534670862e-05, + "loss": 0.1524, + "step": 47520 + }, + { + "epoch": 1.7272330837996948, + "grad_norm": 0.6677774786949158, + "learning_rate": 4.411173041119278e-05, + "loss": 0.1298, + "step": 47530 + }, + { + "epoch": 1.727596482302493, + "grad_norm": 9.5011625289917, + "learning_rate": 4.410838465222048e-05, + "loss": 0.1264, + "step": 47540 + }, + { + "epoch": 1.7279598808052912, + "grad_norm": 1.5103870630264282, + "learning_rate": 4.410503806993587e-05, + "loss": 0.1182, + "step": 47550 + }, + { + "epoch": 1.7283232793080892, + "grad_norm": 1.5079401731491089, + "learning_rate": 4.410169066448314e-05, + "loss": 0.1295, + "step": 47560 + }, + { + "epoch": 1.7286866778108876, + "grad_norm": 1.1625335216522217, + "learning_rate": 4.4098342436006536e-05, + "loss": 0.169, + "step": 47570 + }, + { + "epoch": 1.7290500763136856, + "grad_norm": 0.8692395687103271, + "learning_rate": 4.40949933846503e-05, + "loss": 0.147, + "step": 47580 + }, + { + "epoch": 1.7294134748164838, + "grad_norm": 0.49405890703201294, + "learning_rate": 4.409164351055873e-05, + "loss": 0.1297, + "step": 47590 + }, + { + "epoch": 1.729776873319282, + "grad_norm": 2.2081878185272217, + "learning_rate": 4.408829281387619e-05, + "loss": 0.161, + "step": 47600 + }, + { + "epoch": 1.73014027182208, + "grad_norm": 2.5445384979248047, + "learning_rate": 4.408494129474701e-05, + "loss": 0.105, + "step": 47610 + }, + { + "epoch": 1.7305036703248784, + "grad_norm": 0.4841686487197876, + "learning_rate": 4.408158895331562e-05, + "loss": 0.1352, + "step": 47620 + }, + { + "epoch": 1.7308670688276764, + "grad_norm": 91.45899963378906, + "learning_rate": 4.407823578972646e-05, + "loss": 1.4893, + "step": 47630 + }, + { + "epoch": 1.7312304673304746, + "grad_norm": 1.3897613286972046, + "learning_rate": 4.4074881804124e-05, + "loss": 0.1109, + "step": 47640 + }, + { + "epoch": 1.7315938658332728, + "grad_norm": 2.621211528778076, + "learning_rate": 4.407152699665275e-05, + "loss": 0.1332, + "step": 47650 + }, + { + "epoch": 1.7319572643360708, + "grad_norm": 0.8939427733421326, + "learning_rate": 4.406817136745726e-05, + "loss": 0.1101, + "step": 47660 + }, + { + "epoch": 1.7323206628388692, + "grad_norm": 0.9848506450653076, + "learning_rate": 4.4064814916682105e-05, + "loss": 0.1855, + "step": 47670 + }, + { + "epoch": 1.7326840613416672, + "grad_norm": 0.7746869921684265, + "learning_rate": 4.406145764447192e-05, + "loss": 0.1791, + "step": 47680 + }, + { + "epoch": 1.7330474598444654, + "grad_norm": 0.6325744390487671, + "learning_rate": 4.405809955097133e-05, + "loss": 0.1067, + "step": 47690 + }, + { + "epoch": 1.7334108583472636, + "grad_norm": 0.4917563498020172, + "learning_rate": 4.405474063632505e-05, + "loss": 0.1405, + "step": 47700 + }, + { + "epoch": 1.7337742568500618, + "grad_norm": 0.8248608708381653, + "learning_rate": 4.405138090067779e-05, + "loss": 0.2285, + "step": 47710 + }, + { + "epoch": 1.73413765535286, + "grad_norm": 1.1260930299758911, + "learning_rate": 4.404802034417431e-05, + "loss": 0.1606, + "step": 47720 + }, + { + "epoch": 1.734501053855658, + "grad_norm": 1.282873272895813, + "learning_rate": 4.404465896695941e-05, + "loss": 0.1251, + "step": 47730 + }, + { + "epoch": 1.7348644523584564, + "grad_norm": 1.245103120803833, + "learning_rate": 4.404129676917791e-05, + "loss": 0.1009, + "step": 47740 + }, + { + "epoch": 1.7352278508612544, + "grad_norm": 0.8262288570404053, + "learning_rate": 4.4037933750974686e-05, + "loss": 0.2019, + "step": 47750 + }, + { + "epoch": 1.7355912493640526, + "grad_norm": 0.6815000772476196, + "learning_rate": 4.403456991249464e-05, + "loss": 0.1395, + "step": 47760 + }, + { + "epoch": 1.7359546478668508, + "grad_norm": 0.3317665159702301, + "learning_rate": 4.403120525388269e-05, + "loss": 0.1484, + "step": 47770 + }, + { + "epoch": 1.7363180463696488, + "grad_norm": 3.1392834186553955, + "learning_rate": 4.402783977528383e-05, + "loss": 0.1404, + "step": 47780 + }, + { + "epoch": 1.7366814448724472, + "grad_norm": 0.8862357139587402, + "learning_rate": 4.4024473476843043e-05, + "loss": 0.1012, + "step": 47790 + }, + { + "epoch": 1.7370448433752452, + "grad_norm": 0.40148264169692993, + "learning_rate": 4.402110635870539e-05, + "loss": 0.1284, + "step": 47800 + }, + { + "epoch": 1.7374082418780434, + "grad_norm": 0.6237661838531494, + "learning_rate": 4.401773842101594e-05, + "loss": 0.1583, + "step": 47810 + }, + { + "epoch": 1.7377716403808416, + "grad_norm": 0.6144997477531433, + "learning_rate": 4.4014369663919805e-05, + "loss": 0.2531, + "step": 47820 + }, + { + "epoch": 1.7381350388836398, + "grad_norm": 0.6017129421234131, + "learning_rate": 4.4011000087562135e-05, + "loss": 0.1141, + "step": 47830 + }, + { + "epoch": 1.738498437386438, + "grad_norm": 1.1838932037353516, + "learning_rate": 4.400762969208812e-05, + "loss": 0.145, + "step": 47840 + }, + { + "epoch": 1.738861835889236, + "grad_norm": 1.8152750730514526, + "learning_rate": 4.400425847764297e-05, + "loss": 0.1485, + "step": 47850 + }, + { + "epoch": 1.7392252343920345, + "grad_norm": 1.8269041776657104, + "learning_rate": 4.400088644437193e-05, + "loss": 0.1248, + "step": 47860 + }, + { + "epoch": 1.7395886328948325, + "grad_norm": 0.36247947812080383, + "learning_rate": 4.39975135924203e-05, + "loss": 0.1293, + "step": 47870 + }, + { + "epoch": 1.7399520313976307, + "grad_norm": 0.5409418344497681, + "learning_rate": 4.399413992193341e-05, + "loss": 0.0932, + "step": 47880 + }, + { + "epoch": 1.7403154299004289, + "grad_norm": 0.8623117804527283, + "learning_rate": 4.3990765433056616e-05, + "loss": 0.1024, + "step": 47890 + }, + { + "epoch": 1.7406788284032269, + "grad_norm": 1.7151434421539307, + "learning_rate": 4.39873901259353e-05, + "loss": 0.1591, + "step": 47900 + }, + { + "epoch": 1.7410422269060253, + "grad_norm": 0.580970287322998, + "learning_rate": 4.39840140007149e-05, + "loss": 0.1124, + "step": 47910 + }, + { + "epoch": 1.7414056254088233, + "grad_norm": 0.7153110504150391, + "learning_rate": 4.3980637057540884e-05, + "loss": 0.2013, + "step": 47920 + }, + { + "epoch": 1.7417690239116215, + "grad_norm": 0.6043591499328613, + "learning_rate": 4.397725929655875e-05, + "loss": 1.2071, + "step": 47930 + }, + { + "epoch": 1.7421324224144197, + "grad_norm": 1.4633545875549316, + "learning_rate": 4.397388071791403e-05, + "loss": 0.1533, + "step": 47940 + }, + { + "epoch": 1.7424958209172177, + "grad_norm": 2.011232376098633, + "learning_rate": 4.3970501321752314e-05, + "loss": 0.1288, + "step": 47950 + }, + { + "epoch": 1.742859219420016, + "grad_norm": 1.238098382949829, + "learning_rate": 4.396712110821918e-05, + "loss": 0.1553, + "step": 47960 + }, + { + "epoch": 1.743222617922814, + "grad_norm": 0.30882275104522705, + "learning_rate": 4.3963740077460285e-05, + "loss": 0.1547, + "step": 47970 + }, + { + "epoch": 1.7435860164256123, + "grad_norm": 0.6614134311676025, + "learning_rate": 4.39603582296213e-05, + "loss": 0.0926, + "step": 47980 + }, + { + "epoch": 1.7439494149284105, + "grad_norm": 0.8839965462684631, + "learning_rate": 4.3956975564847944e-05, + "loss": 0.1228, + "step": 47990 + }, + { + "epoch": 1.7443128134312087, + "grad_norm": 1.4926025867462158, + "learning_rate": 4.395359208328597e-05, + "loss": 0.1692, + "step": 48000 + }, + { + "epoch": 1.7443128134312087, + "eval_loss": 0.3166210651397705, + "eval_runtime": 179.7213, + "eval_samples_per_second": 41.253, + "eval_steps_per_second": 5.158, + "eval_wer": 0.1702307259425999, + "step": 48000 + }, + { + "epoch": 1.744676211934007, + "grad_norm": 0.8344828486442566, + "learning_rate": 4.395020778508114e-05, + "loss": 0.115, + "step": 48010 + }, + { + "epoch": 1.7450396104368049, + "grad_norm": 0.7698808908462524, + "learning_rate": 4.394682267037928e-05, + "loss": 0.11, + "step": 48020 + }, + { + "epoch": 1.7454030089396033, + "grad_norm": 0.7840538024902344, + "learning_rate": 4.394343673932625e-05, + "loss": 0.1209, + "step": 48030 + }, + { + "epoch": 1.7457664074424013, + "grad_norm": 0.9926084876060486, + "learning_rate": 4.394004999206792e-05, + "loss": 0.1711, + "step": 48040 + }, + { + "epoch": 1.7461298059451995, + "grad_norm": 0.8097075819969177, + "learning_rate": 4.3936662428750234e-05, + "loss": 0.1564, + "step": 48050 + }, + { + "epoch": 1.7464932044479977, + "grad_norm": 1.1143872737884521, + "learning_rate": 4.393327404951915e-05, + "loss": 0.1254, + "step": 48060 + }, + { + "epoch": 1.7468566029507957, + "grad_norm": 0.8600324988365173, + "learning_rate": 4.392988485452063e-05, + "loss": 0.1512, + "step": 48070 + }, + { + "epoch": 1.7472200014535941, + "grad_norm": 1.2914844751358032, + "learning_rate": 4.3926494843900745e-05, + "loss": 0.1215, + "step": 48080 + }, + { + "epoch": 1.7475833999563921, + "grad_norm": 2.0937047004699707, + "learning_rate": 4.3923104017805524e-05, + "loss": 0.1222, + "step": 48090 + }, + { + "epoch": 1.7479467984591903, + "grad_norm": 0.5339716076850891, + "learning_rate": 4.391971237638108e-05, + "loss": 0.1561, + "step": 48100 + }, + { + "epoch": 1.7483101969619885, + "grad_norm": 2.8374857902526855, + "learning_rate": 4.391631991977356e-05, + "loss": 0.1204, + "step": 48110 + }, + { + "epoch": 1.7486735954647867, + "grad_norm": 0.6171733140945435, + "learning_rate": 4.39129266481291e-05, + "loss": 0.1742, + "step": 48120 + }, + { + "epoch": 1.749036993967585, + "grad_norm": 1.0918267965316772, + "learning_rate": 4.390953256159394e-05, + "loss": 0.1062, + "step": 48130 + }, + { + "epoch": 1.749400392470383, + "grad_norm": 1.273437738418579, + "learning_rate": 4.3906137660314296e-05, + "loss": 0.1166, + "step": 48140 + }, + { + "epoch": 1.7497637909731814, + "grad_norm": 0.9305084943771362, + "learning_rate": 4.390274194443645e-05, + "loss": 0.1264, + "step": 48150 + }, + { + "epoch": 1.7501271894759793, + "grad_norm": 1.1687837839126587, + "learning_rate": 4.389934541410671e-05, + "loss": 0.1437, + "step": 48160 + }, + { + "epoch": 1.7504905879787775, + "grad_norm": 0.5981254577636719, + "learning_rate": 4.389594806947142e-05, + "loss": 0.155, + "step": 48170 + }, + { + "epoch": 1.7508539864815758, + "grad_norm": 0.8380612730979919, + "learning_rate": 4.389254991067695e-05, + "loss": 0.118, + "step": 48180 + }, + { + "epoch": 1.7512173849843737, + "grad_norm": 0.9481167197227478, + "learning_rate": 4.388915093786973e-05, + "loss": 0.1164, + "step": 48190 + }, + { + "epoch": 1.7515807834871722, + "grad_norm": 0.4648977220058441, + "learning_rate": 4.3885751151196206e-05, + "loss": 0.139, + "step": 48200 + }, + { + "epoch": 1.7519441819899702, + "grad_norm": 1.5835154056549072, + "learning_rate": 4.388235055080287e-05, + "loss": 0.1288, + "step": 48210 + }, + { + "epoch": 1.7523075804927684, + "grad_norm": 2.212986469268799, + "learning_rate": 4.387894913683622e-05, + "loss": 0.1271, + "step": 48220 + }, + { + "epoch": 1.7526709789955666, + "grad_norm": 1.1150215864181519, + "learning_rate": 4.3875887168782186e-05, + "loss": 3.4347, + "step": 48230 + }, + { + "epoch": 1.7530343774983645, + "grad_norm": 4.514034271240234, + "learning_rate": 4.3872484209430055e-05, + "loss": 0.1084, + "step": 48240 + }, + { + "epoch": 1.753397776001163, + "grad_norm": 1.3234679698944092, + "learning_rate": 4.386908043692973e-05, + "loss": 0.1258, + "step": 48250 + }, + { + "epoch": 1.753761174503961, + "grad_norm": 0.579300045967102, + "learning_rate": 4.3865675851427856e-05, + "loss": 0.1375, + "step": 48260 + }, + { + "epoch": 1.7541245730067592, + "grad_norm": Infinity, + "learning_rate": 4.3862611029481096e-05, + "loss": 3.7136, + "step": 48270 + }, + { + "epoch": 1.7544879715095574, + "grad_norm": 0.5648366808891296, + "learning_rate": 4.3859204899680476e-05, + "loss": 0.1164, + "step": 48280 + }, + { + "epoch": 1.7548513700123556, + "grad_norm": 2.9147396087646484, + "learning_rate": 4.38557979573038e-05, + "loss": 0.1269, + "step": 48290 + }, + { + "epoch": 1.7552147685151538, + "grad_norm": 1.5536600351333618, + "learning_rate": 4.385239020249789e-05, + "loss": 0.1275, + "step": 48300 + }, + { + "epoch": 1.7555781670179518, + "grad_norm": 1.4855754375457764, + "learning_rate": 4.384898163540956e-05, + "loss": 0.1281, + "step": 48310 + }, + { + "epoch": 1.7559415655207502, + "grad_norm": 182.5458984375, + "learning_rate": 4.384557225618567e-05, + "loss": 2.7885, + "step": 48320 + }, + { + "epoch": 1.7563049640235482, + "grad_norm": 1.1808582544326782, + "learning_rate": 4.3842162064973134e-05, + "loss": 0.1378, + "step": 48330 + }, + { + "epoch": 1.7566683625263464, + "grad_norm": 2.970055341720581, + "learning_rate": 4.383875106191888e-05, + "loss": 0.1319, + "step": 48340 + }, + { + "epoch": 1.7570317610291446, + "grad_norm": 1.0342578887939453, + "learning_rate": 4.383533924716986e-05, + "loss": 0.1254, + "step": 48350 + }, + { + "epoch": 1.7573951595319426, + "grad_norm": 2.821300745010376, + "learning_rate": 4.383192662087309e-05, + "loss": 0.1098, + "step": 48360 + }, + { + "epoch": 1.757758558034741, + "grad_norm": 4.711688041687012, + "learning_rate": 4.382851318317561e-05, + "loss": 0.2667, + "step": 48370 + }, + { + "epoch": 1.758121956537539, + "grad_norm": 0.6478423476219177, + "learning_rate": 4.382509893422448e-05, + "loss": 0.1041, + "step": 48380 + }, + { + "epoch": 1.7584853550403372, + "grad_norm": 0.4265103042125702, + "learning_rate": 4.382168387416683e-05, + "loss": 0.0854, + "step": 48390 + }, + { + "epoch": 1.7588487535431354, + "grad_norm": 1.3017734289169312, + "learning_rate": 4.381826800314979e-05, + "loss": 0.1411, + "step": 48400 + }, + { + "epoch": 1.7592121520459336, + "grad_norm": 2.5378410816192627, + "learning_rate": 4.3814851321320524e-05, + "loss": 0.1361, + "step": 48410 + }, + { + "epoch": 1.7595755505487318, + "grad_norm": 1.133738398551941, + "learning_rate": 4.381143382882627e-05, + "loss": 0.1628, + "step": 48420 + }, + { + "epoch": 1.7599389490515298, + "grad_norm": 0.851696252822876, + "learning_rate": 4.3808015525814254e-05, + "loss": 0.1237, + "step": 48430 + }, + { + "epoch": 1.7603023475543282, + "grad_norm": 0.6289827823638916, + "learning_rate": 4.380459641243177e-05, + "loss": 0.1126, + "step": 48440 + }, + { + "epoch": 1.7606657460571262, + "grad_norm": 0.9123024940490723, + "learning_rate": 4.380117648882614e-05, + "loss": 0.1588, + "step": 48450 + }, + { + "epoch": 1.7610291445599244, + "grad_norm": 0.9411369562149048, + "learning_rate": 4.379775575514471e-05, + "loss": 0.1505, + "step": 48460 + }, + { + "epoch": 1.7613925430627226, + "grad_norm": 0.7461705207824707, + "learning_rate": 4.379433421153486e-05, + "loss": 0.1404, + "step": 48470 + }, + { + "epoch": 1.7617559415655206, + "grad_norm": 3.351199150085449, + "learning_rate": 4.3790911858144025e-05, + "loss": 0.1126, + "step": 48480 + }, + { + "epoch": 1.762119340068319, + "grad_norm": 0.8594498634338379, + "learning_rate": 4.378748869511965e-05, + "loss": 0.0944, + "step": 48490 + }, + { + "epoch": 1.762482738571117, + "grad_norm": 8.456245422363281, + "learning_rate": 4.378406472260924e-05, + "loss": 0.1368, + "step": 48500 + }, + { + "epoch": 1.7628461370739152, + "grad_norm": 2.1628379821777344, + "learning_rate": 4.3780639940760306e-05, + "loss": 0.1052, + "step": 48510 + }, + { + "epoch": 1.7632095355767134, + "grad_norm": 0.7315357327461243, + "learning_rate": 4.377721434972043e-05, + "loss": 0.1247, + "step": 48520 + }, + { + "epoch": 1.7635729340795114, + "grad_norm": 0.8521216511726379, + "learning_rate": 4.377378794963719e-05, + "loss": 4.0948, + "step": 48530 + }, + { + "epoch": 1.7639363325823099, + "grad_norm": 1.6623167991638184, + "learning_rate": 4.377036074065823e-05, + "loss": 0.1117, + "step": 48540 + }, + { + "epoch": 1.7642997310851078, + "grad_norm": 0.8901768326759338, + "learning_rate": 4.3766932722931206e-05, + "loss": 0.1484, + "step": 48550 + }, + { + "epoch": 1.764663129587906, + "grad_norm": 1.522369623184204, + "learning_rate": 4.3763503896603826e-05, + "loss": 0.1206, + "step": 48560 + }, + { + "epoch": 1.7650265280907043, + "grad_norm": 0.5716699957847595, + "learning_rate": 4.3760074261823824e-05, + "loss": 0.1744, + "step": 48570 + }, + { + "epoch": 1.7653899265935025, + "grad_norm": 2.5722410678863525, + "learning_rate": 4.375664381873896e-05, + "loss": 0.1224, + "step": 48580 + }, + { + "epoch": 1.7657533250963007, + "grad_norm": 2.1870505809783936, + "learning_rate": 4.3753212567497065e-05, + "loss": 0.1261, + "step": 48590 + }, + { + "epoch": 1.7661167235990987, + "grad_norm": 0.8871011137962341, + "learning_rate": 4.374978050824596e-05, + "loss": 0.1399, + "step": 48600 + }, + { + "epoch": 1.7661167235990987, + "eval_loss": 0.3405693769454956, + "eval_runtime": 179.6168, + "eval_samples_per_second": 41.277, + "eval_steps_per_second": 5.161, + "eval_wer": 0.16414035979450686, + "step": 48600 + }, + { + "epoch": 1.766480122101897, + "grad_norm": 1.1147382259368896, + "learning_rate": 4.374634764113352e-05, + "loss": 0.1198, + "step": 48610 + }, + { + "epoch": 1.766843520604695, + "grad_norm": 0.48618343472480774, + "learning_rate": 4.374291396630767e-05, + "loss": 0.199, + "step": 48620 + }, + { + "epoch": 1.7672069191074933, + "grad_norm": 0.9088806509971619, + "learning_rate": 4.373947948391633e-05, + "loss": 0.1168, + "step": 48630 + }, + { + "epoch": 1.7675703176102915, + "grad_norm": 0.4064035415649414, + "learning_rate": 4.373604419410751e-05, + "loss": 0.0935, + "step": 48640 + }, + { + "epoch": 1.7679337161130895, + "grad_norm": 0.7718721032142639, + "learning_rate": 4.373260809702921e-05, + "loss": 0.1229, + "step": 48650 + }, + { + "epoch": 1.768297114615888, + "grad_norm": 0.9764898419380188, + "learning_rate": 4.3729171192829465e-05, + "loss": 0.1043, + "step": 48660 + }, + { + "epoch": 1.7686605131186859, + "grad_norm": 1.2039941549301147, + "learning_rate": 4.372573348165638e-05, + "loss": 0.142, + "step": 48670 + }, + { + "epoch": 1.769023911621484, + "grad_norm": 0.5939382314682007, + "learning_rate": 4.3722294963658064e-05, + "loss": 0.1373, + "step": 48680 + }, + { + "epoch": 1.7693873101242823, + "grad_norm": 1.034637451171875, + "learning_rate": 4.3718855638982664e-05, + "loss": 0.114, + "step": 48690 + }, + { + "epoch": 1.7697507086270805, + "grad_norm": 1.1438782215118408, + "learning_rate": 4.371541550777838e-05, + "loss": 0.123, + "step": 48700 + }, + { + "epoch": 1.7701141071298787, + "grad_norm": 0.8836175799369812, + "learning_rate": 4.3711974570193435e-05, + "loss": 0.1159, + "step": 48710 + }, + { + "epoch": 1.7704775056326767, + "grad_norm": 0.4500117897987366, + "learning_rate": 4.370853282637609e-05, + "loss": 0.157, + "step": 48720 + }, + { + "epoch": 1.7708409041354751, + "grad_norm": 0.8643542528152466, + "learning_rate": 4.370509027647462e-05, + "loss": 0.4179, + "step": 48730 + }, + { + "epoch": 1.771204302638273, + "grad_norm": 1.6022706031799316, + "learning_rate": 4.370164692063737e-05, + "loss": 0.1222, + "step": 48740 + }, + { + "epoch": 1.7715677011410713, + "grad_norm": 2.06821870803833, + "learning_rate": 4.3698202759012685e-05, + "loss": 0.1353, + "step": 48750 + }, + { + "epoch": 1.7719310996438695, + "grad_norm": 0.6547145843505859, + "learning_rate": 4.369475779174898e-05, + "loss": 0.1053, + "step": 48760 + }, + { + "epoch": 1.7722944981466675, + "grad_norm": 1.154436707496643, + "learning_rate": 4.369131201899468e-05, + "loss": 0.1642, + "step": 48770 + }, + { + "epoch": 1.772657896649466, + "grad_norm": 1.4460755586624146, + "learning_rate": 4.3687865440898243e-05, + "loss": 0.1194, + "step": 48780 + }, + { + "epoch": 1.773021295152264, + "grad_norm": 0.8164231777191162, + "learning_rate": 4.368441805760818e-05, + "loss": 0.1205, + "step": 48790 + }, + { + "epoch": 1.7733846936550621, + "grad_norm": 3.0235085487365723, + "learning_rate": 4.3680969869273016e-05, + "loss": 0.2118, + "step": 48800 + }, + { + "epoch": 1.7737480921578603, + "grad_norm": 1.397639513015747, + "learning_rate": 4.367752087604134e-05, + "loss": 0.1163, + "step": 48810 + }, + { + "epoch": 1.7741114906606583, + "grad_norm": 0.5514954328536987, + "learning_rate": 4.3674071078061726e-05, + "loss": 0.1373, + "step": 48820 + }, + { + "epoch": 1.7744748891634567, + "grad_norm": 1.3183518648147583, + "learning_rate": 4.3670620475482836e-05, + "loss": 0.1059, + "step": 48830 + }, + { + "epoch": 1.7748382876662547, + "grad_norm": 0.6846873164176941, + "learning_rate": 4.366716906845335e-05, + "loss": 0.1139, + "step": 48840 + }, + { + "epoch": 1.775201686169053, + "grad_norm": 1.2583421468734741, + "learning_rate": 4.366371685712196e-05, + "loss": 0.1248, + "step": 48850 + }, + { + "epoch": 1.7755650846718511, + "grad_norm": 0.7057945728302002, + "learning_rate": 4.366026384163742e-05, + "loss": 0.1089, + "step": 48860 + }, + { + "epoch": 1.7759284831746494, + "grad_norm": 1.1777584552764893, + "learning_rate": 4.36568100221485e-05, + "loss": 0.1842, + "step": 48870 + }, + { + "epoch": 1.7762918816774476, + "grad_norm": 0.8768916726112366, + "learning_rate": 4.3653355398804025e-05, + "loss": 0.1095, + "step": 48880 + }, + { + "epoch": 1.7766552801802455, + "grad_norm": 1.5699349641799927, + "learning_rate": 4.364989997175283e-05, + "loss": 0.0982, + "step": 48890 + }, + { + "epoch": 1.777018678683044, + "grad_norm": 1.1270577907562256, + "learning_rate": 4.36464437411438e-05, + "loss": 0.1329, + "step": 48900 + }, + { + "epoch": 1.777382077185842, + "grad_norm": 0.545153021812439, + "learning_rate": 4.3642986707125856e-05, + "loss": 0.1169, + "step": 48910 + }, + { + "epoch": 1.7777454756886402, + "grad_norm": 1.2134042978286743, + "learning_rate": 4.363952886984795e-05, + "loss": 0.1704, + "step": 48920 + }, + { + "epoch": 1.7781088741914384, + "grad_norm": 1.079684853553772, + "learning_rate": 4.3636070229459055e-05, + "loss": 0.1077, + "step": 48930 + }, + { + "epoch": 1.7784722726942364, + "grad_norm": 0.6559361815452576, + "learning_rate": 4.3632610786108205e-05, + "loss": 0.1044, + "step": 48940 + }, + { + "epoch": 1.7788356711970348, + "grad_norm": 0.9042558670043945, + "learning_rate": 4.3629150539944454e-05, + "loss": 0.8377, + "step": 48950 + }, + { + "epoch": 1.7791990696998328, + "grad_norm": 2.102360725402832, + "learning_rate": 4.362568949111689e-05, + "loss": 0.1407, + "step": 48960 + }, + { + "epoch": 1.779562468202631, + "grad_norm": 2.581956148147583, + "learning_rate": 4.3622227639774635e-05, + "loss": 0.182, + "step": 48970 + }, + { + "epoch": 1.7799258667054292, + "grad_norm": 0.9113497138023376, + "learning_rate": 4.361876498606685e-05, + "loss": 0.0965, + "step": 48980 + }, + { + "epoch": 1.7802892652082274, + "grad_norm": 2.428302049636841, + "learning_rate": 4.361530153014273e-05, + "loss": 0.1093, + "step": 48990 + }, + { + "epoch": 1.7806526637110256, + "grad_norm": 1.9562546014785767, + "learning_rate": 4.361183727215149e-05, + "loss": 0.1437, + "step": 49000 + }, + { + "epoch": 1.7810160622138236, + "grad_norm": 0.7445639967918396, + "learning_rate": 4.360837221224241e-05, + "loss": 0.1037, + "step": 49010 + }, + { + "epoch": 1.781379460716622, + "grad_norm": 0.9966205954551697, + "learning_rate": 4.360490635056478e-05, + "loss": 0.1649, + "step": 49020 + }, + { + "epoch": 1.78174285921942, + "grad_norm": 1.8854800462722778, + "learning_rate": 4.360143968726793e-05, + "loss": 0.1139, + "step": 49030 + }, + { + "epoch": 1.7821062577222182, + "grad_norm": 0.5688827037811279, + "learning_rate": 4.3597972222501225e-05, + "loss": 0.1134, + "step": 49040 + }, + { + "epoch": 1.7824696562250164, + "grad_norm": 0.7284519076347351, + "learning_rate": 4.359450395641408e-05, + "loss": 0.1636, + "step": 49050 + }, + { + "epoch": 1.7828330547278144, + "grad_norm": 0.7459525465965271, + "learning_rate": 4.359103488915591e-05, + "loss": 0.1282, + "step": 49060 + }, + { + "epoch": 1.7831964532306128, + "grad_norm": 0.3692184090614319, + "learning_rate": 4.35875650208762e-05, + "loss": 0.1491, + "step": 49070 + }, + { + "epoch": 1.7835598517334108, + "grad_norm": 1.8872483968734741, + "learning_rate": 4.358409435172443e-05, + "loss": 0.1701, + "step": 49080 + }, + { + "epoch": 1.783923250236209, + "grad_norm": 2.4084055423736572, + "learning_rate": 4.358062288185018e-05, + "loss": 0.1038, + "step": 49090 + }, + { + "epoch": 1.7842866487390072, + "grad_norm": 1.3348972797393799, + "learning_rate": 4.357715061140299e-05, + "loss": 0.1723, + "step": 49100 + }, + { + "epoch": 1.7846500472418052, + "grad_norm": 1.2129530906677246, + "learning_rate": 4.357367754053248e-05, + "loss": 0.1127, + "step": 49110 + }, + { + "epoch": 1.7850134457446036, + "grad_norm": 0.3813287615776062, + "learning_rate": 4.3570203669388285e-05, + "loss": 0.1193, + "step": 49120 + }, + { + "epoch": 1.7853768442474016, + "grad_norm": 0.5162807703018188, + "learning_rate": 4.356672899812009e-05, + "loss": 0.1023, + "step": 49130 + }, + { + "epoch": 1.7857402427501998, + "grad_norm": 1.0435444116592407, + "learning_rate": 4.356325352687761e-05, + "loss": 0.0866, + "step": 49140 + }, + { + "epoch": 1.786103641252998, + "grad_norm": 1.3136024475097656, + "learning_rate": 4.355977725581058e-05, + "loss": 0.1726, + "step": 49150 + }, + { + "epoch": 1.7864670397557962, + "grad_norm": 3.7956295013427734, + "learning_rate": 4.355630018506878e-05, + "loss": 0.1215, + "step": 49160 + }, + { + "epoch": 1.7868304382585944, + "grad_norm": 0.8079971075057983, + "learning_rate": 4.3552822314802025e-05, + "loss": 0.1389, + "step": 49170 + }, + { + "epoch": 1.7871938367613924, + "grad_norm": 0.7602683901786804, + "learning_rate": 4.354934364516018e-05, + "loss": 0.1433, + "step": 49180 + }, + { + "epoch": 1.7875572352641909, + "grad_norm": 1.5899375677108765, + "learning_rate": 4.3545864176293104e-05, + "loss": 0.1194, + "step": 49190 + }, + { + "epoch": 1.7879206337669888, + "grad_norm": 57.75679016113281, + "learning_rate": 4.354238390835073e-05, + "loss": 0.4817, + "step": 49200 + }, + { + "epoch": 1.7879206337669888, + "eval_loss": 0.3314359784126282, + "eval_runtime": 180.9298, + "eval_samples_per_second": 40.977, + "eval_steps_per_second": 5.124, + "eval_wer": 0.17101130938333908, + "step": 49200 + }, + { + "epoch": 1.788284032269787, + "grad_norm": 0.689213216304779, + "learning_rate": 4.353890284148301e-05, + "loss": 0.1062, + "step": 49210 + }, + { + "epoch": 1.7886474307725853, + "grad_norm": 0.43430793285369873, + "learning_rate": 4.3535420975839924e-05, + "loss": 0.1184, + "step": 49220 + }, + { + "epoch": 1.7890108292753832, + "grad_norm": 0.5645721554756165, + "learning_rate": 4.353193831157151e-05, + "loss": 0.5598, + "step": 49230 + }, + { + "epoch": 1.7893742277781817, + "grad_norm": 0.9227817058563232, + "learning_rate": 4.352845484882779e-05, + "loss": 0.1123, + "step": 49240 + }, + { + "epoch": 1.7897376262809797, + "grad_norm": 1.031924843788147, + "learning_rate": 4.35249705877589e-05, + "loss": 0.1266, + "step": 49250 + }, + { + "epoch": 1.7901010247837779, + "grad_norm": 0.7044590711593628, + "learning_rate": 4.3521485528514914e-05, + "loss": 0.1203, + "step": 49260 + }, + { + "epoch": 1.790464423286576, + "grad_norm": 0.669763445854187, + "learning_rate": 4.3517999671246034e-05, + "loss": 0.1206, + "step": 49270 + }, + { + "epoch": 1.7908278217893743, + "grad_norm": 1.149156928062439, + "learning_rate": 4.351451301610243e-05, + "loss": 0.1309, + "step": 49280 + }, + { + "epoch": 1.7911912202921725, + "grad_norm": 0.42814984917640686, + "learning_rate": 4.3511025563234334e-05, + "loss": 0.6053, + "step": 49290 + }, + { + "epoch": 1.7915546187949705, + "grad_norm": 1.2521979808807373, + "learning_rate": 4.350753731279201e-05, + "loss": 0.1397, + "step": 49300 + }, + { + "epoch": 1.791918017297769, + "grad_norm": 1.2080492973327637, + "learning_rate": 4.3504048264925756e-05, + "loss": 0.1416, + "step": 49310 + }, + { + "epoch": 1.7922814158005669, + "grad_norm": 0.479490727186203, + "learning_rate": 4.3500558419785897e-05, + "loss": 0.1247, + "step": 49320 + }, + { + "epoch": 1.792644814303365, + "grad_norm": 0.6005672216415405, + "learning_rate": 4.349706777752279e-05, + "loss": 0.1049, + "step": 49330 + }, + { + "epoch": 1.7930082128061633, + "grad_norm": 0.7790218591690063, + "learning_rate": 4.349357633828687e-05, + "loss": 0.111, + "step": 49340 + }, + { + "epoch": 1.7933716113089613, + "grad_norm": 0.9393801689147949, + "learning_rate": 4.3490084102228523e-05, + "loss": 0.1228, + "step": 49350 + }, + { + "epoch": 1.7937350098117597, + "grad_norm": 6.127364158630371, + "learning_rate": 4.348659106949825e-05, + "loss": 0.1369, + "step": 49360 + }, + { + "epoch": 1.7940984083145577, + "grad_norm": 0.746756911277771, + "learning_rate": 4.3483097240246546e-05, + "loss": 0.1515, + "step": 49370 + }, + { + "epoch": 1.794461806817356, + "grad_norm": 0.8121843934059143, + "learning_rate": 4.347960261462394e-05, + "loss": 0.1007, + "step": 49380 + }, + { + "epoch": 1.794825205320154, + "grad_norm": 1.695778250694275, + "learning_rate": 4.347610719278101e-05, + "loss": 0.1302, + "step": 49390 + }, + { + "epoch": 1.795188603822952, + "grad_norm": 0.9138917922973633, + "learning_rate": 4.3472610974868354e-05, + "loss": 0.1758, + "step": 49400 + }, + { + "epoch": 1.7955520023257505, + "grad_norm": 0.6245046257972717, + "learning_rate": 4.3469113961036625e-05, + "loss": 0.1269, + "step": 49410 + }, + { + "epoch": 1.7959154008285485, + "grad_norm": 0.4156048595905304, + "learning_rate": 4.3465616151436484e-05, + "loss": 0.1782, + "step": 49420 + }, + { + "epoch": 1.7962787993313467, + "grad_norm": 0.7024033069610596, + "learning_rate": 4.346211754621865e-05, + "loss": 0.1817, + "step": 49430 + }, + { + "epoch": 1.796642197834145, + "grad_norm": 1.7370237112045288, + "learning_rate": 4.345861814553385e-05, + "loss": 0.1453, + "step": 49440 + }, + { + "epoch": 1.7970055963369431, + "grad_norm": 2.4251365661621094, + "learning_rate": 4.3455117949532875e-05, + "loss": 1.928, + "step": 49450 + }, + { + "epoch": 1.7973689948397413, + "grad_norm": 0.7227508425712585, + "learning_rate": 4.3451616958366524e-05, + "loss": 0.1225, + "step": 49460 + }, + { + "epoch": 1.7977323933425393, + "grad_norm": 0.27948006987571716, + "learning_rate": 4.344811517218566e-05, + "loss": 0.1819, + "step": 49470 + }, + { + "epoch": 1.7980957918453377, + "grad_norm": 0.583686888217926, + "learning_rate": 4.344461259114116e-05, + "loss": 0.1155, + "step": 49480 + }, + { + "epoch": 1.7984591903481357, + "grad_norm": 0.7126079797744751, + "learning_rate": 4.344110921538391e-05, + "loss": 0.1466, + "step": 49490 + }, + { + "epoch": 1.798822588850934, + "grad_norm": 1.089173674583435, + "learning_rate": 4.343760504506488e-05, + "loss": 0.142, + "step": 49500 + }, + { + "epoch": 1.7991859873537321, + "grad_norm": 0.950932502746582, + "learning_rate": 4.343410008033506e-05, + "loss": 0.1036, + "step": 49510 + }, + { + "epoch": 1.7995493858565301, + "grad_norm": 0.6006519794464111, + "learning_rate": 4.343059432134545e-05, + "loss": 0.2374, + "step": 49520 + }, + { + "epoch": 1.7999127843593286, + "grad_norm": 2.3363699913024902, + "learning_rate": 4.342708776824711e-05, + "loss": 0.1084, + "step": 49530 + }, + { + "epoch": 1.8002761828621265, + "grad_norm": 0.5308919548988342, + "learning_rate": 4.342358042119111e-05, + "loss": 0.1194, + "step": 49540 + }, + { + "epoch": 1.8006395813649247, + "grad_norm": 0.5925958752632141, + "learning_rate": 4.3420072280328594e-05, + "loss": 0.1374, + "step": 49550 + }, + { + "epoch": 1.801002979867723, + "grad_norm": 0.9156503081321716, + "learning_rate": 4.34165633458107e-05, + "loss": 0.1201, + "step": 49560 + }, + { + "epoch": 1.8013663783705212, + "grad_norm": 1.0838543176651, + "learning_rate": 4.341305361778862e-05, + "loss": 0.1442, + "step": 49570 + }, + { + "epoch": 1.8017297768733194, + "grad_norm": 0.5390272736549377, + "learning_rate": 4.340954309641357e-05, + "loss": 0.1318, + "step": 49580 + }, + { + "epoch": 1.8020931753761174, + "grad_norm": 1.546189785003662, + "learning_rate": 4.340603178183681e-05, + "loss": 0.1106, + "step": 49590 + }, + { + "epoch": 1.8024565738789158, + "grad_norm": 0.6207401752471924, + "learning_rate": 4.340251967420963e-05, + "loss": 0.1309, + "step": 49600 + }, + { + "epoch": 1.8028199723817138, + "grad_norm": 0.7735322713851929, + "learning_rate": 4.339900677368335e-05, + "loss": 0.1116, + "step": 49610 + }, + { + "epoch": 1.803183370884512, + "grad_norm": 0.30109134316444397, + "learning_rate": 4.3395493080409335e-05, + "loss": 0.1424, + "step": 49620 + }, + { + "epoch": 1.8035467693873102, + "grad_norm": 0.9261472225189209, + "learning_rate": 4.339197859453897e-05, + "loss": 0.1243, + "step": 49630 + }, + { + "epoch": 1.8039101678901082, + "grad_norm": 1.4092273712158203, + "learning_rate": 4.3388463316223696e-05, + "loss": 0.0978, + "step": 49640 + }, + { + "epoch": 1.8042735663929066, + "grad_norm": 1.4663946628570557, + "learning_rate": 4.338494724561496e-05, + "loss": 0.1159, + "step": 49650 + }, + { + "epoch": 1.8046369648957046, + "grad_norm": 0.9966018795967102, + "learning_rate": 4.338143038286425e-05, + "loss": 0.1606, + "step": 49660 + }, + { + "epoch": 1.8050003633985028, + "grad_norm": 1.0136394500732422, + "learning_rate": 4.3377912728123124e-05, + "loss": 0.147, + "step": 49670 + }, + { + "epoch": 1.805363761901301, + "grad_norm": 0.647540807723999, + "learning_rate": 4.337439428154312e-05, + "loss": 3.8271, + "step": 49680 + }, + { + "epoch": 1.805727160404099, + "grad_norm": 0.9579293131828308, + "learning_rate": 4.3370875043275835e-05, + "loss": 0.1021, + "step": 49690 + }, + { + "epoch": 1.8060905589068974, + "grad_norm": 0.9937068819999695, + "learning_rate": 4.3367355013472924e-05, + "loss": 0.1295, + "step": 49700 + }, + { + "epoch": 1.8064539574096954, + "grad_norm": 0.7757039070129395, + "learning_rate": 4.3363834192286026e-05, + "loss": 0.1347, + "step": 49710 + }, + { + "epoch": 1.8068173559124936, + "grad_norm": 0.4056714177131653, + "learning_rate": 4.336031257986685e-05, + "loss": 0.1225, + "step": 49720 + }, + { + "epoch": 1.8071807544152918, + "grad_norm": 0.7865206003189087, + "learning_rate": 4.335679017636714e-05, + "loss": 0.1149, + "step": 49730 + }, + { + "epoch": 1.80754415291809, + "grad_norm": 0.8593509793281555, + "learning_rate": 4.335326698193864e-05, + "loss": 0.2783, + "step": 49740 + }, + { + "epoch": 1.8079075514208882, + "grad_norm": 1.0015538930892944, + "learning_rate": 4.334974299673318e-05, + "loss": 0.1143, + "step": 49750 + }, + { + "epoch": 1.8082709499236862, + "grad_norm": 0.6025313138961792, + "learning_rate": 4.334621822090258e-05, + "loss": 0.0951, + "step": 49760 + }, + { + "epoch": 1.8086343484264846, + "grad_norm": 0.5842722058296204, + "learning_rate": 4.33426926545987e-05, + "loss": 0.1468, + "step": 49770 + }, + { + "epoch": 1.8089977469292826, + "grad_norm": 0.5118249654769897, + "learning_rate": 4.333916629797348e-05, + "loss": 0.1095, + "step": 49780 + }, + { + "epoch": 1.8093611454320808, + "grad_norm": 2.118723154067993, + "learning_rate": 4.333563915117882e-05, + "loss": 0.1106, + "step": 49790 + }, + { + "epoch": 1.809724543934879, + "grad_norm": 0.7443241477012634, + "learning_rate": 4.3332111214366714e-05, + "loss": 0.1103, + "step": 49800 + }, + { + "epoch": 1.809724543934879, + "eval_loss": 0.3337153196334839, + "eval_runtime": 180.2527, + "eval_samples_per_second": 41.131, + "eval_steps_per_second": 5.143, + "eval_wer": 0.1598834570769873, + "step": 49800 + }, + { + "epoch": 1.810087942437677, + "grad_norm": 1.2676368951797485, + "learning_rate": 4.332858248768916e-05, + "loss": 0.1016, + "step": 49810 + }, + { + "epoch": 1.8104513409404754, + "grad_norm": 2.088113307952881, + "learning_rate": 4.3325052971298195e-05, + "loss": 0.1931, + "step": 49820 + }, + { + "epoch": 1.8108147394432734, + "grad_norm": 0.6147329807281494, + "learning_rate": 4.332152266534591e-05, + "loss": 0.0989, + "step": 49830 + }, + { + "epoch": 1.8111781379460716, + "grad_norm": 1.3880411386489868, + "learning_rate": 4.3317991569984384e-05, + "loss": 0.1297, + "step": 49840 + }, + { + "epoch": 1.8115415364488698, + "grad_norm": 1.794396996498108, + "learning_rate": 4.331445968536579e-05, + "loss": 0.1191, + "step": 49850 + }, + { + "epoch": 1.811904934951668, + "grad_norm": 1.7588627338409424, + "learning_rate": 4.331092701164229e-05, + "loss": 0.1594, + "step": 49860 + }, + { + "epoch": 1.8122683334544663, + "grad_norm": 0.9384113550186157, + "learning_rate": 4.330739354896609e-05, + "loss": 0.1634, + "step": 49870 + }, + { + "epoch": 1.8126317319572642, + "grad_norm": 0.5903241634368896, + "learning_rate": 4.330385929748945e-05, + "loss": 0.1436, + "step": 49880 + }, + { + "epoch": 1.8129951304600627, + "grad_norm": 0.6767405271530151, + "learning_rate": 4.330032425736462e-05, + "loss": 0.1139, + "step": 49890 + }, + { + "epoch": 1.8133585289628606, + "grad_norm": 0.8860883116722107, + "learning_rate": 4.329678842874395e-05, + "loss": 0.1279, + "step": 49900 + }, + { + "epoch": 1.8137219274656589, + "grad_norm": 58.24811553955078, + "learning_rate": 4.3293251811779755e-05, + "loss": 0.5721, + "step": 49910 + }, + { + "epoch": 1.814085325968457, + "grad_norm": 0.965320348739624, + "learning_rate": 4.328971440662443e-05, + "loss": 0.1297, + "step": 49920 + }, + { + "epoch": 1.814448724471255, + "grad_norm": 0.5799686908721924, + "learning_rate": 4.328617621343039e-05, + "loss": 0.1166, + "step": 49930 + }, + { + "epoch": 1.8148121229740535, + "grad_norm": 2.6377480030059814, + "learning_rate": 4.3282637232350074e-05, + "loss": 0.0991, + "step": 49940 + }, + { + "epoch": 1.8151755214768515, + "grad_norm": 0.9475420117378235, + "learning_rate": 4.327909746353597e-05, + "loss": 0.119, + "step": 49950 + }, + { + "epoch": 1.8155389199796497, + "grad_norm": 0.36662977933883667, + "learning_rate": 4.32755569071406e-05, + "loss": 0.1224, + "step": 49960 + }, + { + "epoch": 1.8159023184824479, + "grad_norm": 1.361423373222351, + "learning_rate": 4.3272015563316506e-05, + "loss": 0.1463, + "step": 49970 + }, + { + "epoch": 1.8162657169852459, + "grad_norm": 1.3705862760543823, + "learning_rate": 4.326847343221627e-05, + "loss": 0.9571, + "step": 49980 + }, + { + "epoch": 1.8166291154880443, + "grad_norm": 1.0312581062316895, + "learning_rate": 4.326493051399251e-05, + "loss": 0.1055, + "step": 49990 + }, + { + "epoch": 1.8169925139908423, + "grad_norm": 1.8485617637634277, + "learning_rate": 4.326138680879789e-05, + "loss": 0.1521, + "step": 50000 + }, + { + "epoch": 1.8173559124936405, + "grad_norm": 0.8536475300788879, + "learning_rate": 4.325784231678508e-05, + "loss": 0.1067, + "step": 50010 + }, + { + "epoch": 1.8177193109964387, + "grad_norm": 0.8633929491043091, + "learning_rate": 4.325429703810681e-05, + "loss": 0.1326, + "step": 50020 + }, + { + "epoch": 1.818082709499237, + "grad_norm": 0.9228955507278442, + "learning_rate": 4.325075097291582e-05, + "loss": 0.1123, + "step": 50030 + }, + { + "epoch": 1.818446108002035, + "grad_norm": 0.6627784967422485, + "learning_rate": 4.324720412136491e-05, + "loss": 0.1049, + "step": 50040 + }, + { + "epoch": 1.818809506504833, + "grad_norm": 2.006086826324463, + "learning_rate": 4.324365648360691e-05, + "loss": 0.1699, + "step": 50050 + }, + { + "epoch": 1.8191729050076315, + "grad_norm": 0.8955428600311279, + "learning_rate": 4.3240108059794646e-05, + "loss": 0.1357, + "step": 50060 + }, + { + "epoch": 1.8195363035104295, + "grad_norm": 164.89707946777344, + "learning_rate": 4.3236558850081036e-05, + "loss": 3.7378, + "step": 50070 + }, + { + "epoch": 1.8198997020132277, + "grad_norm": 1.0500569343566895, + "learning_rate": 4.3233008854618994e-05, + "loss": 0.3275, + "step": 50080 + }, + { + "epoch": 1.820263100516026, + "grad_norm": 23.444902420043945, + "learning_rate": 4.3229458073561466e-05, + "loss": 0.1081, + "step": 50090 + }, + { + "epoch": 1.820626499018824, + "grad_norm": 2.446769952774048, + "learning_rate": 4.322590650706145e-05, + "loss": 0.1738, + "step": 50100 + }, + { + "epoch": 1.8209898975216223, + "grad_norm": 4.417498588562012, + "learning_rate": 4.322235415527198e-05, + "loss": 0.1196, + "step": 50110 + }, + { + "epoch": 1.8213532960244203, + "grad_norm": 0.8139522671699524, + "learning_rate": 4.321880101834609e-05, + "loss": 0.1339, + "step": 50120 + }, + { + "epoch": 1.8217166945272185, + "grad_norm": 0.6750831007957458, + "learning_rate": 4.32152470964369e-05, + "loss": 0.1169, + "step": 50130 + }, + { + "epoch": 1.8220800930300167, + "grad_norm": 0.6926230192184448, + "learning_rate": 4.3211692389697514e-05, + "loss": 0.1192, + "step": 50140 + }, + { + "epoch": 1.822443491532815, + "grad_norm": 0.5890200138092041, + "learning_rate": 4.3208136898281106e-05, + "loss": 0.1431, + "step": 50150 + }, + { + "epoch": 1.8228068900356131, + "grad_norm": 1.0174696445465088, + "learning_rate": 4.3204580622340865e-05, + "loss": 0.7309, + "step": 50160 + }, + { + "epoch": 1.8231702885384111, + "grad_norm": 0.9049250483512878, + "learning_rate": 4.320102356203001e-05, + "loss": 0.1451, + "step": 50170 + }, + { + "epoch": 1.8235336870412096, + "grad_norm": 0.7444465160369873, + "learning_rate": 4.3197465717501815e-05, + "loss": 0.1111, + "step": 50180 + }, + { + "epoch": 1.8238970855440075, + "grad_norm": 1.0209647417068481, + "learning_rate": 4.319390708890957e-05, + "loss": 0.0911, + "step": 50190 + }, + { + "epoch": 1.8242604840468057, + "grad_norm": 0.7396380305290222, + "learning_rate": 4.31903476764066e-05, + "loss": 0.1367, + "step": 50200 + }, + { + "epoch": 1.824623882549604, + "grad_norm": 0.7910483479499817, + "learning_rate": 4.318678748014626e-05, + "loss": 0.0978, + "step": 50210 + }, + { + "epoch": 1.824987281052402, + "grad_norm": 0.5519534349441528, + "learning_rate": 4.318322650028197e-05, + "loss": 0.1502, + "step": 50220 + }, + { + "epoch": 1.8253506795552004, + "grad_norm": 0.7698003053665161, + "learning_rate": 4.317966473696714e-05, + "loss": 0.1257, + "step": 50230 + }, + { + "epoch": 1.8257140780579983, + "grad_norm": 0.6308223605155945, + "learning_rate": 4.3176102190355246e-05, + "loss": 0.1048, + "step": 50240 + }, + { + "epoch": 1.8260774765607966, + "grad_norm": 0.7210573554039001, + "learning_rate": 4.317253886059978e-05, + "loss": 2.1908, + "step": 50250 + }, + { + "epoch": 1.8264408750635948, + "grad_norm": 1.0156967639923096, + "learning_rate": 4.316897474785426e-05, + "loss": 0.1612, + "step": 50260 + }, + { + "epoch": 1.8268042735663927, + "grad_norm": 1.366592288017273, + "learning_rate": 4.3165409852272276e-05, + "loss": 0.1524, + "step": 50270 + }, + { + "epoch": 1.8271676720691912, + "grad_norm": 1.1312644481658936, + "learning_rate": 4.3161844174007406e-05, + "loss": 0.1118, + "step": 50280 + }, + { + "epoch": 1.8275310705719892, + "grad_norm": 0.3259322941303253, + "learning_rate": 4.3158277713213295e-05, + "loss": 0.1043, + "step": 50290 + }, + { + "epoch": 1.8278944690747874, + "grad_norm": 0.8572397828102112, + "learning_rate": 4.31547104700436e-05, + "loss": 0.1346, + "step": 50300 + }, + { + "epoch": 1.8282578675775856, + "grad_norm": 1.2105157375335693, + "learning_rate": 4.3151142444652035e-05, + "loss": 0.1135, + "step": 50310 + }, + { + "epoch": 1.8286212660803838, + "grad_norm": 0.6126898527145386, + "learning_rate": 4.314757363719232e-05, + "loss": 0.1719, + "step": 50320 + }, + { + "epoch": 1.828984664583182, + "grad_norm": 0.7111977338790894, + "learning_rate": 4.314400404781822e-05, + "loss": 0.1089, + "step": 50330 + }, + { + "epoch": 1.82934806308598, + "grad_norm": 1.016365647315979, + "learning_rate": 4.314043367668355e-05, + "loss": 0.0869, + "step": 50340 + }, + { + "epoch": 1.8297114615887784, + "grad_norm": 1.1696245670318604, + "learning_rate": 4.3136862523942136e-05, + "loss": 0.1241, + "step": 50350 + }, + { + "epoch": 1.8300748600915764, + "grad_norm": 1.2339487075805664, + "learning_rate": 4.313329058974784e-05, + "loss": 0.1202, + "step": 50360 + }, + { + "epoch": 1.8304382585943746, + "grad_norm": 1.6147994995117188, + "learning_rate": 4.312971787425458e-05, + "loss": 0.1502, + "step": 50370 + }, + { + "epoch": 1.8308016570971728, + "grad_norm": 0.8028876781463623, + "learning_rate": 4.312614437761628e-05, + "loss": 0.1153, + "step": 50380 + }, + { + "epoch": 1.8311650555999708, + "grad_norm": 0.6366049647331238, + "learning_rate": 4.312257009998691e-05, + "loss": 0.1367, + "step": 50390 + }, + { + "epoch": 1.8315284541027692, + "grad_norm": 1.6343673467636108, + "learning_rate": 4.311899504152047e-05, + "loss": 0.1502, + "step": 50400 + }, + { + "epoch": 1.8315284541027692, + "eval_loss": 0.34016337990760803, + "eval_runtime": 180.2837, + "eval_samples_per_second": 41.124, + "eval_steps_per_second": 5.142, + "eval_wer": 0.16422204875923538, + "step": 50400 + }, + { + "epoch": 1.8318918526055672, + "grad_norm": 0.5833923816680908, + "learning_rate": 4.311541920237101e-05, + "loss": 0.1262, + "step": 50410 + }, + { + "epoch": 1.8322552511083654, + "grad_norm": 0.5683671832084656, + "learning_rate": 4.311184258269258e-05, + "loss": 0.1626, + "step": 50420 + }, + { + "epoch": 1.8326186496111636, + "grad_norm": 2.4269814491271973, + "learning_rate": 4.3108265182639304e-05, + "loss": 0.1293, + "step": 50430 + }, + { + "epoch": 1.8329820481139618, + "grad_norm": 0.8372895121574402, + "learning_rate": 4.310468700236532e-05, + "loss": 0.1004, + "step": 50440 + }, + { + "epoch": 1.83334544661676, + "grad_norm": 0.7744470238685608, + "learning_rate": 4.3101108042024776e-05, + "loss": 0.126, + "step": 50450 + }, + { + "epoch": 1.833708845119558, + "grad_norm": 1.1404407024383545, + "learning_rate": 4.3097528301771895e-05, + "loss": 0.1267, + "step": 50460 + }, + { + "epoch": 1.8340722436223564, + "grad_norm": 0.4898841381072998, + "learning_rate": 4.30939477817609e-05, + "loss": 0.1353, + "step": 50470 + }, + { + "epoch": 1.8344356421251544, + "grad_norm": 0.949220597743988, + "learning_rate": 4.3090366482146085e-05, + "loss": 0.1188, + "step": 50480 + }, + { + "epoch": 1.8347990406279526, + "grad_norm": 0.2803521156311035, + "learning_rate": 4.308678440308175e-05, + "loss": 0.0978, + "step": 50490 + }, + { + "epoch": 1.8351624391307508, + "grad_norm": 1.3130167722702026, + "learning_rate": 4.308320154472221e-05, + "loss": 0.1228, + "step": 50500 + }, + { + "epoch": 1.8355258376335488, + "grad_norm": 1.2615669965744019, + "learning_rate": 4.307961790722187e-05, + "loss": 0.1146, + "step": 50510 + }, + { + "epoch": 1.8358892361363472, + "grad_norm": 1.296720266342163, + "learning_rate": 4.307603349073512e-05, + "loss": 0.1497, + "step": 50520 + }, + { + "epoch": 1.8362526346391452, + "grad_norm": 0.7248766422271729, + "learning_rate": 4.30724482954164e-05, + "loss": 2.0, + "step": 50530 + }, + { + "epoch": 1.8366160331419434, + "grad_norm": 0.9493942260742188, + "learning_rate": 4.306886232142018e-05, + "loss": 0.1092, + "step": 50540 + }, + { + "epoch": 1.8369794316447416, + "grad_norm": 0.8784381151199341, + "learning_rate": 4.306527556890097e-05, + "loss": 0.1332, + "step": 50550 + }, + { + "epoch": 1.8373428301475396, + "grad_norm": 1.1002815961837769, + "learning_rate": 4.306168803801332e-05, + "loss": 0.1166, + "step": 50560 + }, + { + "epoch": 1.837706228650338, + "grad_norm": 1.0042141675949097, + "learning_rate": 4.3058099728911795e-05, + "loss": 0.1494, + "step": 50570 + }, + { + "epoch": 1.838069627153136, + "grad_norm": 0.8974900841712952, + "learning_rate": 4.305451064175101e-05, + "loss": 0.1281, + "step": 50580 + }, + { + "epoch": 1.8384330256559342, + "grad_norm": 6.133358001708984, + "learning_rate": 4.3050920776685587e-05, + "loss": 0.1136, + "step": 50590 + }, + { + "epoch": 1.8387964241587325, + "grad_norm": 1.828365445137024, + "learning_rate": 4.304733013387022e-05, + "loss": 0.1634, + "step": 50600 + }, + { + "epoch": 1.8391598226615307, + "grad_norm": 1.1756354570388794, + "learning_rate": 4.3043738713459605e-05, + "loss": 0.1039, + "step": 50610 + }, + { + "epoch": 1.8395232211643289, + "grad_norm": 0.3747727870941162, + "learning_rate": 4.304014651560849e-05, + "loss": 0.1966, + "step": 50620 + }, + { + "epoch": 1.8398866196671269, + "grad_norm": 2.3143324851989746, + "learning_rate": 4.303655354047166e-05, + "loss": 0.1294, + "step": 50630 + }, + { + "epoch": 1.8402500181699253, + "grad_norm": 0.408648818731308, + "learning_rate": 4.3032959788203894e-05, + "loss": 0.1097, + "step": 50640 + }, + { + "epoch": 1.8406134166727233, + "grad_norm": 2.8158206939697266, + "learning_rate": 4.3029365258960065e-05, + "loss": 0.1418, + "step": 50650 + }, + { + "epoch": 1.8409768151755215, + "grad_norm": 1.1815099716186523, + "learning_rate": 4.3025769952895036e-05, + "loss": 0.1053, + "step": 50660 + }, + { + "epoch": 1.8413402136783197, + "grad_norm": 0.5927807688713074, + "learning_rate": 4.3022173870163705e-05, + "loss": 0.1731, + "step": 50670 + }, + { + "epoch": 1.8417036121811177, + "grad_norm": 0.945854663848877, + "learning_rate": 4.301857701092103e-05, + "loss": 0.1355, + "step": 50680 + }, + { + "epoch": 1.842067010683916, + "grad_norm": 1.1901466846466064, + "learning_rate": 4.301497937532199e-05, + "loss": 0.1237, + "step": 50690 + }, + { + "epoch": 1.842430409186714, + "grad_norm": 2.328880548477173, + "learning_rate": 4.301138096352158e-05, + "loss": 0.184, + "step": 50700 + }, + { + "epoch": 1.8427938076895123, + "grad_norm": 1.652759075164795, + "learning_rate": 4.3007781775674846e-05, + "loss": 0.1085, + "step": 50710 + }, + { + "epoch": 1.8431572061923105, + "grad_norm": 0.7879970669746399, + "learning_rate": 4.3004181811936874e-05, + "loss": 0.1291, + "step": 50720 + }, + { + "epoch": 1.8435206046951087, + "grad_norm": 0.9201347827911377, + "learning_rate": 4.3000581072462765e-05, + "loss": 0.1069, + "step": 50730 + }, + { + "epoch": 1.843884003197907, + "grad_norm": 0.4384756088256836, + "learning_rate": 4.299697955740766e-05, + "loss": 0.0966, + "step": 50740 + }, + { + "epoch": 1.844247401700705, + "grad_norm": 1.022146463394165, + "learning_rate": 4.299337726692674e-05, + "loss": 0.4218, + "step": 50750 + }, + { + "epoch": 1.8446108002035033, + "grad_norm": 1.298736810684204, + "learning_rate": 4.298977420117521e-05, + "loss": 0.1266, + "step": 50760 + }, + { + "epoch": 1.8449741987063013, + "grad_norm": 0.3361910283565521, + "learning_rate": 4.2986170360308324e-05, + "loss": 0.1357, + "step": 50770 + }, + { + "epoch": 1.8453375972090995, + "grad_norm": 1.3413841724395752, + "learning_rate": 4.298256574448135e-05, + "loss": 0.1116, + "step": 50780 + }, + { + "epoch": 1.8457009957118977, + "grad_norm": 1.324555516242981, + "learning_rate": 4.2978960353849606e-05, + "loss": 0.1171, + "step": 50790 + }, + { + "epoch": 1.8460643942146957, + "grad_norm": 2.6282169818878174, + "learning_rate": 4.297535418856843e-05, + "loss": 0.1341, + "step": 50800 + }, + { + "epoch": 1.8464277927174941, + "grad_norm": 1.0654820203781128, + "learning_rate": 4.297174724879319e-05, + "loss": 0.1358, + "step": 50810 + }, + { + "epoch": 1.8467911912202921, + "grad_norm": 0.45108261704444885, + "learning_rate": 4.296813953467931e-05, + "loss": 0.21, + "step": 50820 + }, + { + "epoch": 1.8471545897230903, + "grad_norm": 0.4607618749141693, + "learning_rate": 4.296453104638222e-05, + "loss": 0.1004, + "step": 50830 + }, + { + "epoch": 1.8475179882258885, + "grad_norm": 1.834341049194336, + "learning_rate": 4.296092178405741e-05, + "loss": 0.1092, + "step": 50840 + }, + { + "epoch": 1.8478813867286865, + "grad_norm": 2.6476006507873535, + "learning_rate": 4.295731174786039e-05, + "loss": 0.1255, + "step": 50850 + }, + { + "epoch": 1.848244785231485, + "grad_norm": 0.6069791316986084, + "learning_rate": 4.2953700937946696e-05, + "loss": 0.1166, + "step": 50860 + }, + { + "epoch": 1.848608183734283, + "grad_norm": 0.9340389966964722, + "learning_rate": 4.2950089354471915e-05, + "loss": 0.1548, + "step": 50870 + }, + { + "epoch": 1.8489715822370811, + "grad_norm": 1.0438172817230225, + "learning_rate": 4.294647699759163e-05, + "loss": 0.1996, + "step": 50880 + }, + { + "epoch": 1.8493349807398793, + "grad_norm": 0.9623711109161377, + "learning_rate": 4.294286386746152e-05, + "loss": 0.11, + "step": 50890 + }, + { + "epoch": 1.8496983792426775, + "grad_norm": 0.7084048986434937, + "learning_rate": 4.2939249964237246e-05, + "loss": 0.1411, + "step": 50900 + }, + { + "epoch": 1.8500617777454758, + "grad_norm": 2.6798365116119385, + "learning_rate": 4.293563528807453e-05, + "loss": 0.1253, + "step": 50910 + }, + { + "epoch": 1.8504251762482737, + "grad_norm": 1.17062246799469, + "learning_rate": 4.2932019839129087e-05, + "loss": 0.1676, + "step": 50920 + }, + { + "epoch": 1.8507885747510722, + "grad_norm": 3.6233065128326416, + "learning_rate": 4.2928403617556714e-05, + "loss": 0.1128, + "step": 50930 + }, + { + "epoch": 1.8511519732538702, + "grad_norm": 1.1630785465240479, + "learning_rate": 4.2924786623513225e-05, + "loss": 0.084, + "step": 50940 + }, + { + "epoch": 1.8515153717566684, + "grad_norm": 1.654990553855896, + "learning_rate": 4.292116885715446e-05, + "loss": 0.1527, + "step": 50950 + }, + { + "epoch": 1.8518787702594666, + "grad_norm": 1.1175593137741089, + "learning_rate": 4.291755031863628e-05, + "loss": 0.1132, + "step": 50960 + }, + { + "epoch": 1.8522421687622646, + "grad_norm": 0.46370136737823486, + "learning_rate": 4.291393100811462e-05, + "loss": 0.1335, + "step": 50970 + }, + { + "epoch": 1.852605567265063, + "grad_norm": 1.1095346212387085, + "learning_rate": 4.2910310925745404e-05, + "loss": 0.1024, + "step": 50980 + }, + { + "epoch": 1.852968965767861, + "grad_norm": 1.7305604219436646, + "learning_rate": 4.290669007168462e-05, + "loss": 0.0964, + "step": 50990 + }, + { + "epoch": 1.8533323642706592, + "grad_norm": 0.5683947801589966, + "learning_rate": 4.290306844608827e-05, + "loss": 0.1324, + "step": 51000 + }, + { + "epoch": 1.8533323642706592, + "eval_loss": 0.35921338200569153, + "eval_runtime": 179.6912, + "eval_samples_per_second": 41.26, + "eval_steps_per_second": 5.159, + "eval_wer": 0.164648646686151, + "step": 51000 + }, + { + "epoch": 1.8536957627734574, + "grad_norm": 0.417925089597702, + "learning_rate": 4.289944604911239e-05, + "loss": 0.156, + "step": 51010 + }, + { + "epoch": 1.8540591612762556, + "grad_norm": 0.3907199501991272, + "learning_rate": 4.2895822880913076e-05, + "loss": 0.1322, + "step": 51020 + }, + { + "epoch": 1.8544225597790538, + "grad_norm": 0.8322422504425049, + "learning_rate": 4.2892198941646436e-05, + "loss": 0.1278, + "step": 51030 + }, + { + "epoch": 1.8547859582818518, + "grad_norm": 1.3560541868209839, + "learning_rate": 4.2888574231468595e-05, + "loss": 0.0933, + "step": 51040 + }, + { + "epoch": 1.8551493567846502, + "grad_norm": 1.353043556213379, + "learning_rate": 4.288494875053573e-05, + "loss": 0.1185, + "step": 51050 + }, + { + "epoch": 1.8555127552874482, + "grad_norm": 7.476738929748535, + "learning_rate": 4.2881322499004076e-05, + "loss": 0.127, + "step": 51060 + }, + { + "epoch": 1.8558761537902464, + "grad_norm": 0.5096439719200134, + "learning_rate": 4.2877695477029844e-05, + "loss": 0.1768, + "step": 51070 + }, + { + "epoch": 1.8562395522930446, + "grad_norm": 2.3075900077819824, + "learning_rate": 4.2874067684769325e-05, + "loss": 0.1288, + "step": 51080 + }, + { + "epoch": 1.8566029507958426, + "grad_norm": 0.615508496761322, + "learning_rate": 4.287043912237883e-05, + "loss": 0.0903, + "step": 51090 + }, + { + "epoch": 1.856966349298641, + "grad_norm": 2.022796154022217, + "learning_rate": 4.286680979001469e-05, + "loss": 0.1372, + "step": 51100 + }, + { + "epoch": 1.857329747801439, + "grad_norm": 0.8577511310577393, + "learning_rate": 4.28631796878333e-05, + "loss": 0.1216, + "step": 51110 + }, + { + "epoch": 1.8576931463042372, + "grad_norm": 0.5876504182815552, + "learning_rate": 4.285954881599104e-05, + "loss": 0.1672, + "step": 51120 + }, + { + "epoch": 1.8580565448070354, + "grad_norm": 1.0467904806137085, + "learning_rate": 4.2855917174644374e-05, + "loss": 0.1175, + "step": 51130 + }, + { + "epoch": 1.8584199433098334, + "grad_norm": 0.6273336410522461, + "learning_rate": 4.285228476394977e-05, + "loss": 0.1048, + "step": 51140 + }, + { + "epoch": 1.8587833418126318, + "grad_norm": 0.7880851030349731, + "learning_rate": 4.284865158406372e-05, + "loss": 0.1418, + "step": 51150 + }, + { + "epoch": 1.8591467403154298, + "grad_norm": 1.316256046295166, + "learning_rate": 4.284501763514279e-05, + "loss": 0.1373, + "step": 51160 + }, + { + "epoch": 1.859510138818228, + "grad_norm": 0.46383532881736755, + "learning_rate": 4.284138291734355e-05, + "loss": 0.167, + "step": 51170 + }, + { + "epoch": 1.8598735373210262, + "grad_norm": 0.7432321310043335, + "learning_rate": 4.283774743082259e-05, + "loss": 0.1534, + "step": 51180 + }, + { + "epoch": 1.8602369358238244, + "grad_norm": 0.673897922039032, + "learning_rate": 4.2834111175736555e-05, + "loss": 0.1122, + "step": 51190 + }, + { + "epoch": 1.8606003343266226, + "grad_norm": 0.7635305523872375, + "learning_rate": 4.2830474152242136e-05, + "loss": 0.1054, + "step": 51200 + }, + { + "epoch": 1.8609637328294206, + "grad_norm": 1.281503677368164, + "learning_rate": 4.282683636049602e-05, + "loss": 0.1175, + "step": 51210 + }, + { + "epoch": 1.861327131332219, + "grad_norm": 1.249510407447815, + "learning_rate": 4.282319780065496e-05, + "loss": 0.1624, + "step": 51220 + }, + { + "epoch": 1.861690529835017, + "grad_norm": 1.7470375299453735, + "learning_rate": 4.281955847287571e-05, + "loss": 0.1173, + "step": 51230 + }, + { + "epoch": 1.8620539283378152, + "grad_norm": 0.4860547184944153, + "learning_rate": 4.2815918377315096e-05, + "loss": 0.1109, + "step": 51240 + }, + { + "epoch": 1.8624173268406135, + "grad_norm": 1.2206679582595825, + "learning_rate": 4.281227751412995e-05, + "loss": 0.139, + "step": 51250 + }, + { + "epoch": 1.8627807253434114, + "grad_norm": 5.966470241546631, + "learning_rate": 4.2808635883477134e-05, + "loss": 0.0989, + "step": 51260 + }, + { + "epoch": 1.8631441238462099, + "grad_norm": 0.6490101218223572, + "learning_rate": 4.280499348551357e-05, + "loss": 0.1876, + "step": 51270 + }, + { + "epoch": 1.8635075223490078, + "grad_norm": 1.0723692178726196, + "learning_rate": 4.280135032039618e-05, + "loss": 0.1081, + "step": 51280 + }, + { + "epoch": 1.863870920851806, + "grad_norm": 1.9690381288528442, + "learning_rate": 4.2797706388281944e-05, + "loss": 0.1112, + "step": 51290 + }, + { + "epoch": 1.8642343193546043, + "grad_norm": 5.197057723999023, + "learning_rate": 4.279406168932787e-05, + "loss": 0.1374, + "step": 51300 + }, + { + "epoch": 1.8645977178574025, + "grad_norm": 1.063194751739502, + "learning_rate": 4.279041622369098e-05, + "loss": 0.1221, + "step": 51310 + }, + { + "epoch": 1.8649611163602007, + "grad_norm": 0.9989191889762878, + "learning_rate": 4.2786769991528356e-05, + "loss": 0.1602, + "step": 51320 + }, + { + "epoch": 1.8653245148629987, + "grad_norm": 1.2675135135650635, + "learning_rate": 4.278312299299711e-05, + "loss": 0.1038, + "step": 51330 + }, + { + "epoch": 1.865687913365797, + "grad_norm": 0.7109481692314148, + "learning_rate": 4.277947522825435e-05, + "loss": 0.1323, + "step": 51340 + }, + { + "epoch": 1.866051311868595, + "grad_norm": 0.522361695766449, + "learning_rate": 4.2775826697457265e-05, + "loss": 0.1339, + "step": 51350 + }, + { + "epoch": 1.8664147103713933, + "grad_norm": 0.6734838485717773, + "learning_rate": 4.277217740076306e-05, + "loss": 0.0991, + "step": 51360 + }, + { + "epoch": 1.8667781088741915, + "grad_norm": 0.7802498936653137, + "learning_rate": 4.276852733832897e-05, + "loss": 0.1614, + "step": 51370 + }, + { + "epoch": 1.8671415073769895, + "grad_norm": 0.7883875966072083, + "learning_rate": 4.276487651031225e-05, + "loss": 0.1093, + "step": 51380 + }, + { + "epoch": 1.867504905879788, + "grad_norm": 2.097398519515991, + "learning_rate": 4.27612249168702e-05, + "loss": 0.0925, + "step": 51390 + }, + { + "epoch": 1.8678683043825859, + "grad_norm": 2.9690377712249756, + "learning_rate": 4.2757572558160176e-05, + "loss": 0.1693, + "step": 51400 + }, + { + "epoch": 1.868231702885384, + "grad_norm": 0.681706428527832, + "learning_rate": 4.275391943433953e-05, + "loss": 0.1045, + "step": 51410 + }, + { + "epoch": 1.8685951013881823, + "grad_norm": 0.32064223289489746, + "learning_rate": 4.275026554556566e-05, + "loss": 0.1511, + "step": 51420 + }, + { + "epoch": 1.8689584998909803, + "grad_norm": 0.30609723925590515, + "learning_rate": 4.2746610891996006e-05, + "loss": 0.1089, + "step": 51430 + }, + { + "epoch": 1.8693218983937787, + "grad_norm": 0.5510174036026001, + "learning_rate": 4.274295547378803e-05, + "loss": 0.097, + "step": 51440 + }, + { + "epoch": 1.8696852968965767, + "grad_norm": 1.0738519430160522, + "learning_rate": 4.2739299291099233e-05, + "loss": 0.1309, + "step": 51450 + }, + { + "epoch": 1.870048695399375, + "grad_norm": 0.7475055456161499, + "learning_rate": 4.2735642344087144e-05, + "loss": 0.2601, + "step": 51460 + }, + { + "epoch": 1.8704120939021731, + "grad_norm": 0.5625230669975281, + "learning_rate": 4.273198463290934e-05, + "loss": 0.1464, + "step": 51470 + }, + { + "epoch": 1.8707754924049713, + "grad_norm": 1.03018057346344, + "learning_rate": 4.2728326157723396e-05, + "loss": 0.1346, + "step": 51480 + }, + { + "epoch": 1.8711388909077695, + "grad_norm": 0.7019144892692566, + "learning_rate": 4.272466691868696e-05, + "loss": 0.1281, + "step": 51490 + }, + { + "epoch": 1.8715022894105675, + "grad_norm": 1.6843324899673462, + "learning_rate": 4.2721006915957695e-05, + "loss": 0.8711, + "step": 51500 + }, + { + "epoch": 1.871865687913366, + "grad_norm": 1.4415316581726074, + "learning_rate": 4.271734614969329e-05, + "loss": 0.1193, + "step": 51510 + }, + { + "epoch": 1.872229086416164, + "grad_norm": 0.5036882162094116, + "learning_rate": 4.271368462005148e-05, + "loss": 0.1821, + "step": 51520 + }, + { + "epoch": 1.8725924849189621, + "grad_norm": 80.8698959350586, + "learning_rate": 4.2710022327190026e-05, + "loss": 1.5666, + "step": 51530 + }, + { + "epoch": 1.8729558834217603, + "grad_norm": 1.3852354288101196, + "learning_rate": 4.2706359271266716e-05, + "loss": 0.1154, + "step": 51540 + }, + { + "epoch": 1.8733192819245583, + "grad_norm": 2.0087506771087646, + "learning_rate": 4.270269545243939e-05, + "loss": 0.1289, + "step": 51550 + }, + { + "epoch": 1.8736826804273568, + "grad_norm": 18.74397087097168, + "learning_rate": 4.26990308708659e-05, + "loss": 0.166, + "step": 51560 + }, + { + "epoch": 1.8740460789301547, + "grad_norm": 0.5033942461013794, + "learning_rate": 4.2695365526704144e-05, + "loss": 0.2648, + "step": 51570 + }, + { + "epoch": 1.874409477432953, + "grad_norm": 0.7396907210350037, + "learning_rate": 4.269169942011205e-05, + "loss": 0.1235, + "step": 51580 + }, + { + "epoch": 1.8747728759357511, + "grad_norm": 1.0756418704986572, + "learning_rate": 4.2688032551247574e-05, + "loss": 0.1113, + "step": 51590 + }, + { + "epoch": 1.8751362744385494, + "grad_norm": 0.8277359008789062, + "learning_rate": 4.268436492026871e-05, + "loss": 0.1556, + "step": 51600 + }, + { + "epoch": 1.8751362744385494, + "eval_loss": 0.3514460623264313, + "eval_runtime": 180.5008, + "eval_samples_per_second": 41.075, + "eval_steps_per_second": 5.136, + "eval_wer": 0.16721731079928115, + "step": 51600 + }, + { + "epoch": 1.8754996729413476, + "grad_norm": 1.8270063400268555, + "learning_rate": 4.268069652733349e-05, + "loss": 0.1154, + "step": 51610 + }, + { + "epoch": 1.8758630714441455, + "grad_norm": 1.2547001838684082, + "learning_rate": 4.267702737259995e-05, + "loss": 0.1256, + "step": 51620 + }, + { + "epoch": 1.876226469946944, + "grad_norm": 2.664400100708008, + "learning_rate": 4.26733574562262e-05, + "loss": 0.1301, + "step": 51630 + }, + { + "epoch": 1.876589868449742, + "grad_norm": 1.9625864028930664, + "learning_rate": 4.266968677837037e-05, + "loss": 0.1347, + "step": 51640 + }, + { + "epoch": 1.8769532669525402, + "grad_norm": 1.5494035482406616, + "learning_rate": 4.266601533919059e-05, + "loss": 0.1537, + "step": 51650 + }, + { + "epoch": 1.8773166654553384, + "grad_norm": 1.1424529552459717, + "learning_rate": 4.2662343138845076e-05, + "loss": 0.1212, + "step": 51660 + }, + { + "epoch": 1.8776800639581364, + "grad_norm": 0.8773604035377502, + "learning_rate": 4.265867017749203e-05, + "loss": 0.2055, + "step": 51670 + }, + { + "epoch": 1.8780434624609348, + "grad_norm": 0.9486932158470154, + "learning_rate": 4.265499645528972e-05, + "loss": 0.114, + "step": 51680 + }, + { + "epoch": 1.8784068609637328, + "grad_norm": 0.5126560926437378, + "learning_rate": 4.265132197239643e-05, + "loss": 0.1068, + "step": 51690 + }, + { + "epoch": 1.878770259466531, + "grad_norm": 1.4426672458648682, + "learning_rate": 4.264764672897049e-05, + "loss": 0.1268, + "step": 51700 + }, + { + "epoch": 1.8791336579693292, + "grad_norm": 1.2795157432556152, + "learning_rate": 4.264397072517023e-05, + "loss": 0.1, + "step": 51710 + }, + { + "epoch": 1.8794970564721272, + "grad_norm": 0.34891799092292786, + "learning_rate": 4.2640293961154055e-05, + "loss": 0.1683, + "step": 51720 + }, + { + "epoch": 1.8798604549749256, + "grad_norm": 0.6939824223518372, + "learning_rate": 4.2636616437080366e-05, + "loss": 0.1131, + "step": 51730 + }, + { + "epoch": 1.8802238534777236, + "grad_norm": 1.4243013858795166, + "learning_rate": 4.2632938153107636e-05, + "loss": 0.1019, + "step": 51740 + }, + { + "epoch": 1.8805872519805218, + "grad_norm": 0.699863851070404, + "learning_rate": 4.2629259109394335e-05, + "loss": 0.1652, + "step": 51750 + }, + { + "epoch": 1.88095065048332, + "grad_norm": 1.4008554220199585, + "learning_rate": 4.2625579306098994e-05, + "loss": 0.1062, + "step": 51760 + }, + { + "epoch": 1.8813140489861182, + "grad_norm": 1.0460174083709717, + "learning_rate": 4.2621898743380144e-05, + "loss": 0.1216, + "step": 51770 + }, + { + "epoch": 1.8816774474889164, + "grad_norm": 1.277803897857666, + "learning_rate": 4.2618217421396375e-05, + "loss": 1.8922, + "step": 51780 + }, + { + "epoch": 1.8820408459917144, + "grad_norm": 2.2651615142822266, + "learning_rate": 4.2614535340306314e-05, + "loss": 0.1014, + "step": 51790 + }, + { + "epoch": 1.8824042444945128, + "grad_norm": 0.8989794254302979, + "learning_rate": 4.2610852500268586e-05, + "loss": 0.1089, + "step": 51800 + }, + { + "epoch": 1.8827676429973108, + "grad_norm": 0.9130983948707581, + "learning_rate": 4.2607168901441885e-05, + "loss": 0.1098, + "step": 51810 + }, + { + "epoch": 1.883131041500109, + "grad_norm": 0.5734561085700989, + "learning_rate": 4.260348454398493e-05, + "loss": 0.1674, + "step": 51820 + }, + { + "epoch": 1.8834944400029072, + "grad_norm": 0.9285537600517273, + "learning_rate": 4.259979942805645e-05, + "loss": 0.1197, + "step": 51830 + }, + { + "epoch": 1.8838578385057052, + "grad_norm": 1.951344609260559, + "learning_rate": 4.259611355381524e-05, + "loss": 0.1009, + "step": 51840 + }, + { + "epoch": 1.8842212370085036, + "grad_norm": 0.6593104004859924, + "learning_rate": 4.2592426921420106e-05, + "loss": 0.1121, + "step": 51850 + }, + { + "epoch": 1.8845846355113016, + "grad_norm": 0.35744279623031616, + "learning_rate": 4.258873953102987e-05, + "loss": 0.1029, + "step": 51860 + }, + { + "epoch": 1.8849480340140998, + "grad_norm": 0.7135227918624878, + "learning_rate": 4.2585051382803455e-05, + "loss": 0.153, + "step": 51870 + }, + { + "epoch": 1.885311432516898, + "grad_norm": 1.8943212032318115, + "learning_rate": 4.258136247689973e-05, + "loss": 0.1079, + "step": 51880 + }, + { + "epoch": 1.8856748310196962, + "grad_norm": 1.6160852909088135, + "learning_rate": 4.2577672813477656e-05, + "loss": 0.1259, + "step": 51890 + }, + { + "epoch": 1.8860382295224944, + "grad_norm": 0.8660845160484314, + "learning_rate": 4.25739823926962e-05, + "loss": 0.1151, + "step": 51900 + }, + { + "epoch": 1.8864016280252924, + "grad_norm": 1.074818730354309, + "learning_rate": 4.2570291214714365e-05, + "loss": 0.1181, + "step": 51910 + }, + { + "epoch": 1.8867650265280909, + "grad_norm": 0.4410496950149536, + "learning_rate": 4.2566599279691205e-05, + "loss": 0.1296, + "step": 51920 + }, + { + "epoch": 1.8871284250308888, + "grad_norm": 0.6509966254234314, + "learning_rate": 4.2562906587785776e-05, + "loss": 0.1096, + "step": 51930 + }, + { + "epoch": 1.887491823533687, + "grad_norm": 0.8408392667770386, + "learning_rate": 4.25592131391572e-05, + "loss": 0.0968, + "step": 51940 + }, + { + "epoch": 1.8878552220364853, + "grad_norm": 0.6419994831085205, + "learning_rate": 4.25555189339646e-05, + "loss": 0.1336, + "step": 51950 + }, + { + "epoch": 1.8882186205392832, + "grad_norm": 1.4039828777313232, + "learning_rate": 4.2551823972367156e-05, + "loss": 0.1102, + "step": 51960 + }, + { + "epoch": 1.8885820190420817, + "grad_norm": 1.1315640211105347, + "learning_rate": 4.2548128254524066e-05, + "loss": 0.1265, + "step": 51970 + }, + { + "epoch": 1.8889454175448797, + "grad_norm": 0.6739282011985779, + "learning_rate": 4.254443178059456e-05, + "loss": 0.101, + "step": 51980 + }, + { + "epoch": 1.8893088160476779, + "grad_norm": 0.9321909546852112, + "learning_rate": 4.254073455073792e-05, + "loss": 0.1093, + "step": 51990 + }, + { + "epoch": 1.889672214550476, + "grad_norm": 8.199972152709961, + "learning_rate": 4.2537036565113435e-05, + "loss": 0.2084, + "step": 52000 + }, + { + "epoch": 1.890035613053274, + "grad_norm": 0.5499328970909119, + "learning_rate": 4.253333782388044e-05, + "loss": 0.138, + "step": 52010 + }, + { + "epoch": 1.8903990115560725, + "grad_norm": 1.2447484731674194, + "learning_rate": 4.252963832719831e-05, + "loss": 0.1353, + "step": 52020 + }, + { + "epoch": 1.8907624100588705, + "grad_norm": 1.7561428546905518, + "learning_rate": 4.252593807522642e-05, + "loss": 0.1267, + "step": 52030 + }, + { + "epoch": 1.8911258085616687, + "grad_norm": 0.6353381872177124, + "learning_rate": 4.252223706812423e-05, + "loss": 0.1105, + "step": 52040 + }, + { + "epoch": 1.8914892070644669, + "grad_norm": 1.8371816873550415, + "learning_rate": 4.251853530605118e-05, + "loss": 0.1498, + "step": 52050 + }, + { + "epoch": 1.891852605567265, + "grad_norm": 2.9866833686828613, + "learning_rate": 4.251483278916678e-05, + "loss": 0.1133, + "step": 52060 + }, + { + "epoch": 1.8922160040700633, + "grad_norm": 0.9184136986732483, + "learning_rate": 4.2511129517630555e-05, + "loss": 0.114, + "step": 52070 + }, + { + "epoch": 1.8925794025728613, + "grad_norm": 1.407132863998413, + "learning_rate": 4.250742549160206e-05, + "loss": 0.1055, + "step": 52080 + }, + { + "epoch": 1.8929428010756597, + "grad_norm": 1.2703722715377808, + "learning_rate": 4.250372071124089e-05, + "loss": 0.1429, + "step": 52090 + }, + { + "epoch": 1.8933061995784577, + "grad_norm": 1.260004997253418, + "learning_rate": 4.2500385764093334e-05, + "loss": 1.4537, + "step": 52100 + }, + { + "epoch": 1.893669598081256, + "grad_norm": 0.9646703004837036, + "learning_rate": 4.249667955093988e-05, + "loss": 0.1465, + "step": 52110 + }, + { + "epoch": 1.894032996584054, + "grad_norm": 0.7287250757217407, + "learning_rate": 4.249297258391677e-05, + "loss": 0.1387, + "step": 52120 + }, + { + "epoch": 1.894396395086852, + "grad_norm": 1.042417049407959, + "learning_rate": 4.24892648631837e-05, + "loss": 0.1031, + "step": 52130 + }, + { + "epoch": 1.8947597935896505, + "grad_norm": 0.9328198432922363, + "learning_rate": 4.248555638890043e-05, + "loss": 0.1324, + "step": 52140 + }, + { + "epoch": 1.8951231920924485, + "grad_norm": 3.1417503356933594, + "learning_rate": 4.2481847161226764e-05, + "loss": 0.1798, + "step": 52150 + }, + { + "epoch": 1.8954865905952467, + "grad_norm": 0.8507013916969299, + "learning_rate": 4.247813718032249e-05, + "loss": 0.1182, + "step": 52160 + }, + { + "epoch": 1.895849989098045, + "grad_norm": 1.0147353410720825, + "learning_rate": 4.247442644634748e-05, + "loss": 0.1341, + "step": 52170 + }, + { + "epoch": 1.8962133876008431, + "grad_norm": 2.492661476135254, + "learning_rate": 4.2470714959461614e-05, + "loss": 0.1109, + "step": 52180 + }, + { + "epoch": 1.8965767861036413, + "grad_norm": 1.8121393918991089, + "learning_rate": 4.246700271982479e-05, + "loss": 0.1381, + "step": 52190 + }, + { + "epoch": 1.8969401846064393, + "grad_norm": 1.2550605535507202, + "learning_rate": 4.2463289727596965e-05, + "loss": 0.1469, + "step": 52200 + }, + { + "epoch": 1.8969401846064393, + "eval_loss": 0.3317066729068756, + "eval_runtime": 179.6114, + "eval_samples_per_second": 41.278, + "eval_steps_per_second": 5.161, + "eval_wer": 0.16958629077640822, + "step": 52200 + }, + { + "epoch": 1.8973035831092377, + "grad_norm": 0.7204797863960266, + "learning_rate": 4.245957598293813e-05, + "loss": 0.1328, + "step": 52210 + }, + { + "epoch": 1.8976669816120357, + "grad_norm": 0.5142366290092468, + "learning_rate": 4.245586148600829e-05, + "loss": 0.1457, + "step": 52220 + }, + { + "epoch": 1.898030380114834, + "grad_norm": 0.8868045210838318, + "learning_rate": 4.2452146236967474e-05, + "loss": 0.1344, + "step": 52230 + }, + { + "epoch": 1.8983937786176321, + "grad_norm": 0.7489217519760132, + "learning_rate": 4.2448430235975777e-05, + "loss": 0.1119, + "step": 52240 + }, + { + "epoch": 1.8987571771204301, + "grad_norm": 1.2009568214416504, + "learning_rate": 4.244471348319331e-05, + "loss": 0.1503, + "step": 52250 + }, + { + "epoch": 1.8991205756232286, + "grad_norm": 1.1081483364105225, + "learning_rate": 4.24409959787802e-05, + "loss": 0.1265, + "step": 52260 + }, + { + "epoch": 1.8994839741260265, + "grad_norm": 0.42917948961257935, + "learning_rate": 4.243727772289663e-05, + "loss": 0.1825, + "step": 52270 + }, + { + "epoch": 1.8998473726288247, + "grad_norm": 1.7099511623382568, + "learning_rate": 4.2433558715702804e-05, + "loss": 0.133, + "step": 52280 + }, + { + "epoch": 1.900210771131623, + "grad_norm": 1.212544560432434, + "learning_rate": 4.242983895735896e-05, + "loss": 0.1306, + "step": 52290 + }, + { + "epoch": 1.900574169634421, + "grad_norm": 0.48001641035079956, + "learning_rate": 4.242611844802538e-05, + "loss": 0.1739, + "step": 52300 + }, + { + "epoch": 1.9009375681372194, + "grad_norm": 2.3596603870391846, + "learning_rate": 4.242239718786235e-05, + "loss": 0.129, + "step": 52310 + }, + { + "epoch": 1.9013009666400174, + "grad_norm": 0.41326409578323364, + "learning_rate": 4.241867517703022e-05, + "loss": 0.1218, + "step": 52320 + }, + { + "epoch": 1.9016643651428156, + "grad_norm": 0.9740013480186462, + "learning_rate": 4.241495241568935e-05, + "loss": 0.1248, + "step": 52330 + }, + { + "epoch": 1.9020277636456138, + "grad_norm": 0.8275489807128906, + "learning_rate": 4.2411228904000136e-05, + "loss": 0.1067, + "step": 52340 + }, + { + "epoch": 1.902391162148412, + "grad_norm": 9.790162086486816, + "learning_rate": 4.240750464212303e-05, + "loss": 0.1548, + "step": 52350 + }, + { + "epoch": 1.9027545606512102, + "grad_norm": 3.4635374546051025, + "learning_rate": 4.240377963021847e-05, + "loss": 0.1157, + "step": 52360 + }, + { + "epoch": 1.9031179591540082, + "grad_norm": 0.5103577971458435, + "learning_rate": 4.2400053868446976e-05, + "loss": 0.1356, + "step": 52370 + }, + { + "epoch": 1.9034813576568066, + "grad_norm": 1.608657956123352, + "learning_rate": 4.239632735696908e-05, + "loss": 0.114, + "step": 52380 + }, + { + "epoch": 1.9038447561596046, + "grad_norm": 0.9395160675048828, + "learning_rate": 4.2392600095945324e-05, + "loss": 0.1079, + "step": 52390 + }, + { + "epoch": 1.9042081546624028, + "grad_norm": 1.1032116413116455, + "learning_rate": 4.2388872085536314e-05, + "loss": 0.1789, + "step": 52400 + }, + { + "epoch": 1.904571553165201, + "grad_norm": 0.765036940574646, + "learning_rate": 4.2385143325902675e-05, + "loss": 0.0984, + "step": 52410 + }, + { + "epoch": 1.904934951667999, + "grad_norm": 2.460920572280884, + "learning_rate": 4.238141381720507e-05, + "loss": 0.1201, + "step": 52420 + }, + { + "epoch": 1.9052983501707974, + "grad_norm": 0.6005275845527649, + "learning_rate": 4.237768355960418e-05, + "loss": 0.1145, + "step": 52430 + }, + { + "epoch": 1.9056617486735954, + "grad_norm": 0.606640636920929, + "learning_rate": 4.2373952553260745e-05, + "loss": 0.1204, + "step": 52440 + }, + { + "epoch": 1.9060251471763936, + "grad_norm": 1.0981110334396362, + "learning_rate": 4.237022079833551e-05, + "loss": 0.1237, + "step": 52450 + }, + { + "epoch": 1.9063885456791918, + "grad_norm": 1.2138440608978271, + "learning_rate": 4.236648829498926e-05, + "loss": 0.1027, + "step": 52460 + }, + { + "epoch": 1.90675194418199, + "grad_norm": 0.38126930594444275, + "learning_rate": 4.2362755043382816e-05, + "loss": 0.1787, + "step": 52470 + }, + { + "epoch": 1.9071153426847882, + "grad_norm": 1.1713272333145142, + "learning_rate": 4.235902104367704e-05, + "loss": 0.1098, + "step": 52480 + }, + { + "epoch": 1.9074787411875862, + "grad_norm": 1.0597947835922241, + "learning_rate": 4.235528629603282e-05, + "loss": 0.1085, + "step": 52490 + }, + { + "epoch": 1.9078421396903846, + "grad_norm": 0.5749408602714539, + "learning_rate": 4.235155080061105e-05, + "loss": 0.1295, + "step": 52500 + }, + { + "epoch": 1.9082055381931826, + "grad_norm": 1.4702091217041016, + "learning_rate": 4.234781455757269e-05, + "loss": 0.1281, + "step": 52510 + }, + { + "epoch": 1.9085689366959808, + "grad_norm": 0.586208164691925, + "learning_rate": 4.234407756707873e-05, + "loss": 0.1412, + "step": 52520 + }, + { + "epoch": 1.908932335198779, + "grad_norm": 0.8572281002998352, + "learning_rate": 4.2340339829290174e-05, + "loss": 0.1059, + "step": 52530 + }, + { + "epoch": 1.909295733701577, + "grad_norm": 0.7896180152893066, + "learning_rate": 4.233660134436809e-05, + "loss": 0.1144, + "step": 52540 + }, + { + "epoch": 1.9096591322043754, + "grad_norm": 0.9928715825080872, + "learning_rate": 4.233286211247351e-05, + "loss": 0.1507, + "step": 52550 + }, + { + "epoch": 1.9100225307071734, + "grad_norm": 1.1396877765655518, + "learning_rate": 4.23291221337676e-05, + "loss": 0.1306, + "step": 52560 + }, + { + "epoch": 1.9103859292099716, + "grad_norm": 0.743976891040802, + "learning_rate": 4.232538140841146e-05, + "loss": 0.1839, + "step": 52570 + }, + { + "epoch": 1.9107493277127698, + "grad_norm": 0.40765443444252014, + "learning_rate": 4.232163993656628e-05, + "loss": 0.1303, + "step": 52580 + }, + { + "epoch": 1.911112726215568, + "grad_norm": 0.6623360514640808, + "learning_rate": 4.231789771839326e-05, + "loss": 0.1202, + "step": 52590 + }, + { + "epoch": 1.9114761247183663, + "grad_norm": 0.9128944873809814, + "learning_rate": 4.2314154754053656e-05, + "loss": 0.827, + "step": 52600 + }, + { + "epoch": 1.9118395232211642, + "grad_norm": 0.9086483716964722, + "learning_rate": 4.231041104370872e-05, + "loss": 0.1147, + "step": 52610 + }, + { + "epoch": 1.9122029217239624, + "grad_norm": 1.0172945261001587, + "learning_rate": 4.2306666587519765e-05, + "loss": 0.1501, + "step": 52620 + }, + { + "epoch": 1.9125663202267607, + "grad_norm": 1.759474277496338, + "learning_rate": 4.2302921385648126e-05, + "loss": 0.1389, + "step": 52630 + }, + { + "epoch": 1.9129297187295589, + "grad_norm": 1.5807387828826904, + "learning_rate": 4.229917543825517e-05, + "loss": 0.1067, + "step": 52640 + }, + { + "epoch": 1.913293117232357, + "grad_norm": 0.8961324095726013, + "learning_rate": 4.2295428745502284e-05, + "loss": 0.1204, + "step": 52650 + }, + { + "epoch": 1.913656515735155, + "grad_norm": 1.3519996404647827, + "learning_rate": 4.229168130755092e-05, + "loss": 0.1163, + "step": 52660 + }, + { + "epoch": 1.9140199142379535, + "grad_norm": 1.4970946311950684, + "learning_rate": 4.2287933124562526e-05, + "loss": 0.1532, + "step": 52670 + }, + { + "epoch": 1.9143833127407515, + "grad_norm": 48.62047576904297, + "learning_rate": 4.2284184196698615e-05, + "loss": 0.474, + "step": 52680 + }, + { + "epoch": 1.9147467112435497, + "grad_norm": 1.5001195669174194, + "learning_rate": 4.22804345241207e-05, + "loss": 0.1108, + "step": 52690 + }, + { + "epoch": 1.9151101097463479, + "grad_norm": 0.6537098288536072, + "learning_rate": 4.227668410699034e-05, + "loss": 0.1457, + "step": 52700 + }, + { + "epoch": 1.9154735082491459, + "grad_norm": 1.2610722780227661, + "learning_rate": 4.227293294546914e-05, + "loss": 0.1223, + "step": 52710 + }, + { + "epoch": 1.9158369067519443, + "grad_norm": 1.0688477754592896, + "learning_rate": 4.226918103971871e-05, + "loss": 0.1319, + "step": 52720 + }, + { + "epoch": 1.9162003052547423, + "grad_norm": 0.532785952091217, + "learning_rate": 4.226542838990072e-05, + "loss": 0.1355, + "step": 52730 + }, + { + "epoch": 1.9165637037575405, + "grad_norm": 0.6391937136650085, + "learning_rate": 4.226167499617684e-05, + "loss": 0.1215, + "step": 52740 + }, + { + "epoch": 1.9169271022603387, + "grad_norm": 0.9662737250328064, + "learning_rate": 4.225792085870881e-05, + "loss": 0.1522, + "step": 52750 + }, + { + "epoch": 1.917290500763137, + "grad_norm": 1.3882033824920654, + "learning_rate": 4.225416597765838e-05, + "loss": 0.1234, + "step": 52760 + }, + { + "epoch": 1.917653899265935, + "grad_norm": 1.2721084356307983, + "learning_rate": 4.225041035318732e-05, + "loss": 0.1645, + "step": 52770 + }, + { + "epoch": 1.918017297768733, + "grad_norm": 1.06475830078125, + "learning_rate": 4.224665398545745e-05, + "loss": 0.1278, + "step": 52780 + }, + { + "epoch": 1.9183806962715315, + "grad_norm": 1.8718911409378052, + "learning_rate": 4.224289687463063e-05, + "loss": 0.1081, + "step": 52790 + }, + { + "epoch": 1.9187440947743295, + "grad_norm": 0.7336494326591492, + "learning_rate": 4.223913902086874e-05, + "loss": 0.1398, + "step": 52800 + }, + { + "epoch": 1.9187440947743295, + "eval_loss": 0.33621227741241455, + "eval_runtime": 180.2286, + "eval_samples_per_second": 41.137, + "eval_steps_per_second": 5.143, + "eval_wer": 0.16169876740428776, + "step": 52800 + }, + { + "epoch": 1.9191074932771277, + "grad_norm": 1.407049298286438, + "learning_rate": 4.223538042433368e-05, + "loss": 0.1219, + "step": 52810 + }, + { + "epoch": 1.919470891779926, + "grad_norm": 0.27716466784477234, + "learning_rate": 4.22316210851874e-05, + "loss": 0.1303, + "step": 52820 + }, + { + "epoch": 1.919834290282724, + "grad_norm": 1.0262128114700317, + "learning_rate": 4.222786100359188e-05, + "loss": 0.1053, + "step": 52830 + }, + { + "epoch": 1.9201976887855223, + "grad_norm": 0.6818228960037231, + "learning_rate": 4.222410017970913e-05, + "loss": 0.09, + "step": 52840 + }, + { + "epoch": 1.9205610872883203, + "grad_norm": 0.6761994361877441, + "learning_rate": 4.2220338613701185e-05, + "loss": 0.108, + "step": 52850 + }, + { + "epoch": 1.9209244857911185, + "grad_norm": 4.313242435455322, + "learning_rate": 4.2216576305730104e-05, + "loss": 0.1262, + "step": 52860 + }, + { + "epoch": 1.9212878842939167, + "grad_norm": 0.6098904609680176, + "learning_rate": 4.221281325595803e-05, + "loss": 0.2005, + "step": 52870 + }, + { + "epoch": 1.921651282796715, + "grad_norm": 1.2213470935821533, + "learning_rate": 4.2209049464547064e-05, + "loss": 0.1088, + "step": 52880 + }, + { + "epoch": 1.9220146812995131, + "grad_norm": 0.705827534198761, + "learning_rate": 4.220528493165938e-05, + "loss": 0.1207, + "step": 52890 + }, + { + "epoch": 1.9223780798023111, + "grad_norm": 0.8161284327507019, + "learning_rate": 4.22015196574572e-05, + "loss": 0.1855, + "step": 52900 + }, + { + "epoch": 1.9227414783051093, + "grad_norm": 0.7296738028526306, + "learning_rate": 4.2197753642102734e-05, + "loss": 0.1224, + "step": 52910 + }, + { + "epoch": 1.9231048768079075, + "grad_norm": 1.1311039924621582, + "learning_rate": 4.2193986885758255e-05, + "loss": 0.1331, + "step": 52920 + }, + { + "epoch": 1.9234682753107057, + "grad_norm": 1.0949995517730713, + "learning_rate": 4.219021938858605e-05, + "loss": 0.1172, + "step": 52930 + }, + { + "epoch": 1.923831673813504, + "grad_norm": 2.3175034523010254, + "learning_rate": 4.2186451150748465e-05, + "loss": 0.1061, + "step": 52940 + }, + { + "epoch": 1.924195072316302, + "grad_norm": 0.4657406806945801, + "learning_rate": 4.2182682172407853e-05, + "loss": 0.1099, + "step": 52950 + }, + { + "epoch": 1.9245584708191004, + "grad_norm": 1.0153266191482544, + "learning_rate": 4.2178912453726585e-05, + "loss": 0.1028, + "step": 52960 + }, + { + "epoch": 1.9249218693218983, + "grad_norm": 0.48774194717407227, + "learning_rate": 4.217514199486712e-05, + "loss": 0.1196, + "step": 52970 + }, + { + "epoch": 1.9252852678246966, + "grad_norm": 0.5909627079963684, + "learning_rate": 4.2171370795991886e-05, + "loss": 3.5511, + "step": 52980 + }, + { + "epoch": 1.9256486663274948, + "grad_norm": 1.0662988424301147, + "learning_rate": 4.216759885726338e-05, + "loss": 0.2095, + "step": 52990 + }, + { + "epoch": 1.9260120648302927, + "grad_norm": 1.2562239170074463, + "learning_rate": 4.2163826178844124e-05, + "loss": 0.6666, + "step": 53000 + }, + { + "epoch": 1.9263754633330912, + "grad_norm": 0.6966450214385986, + "learning_rate": 4.216005276089666e-05, + "loss": 0.1059, + "step": 53010 + }, + { + "epoch": 1.9267388618358892, + "grad_norm": 0.7130870819091797, + "learning_rate": 4.215627860358359e-05, + "loss": 0.1822, + "step": 53020 + }, + { + "epoch": 1.9271022603386874, + "grad_norm": 0.8667415380477905, + "learning_rate": 4.215250370706752e-05, + "loss": 0.1297, + "step": 53030 + }, + { + "epoch": 1.9274656588414856, + "grad_norm": 0.8106217384338379, + "learning_rate": 4.214872807151108e-05, + "loss": 0.1198, + "step": 53040 + }, + { + "epoch": 1.9278290573442838, + "grad_norm": 0.6625964045524597, + "learning_rate": 4.214495169707697e-05, + "loss": 0.1554, + "step": 53050 + }, + { + "epoch": 1.928192455847082, + "grad_norm": 1.328296422958374, + "learning_rate": 4.214117458392789e-05, + "loss": 0.1275, + "step": 53060 + }, + { + "epoch": 1.92855585434988, + "grad_norm": 0.5741416811943054, + "learning_rate": 4.213739673222659e-05, + "loss": 0.122, + "step": 53070 + }, + { + "epoch": 1.9289192528526784, + "grad_norm": 0.6884883046150208, + "learning_rate": 4.213361814213584e-05, + "loss": 0.2229, + "step": 53080 + }, + { + "epoch": 1.9292826513554764, + "grad_norm": 1.364357590675354, + "learning_rate": 4.212983881381844e-05, + "loss": 0.1169, + "step": 53090 + }, + { + "epoch": 1.9296460498582746, + "grad_norm": 1.6540427207946777, + "learning_rate": 4.2126058747437236e-05, + "loss": 0.1273, + "step": 53100 + }, + { + "epoch": 1.9300094483610728, + "grad_norm": 1.8838560581207275, + "learning_rate": 4.21222779431551e-05, + "loss": 0.1395, + "step": 53110 + }, + { + "epoch": 1.9303728468638708, + "grad_norm": 1.0048059225082397, + "learning_rate": 4.2118496401134925e-05, + "loss": 0.1516, + "step": 53120 + }, + { + "epoch": 1.9307362453666692, + "grad_norm": 1.0288422107696533, + "learning_rate": 4.211471412153965e-05, + "loss": 0.114, + "step": 53130 + }, + { + "epoch": 1.9310996438694672, + "grad_norm": 0.8214828968048096, + "learning_rate": 4.2110931104532236e-05, + "loss": 0.1154, + "step": 53140 + }, + { + "epoch": 1.9314630423722654, + "grad_norm": 1.7350075244903564, + "learning_rate": 4.210714735027568e-05, + "loss": 0.1351, + "step": 53150 + }, + { + "epoch": 1.9318264408750636, + "grad_norm": 1.1846505403518677, + "learning_rate": 4.210336285893302e-05, + "loss": 0.1213, + "step": 53160 + }, + { + "epoch": 1.9321898393778618, + "grad_norm": 0.36710694432258606, + "learning_rate": 4.2099577630667295e-05, + "loss": 0.1328, + "step": 53170 + }, + { + "epoch": 1.93255323788066, + "grad_norm": 1.5242916345596313, + "learning_rate": 4.209579166564162e-05, + "loss": 0.1068, + "step": 53180 + }, + { + "epoch": 1.932916636383458, + "grad_norm": 0.5341594219207764, + "learning_rate": 4.209200496401911e-05, + "loss": 0.1132, + "step": 53190 + }, + { + "epoch": 1.9332800348862562, + "grad_norm": 1.32260000705719, + "learning_rate": 4.2088217525962914e-05, + "loss": 0.2021, + "step": 53200 + }, + { + "epoch": 1.9336434333890544, + "grad_norm": 1.7666555643081665, + "learning_rate": 4.208442935163622e-05, + "loss": 0.1199, + "step": 53210 + }, + { + "epoch": 1.9340068318918526, + "grad_norm": 0.7060844302177429, + "learning_rate": 4.2080640441202265e-05, + "loss": 0.2058, + "step": 53220 + }, + { + "epoch": 1.9343702303946508, + "grad_norm": 0.6064701676368713, + "learning_rate": 4.207685079482428e-05, + "loss": 0.1163, + "step": 53230 + }, + { + "epoch": 1.9347336288974488, + "grad_norm": 0.8445596694946289, + "learning_rate": 4.2073060412665554e-05, + "loss": 0.1094, + "step": 53240 + }, + { + "epoch": 1.9350970274002472, + "grad_norm": 1.8160717487335205, + "learning_rate": 4.20692692948894e-05, + "loss": 0.1421, + "step": 53250 + }, + { + "epoch": 1.9354604259030452, + "grad_norm": 0.8465480208396912, + "learning_rate": 4.206547744165918e-05, + "loss": 0.1151, + "step": 53260 + }, + { + "epoch": 1.9358238244058434, + "grad_norm": 0.4361567795276642, + "learning_rate": 4.206168485313823e-05, + "loss": 0.1343, + "step": 53270 + }, + { + "epoch": 1.9361872229086416, + "grad_norm": 0.4682723581790924, + "learning_rate": 4.2057891529490004e-05, + "loss": 0.2349, + "step": 53280 + }, + { + "epoch": 1.9365506214114396, + "grad_norm": 0.7894558310508728, + "learning_rate": 4.205409747087792e-05, + "loss": 0.1211, + "step": 53290 + }, + { + "epoch": 1.936914019914238, + "grad_norm": 1.9797241687774658, + "learning_rate": 4.205030267746545e-05, + "loss": 0.1403, + "step": 53300 + }, + { + "epoch": 1.937277418417036, + "grad_norm": 0.7554487586021423, + "learning_rate": 4.20465071494161e-05, + "loss": 0.133, + "step": 53310 + }, + { + "epoch": 1.9376408169198343, + "grad_norm": 0.5056400895118713, + "learning_rate": 4.2042710886893414e-05, + "loss": 0.1429, + "step": 53320 + }, + { + "epoch": 1.9380042154226325, + "grad_norm": 4.12957763671875, + "learning_rate": 4.203891389006096e-05, + "loss": 0.1154, + "step": 53330 + }, + { + "epoch": 1.9383676139254307, + "grad_norm": 0.7138916850090027, + "learning_rate": 4.203511615908232e-05, + "loss": 0.1235, + "step": 53340 + }, + { + "epoch": 1.9387310124282289, + "grad_norm": 0.6101375818252563, + "learning_rate": 4.2031317694121144e-05, + "loss": 0.1015, + "step": 53350 + }, + { + "epoch": 1.9390944109310269, + "grad_norm": 0.9244548082351685, + "learning_rate": 4.202751849534108e-05, + "loss": 0.104, + "step": 53360 + }, + { + "epoch": 1.9394578094338253, + "grad_norm": 0.38235339522361755, + "learning_rate": 4.202371856290583e-05, + "loss": 0.2562, + "step": 53370 + }, + { + "epoch": 1.9398212079366233, + "grad_norm": 1.2204453945159912, + "learning_rate": 4.201991789697912e-05, + "loss": 0.1074, + "step": 53380 + }, + { + "epoch": 1.9401846064394215, + "grad_norm": 0.9025306105613708, + "learning_rate": 4.2016116497724715e-05, + "loss": 0.1154, + "step": 53390 + }, + { + "epoch": 1.9405480049422197, + "grad_norm": 0.6132228970527649, + "learning_rate": 4.201231436530637e-05, + "loss": 0.1332, + "step": 53400 + }, + { + "epoch": 1.9405480049422197, + "eval_loss": 0.34726399183273315, + "eval_runtime": 180.5053, + "eval_samples_per_second": 41.074, + "eval_steps_per_second": 5.136, + "eval_wer": 0.16638226804872294, + "step": 53400 + }, + { + "epoch": 1.9409114034450177, + "grad_norm": 1.0227421522140503, + "learning_rate": 4.2008511499887945e-05, + "loss": 0.1042, + "step": 53410 + }, + { + "epoch": 1.941274801947816, + "grad_norm": 1.9135148525238037, + "learning_rate": 4.2004707901633274e-05, + "loss": 0.1953, + "step": 53420 + }, + { + "epoch": 1.941638200450614, + "grad_norm": 1.0358216762542725, + "learning_rate": 4.200090357070624e-05, + "loss": 0.1029, + "step": 53430 + }, + { + "epoch": 1.9420015989534123, + "grad_norm": 0.9207081198692322, + "learning_rate": 4.199709850727076e-05, + "loss": 0.1171, + "step": 53440 + }, + { + "epoch": 1.9423649974562105, + "grad_norm": 0.558474600315094, + "learning_rate": 4.1993292711490784e-05, + "loss": 0.1185, + "step": 53450 + }, + { + "epoch": 1.9427283959590087, + "grad_norm": 1.7064687013626099, + "learning_rate": 4.198948618353029e-05, + "loss": 0.1429, + "step": 53460 + }, + { + "epoch": 1.943091794461807, + "grad_norm": 0.4840683341026306, + "learning_rate": 4.198567892355328e-05, + "loss": 0.1566, + "step": 53470 + }, + { + "epoch": 1.943455192964605, + "grad_norm": 2.152949810028076, + "learning_rate": 4.19818709317238e-05, + "loss": 0.11, + "step": 53480 + }, + { + "epoch": 1.943818591467403, + "grad_norm": 1.0784387588500977, + "learning_rate": 4.197806220820592e-05, + "loss": 0.1211, + "step": 53490 + }, + { + "epoch": 1.9441819899702013, + "grad_norm": 0.9039841890335083, + "learning_rate": 4.197425275316376e-05, + "loss": 0.1167, + "step": 53500 + }, + { + "epoch": 1.9445453884729995, + "grad_norm": 0.8237749934196472, + "learning_rate": 4.1970442566761436e-05, + "loss": 0.1443, + "step": 53510 + }, + { + "epoch": 1.9449087869757977, + "grad_norm": 0.5178882479667664, + "learning_rate": 4.196663164916313e-05, + "loss": 0.1251, + "step": 53520 + }, + { + "epoch": 1.9452721854785957, + "grad_norm": 0.883787989616394, + "learning_rate": 4.196282000053301e-05, + "loss": 0.1078, + "step": 53530 + }, + { + "epoch": 1.9456355839813941, + "grad_norm": 0.6376329064369202, + "learning_rate": 4.195900762103535e-05, + "loss": 0.1286, + "step": 53540 + }, + { + "epoch": 1.9459989824841921, + "grad_norm": 1.3312426805496216, + "learning_rate": 4.1955194510834394e-05, + "loss": 0.1421, + "step": 53550 + }, + { + "epoch": 1.9463623809869903, + "grad_norm": 0.8462713360786438, + "learning_rate": 4.1951380670094424e-05, + "loss": 0.1183, + "step": 53560 + }, + { + "epoch": 1.9467257794897885, + "grad_norm": 0.4300178587436676, + "learning_rate": 4.194756609897978e-05, + "loss": 0.11, + "step": 53570 + }, + { + "epoch": 1.9470891779925865, + "grad_norm": 0.5355455875396729, + "learning_rate": 4.1943750797654816e-05, + "loss": 0.1197, + "step": 53580 + }, + { + "epoch": 1.947452576495385, + "grad_norm": 0.8750283122062683, + "learning_rate": 4.193993476628391e-05, + "loss": 0.1024, + "step": 53590 + }, + { + "epoch": 1.947815974998183, + "grad_norm": 1.2552978992462158, + "learning_rate": 4.193611800503148e-05, + "loss": 0.152, + "step": 53600 + }, + { + "epoch": 1.9481793735009811, + "grad_norm": 0.8852622509002686, + "learning_rate": 4.1932300514062e-05, + "loss": 0.1077, + "step": 53610 + }, + { + "epoch": 1.9485427720037793, + "grad_norm": 0.6841835379600525, + "learning_rate": 4.192848229353992e-05, + "loss": 0.1281, + "step": 53620 + }, + { + "epoch": 1.9489061705065776, + "grad_norm": 1.0521607398986816, + "learning_rate": 4.192466334362978e-05, + "loss": 0.1136, + "step": 53630 + }, + { + "epoch": 1.9492695690093758, + "grad_norm": 4.119276523590088, + "learning_rate": 4.192084366449612e-05, + "loss": 0.0939, + "step": 53640 + }, + { + "epoch": 1.9496329675121737, + "grad_norm": 0.8290958404541016, + "learning_rate": 4.19170232563035e-05, + "loss": 0.1625, + "step": 53650 + }, + { + "epoch": 1.9499963660149722, + "grad_norm": 0.6359632015228271, + "learning_rate": 4.191320211921654e-05, + "loss": 0.1208, + "step": 53660 + }, + { + "epoch": 1.9503597645177702, + "grad_norm": 0.699052631855011, + "learning_rate": 4.1909380253399875e-05, + "loss": 0.1248, + "step": 53670 + }, + { + "epoch": 1.9507231630205684, + "grad_norm": 17.17115592956543, + "learning_rate": 4.190555765901819e-05, + "loss": 0.3458, + "step": 53680 + }, + { + "epoch": 1.9510865615233666, + "grad_norm": 3.899052858352661, + "learning_rate": 4.190173433623618e-05, + "loss": 0.1144, + "step": 53690 + }, + { + "epoch": 1.9514499600261646, + "grad_norm": 1.1907508373260498, + "learning_rate": 4.1897910285218556e-05, + "loss": 0.152, + "step": 53700 + }, + { + "epoch": 1.951813358528963, + "grad_norm": 0.7645424008369446, + "learning_rate": 4.189408550613011e-05, + "loss": 0.1258, + "step": 53710 + }, + { + "epoch": 1.952176757031761, + "grad_norm": 0.2523237466812134, + "learning_rate": 4.1890259999135625e-05, + "loss": 0.1213, + "step": 53720 + }, + { + "epoch": 1.9525401555345592, + "grad_norm": 1.3578497171401978, + "learning_rate": 4.188643376439993e-05, + "loss": 1.5489, + "step": 53730 + }, + { + "epoch": 1.9529035540373574, + "grad_norm": 0.6249386072158813, + "learning_rate": 4.1882606802087896e-05, + "loss": 0.1699, + "step": 53740 + }, + { + "epoch": 1.9532669525401556, + "grad_norm": 0.9699862599372864, + "learning_rate": 4.1878779112364394e-05, + "loss": 0.1724, + "step": 53750 + }, + { + "epoch": 1.9536303510429538, + "grad_norm": 1.3478792905807495, + "learning_rate": 4.187495069539437e-05, + "loss": 0.133, + "step": 53760 + }, + { + "epoch": 1.9539937495457518, + "grad_norm": 0.6324986815452576, + "learning_rate": 4.187112155134275e-05, + "loss": 0.1354, + "step": 53770 + }, + { + "epoch": 1.95435714804855, + "grad_norm": 2.009544610977173, + "learning_rate": 4.186729168037453e-05, + "loss": 0.1347, + "step": 53780 + }, + { + "epoch": 1.9547205465513482, + "grad_norm": 0.6510929465293884, + "learning_rate": 4.186346108265472e-05, + "loss": 0.1227, + "step": 53790 + }, + { + "epoch": 1.9550839450541464, + "grad_norm": 1.5079245567321777, + "learning_rate": 4.185962975834838e-05, + "loss": 0.1347, + "step": 53800 + }, + { + "epoch": 1.9554473435569446, + "grad_norm": 3.214449882507324, + "learning_rate": 4.1855797707620586e-05, + "loss": 0.1138, + "step": 53810 + }, + { + "epoch": 1.9558107420597426, + "grad_norm": 0.7995330095291138, + "learning_rate": 4.1851964930636434e-05, + "loss": 0.1522, + "step": 53820 + }, + { + "epoch": 1.956174140562541, + "grad_norm": 1.6713122129440308, + "learning_rate": 4.184813142756108e-05, + "loss": 0.125, + "step": 53830 + }, + { + "epoch": 1.956537539065339, + "grad_norm": 0.7136033177375793, + "learning_rate": 4.184429719855968e-05, + "loss": 0.1267, + "step": 53840 + }, + { + "epoch": 1.9569009375681372, + "grad_norm": 0.5580174922943115, + "learning_rate": 4.1840462243797444e-05, + "loss": 0.1126, + "step": 53850 + }, + { + "epoch": 1.9572643360709354, + "grad_norm": 0.8671419024467468, + "learning_rate": 4.183662656343961e-05, + "loss": 0.1209, + "step": 53860 + }, + { + "epoch": 1.9576277345737334, + "grad_norm": 0.6624314188957214, + "learning_rate": 4.183279015765145e-05, + "loss": 0.1397, + "step": 53870 + }, + { + "epoch": 1.9579911330765318, + "grad_norm": 1.4401901960372925, + "learning_rate": 4.182895302659825e-05, + "loss": 0.0894, + "step": 53880 + }, + { + "epoch": 1.9583545315793298, + "grad_norm": 0.9187797904014587, + "learning_rate": 4.182511517044534e-05, + "loss": 0.127, + "step": 53890 + }, + { + "epoch": 1.958717930082128, + "grad_norm": 1.2426072359085083, + "learning_rate": 4.1821276589358084e-05, + "loss": 0.1381, + "step": 53900 + }, + { + "epoch": 1.9590813285849262, + "grad_norm": 0.8035231828689575, + "learning_rate": 4.1817437283501865e-05, + "loss": 0.0953, + "step": 53910 + }, + { + "epoch": 1.9594447270877244, + "grad_norm": 0.32439205050468445, + "learning_rate": 4.1813597253042115e-05, + "loss": 0.138, + "step": 53920 + }, + { + "epoch": 1.9598081255905226, + "grad_norm": 1.0287327766418457, + "learning_rate": 4.180975649814428e-05, + "loss": 3.1039, + "step": 53930 + }, + { + "epoch": 1.9601715240933206, + "grad_norm": 1.3450182676315308, + "learning_rate": 4.180591501897384e-05, + "loss": 0.1081, + "step": 53940 + }, + { + "epoch": 1.960534922596119, + "grad_norm": 7.1403961181640625, + "learning_rate": 4.180207281569633e-05, + "loss": 0.1484, + "step": 53950 + }, + { + "epoch": 1.960898321098917, + "grad_norm": 1.2163225412368774, + "learning_rate": 4.179822988847728e-05, + "loss": 0.1041, + "step": 53960 + }, + { + "epoch": 1.9612617196017152, + "grad_norm": 0.9922796487808228, + "learning_rate": 4.179438623748228e-05, + "loss": 0.1343, + "step": 53970 + }, + { + "epoch": 1.9616251181045135, + "grad_norm": 2.245447874069214, + "learning_rate": 4.1790541862876906e-05, + "loss": 0.1015, + "step": 53980 + }, + { + "epoch": 1.9619885166073114, + "grad_norm": 2.284679651260376, + "learning_rate": 4.178669676482685e-05, + "loss": 0.0913, + "step": 53990 + }, + { + "epoch": 1.9623519151101099, + "grad_norm": 0.9692349433898926, + "learning_rate": 4.178285094349775e-05, + "loss": 0.1282, + "step": 54000 + }, + { + "epoch": 1.9623519151101099, + "eval_loss": 0.3314037322998047, + "eval_runtime": 180.606, + "eval_samples_per_second": 41.051, + "eval_steps_per_second": 5.133, + "eval_wer": 0.16505709150979359, + "step": 54000 + }, + { + "epoch": 1.9627153136129079, + "grad_norm": 0.4108816683292389, + "learning_rate": 4.177900439905531e-05, + "loss": 0.1272, + "step": 54010 + }, + { + "epoch": 1.963078712115706, + "grad_norm": 0.3358526825904846, + "learning_rate": 4.1775157131665276e-05, + "loss": 0.1453, + "step": 54020 + }, + { + "epoch": 1.9634421106185043, + "grad_norm": 1.476314663887024, + "learning_rate": 4.177130914149341e-05, + "loss": 0.1162, + "step": 54030 + }, + { + "epoch": 1.9638055091213025, + "grad_norm": 0.7912114262580872, + "learning_rate": 4.17674604287055e-05, + "loss": 0.1056, + "step": 54040 + }, + { + "epoch": 1.9641689076241007, + "grad_norm": 0.4801596403121948, + "learning_rate": 4.176361099346738e-05, + "loss": 0.1478, + "step": 54050 + }, + { + "epoch": 1.9645323061268987, + "grad_norm": 0.7710531949996948, + "learning_rate": 4.175976083594491e-05, + "loss": 0.1131, + "step": 54060 + }, + { + "epoch": 1.9648957046296969, + "grad_norm": 0.6709341406822205, + "learning_rate": 4.175590995630398e-05, + "loss": 0.1586, + "step": 54070 + }, + { + "epoch": 1.965259103132495, + "grad_norm": 1.3941307067871094, + "learning_rate": 4.17520583547105e-05, + "loss": 0.1131, + "step": 54080 + }, + { + "epoch": 1.9656225016352933, + "grad_norm": 0.759842038154602, + "learning_rate": 4.174820603133043e-05, + "loss": 0.0985, + "step": 54090 + }, + { + "epoch": 1.9659859001380915, + "grad_norm": 0.9153608679771423, + "learning_rate": 4.174435298632976e-05, + "loss": 0.1547, + "step": 54100 + }, + { + "epoch": 1.9663492986408895, + "grad_norm": 1.4363652467727661, + "learning_rate": 4.174049921987449e-05, + "loss": 0.1127, + "step": 54110 + }, + { + "epoch": 1.966712697143688, + "grad_norm": 0.7368317246437073, + "learning_rate": 4.173664473213067e-05, + "loss": 0.1302, + "step": 54120 + }, + { + "epoch": 1.967076095646486, + "grad_norm": 1.2740521430969238, + "learning_rate": 4.173278952326438e-05, + "loss": 0.1294, + "step": 54130 + }, + { + "epoch": 1.967439494149284, + "grad_norm": 2.7798774242401123, + "learning_rate": 4.1728933593441735e-05, + "loss": 0.1011, + "step": 54140 + }, + { + "epoch": 1.9678028926520823, + "grad_norm": 1.9629179239273071, + "learning_rate": 4.172507694282885e-05, + "loss": 0.3149, + "step": 54150 + }, + { + "epoch": 1.9681662911548803, + "grad_norm": 3.5863332748413086, + "learning_rate": 4.1721219571591915e-05, + "loss": 0.1323, + "step": 54160 + }, + { + "epoch": 1.9685296896576787, + "grad_norm": 0.29740679264068604, + "learning_rate": 4.1717361479897116e-05, + "loss": 0.1725, + "step": 54170 + }, + { + "epoch": 1.9688930881604767, + "grad_norm": 1.0469319820404053, + "learning_rate": 4.17135026679107e-05, + "loss": 0.1138, + "step": 54180 + }, + { + "epoch": 1.969256486663275, + "grad_norm": 0.5336177945137024, + "learning_rate": 4.170964313579891e-05, + "loss": 0.2207, + "step": 54190 + }, + { + "epoch": 1.9696198851660731, + "grad_norm": 0.973862886428833, + "learning_rate": 4.1705782883728055e-05, + "loss": 0.1328, + "step": 54200 + }, + { + "epoch": 1.9699832836688713, + "grad_norm": 0.8640954494476318, + "learning_rate": 4.170192191186446e-05, + "loss": 0.1315, + "step": 54210 + }, + { + "epoch": 1.9703466821716695, + "grad_norm": 0.47578397393226624, + "learning_rate": 4.169806022037447e-05, + "loss": 0.1823, + "step": 54220 + }, + { + "epoch": 1.9707100806744675, + "grad_norm": 1.4527409076690674, + "learning_rate": 4.169419780942448e-05, + "loss": 2.5822, + "step": 54230 + }, + { + "epoch": 1.971073479177266, + "grad_norm": 0.48623302578926086, + "learning_rate": 4.1690334679180896e-05, + "loss": 0.1093, + "step": 54240 + }, + { + "epoch": 1.971436877680064, + "grad_norm": 1.1767234802246094, + "learning_rate": 4.1686470829810185e-05, + "loss": 0.1329, + "step": 54250 + }, + { + "epoch": 1.9718002761828621, + "grad_norm": 1.128841519355774, + "learning_rate": 4.1682606261478816e-05, + "loss": 0.1102, + "step": 54260 + }, + { + "epoch": 1.9721636746856603, + "grad_norm": 1.4685746431350708, + "learning_rate": 4.16787409743533e-05, + "loss": 0.14, + "step": 54270 + }, + { + "epoch": 1.9725270731884583, + "grad_norm": 0.9918948411941528, + "learning_rate": 4.167487496860018e-05, + "loss": 0.1093, + "step": 54280 + }, + { + "epoch": 1.9728904716912568, + "grad_norm": 0.5849924683570862, + "learning_rate": 4.167100824438602e-05, + "loss": 0.3633, + "step": 54290 + }, + { + "epoch": 1.9732538701940547, + "grad_norm": 1.0083026885986328, + "learning_rate": 4.1667140801877433e-05, + "loss": 0.3471, + "step": 54300 + }, + { + "epoch": 1.973617268696853, + "grad_norm": 4.210540771484375, + "learning_rate": 4.1663272641241056e-05, + "loss": 0.111, + "step": 54310 + }, + { + "epoch": 1.9739806671996512, + "grad_norm": 0.47457021474838257, + "learning_rate": 4.165940376264354e-05, + "loss": 0.1304, + "step": 54320 + }, + { + "epoch": 1.9743440657024494, + "grad_norm": 0.6626879572868347, + "learning_rate": 4.1655534166251596e-05, + "loss": 0.1362, + "step": 54330 + }, + { + "epoch": 1.9747074642052476, + "grad_norm": 1.0823551416397095, + "learning_rate": 4.1651663852231946e-05, + "loss": 0.1009, + "step": 54340 + }, + { + "epoch": 1.9750708627080455, + "grad_norm": 1.6723361015319824, + "learning_rate": 4.164779282075134e-05, + "loss": 0.1539, + "step": 54350 + }, + { + "epoch": 1.9754342612108438, + "grad_norm": 1.5842360258102417, + "learning_rate": 4.1643921071976584e-05, + "loss": 0.1342, + "step": 54360 + }, + { + "epoch": 1.975797659713642, + "grad_norm": 1.055336594581604, + "learning_rate": 4.164004860607448e-05, + "loss": 0.176, + "step": 54370 + }, + { + "epoch": 1.9761610582164402, + "grad_norm": 0.81571364402771, + "learning_rate": 4.16361754232119e-05, + "loss": 0.1187, + "step": 54380 + }, + { + "epoch": 1.9765244567192384, + "grad_norm": 1.0346819162368774, + "learning_rate": 4.1632301523555693e-05, + "loss": 0.1255, + "step": 54390 + }, + { + "epoch": 1.9768878552220364, + "grad_norm": 1.1211163997650146, + "learning_rate": 4.162842690727281e-05, + "loss": 0.1165, + "step": 54400 + }, + { + "epoch": 1.9772512537248348, + "grad_norm": 0.5160552263259888, + "learning_rate": 4.162455157453017e-05, + "loss": 0.1393, + "step": 54410 + }, + { + "epoch": 1.9776146522276328, + "grad_norm": 0.767784833908081, + "learning_rate": 4.1620675525494746e-05, + "loss": 0.1552, + "step": 54420 + }, + { + "epoch": 1.977978050730431, + "grad_norm": 1.101317286491394, + "learning_rate": 4.1616798760333554e-05, + "loss": 0.1182, + "step": 54430 + }, + { + "epoch": 1.9783414492332292, + "grad_norm": 0.7279396653175354, + "learning_rate": 4.161292127921363e-05, + "loss": 0.12, + "step": 54440 + }, + { + "epoch": 1.9787048477360272, + "grad_norm": 1.5998153686523438, + "learning_rate": 4.1609043082302036e-05, + "loss": 0.1335, + "step": 54450 + }, + { + "epoch": 1.9790682462388256, + "grad_norm": 0.8245583772659302, + "learning_rate": 4.160516416976587e-05, + "loss": 0.1249, + "step": 54460 + }, + { + "epoch": 1.9794316447416236, + "grad_norm": 0.5749397277832031, + "learning_rate": 4.1601284541772255e-05, + "loss": 0.1939, + "step": 54470 + }, + { + "epoch": 1.9797950432444218, + "grad_norm": 0.7786006927490234, + "learning_rate": 4.159740419848837e-05, + "loss": 1.8059, + "step": 54480 + }, + { + "epoch": 1.98015844174722, + "grad_norm": 0.41233259439468384, + "learning_rate": 4.159352314008138e-05, + "loss": 0.1208, + "step": 54490 + }, + { + "epoch": 1.9805218402500182, + "grad_norm": 0.5091323256492615, + "learning_rate": 4.158964136671852e-05, + "loss": 0.1279, + "step": 54500 + }, + { + "epoch": 1.9808852387528164, + "grad_norm": 4.300207138061523, + "learning_rate": 4.158575887856704e-05, + "loss": 0.1744, + "step": 54510 + }, + { + "epoch": 1.9812486372556144, + "grad_norm": 0.7447227239608765, + "learning_rate": 4.1581875675794226e-05, + "loss": 0.1652, + "step": 54520 + }, + { + "epoch": 1.9816120357584128, + "grad_norm": 0.6846696734428406, + "learning_rate": 4.157799175856738e-05, + "loss": 0.1027, + "step": 54530 + }, + { + "epoch": 1.9819754342612108, + "grad_norm": 0.8642467260360718, + "learning_rate": 4.157410712705386e-05, + "loss": 0.1165, + "step": 54540 + }, + { + "epoch": 1.982338832764009, + "grad_norm": 0.8407902121543884, + "learning_rate": 4.157022178142104e-05, + "loss": 0.1623, + "step": 54550 + }, + { + "epoch": 1.9827022312668072, + "grad_norm": 0.8839777708053589, + "learning_rate": 4.156633572183631e-05, + "loss": 0.1131, + "step": 54560 + }, + { + "epoch": 1.9830656297696052, + "grad_norm": 1.39069402217865, + "learning_rate": 4.1562448948467126e-05, + "loss": 0.1906, + "step": 54570 + }, + { + "epoch": 1.9834290282724036, + "grad_norm": 2.1196155548095703, + "learning_rate": 4.1558561461480936e-05, + "loss": 0.1261, + "step": 54580 + }, + { + "epoch": 1.9837924267752016, + "grad_norm": 1.092934250831604, + "learning_rate": 4.155467326104525e-05, + "loss": 0.1029, + "step": 54590 + }, + { + "epoch": 1.9841558252779998, + "grad_norm": 0.7902958989143372, + "learning_rate": 4.1550784347327607e-05, + "loss": 0.1159, + "step": 54600 + }, + { + "epoch": 1.9841558252779998, + "eval_loss": 0.3433511555194855, + "eval_runtime": 180.0868, + "eval_samples_per_second": 41.169, + "eval_steps_per_second": 5.148, + "eval_wer": 0.16009221776462687, + "step": 54600 + }, + { + "epoch": 1.984519223780798, + "grad_norm": 1.3083094358444214, + "learning_rate": 4.1546894720495546e-05, + "loss": 0.1172, + "step": 54610 + }, + { + "epoch": 1.9848826222835962, + "grad_norm": 1.9061583280563354, + "learning_rate": 4.154300438071666e-05, + "loss": 0.1335, + "step": 54620 + }, + { + "epoch": 1.9852460207863944, + "grad_norm": 1.9469786882400513, + "learning_rate": 4.153911332815859e-05, + "loss": 0.1014, + "step": 54630 + }, + { + "epoch": 1.9856094192891924, + "grad_norm": 6.232102394104004, + "learning_rate": 4.153522156298896e-05, + "loss": 0.1216, + "step": 54640 + }, + { + "epoch": 1.9859728177919909, + "grad_norm": 0.6339765191078186, + "learning_rate": 4.153132908537547e-05, + "loss": 0.1236, + "step": 54650 + }, + { + "epoch": 1.9863362162947888, + "grad_norm": 0.9476169943809509, + "learning_rate": 4.152743589548582e-05, + "loss": 0.0962, + "step": 54660 + }, + { + "epoch": 1.986699614797587, + "grad_norm": 1.0691879987716675, + "learning_rate": 4.152354199348777e-05, + "loss": 0.3789, + "step": 54670 + }, + { + "epoch": 1.9870630133003853, + "grad_norm": 0.9338876605033875, + "learning_rate": 4.1519647379549084e-05, + "loss": 0.0914, + "step": 54680 + }, + { + "epoch": 1.9874264118031832, + "grad_norm": 0.6754772663116455, + "learning_rate": 4.151575205383758e-05, + "loss": 0.1044, + "step": 54690 + }, + { + "epoch": 1.9877898103059817, + "grad_norm": 0.6961863040924072, + "learning_rate": 4.151185601652107e-05, + "loss": 0.1322, + "step": 54700 + }, + { + "epoch": 1.9881532088087797, + "grad_norm": 1.1425034999847412, + "learning_rate": 4.150795926776744e-05, + "loss": 0.1381, + "step": 54710 + }, + { + "epoch": 1.9885166073115779, + "grad_norm": 1.4080971479415894, + "learning_rate": 4.150406180774458e-05, + "loss": 0.1234, + "step": 54720 + }, + { + "epoch": 1.988880005814376, + "grad_norm": 0.7941197752952576, + "learning_rate": 4.1500163636620414e-05, + "loss": 0.0903, + "step": 54730 + }, + { + "epoch": 1.989243404317174, + "grad_norm": 0.8813301920890808, + "learning_rate": 4.149626475456291e-05, + "loss": 0.0965, + "step": 54740 + }, + { + "epoch": 1.9896068028199725, + "grad_norm": 0.727293848991394, + "learning_rate": 4.1492365161740054e-05, + "loss": 0.1269, + "step": 54750 + }, + { + "epoch": 1.9899702013227705, + "grad_norm": 3.7548305988311768, + "learning_rate": 4.148846485831986e-05, + "loss": 0.0992, + "step": 54760 + }, + { + "epoch": 1.9903335998255687, + "grad_norm": 0.5141910910606384, + "learning_rate": 4.148456384447037e-05, + "loss": 0.1275, + "step": 54770 + }, + { + "epoch": 1.9906969983283669, + "grad_norm": 0.5424654483795166, + "learning_rate": 4.1480662120359696e-05, + "loss": 0.6733, + "step": 54780 + }, + { + "epoch": 1.991060396831165, + "grad_norm": 0.8342083096504211, + "learning_rate": 4.147675968615592e-05, + "loss": 0.1126, + "step": 54790 + }, + { + "epoch": 1.9914237953339633, + "grad_norm": 0.3992403745651245, + "learning_rate": 4.147285654202719e-05, + "loss": 0.1589, + "step": 54800 + }, + { + "epoch": 1.9917871938367613, + "grad_norm": 0.9092950820922852, + "learning_rate": 4.146895268814169e-05, + "loss": 0.1217, + "step": 54810 + }, + { + "epoch": 1.9921505923395597, + "grad_norm": 0.4327254295349121, + "learning_rate": 4.1465048124667605e-05, + "loss": 0.1615, + "step": 54820 + }, + { + "epoch": 1.9925139908423577, + "grad_norm": 1.1109565496444702, + "learning_rate": 4.146114285177319e-05, + "loss": 0.1965, + "step": 54830 + }, + { + "epoch": 1.992877389345156, + "grad_norm": 10.526979446411133, + "learning_rate": 4.145723686962669e-05, + "loss": 0.2047, + "step": 54840 + }, + { + "epoch": 1.993240787847954, + "grad_norm": 1.4240983724594116, + "learning_rate": 4.1453330178396415e-05, + "loss": 0.1261, + "step": 54850 + }, + { + "epoch": 1.993604186350752, + "grad_norm": 3.436688184738159, + "learning_rate": 4.144942277825068e-05, + "loss": 0.1194, + "step": 54860 + }, + { + "epoch": 1.9939675848535505, + "grad_norm": 0.3504880666732788, + "learning_rate": 4.1445514669357846e-05, + "loss": 0.1269, + "step": 54870 + }, + { + "epoch": 1.9943309833563485, + "grad_norm": 1.8600322008132935, + "learning_rate": 4.14416058518863e-05, + "loss": 0.1202, + "step": 54880 + }, + { + "epoch": 1.9946943818591467, + "grad_norm": 0.7843186259269714, + "learning_rate": 4.1437696326004456e-05, + "loss": 0.1047, + "step": 54890 + }, + { + "epoch": 1.995057780361945, + "grad_norm": 1.593837022781372, + "learning_rate": 4.1433786091880765e-05, + "loss": 0.1269, + "step": 54900 + }, + { + "epoch": 1.9954211788647431, + "grad_norm": 1.9453426599502563, + "learning_rate": 4.14298751496837e-05, + "loss": 0.1054, + "step": 54910 + }, + { + "epoch": 1.9957845773675413, + "grad_norm": 0.7861382365226746, + "learning_rate": 4.142596349958177e-05, + "loss": 0.1467, + "step": 54920 + }, + { + "epoch": 1.9961479758703393, + "grad_norm": 0.9338520169258118, + "learning_rate": 4.142205114174352e-05, + "loss": 0.1014, + "step": 54930 + }, + { + "epoch": 1.9965113743731377, + "grad_norm": 3.8717129230499268, + "learning_rate": 4.1418138076337516e-05, + "loss": 0.1426, + "step": 54940 + }, + { + "epoch": 1.9968747728759357, + "grad_norm": 0.579759418964386, + "learning_rate": 4.141422430353236e-05, + "loss": 0.1154, + "step": 54950 + }, + { + "epoch": 1.997238171378734, + "grad_norm": 1.129913091659546, + "learning_rate": 4.141030982349668e-05, + "loss": 0.1019, + "step": 54960 + }, + { + "epoch": 1.9976015698815321, + "grad_norm": 0.5852164626121521, + "learning_rate": 4.140639463639913e-05, + "loss": 0.1719, + "step": 54970 + }, + { + "epoch": 1.9979649683843301, + "grad_norm": 3.1367127895355225, + "learning_rate": 4.1402478742408415e-05, + "loss": 0.0909, + "step": 54980 + }, + { + "epoch": 1.9983283668871286, + "grad_norm": 0.5207622051239014, + "learning_rate": 4.1398562141693253e-05, + "loss": 0.1212, + "step": 54990 + }, + { + "epoch": 1.9986917653899265, + "grad_norm": 0.5118950605392456, + "learning_rate": 4.1394644834422394e-05, + "loss": 0.1217, + "step": 55000 + }, + { + "epoch": 1.9990551638927248, + "grad_norm": 0.45482707023620605, + "learning_rate": 4.1390726820764614e-05, + "loss": 0.0986, + "step": 55010 + }, + { + "epoch": 1.999418562395523, + "grad_norm": 1.9805399179458618, + "learning_rate": 4.138680810088875e-05, + "loss": 0.1356, + "step": 55020 + }, + { + "epoch": 1.999781960898321, + "grad_norm": 1.0094414949417114, + "learning_rate": 4.138288867496362e-05, + "loss": 0.1751, + "step": 55030 + }, + { + "epoch": 2.0001453594011194, + "grad_norm": 1.6492732763290405, + "learning_rate": 4.1378968543158106e-05, + "loss": 0.1792, + "step": 55040 + }, + { + "epoch": 2.0005087579039174, + "grad_norm": 6.960714340209961, + "learning_rate": 4.137504770564111e-05, + "loss": 0.1707, + "step": 55050 + }, + { + "epoch": 2.000872156406716, + "grad_norm": 0.483518123626709, + "learning_rate": 4.1371126162581576e-05, + "loss": 0.1043, + "step": 55060 + }, + { + "epoch": 2.0012355549095138, + "grad_norm": 0.5076984763145447, + "learning_rate": 4.1367203914148464e-05, + "loss": 0.1356, + "step": 55070 + }, + { + "epoch": 2.0015989534123118, + "grad_norm": 2.341773509979248, + "learning_rate": 4.136328096051077e-05, + "loss": 0.1096, + "step": 55080 + }, + { + "epoch": 2.00196235191511, + "grad_norm": 0.5860946178436279, + "learning_rate": 4.135935730183752e-05, + "loss": 0.1076, + "step": 55090 + }, + { + "epoch": 2.002325750417908, + "grad_norm": 0.4653785824775696, + "learning_rate": 4.1355432938297774e-05, + "loss": 0.1517, + "step": 55100 + }, + { + "epoch": 2.0026891489207066, + "grad_norm": 1.198096513748169, + "learning_rate": 4.135150787006061e-05, + "loss": 0.369, + "step": 55110 + }, + { + "epoch": 2.0030525474235046, + "grad_norm": 1.07427978515625, + "learning_rate": 4.134758209729516e-05, + "loss": 0.1476, + "step": 55120 + }, + { + "epoch": 2.0034159459263026, + "grad_norm": 0.7984631657600403, + "learning_rate": 4.134365562017055e-05, + "loss": 0.0972, + "step": 55130 + }, + { + "epoch": 2.003779344429101, + "grad_norm": 1.2470594644546509, + "learning_rate": 4.133972843885598e-05, + "loss": 0.0884, + "step": 55140 + }, + { + "epoch": 2.004142742931899, + "grad_norm": 0.6046581268310547, + "learning_rate": 4.133580055352064e-05, + "loss": 0.2083, + "step": 55150 + }, + { + "epoch": 2.0045061414346974, + "grad_norm": 0.8026099801063538, + "learning_rate": 4.133187196433379e-05, + "loss": 0.1278, + "step": 55160 + }, + { + "epoch": 2.0048695399374954, + "grad_norm": 0.6957481503486633, + "learning_rate": 4.132794267146467e-05, + "loss": 0.1106, + "step": 55170 + }, + { + "epoch": 2.005232938440294, + "grad_norm": 1.2208986282348633, + "learning_rate": 4.13240126750826e-05, + "loss": 0.1058, + "step": 55180 + }, + { + "epoch": 2.005596336943092, + "grad_norm": 0.9665369391441345, + "learning_rate": 4.132008197535692e-05, + "loss": 0.1195, + "step": 55190 + }, + { + "epoch": 2.00595973544589, + "grad_norm": 1.0869636535644531, + "learning_rate": 4.131615057245696e-05, + "loss": 0.1004, + "step": 55200 + }, + { + "epoch": 2.00595973544589, + "eval_loss": 0.3372127115726471, + "eval_runtime": 180.3164, + "eval_samples_per_second": 41.117, + "eval_steps_per_second": 5.141, + "eval_wer": 0.16414035979450686, + "step": 55200 + }, + { + "epoch": 2.006323133948688, + "grad_norm": 1.0461617708206177, + "learning_rate": 4.131221846655212e-05, + "loss": 0.1003, + "step": 55210 + }, + { + "epoch": 2.006686532451486, + "grad_norm": 1.1234357357025146, + "learning_rate": 4.130828565781183e-05, + "loss": 0.131, + "step": 55220 + }, + { + "epoch": 2.0070499309542846, + "grad_norm": 0.792592465877533, + "learning_rate": 4.1304352146405544e-05, + "loss": 0.1236, + "step": 55230 + }, + { + "epoch": 2.0074133294570826, + "grad_norm": 2.0296480655670166, + "learning_rate": 4.130041793250273e-05, + "loss": 0.1162, + "step": 55240 + }, + { + "epoch": 2.0077767279598806, + "grad_norm": 0.8490334153175354, + "learning_rate": 4.12964830162729e-05, + "loss": 0.0891, + "step": 55250 + }, + { + "epoch": 2.008140126462679, + "grad_norm": 2.996204376220703, + "learning_rate": 4.129254739788561e-05, + "loss": 0.088, + "step": 55260 + }, + { + "epoch": 2.008503524965477, + "grad_norm": 0.785502016544342, + "learning_rate": 4.128861107751041e-05, + "loss": 2.8838, + "step": 55270 + }, + { + "epoch": 2.0088669234682754, + "grad_norm": 0.9276618957519531, + "learning_rate": 4.128467405531693e-05, + "loss": 0.1125, + "step": 55280 + }, + { + "epoch": 2.0092303219710734, + "grad_norm": 0.6827619671821594, + "learning_rate": 4.128073633147477e-05, + "loss": 0.1538, + "step": 55290 + }, + { + "epoch": 2.009593720473872, + "grad_norm": 0.5531404614448547, + "learning_rate": 4.1276797906153614e-05, + "loss": 0.1451, + "step": 55300 + }, + { + "epoch": 2.00995711897667, + "grad_norm": 1.3195756673812866, + "learning_rate": 4.127285877952315e-05, + "loss": 0.0831, + "step": 55310 + }, + { + "epoch": 2.010320517479468, + "grad_norm": 1.291306734085083, + "learning_rate": 4.12689189517531e-05, + "loss": 0.1168, + "step": 55320 + }, + { + "epoch": 2.0106839159822663, + "grad_norm": 0.7740198373794556, + "learning_rate": 4.126497842301322e-05, + "loss": 0.1293, + "step": 55330 + }, + { + "epoch": 2.0110473144850642, + "grad_norm": 0.619372546672821, + "learning_rate": 4.126103719347329e-05, + "loss": 0.1151, + "step": 55340 + }, + { + "epoch": 2.0114107129878627, + "grad_norm": 0.6809590458869934, + "learning_rate": 4.1257095263303114e-05, + "loss": 0.0808, + "step": 55350 + }, + { + "epoch": 2.0117741114906607, + "grad_norm": 0.7653446197509766, + "learning_rate": 4.125315263267255e-05, + "loss": 0.0847, + "step": 55360 + }, + { + "epoch": 2.0121375099934586, + "grad_norm": 0.7010202407836914, + "learning_rate": 4.124920930175148e-05, + "loss": 0.1856, + "step": 55370 + }, + { + "epoch": 2.012500908496257, + "grad_norm": 0.8760896921157837, + "learning_rate": 4.1245265270709786e-05, + "loss": 0.0966, + "step": 55380 + }, + { + "epoch": 2.012864306999055, + "grad_norm": 0.8872328400611877, + "learning_rate": 4.124132053971741e-05, + "loss": 0.1225, + "step": 55390 + }, + { + "epoch": 2.0132277055018535, + "grad_norm": 0.7111076712608337, + "learning_rate": 4.123737510894433e-05, + "loss": 0.0917, + "step": 55400 + }, + { + "epoch": 2.0135911040046515, + "grad_norm": 0.2959582209587097, + "learning_rate": 4.1233428978560515e-05, + "loss": 0.084, + "step": 55410 + }, + { + "epoch": 2.0139545025074495, + "grad_norm": 0.5472272038459778, + "learning_rate": 4.122948214873602e-05, + "loss": 0.1165, + "step": 55420 + }, + { + "epoch": 2.014317901010248, + "grad_norm": 1.7232263088226318, + "learning_rate": 4.1225534619640874e-05, + "loss": 0.1483, + "step": 55430 + }, + { + "epoch": 2.014681299513046, + "grad_norm": 0.6070485711097717, + "learning_rate": 4.1221586391445164e-05, + "loss": 0.1181, + "step": 55440 + }, + { + "epoch": 2.0150446980158443, + "grad_norm": 0.42631739377975464, + "learning_rate": 4.121763746431903e-05, + "loss": 0.2435, + "step": 55450 + }, + { + "epoch": 2.0154080965186423, + "grad_norm": 0.4716903865337372, + "learning_rate": 4.1213687838432594e-05, + "loss": 0.102, + "step": 55460 + }, + { + "epoch": 2.0157714950214407, + "grad_norm": 1.0024840831756592, + "learning_rate": 4.120973751395604e-05, + "loss": 2.0817, + "step": 55470 + }, + { + "epoch": 2.0161348935242387, + "grad_norm": 0.6983594298362732, + "learning_rate": 4.1205786491059565e-05, + "loss": 0.1091, + "step": 55480 + }, + { + "epoch": 2.0164982920270367, + "grad_norm": 3.18595814704895, + "learning_rate": 4.1201834769913405e-05, + "loss": 0.1334, + "step": 55490 + }, + { + "epoch": 2.016861690529835, + "grad_norm": 1.0065993070602417, + "learning_rate": 4.119788235068785e-05, + "loss": 0.0893, + "step": 55500 + }, + { + "epoch": 2.017225089032633, + "grad_norm": 1.9013348817825317, + "learning_rate": 4.119392923355315e-05, + "loss": 0.1055, + "step": 55510 + }, + { + "epoch": 2.0175884875354315, + "grad_norm": 0.727342963218689, + "learning_rate": 4.118997541867968e-05, + "loss": 0.1577, + "step": 55520 + }, + { + "epoch": 2.0179518860382295, + "grad_norm": 1.3305946588516235, + "learning_rate": 4.118602090623777e-05, + "loss": 0.1156, + "step": 55530 + }, + { + "epoch": 2.0183152845410275, + "grad_norm": 166.5440673828125, + "learning_rate": 4.11820656963978e-05, + "loss": 3.13, + "step": 55540 + }, + { + "epoch": 2.018678683043826, + "grad_norm": 1.1718511581420898, + "learning_rate": 4.11781097893302e-05, + "loss": 1.2695, + "step": 55550 + }, + { + "epoch": 2.019042081546624, + "grad_norm": 0.7488642930984497, + "learning_rate": 4.117415318520541e-05, + "loss": 0.1179, + "step": 55560 + }, + { + "epoch": 2.0194054800494223, + "grad_norm": 0.8934155702590942, + "learning_rate": 4.117019588419391e-05, + "loss": 0.0957, + "step": 55570 + }, + { + "epoch": 2.0197688785522203, + "grad_norm": 1.2470290660858154, + "learning_rate": 4.11662378864662e-05, + "loss": 0.0974, + "step": 55580 + }, + { + "epoch": 2.0201322770550187, + "grad_norm": 2.387202501296997, + "learning_rate": 4.116227919219282e-05, + "loss": 0.2065, + "step": 55590 + }, + { + "epoch": 2.0204956755578167, + "grad_norm": 0.9765509963035583, + "learning_rate": 4.115831980154434e-05, + "loss": 0.083, + "step": 55600 + }, + { + "epoch": 2.0208590740606147, + "grad_norm": 1.544554591178894, + "learning_rate": 4.115435971469135e-05, + "loss": 0.1067, + "step": 55610 + }, + { + "epoch": 2.021222472563413, + "grad_norm": 1.8516936302185059, + "learning_rate": 4.1150398931804465e-05, + "loss": 0.1292, + "step": 55620 + }, + { + "epoch": 2.021585871066211, + "grad_norm": 1.211599349975586, + "learning_rate": 4.114643745305437e-05, + "loss": 0.098, + "step": 55630 + }, + { + "epoch": 2.0219492695690096, + "grad_norm": 0.8160383105278015, + "learning_rate": 4.114247527861173e-05, + "loss": 0.0919, + "step": 55640 + }, + { + "epoch": 2.0223126680718075, + "grad_norm": 0.8116459846496582, + "learning_rate": 4.1138512408647256e-05, + "loss": 0.0929, + "step": 55650 + }, + { + "epoch": 2.0226760665746055, + "grad_norm": 0.9536616206169128, + "learning_rate": 4.113454884333171e-05, + "loss": 0.1269, + "step": 55660 + }, + { + "epoch": 2.023039465077404, + "grad_norm": 0.6211200952529907, + "learning_rate": 4.113058458283586e-05, + "loss": 0.1285, + "step": 55670 + }, + { + "epoch": 2.023402863580202, + "grad_norm": 1.3393282890319824, + "learning_rate": 4.112661962733052e-05, + "loss": 0.1211, + "step": 55680 + }, + { + "epoch": 2.0237662620830004, + "grad_norm": 0.9137499928474426, + "learning_rate": 4.1122653976986514e-05, + "loss": 0.1492, + "step": 55690 + }, + { + "epoch": 2.0241296605857984, + "grad_norm": 8.595315933227539, + "learning_rate": 4.1118687631974705e-05, + "loss": 0.0813, + "step": 55700 + }, + { + "epoch": 2.0244930590885963, + "grad_norm": 8.519613265991211, + "learning_rate": 4.111472059246601e-05, + "loss": 0.0971, + "step": 55710 + }, + { + "epoch": 2.0248564575913948, + "grad_norm": 0.906406819820404, + "learning_rate": 4.111075285863133e-05, + "loss": 0.1068, + "step": 55720 + }, + { + "epoch": 2.0252198560941927, + "grad_norm": 0.6413214206695557, + "learning_rate": 4.1106784430641634e-05, + "loss": 0.0904, + "step": 55730 + }, + { + "epoch": 2.025583254596991, + "grad_norm": 1.054943561553955, + "learning_rate": 4.110281530866791e-05, + "loss": 0.1087, + "step": 55740 + }, + { + "epoch": 2.025946653099789, + "grad_norm": 0.686661958694458, + "learning_rate": 4.1098845492881164e-05, + "loss": 0.1022, + "step": 55750 + }, + { + "epoch": 2.0263100516025876, + "grad_norm": 1.9529190063476562, + "learning_rate": 4.109487498345245e-05, + "loss": 0.1089, + "step": 55760 + }, + { + "epoch": 2.0266734501053856, + "grad_norm": 0.5279061198234558, + "learning_rate": 4.109090378055284e-05, + "loss": 0.1115, + "step": 55770 + }, + { + "epoch": 2.0270368486081836, + "grad_norm": 1.3651883602142334, + "learning_rate": 4.108693188435343e-05, + "loss": 0.1206, + "step": 55780 + }, + { + "epoch": 2.027400247110982, + "grad_norm": 0.9911472201347351, + "learning_rate": 4.108295929502536e-05, + "loss": 0.1235, + "step": 55790 + }, + { + "epoch": 2.02776364561378, + "grad_norm": 1.1165162324905396, + "learning_rate": 4.107898601273981e-05, + "loss": 0.0944, + "step": 55800 + }, + { + "epoch": 2.02776364561378, + "eval_loss": 0.342909038066864, + "eval_runtime": 179.8346, + "eval_samples_per_second": 41.227, + "eval_steps_per_second": 5.155, + "eval_wer": 0.16486648392542705, + "step": 55800 + }, + { + "epoch": 2.0281270441165784, + "grad_norm": 0.5809179544448853, + "learning_rate": 4.107501203766795e-05, + "loss": 0.0912, + "step": 55810 + }, + { + "epoch": 2.0284904426193764, + "grad_norm": 0.5710409283638, + "learning_rate": 4.1071037369981025e-05, + "loss": 0.1165, + "step": 55820 + }, + { + "epoch": 2.0288538411221744, + "grad_norm": 1.5615267753601074, + "learning_rate": 4.1067062009850276e-05, + "loss": 0.1126, + "step": 55830 + }, + { + "epoch": 2.029217239624973, + "grad_norm": 0.5823513269424438, + "learning_rate": 4.106308595744699e-05, + "loss": 0.1649, + "step": 55840 + }, + { + "epoch": 2.029580638127771, + "grad_norm": 1.1584099531173706, + "learning_rate": 4.105910921294249e-05, + "loss": 3.2733, + "step": 55850 + }, + { + "epoch": 2.029944036630569, + "grad_norm": 3.6284244060516357, + "learning_rate": 4.105513177650811e-05, + "loss": 0.1123, + "step": 55860 + }, + { + "epoch": 2.030307435133367, + "grad_norm": 0.2692999839782715, + "learning_rate": 4.105115364831522e-05, + "loss": 0.1022, + "step": 55870 + }, + { + "epoch": 2.0306708336361656, + "grad_norm": 0.7533041834831238, + "learning_rate": 4.1047174828535236e-05, + "loss": 0.1051, + "step": 55880 + }, + { + "epoch": 2.0310342321389636, + "grad_norm": 1.988377332687378, + "learning_rate": 4.104319531733958e-05, + "loss": 0.1112, + "step": 55890 + }, + { + "epoch": 2.0313976306417616, + "grad_norm": 1.1997753381729126, + "learning_rate": 4.103921511489972e-05, + "loss": 0.1166, + "step": 55900 + }, + { + "epoch": 2.03176102914456, + "grad_norm": 0.9296682476997375, + "learning_rate": 4.1035234221387154e-05, + "loss": 0.0914, + "step": 55910 + }, + { + "epoch": 2.032124427647358, + "grad_norm": 1.5428096055984497, + "learning_rate": 4.1031252636973394e-05, + "loss": 0.1383, + "step": 55920 + }, + { + "epoch": 2.0324878261501564, + "grad_norm": 1.2502493858337402, + "learning_rate": 4.1027270361829995e-05, + "loss": 0.0876, + "step": 55930 + }, + { + "epoch": 2.0328512246529544, + "grad_norm": 0.9851539731025696, + "learning_rate": 4.102328739612855e-05, + "loss": 0.1441, + "step": 55940 + }, + { + "epoch": 2.0332146231557524, + "grad_norm": 0.6906759738922119, + "learning_rate": 4.101930374004066e-05, + "loss": 0.0894, + "step": 55950 + }, + { + "epoch": 2.033578021658551, + "grad_norm": 0.6129600405693054, + "learning_rate": 4.101531939373796e-05, + "loss": 0.0935, + "step": 55960 + }, + { + "epoch": 2.033941420161349, + "grad_norm": 0.7423244118690491, + "learning_rate": 4.101133435739214e-05, + "loss": 0.0996, + "step": 55970 + }, + { + "epoch": 2.0343048186641473, + "grad_norm": 0.9887922406196594, + "learning_rate": 4.100734863117489e-05, + "loss": 0.1159, + "step": 55980 + }, + { + "epoch": 2.0346682171669452, + "grad_norm": 0.701602041721344, + "learning_rate": 4.100336221525794e-05, + "loss": 0.1278, + "step": 55990 + }, + { + "epoch": 2.0350316156697437, + "grad_norm": 1.088302493095398, + "learning_rate": 4.099937510981304e-05, + "loss": 0.0966, + "step": 56000 + }, + { + "epoch": 2.0353950141725417, + "grad_norm": 1.022271990776062, + "learning_rate": 4.099538731501201e-05, + "loss": 0.1055, + "step": 56010 + }, + { + "epoch": 2.0357584126753396, + "grad_norm": 1.5955030918121338, + "learning_rate": 4.099139883102664e-05, + "loss": 0.1654, + "step": 56020 + }, + { + "epoch": 2.036121811178138, + "grad_norm": 1.2459834814071655, + "learning_rate": 4.0987409658028805e-05, + "loss": 0.1179, + "step": 56030 + }, + { + "epoch": 2.036485209680936, + "grad_norm": 2.4748172760009766, + "learning_rate": 4.098341979619036e-05, + "loss": 0.1511, + "step": 56040 + }, + { + "epoch": 2.0368486081837345, + "grad_norm": 1.0897467136383057, + "learning_rate": 4.097942924568323e-05, + "loss": 0.1033, + "step": 56050 + }, + { + "epoch": 2.0372120066865325, + "grad_norm": 0.9883999228477478, + "learning_rate": 4.097543800667935e-05, + "loss": 0.0879, + "step": 56060 + }, + { + "epoch": 2.0375754051893304, + "grad_norm": 0.9798412919044495, + "learning_rate": 4.097144607935068e-05, + "loss": 0.9193, + "step": 56070 + }, + { + "epoch": 2.037938803692129, + "grad_norm": 0.8659210801124573, + "learning_rate": 4.0967453463869233e-05, + "loss": 0.1121, + "step": 56080 + }, + { + "epoch": 2.038302202194927, + "grad_norm": 0.825116753578186, + "learning_rate": 4.096346016040703e-05, + "loss": 0.1276, + "step": 56090 + }, + { + "epoch": 2.0386656006977253, + "grad_norm": 0.6097813844680786, + "learning_rate": 4.0959466169136115e-05, + "loss": 0.1045, + "step": 56100 + }, + { + "epoch": 2.0390289992005233, + "grad_norm": 2.39689564704895, + "learning_rate": 4.0955471490228604e-05, + "loss": 0.1286, + "step": 56110 + }, + { + "epoch": 2.0393923977033213, + "grad_norm": 0.4547784626483917, + "learning_rate": 4.095147612385658e-05, + "loss": 0.1266, + "step": 56120 + }, + { + "epoch": 2.0397557962061197, + "grad_norm": 1.1596136093139648, + "learning_rate": 4.094748007019221e-05, + "loss": 0.1022, + "step": 56130 + }, + { + "epoch": 2.0401191947089177, + "grad_norm": 1.1768062114715576, + "learning_rate": 4.094348332940767e-05, + "loss": 0.1214, + "step": 56140 + }, + { + "epoch": 2.040482593211716, + "grad_norm": 0.6755580902099609, + "learning_rate": 4.0939485901675153e-05, + "loss": 0.1078, + "step": 56150 + }, + { + "epoch": 2.040845991714514, + "grad_norm": 0.7698992490768433, + "learning_rate": 4.0935487787166914e-05, + "loss": 0.0923, + "step": 56160 + }, + { + "epoch": 2.0412093902173125, + "grad_norm": 0.6949880123138428, + "learning_rate": 4.093148898605519e-05, + "loss": 0.1065, + "step": 56170 + }, + { + "epoch": 2.0415727887201105, + "grad_norm": 0.5093110203742981, + "learning_rate": 4.09274894985123e-05, + "loss": 0.1358, + "step": 56180 + }, + { + "epoch": 2.0419361872229085, + "grad_norm": 0.8544941544532776, + "learning_rate": 4.092348932471055e-05, + "loss": 0.1215, + "step": 56190 + }, + { + "epoch": 2.042299585725707, + "grad_norm": 0.8509432673454285, + "learning_rate": 4.091948846482231e-05, + "loss": 0.1024, + "step": 56200 + }, + { + "epoch": 2.042662984228505, + "grad_norm": 0.5665140151977539, + "learning_rate": 4.091548691901995e-05, + "loss": 0.089, + "step": 56210 + }, + { + "epoch": 2.0430263827313033, + "grad_norm": 0.9858969449996948, + "learning_rate": 4.0911484687475886e-05, + "loss": 0.1107, + "step": 56220 + }, + { + "epoch": 2.0433897812341013, + "grad_norm": 0.8955181241035461, + "learning_rate": 4.0907481770362556e-05, + "loss": 0.113, + "step": 56230 + }, + { + "epoch": 2.0437531797368993, + "grad_norm": 0.9829466938972473, + "learning_rate": 4.090347816785244e-05, + "loss": 0.1631, + "step": 56240 + }, + { + "epoch": 2.0441165782396977, + "grad_norm": 0.5513119101524353, + "learning_rate": 4.089947388011803e-05, + "loss": 0.0959, + "step": 56250 + }, + { + "epoch": 2.0444799767424957, + "grad_norm": 0.4322792887687683, + "learning_rate": 4.089546890733187e-05, + "loss": 0.0982, + "step": 56260 + }, + { + "epoch": 2.044843375245294, + "grad_norm": 1.2782713174819946, + "learning_rate": 4.0891463249666504e-05, + "loss": 0.1112, + "step": 56270 + }, + { + "epoch": 2.045206773748092, + "grad_norm": 0.9792034029960632, + "learning_rate": 4.088745690729453e-05, + "loss": 0.0975, + "step": 56280 + }, + { + "epoch": 2.0455701722508906, + "grad_norm": 2.263601064682007, + "learning_rate": 4.088344988038857e-05, + "loss": 0.1418, + "step": 56290 + }, + { + "epoch": 2.0459335707536885, + "grad_norm": 1.011856198310852, + "learning_rate": 4.087944216912126e-05, + "loss": 0.1022, + "step": 56300 + }, + { + "epoch": 2.0462969692564865, + "grad_norm": 1.0281102657318115, + "learning_rate": 4.0875433773665286e-05, + "loss": 0.0925, + "step": 56310 + }, + { + "epoch": 2.046660367759285, + "grad_norm": 0.5272021889686584, + "learning_rate": 4.087142469419336e-05, + "loss": 0.1297, + "step": 56320 + }, + { + "epoch": 2.047023766262083, + "grad_norm": 0.9789879322052002, + "learning_rate": 4.0867414930878224e-05, + "loss": 0.0946, + "step": 56330 + }, + { + "epoch": 2.0473871647648814, + "grad_norm": 0.8782264590263367, + "learning_rate": 4.086340448389262e-05, + "loss": 0.1246, + "step": 56340 + }, + { + "epoch": 2.0477505632676793, + "grad_norm": 0.66651850938797, + "learning_rate": 4.0859393353409364e-05, + "loss": 0.0898, + "step": 56350 + }, + { + "epoch": 2.0481139617704773, + "grad_norm": 1.6209585666656494, + "learning_rate": 4.085538153960128e-05, + "loss": 0.1016, + "step": 56360 + }, + { + "epoch": 2.0484773602732758, + "grad_norm": 1.2438985109329224, + "learning_rate": 4.085136904264121e-05, + "loss": 0.1419, + "step": 56370 + }, + { + "epoch": 2.0488407587760737, + "grad_norm": 0.7311316728591919, + "learning_rate": 4.0847355862702055e-05, + "loss": 0.102, + "step": 56380 + }, + { + "epoch": 2.049204157278872, + "grad_norm": 1.2164160013198853, + "learning_rate": 4.084334199995672e-05, + "loss": 0.0936, + "step": 56390 + }, + { + "epoch": 2.04956755578167, + "grad_norm": 0.746296226978302, + "learning_rate": 4.083932745457815e-05, + "loss": 0.1268, + "step": 56400 + }, + { + "epoch": 2.04956755578167, + "eval_loss": 0.31779325008392334, + "eval_runtime": 178.9992, + "eval_samples_per_second": 41.419, + "eval_steps_per_second": 5.179, + "eval_wer": 0.15986530397371432, + "step": 56400 + }, + { + "epoch": 2.049930954284468, + "grad_norm": 0.94898521900177, + "learning_rate": 4.083531222673931e-05, + "loss": 0.1344, + "step": 56410 + }, + { + "epoch": 2.0502943527872666, + "grad_norm": 0.5653538703918457, + "learning_rate": 4.083129631661322e-05, + "loss": 0.1026, + "step": 56420 + }, + { + "epoch": 2.0506577512900646, + "grad_norm": 0.6599397659301758, + "learning_rate": 4.0827279724372884e-05, + "loss": 0.0923, + "step": 56430 + }, + { + "epoch": 2.051021149792863, + "grad_norm": 2.0571577548980713, + "learning_rate": 4.082326245019139e-05, + "loss": 0.1894, + "step": 56440 + }, + { + "epoch": 2.051384548295661, + "grad_norm": 1.0707124471664429, + "learning_rate": 4.081924449424182e-05, + "loss": 0.1098, + "step": 56450 + }, + { + "epoch": 2.0517479467984594, + "grad_norm": 0.450382798910141, + "learning_rate": 4.081522585669728e-05, + "loss": 0.0928, + "step": 56460 + }, + { + "epoch": 2.0521113453012574, + "grad_norm": 0.9119880795478821, + "learning_rate": 4.081120653773093e-05, + "loss": 0.1102, + "step": 56470 + }, + { + "epoch": 2.0524747438040554, + "grad_norm": 0.39113524556159973, + "learning_rate": 4.080718653751595e-05, + "loss": 0.1048, + "step": 56480 + }, + { + "epoch": 2.052838142306854, + "grad_norm": 1.069718837738037, + "learning_rate": 4.080316585622554e-05, + "loss": 0.1165, + "step": 56490 + }, + { + "epoch": 2.053201540809652, + "grad_norm": 1.0856863260269165, + "learning_rate": 4.0799144494032936e-05, + "loss": 0.0966, + "step": 56500 + }, + { + "epoch": 2.05356493931245, + "grad_norm": 0.9092361927032471, + "learning_rate": 4.079512245111142e-05, + "loss": 0.1041, + "step": 56510 + }, + { + "epoch": 2.053928337815248, + "grad_norm": 0.6025703549385071, + "learning_rate": 4.079109972763428e-05, + "loss": 0.1167, + "step": 56520 + }, + { + "epoch": 2.054291736318046, + "grad_norm": 2.7288074493408203, + "learning_rate": 4.078707632377483e-05, + "loss": 0.1092, + "step": 56530 + }, + { + "epoch": 2.0546551348208446, + "grad_norm": 1.0255563259124756, + "learning_rate": 4.078305223970643e-05, + "loss": 0.13, + "step": 56540 + }, + { + "epoch": 2.0550185333236426, + "grad_norm": 0.556890070438385, + "learning_rate": 4.0779429982609526e-05, + "loss": 1.9074, + "step": 56550 + }, + { + "epoch": 2.055381931826441, + "grad_norm": 0.8369362950325012, + "learning_rate": 4.077540460662182e-05, + "loss": 0.0925, + "step": 56560 + }, + { + "epoch": 2.055745330329239, + "grad_norm": 0.6321738958358765, + "learning_rate": 4.0771378550928064e-05, + "loss": 0.1274, + "step": 56570 + }, + { + "epoch": 2.0561087288320374, + "grad_norm": 2.1743392944335938, + "learning_rate": 4.076735181570172e-05, + "loss": 0.1147, + "step": 56580 + }, + { + "epoch": 2.0564721273348354, + "grad_norm": 0.35284000635147095, + "learning_rate": 4.076332440111629e-05, + "loss": 0.2721, + "step": 56590 + }, + { + "epoch": 2.0568355258376334, + "grad_norm": 0.4702494740486145, + "learning_rate": 4.0759296307345285e-05, + "loss": 0.0972, + "step": 56600 + }, + { + "epoch": 2.057198924340432, + "grad_norm": 0.5263124704360962, + "learning_rate": 4.075526753456229e-05, + "loss": 0.1004, + "step": 56610 + }, + { + "epoch": 2.05756232284323, + "grad_norm": 0.5063189268112183, + "learning_rate": 4.0751238082940864e-05, + "loss": 0.1254, + "step": 56620 + }, + { + "epoch": 2.0579257213460282, + "grad_norm": 0.8294627070426941, + "learning_rate": 4.074720795265463e-05, + "loss": 0.1067, + "step": 56630 + }, + { + "epoch": 2.0582891198488262, + "grad_norm": 0.8625883460044861, + "learning_rate": 4.0743177143877244e-05, + "loss": 0.1176, + "step": 56640 + }, + { + "epoch": 2.058652518351624, + "grad_norm": 0.7036715745925903, + "learning_rate": 4.073914565678236e-05, + "loss": 0.0807, + "step": 56650 + }, + { + "epoch": 2.0590159168544226, + "grad_norm": 1.748412013053894, + "learning_rate": 4.07351134915437e-05, + "loss": 0.0877, + "step": 56660 + }, + { + "epoch": 2.0593793153572206, + "grad_norm": 1.1321426630020142, + "learning_rate": 4.0731080648334975e-05, + "loss": 0.097, + "step": 56670 + }, + { + "epoch": 2.059742713860019, + "grad_norm": 2.1829307079315186, + "learning_rate": 4.0727047127329964e-05, + "loss": 0.1159, + "step": 56680 + }, + { + "epoch": 2.060106112362817, + "grad_norm": 1.0120956897735596, + "learning_rate": 4.0723012928702443e-05, + "loss": 0.1096, + "step": 56690 + }, + { + "epoch": 2.060469510865615, + "grad_norm": 0.6774507761001587, + "learning_rate": 4.071897805262624e-05, + "loss": 0.0925, + "step": 56700 + }, + { + "epoch": 2.0608329093684135, + "grad_norm": 0.7925111651420593, + "learning_rate": 4.07149424992752e-05, + "loss": 0.0944, + "step": 56710 + }, + { + "epoch": 2.0611963078712114, + "grad_norm": 0.41430070996284485, + "learning_rate": 4.07109062688232e-05, + "loss": 0.1082, + "step": 56720 + }, + { + "epoch": 2.06155970637401, + "grad_norm": 0.5457510948181152, + "learning_rate": 4.070686936144415e-05, + "loss": 0.1724, + "step": 56730 + }, + { + "epoch": 2.061923104876808, + "grad_norm": 1.1867283582687378, + "learning_rate": 4.070283177731199e-05, + "loss": 0.121, + "step": 56740 + }, + { + "epoch": 2.0622865033796063, + "grad_norm": 0.5466375946998596, + "learning_rate": 4.0698793516600676e-05, + "loss": 0.0729, + "step": 56750 + }, + { + "epoch": 2.0626499018824043, + "grad_norm": 0.802174985408783, + "learning_rate": 4.0694754579484204e-05, + "loss": 0.0983, + "step": 56760 + }, + { + "epoch": 2.0630133003852023, + "grad_norm": 0.37869808077812195, + "learning_rate": 4.06907149661366e-05, + "loss": 0.1225, + "step": 56770 + }, + { + "epoch": 2.0633766988880007, + "grad_norm": 1.0356521606445312, + "learning_rate": 4.068667467673192e-05, + "loss": 0.1046, + "step": 56780 + }, + { + "epoch": 2.0637400973907987, + "grad_norm": 1.9794261455535889, + "learning_rate": 4.068263371144423e-05, + "loss": 0.1497, + "step": 56790 + }, + { + "epoch": 2.064103495893597, + "grad_norm": 0.7426532506942749, + "learning_rate": 4.067859207044766e-05, + "loss": 0.1153, + "step": 56800 + }, + { + "epoch": 2.064466894396395, + "grad_norm": 0.4637458622455597, + "learning_rate": 4.0674549753916344e-05, + "loss": 0.0885, + "step": 56810 + }, + { + "epoch": 2.064830292899193, + "grad_norm": 0.44504934549331665, + "learning_rate": 4.067050676202445e-05, + "loss": 0.1483, + "step": 56820 + }, + { + "epoch": 2.0651936914019915, + "grad_norm": 0.8600061535835266, + "learning_rate": 4.066646309494617e-05, + "loss": 0.113, + "step": 56830 + }, + { + "epoch": 2.0655570899047895, + "grad_norm": 1.6054418087005615, + "learning_rate": 4.0662418752855746e-05, + "loss": 0.1626, + "step": 56840 + }, + { + "epoch": 2.065920488407588, + "grad_norm": 0.9366486072540283, + "learning_rate": 4.0658373735927415e-05, + "loss": 0.1035, + "step": 56850 + }, + { + "epoch": 2.066283886910386, + "grad_norm": 0.9057123064994812, + "learning_rate": 4.065432804433548e-05, + "loss": 0.0976, + "step": 56860 + }, + { + "epoch": 2.0666472854131843, + "grad_norm": 0.7718061804771423, + "learning_rate": 4.065028167825424e-05, + "loss": 1.6687, + "step": 56870 + }, + { + "epoch": 2.0670106839159823, + "grad_norm": 1.0670592784881592, + "learning_rate": 4.064623463785805e-05, + "loss": 0.0958, + "step": 56880 + }, + { + "epoch": 2.0673740824187803, + "grad_norm": 0.6497521996498108, + "learning_rate": 4.064218692332128e-05, + "loss": 0.1301, + "step": 56890 + }, + { + "epoch": 2.0677374809215787, + "grad_norm": 0.5239264369010925, + "learning_rate": 4.063813853481833e-05, + "loss": 0.0843, + "step": 56900 + }, + { + "epoch": 2.0681008794243767, + "grad_norm": 0.7263264060020447, + "learning_rate": 4.0634089472523626e-05, + "loss": 0.0928, + "step": 56910 + }, + { + "epoch": 2.068464277927175, + "grad_norm": 0.6024682521820068, + "learning_rate": 4.063003973661164e-05, + "loss": 0.1402, + "step": 56920 + }, + { + "epoch": 2.068827676429973, + "grad_norm": 0.8949540853500366, + "learning_rate": 4.0625989327256855e-05, + "loss": 0.1171, + "step": 56930 + }, + { + "epoch": 2.069191074932771, + "grad_norm": 0.9099026322364807, + "learning_rate": 4.062193824463378e-05, + "loss": 0.1184, + "step": 56940 + }, + { + "epoch": 2.0695544734355695, + "grad_norm": 8.538558959960938, + "learning_rate": 4.0617886488916976e-05, + "loss": 0.0981, + "step": 56950 + }, + { + "epoch": 2.0699178719383675, + "grad_norm": 0.8870179653167725, + "learning_rate": 4.061383406028101e-05, + "loss": 0.0796, + "step": 56960 + }, + { + "epoch": 2.070281270441166, + "grad_norm": 0.8997694253921509, + "learning_rate": 4.060978095890049e-05, + "loss": 0.1289, + "step": 56970 + }, + { + "epoch": 2.070644668943964, + "grad_norm": 1.018744707107544, + "learning_rate": 4.060572718495004e-05, + "loss": 0.1006, + "step": 56980 + }, + { + "epoch": 2.071008067446762, + "grad_norm": 0.5158216953277588, + "learning_rate": 4.0601672738604346e-05, + "loss": 0.161, + "step": 56990 + }, + { + "epoch": 2.0713714659495603, + "grad_norm": 1.025295615196228, + "learning_rate": 4.059761762003807e-05, + "loss": 0.086, + "step": 57000 + }, + { + "epoch": 2.0713714659495603, + "eval_loss": 0.34686627984046936, + "eval_runtime": 180.463, + "eval_samples_per_second": 41.083, + "eval_steps_per_second": 5.137, + "eval_wer": 0.16041897362354093, + "step": 57000 + }, + { + "epoch": 2.0717348644523583, + "grad_norm": 3.3658320903778076, + "learning_rate": 4.0593561829425955e-05, + "loss": 0.1124, + "step": 57010 + }, + { + "epoch": 2.0720982629551568, + "grad_norm": 7.979375839233398, + "learning_rate": 4.058950536694274e-05, + "loss": 0.133, + "step": 57020 + }, + { + "epoch": 2.0724616614579547, + "grad_norm": 0.7676217555999756, + "learning_rate": 4.058544823276321e-05, + "loss": 0.1096, + "step": 57030 + }, + { + "epoch": 2.072825059960753, + "grad_norm": 0.6934232711791992, + "learning_rate": 4.058139042706216e-05, + "loss": 0.1132, + "step": 57040 + }, + { + "epoch": 2.073188458463551, + "grad_norm": 0.9430510401725769, + "learning_rate": 4.057733195001444e-05, + "loss": 0.0998, + "step": 57050 + }, + { + "epoch": 2.073551856966349, + "grad_norm": 3.497431993484497, + "learning_rate": 4.057327280179491e-05, + "loss": 0.089, + "step": 57060 + }, + { + "epoch": 2.0739152554691476, + "grad_norm": 0.7105191349983215, + "learning_rate": 4.056921298257847e-05, + "loss": 0.1243, + "step": 57070 + }, + { + "epoch": 2.0742786539719456, + "grad_norm": 7.004267692565918, + "learning_rate": 4.0565152492540034e-05, + "loss": 0.1229, + "step": 57080 + }, + { + "epoch": 2.074642052474744, + "grad_norm": 0.42751577496528625, + "learning_rate": 4.0561091331854555e-05, + "loss": 0.1073, + "step": 57090 + }, + { + "epoch": 2.075005450977542, + "grad_norm": 1.4434239864349365, + "learning_rate": 4.055702950069702e-05, + "loss": 0.1044, + "step": 57100 + }, + { + "epoch": 2.07536884948034, + "grad_norm": 0.6759265661239624, + "learning_rate": 4.055296699924244e-05, + "loss": 0.089, + "step": 57110 + }, + { + "epoch": 2.0757322479831384, + "grad_norm": 0.48018431663513184, + "learning_rate": 4.0548903827665846e-05, + "loss": 0.106, + "step": 57120 + }, + { + "epoch": 2.0760956464859364, + "grad_norm": 1.510313630104065, + "learning_rate": 4.054483998614231e-05, + "loss": 0.1295, + "step": 57130 + }, + { + "epoch": 2.076459044988735, + "grad_norm": 0.807949960231781, + "learning_rate": 4.054077547484693e-05, + "loss": 0.4319, + "step": 57140 + }, + { + "epoch": 2.0768224434915328, + "grad_norm": 1.3790713548660278, + "learning_rate": 4.0536710293954824e-05, + "loss": 0.1006, + "step": 57150 + }, + { + "epoch": 2.077185841994331, + "grad_norm": 0.507022500038147, + "learning_rate": 4.0532644443641156e-05, + "loss": 0.0751, + "step": 57160 + }, + { + "epoch": 2.077549240497129, + "grad_norm": 1.7080292701721191, + "learning_rate": 4.0528577924081104e-05, + "loss": 0.1266, + "step": 57170 + }, + { + "epoch": 2.077912638999927, + "grad_norm": 1.9344823360443115, + "learning_rate": 4.052451073544987e-05, + "loss": 0.094, + "step": 57180 + }, + { + "epoch": 2.0782760375027256, + "grad_norm": 1.0933985710144043, + "learning_rate": 4.0520442877922715e-05, + "loss": 0.1295, + "step": 57190 + }, + { + "epoch": 2.0786394360055236, + "grad_norm": 0.6466109752655029, + "learning_rate": 4.05163743516749e-05, + "loss": 0.108, + "step": 57200 + }, + { + "epoch": 2.079002834508322, + "grad_norm": 0.5679341554641724, + "learning_rate": 4.051230515688171e-05, + "loss": 0.1205, + "step": 57210 + }, + { + "epoch": 2.07936623301112, + "grad_norm": 0.5203921794891357, + "learning_rate": 4.0508235293718495e-05, + "loss": 0.1202, + "step": 57220 + }, + { + "epoch": 2.079729631513918, + "grad_norm": 4.9159393310546875, + "learning_rate": 4.050416476236059e-05, + "loss": 0.0901, + "step": 57230 + }, + { + "epoch": 2.0800930300167164, + "grad_norm": 0.7785301208496094, + "learning_rate": 4.05000935629834e-05, + "loss": 0.0885, + "step": 57240 + }, + { + "epoch": 2.0804564285195144, + "grad_norm": 1.5235596895217896, + "learning_rate": 4.049602169576232e-05, + "loss": 0.1163, + "step": 57250 + }, + { + "epoch": 2.080819827022313, + "grad_norm": 0.7558295726776123, + "learning_rate": 4.0491949160872805e-05, + "loss": 0.0969, + "step": 57260 + }, + { + "epoch": 2.081183225525111, + "grad_norm": 0.8465888500213623, + "learning_rate": 4.048787595849032e-05, + "loss": 0.1061, + "step": 57270 + }, + { + "epoch": 2.081546624027909, + "grad_norm": 1.5089519023895264, + "learning_rate": 4.048380208879037e-05, + "loss": 0.0918, + "step": 57280 + }, + { + "epoch": 2.0819100225307072, + "grad_norm": 0.5132701992988586, + "learning_rate": 4.047972755194847e-05, + "loss": 0.0971, + "step": 57290 + }, + { + "epoch": 2.082273421033505, + "grad_norm": 2.1400113105773926, + "learning_rate": 4.047565234814019e-05, + "loss": 0.0934, + "step": 57300 + }, + { + "epoch": 2.0826368195363036, + "grad_norm": 0.6013107299804688, + "learning_rate": 4.047157647754112e-05, + "loss": 0.1349, + "step": 57310 + }, + { + "epoch": 2.0830002180391016, + "grad_norm": 2.676640272140503, + "learning_rate": 4.046749994032686e-05, + "loss": 0.13, + "step": 57320 + }, + { + "epoch": 2.0833636165419, + "grad_norm": 0.9156673550605774, + "learning_rate": 4.046342273667306e-05, + "loss": 0.1068, + "step": 57330 + }, + { + "epoch": 2.083727015044698, + "grad_norm": 1.0060288906097412, + "learning_rate": 4.04593448667554e-05, + "loss": 0.1091, + "step": 57340 + }, + { + "epoch": 2.084090413547496, + "grad_norm": 2.746476650238037, + "learning_rate": 4.0455266330749567e-05, + "loss": 0.1001, + "step": 57350 + }, + { + "epoch": 2.0844538120502945, + "grad_norm": 1.1911275386810303, + "learning_rate": 4.04511871288313e-05, + "loss": 0.0981, + "step": 57360 + }, + { + "epoch": 2.0848172105530924, + "grad_norm": 10.354631423950195, + "learning_rate": 4.044710726117636e-05, + "loss": 0.1354, + "step": 57370 + }, + { + "epoch": 2.085180609055891, + "grad_norm": 1.2562741041183472, + "learning_rate": 4.044302672796053e-05, + "loss": 0.0962, + "step": 57380 + }, + { + "epoch": 2.085544007558689, + "grad_norm": 0.48360708355903625, + "learning_rate": 4.043894552935962e-05, + "loss": 0.1203, + "step": 57390 + }, + { + "epoch": 2.085907406061487, + "grad_norm": 1.9491641521453857, + "learning_rate": 4.043486366554948e-05, + "loss": 0.0984, + "step": 57400 + }, + { + "epoch": 2.0862708045642853, + "grad_norm": 0.48460692167282104, + "learning_rate": 4.0430781136705975e-05, + "loss": 0.0984, + "step": 57410 + }, + { + "epoch": 2.0866342030670832, + "grad_norm": 0.9770491719245911, + "learning_rate": 4.042669794300502e-05, + "loss": 0.1173, + "step": 57420 + }, + { + "epoch": 2.0869976015698817, + "grad_norm": 0.4919109642505646, + "learning_rate": 4.042261408462255e-05, + "loss": 0.1162, + "step": 57430 + }, + { + "epoch": 2.0873610000726797, + "grad_norm": 0.555167019367218, + "learning_rate": 4.0418529561734495e-05, + "loss": 0.1137, + "step": 57440 + }, + { + "epoch": 2.087724398575478, + "grad_norm": 0.8190045356750488, + "learning_rate": 4.041444437451687e-05, + "loss": 0.0972, + "step": 57450 + }, + { + "epoch": 2.088087797078276, + "grad_norm": 0.5673350691795349, + "learning_rate": 4.041035852314568e-05, + "loss": 0.0985, + "step": 57460 + }, + { + "epoch": 2.088451195581074, + "grad_norm": 2.584392547607422, + "learning_rate": 4.040627200779697e-05, + "loss": 0.1159, + "step": 57470 + }, + { + "epoch": 2.0888145940838725, + "grad_norm": 3.240104913711548, + "learning_rate": 4.040218482864682e-05, + "loss": 0.0886, + "step": 57480 + }, + { + "epoch": 2.0891779925866705, + "grad_norm": 1.0577195882797241, + "learning_rate": 4.039809698587132e-05, + "loss": 0.1079, + "step": 57490 + }, + { + "epoch": 2.089541391089469, + "grad_norm": 1.1150219440460205, + "learning_rate": 4.039400847964661e-05, + "loss": 0.091, + "step": 57500 + }, + { + "epoch": 2.089904789592267, + "grad_norm": 0.42998915910720825, + "learning_rate": 4.038991931014885e-05, + "loss": 0.1038, + "step": 57510 + }, + { + "epoch": 2.090268188095065, + "grad_norm": 1.2772380113601685, + "learning_rate": 4.0385829477554216e-05, + "loss": 0.1114, + "step": 57520 + }, + { + "epoch": 2.0906315865978633, + "grad_norm": 0.6975306868553162, + "learning_rate": 4.0381738982038944e-05, + "loss": 0.1107, + "step": 57530 + }, + { + "epoch": 2.0909949851006613, + "grad_norm": 0.5228861570358276, + "learning_rate": 4.0377647823779257e-05, + "loss": 0.1217, + "step": 57540 + }, + { + "epoch": 2.0913583836034597, + "grad_norm": 0.8819922208786011, + "learning_rate": 4.0373556002951444e-05, + "loss": 0.1149, + "step": 57550 + }, + { + "epoch": 2.0917217821062577, + "grad_norm": 0.47613778710365295, + "learning_rate": 4.036946351973181e-05, + "loss": 0.1037, + "step": 57560 + }, + { + "epoch": 2.0920851806090557, + "grad_norm": 1.3058334589004517, + "learning_rate": 4.0365370374296666e-05, + "loss": 0.1195, + "step": 57570 + }, + { + "epoch": 2.092448579111854, + "grad_norm": 0.9610320329666138, + "learning_rate": 4.0361276566822383e-05, + "loss": 0.1286, + "step": 57580 + }, + { + "epoch": 2.092811977614652, + "grad_norm": 0.9065276980400085, + "learning_rate": 4.035718209748536e-05, + "loss": 0.1146, + "step": 57590 + }, + { + "epoch": 2.0931753761174505, + "grad_norm": 1.189386248588562, + "learning_rate": 4.0353086966461984e-05, + "loss": 0.0853, + "step": 57600 + }, + { + "epoch": 2.0931753761174505, + "eval_loss": 0.35443732142448425, + "eval_runtime": 179.8419, + "eval_samples_per_second": 41.225, + "eval_steps_per_second": 5.155, + "eval_wer": 0.16203459981483834, + "step": 57600 + }, + { + "epoch": 2.0935387746202485, + "grad_norm": 1.9661606550216675, + "learning_rate": 4.034899117392873e-05, + "loss": 0.0915, + "step": 57610 + }, + { + "epoch": 2.093902173123047, + "grad_norm": 1.0128490924835205, + "learning_rate": 4.0344894720062055e-05, + "loss": 2.6837, + "step": 57620 + }, + { + "epoch": 2.094265571625845, + "grad_norm": 0.9373286962509155, + "learning_rate": 4.0340797605038464e-05, + "loss": 0.1149, + "step": 57630 + }, + { + "epoch": 2.094628970128643, + "grad_norm": 0.7361924052238464, + "learning_rate": 4.033669982903449e-05, + "loss": 0.1473, + "step": 57640 + }, + { + "epoch": 2.0949923686314413, + "grad_norm": 0.6584343314170837, + "learning_rate": 4.0332601392226673e-05, + "loss": 0.0983, + "step": 57650 + }, + { + "epoch": 2.0953557671342393, + "grad_norm": 3.030869960784912, + "learning_rate": 4.0328502294791634e-05, + "loss": 0.0874, + "step": 57660 + }, + { + "epoch": 2.0957191656370378, + "grad_norm": 0.4622768759727478, + "learning_rate": 4.0324402536905964e-05, + "loss": 0.122, + "step": 57670 + }, + { + "epoch": 2.0960825641398357, + "grad_norm": 0.7545061111450195, + "learning_rate": 4.0320302118746314e-05, + "loss": 0.1077, + "step": 57680 + }, + { + "epoch": 2.0964459626426337, + "grad_norm": 1.838789939880371, + "learning_rate": 4.0316201040489355e-05, + "loss": 0.1814, + "step": 57690 + }, + { + "epoch": 2.096809361145432, + "grad_norm": 0.5931621789932251, + "learning_rate": 4.031209930231179e-05, + "loss": 0.1053, + "step": 57700 + }, + { + "epoch": 2.09717275964823, + "grad_norm": 0.698026180267334, + "learning_rate": 4.0307996904390336e-05, + "loss": 0.0843, + "step": 57710 + }, + { + "epoch": 2.0975361581510286, + "grad_norm": 0.663277804851532, + "learning_rate": 4.030389384690177e-05, + "loss": 0.1109, + "step": 57720 + }, + { + "epoch": 2.0978995566538265, + "grad_norm": 0.6599337458610535, + "learning_rate": 4.0299790130022874e-05, + "loss": 0.1007, + "step": 57730 + }, + { + "epoch": 2.098262955156625, + "grad_norm": 0.5328543186187744, + "learning_rate": 4.0295685753930454e-05, + "loss": 0.1004, + "step": 57740 + }, + { + "epoch": 2.098626353659423, + "grad_norm": 0.5420628190040588, + "learning_rate": 4.029158071880136e-05, + "loss": 0.0959, + "step": 57750 + }, + { + "epoch": 2.098989752162221, + "grad_norm": 1.3125580549240112, + "learning_rate": 4.028747502481245e-05, + "loss": 0.0835, + "step": 57760 + }, + { + "epoch": 2.0993531506650194, + "grad_norm": 0.6729845404624939, + "learning_rate": 4.028336867214064e-05, + "loss": 0.1596, + "step": 57770 + }, + { + "epoch": 2.0997165491678174, + "grad_norm": 0.8784998655319214, + "learning_rate": 4.0279261660962854e-05, + "loss": 0.1261, + "step": 57780 + }, + { + "epoch": 2.100079947670616, + "grad_norm": 0.8162268996238708, + "learning_rate": 4.027515399145605e-05, + "loss": 0.0996, + "step": 57790 + }, + { + "epoch": 2.1004433461734138, + "grad_norm": 0.8188743591308594, + "learning_rate": 4.02710456637972e-05, + "loss": 0.1043, + "step": 57800 + }, + { + "epoch": 2.1008067446762118, + "grad_norm": 2.6283457279205322, + "learning_rate": 4.0266936678163333e-05, + "loss": 0.1207, + "step": 57810 + }, + { + "epoch": 2.10117014317901, + "grad_norm": 0.9076483249664307, + "learning_rate": 4.0262827034731486e-05, + "loss": 0.1283, + "step": 57820 + }, + { + "epoch": 2.101533541681808, + "grad_norm": 1.4384301900863647, + "learning_rate": 4.025871673367873e-05, + "loss": 0.0942, + "step": 57830 + }, + { + "epoch": 2.1018969401846066, + "grad_norm": 0.7651816010475159, + "learning_rate": 4.025460577518215e-05, + "loss": 0.1171, + "step": 57840 + }, + { + "epoch": 2.1022603386874046, + "grad_norm": 1.075475811958313, + "learning_rate": 4.025049415941889e-05, + "loss": 0.1002, + "step": 57850 + }, + { + "epoch": 2.1026237371902026, + "grad_norm": 0.5640189051628113, + "learning_rate": 4.02463818865661e-05, + "loss": 0.0797, + "step": 57860 + }, + { + "epoch": 2.102987135693001, + "grad_norm": 2.052508592605591, + "learning_rate": 4.024226895680097e-05, + "loss": 0.114, + "step": 57870 + }, + { + "epoch": 2.103350534195799, + "grad_norm": 0.8014973998069763, + "learning_rate": 4.023815537030068e-05, + "loss": 0.1304, + "step": 57880 + }, + { + "epoch": 2.1037139326985974, + "grad_norm": 0.9665643572807312, + "learning_rate": 4.02340411272425e-05, + "loss": 0.1191, + "step": 57890 + }, + { + "epoch": 2.1040773312013954, + "grad_norm": 1.0654706954956055, + "learning_rate": 4.02299262278037e-05, + "loss": 0.0836, + "step": 57900 + }, + { + "epoch": 2.104440729704194, + "grad_norm": 1.1803388595581055, + "learning_rate": 4.022581067216157e-05, + "loss": 0.0988, + "step": 57910 + }, + { + "epoch": 2.104804128206992, + "grad_norm": 0.5792093276977539, + "learning_rate": 4.022169446049342e-05, + "loss": 0.1177, + "step": 57920 + }, + { + "epoch": 2.10516752670979, + "grad_norm": 0.9450294375419617, + "learning_rate": 4.021757759297662e-05, + "loss": 0.094, + "step": 57930 + }, + { + "epoch": 2.1055309252125882, + "grad_norm": 0.5335323810577393, + "learning_rate": 4.021346006978854e-05, + "loss": 0.1358, + "step": 57940 + }, + { + "epoch": 2.105894323715386, + "grad_norm": 1.1108689308166504, + "learning_rate": 4.02093418911066e-05, + "loss": 0.0964, + "step": 57950 + }, + { + "epoch": 2.1062577222181846, + "grad_norm": 0.3482346534729004, + "learning_rate": 4.020522305710823e-05, + "loss": 0.0928, + "step": 57960 + }, + { + "epoch": 2.1066211207209826, + "grad_norm": 0.6527045369148254, + "learning_rate": 4.02011035679709e-05, + "loss": 0.1033, + "step": 57970 + }, + { + "epoch": 2.1069845192237806, + "grad_norm": 0.9047361612319946, + "learning_rate": 4.019698342387211e-05, + "loss": 0.0939, + "step": 57980 + }, + { + "epoch": 2.107347917726579, + "grad_norm": 1.3960262537002563, + "learning_rate": 4.019286262498937e-05, + "loss": 0.1275, + "step": 57990 + }, + { + "epoch": 2.107711316229377, + "grad_norm": 0.49838632345199585, + "learning_rate": 4.0188741171500234e-05, + "loss": 0.1133, + "step": 58000 + }, + { + "epoch": 2.1080747147321754, + "grad_norm": 0.6651538014411926, + "learning_rate": 4.0184619063582284e-05, + "loss": 0.1361, + "step": 58010 + }, + { + "epoch": 2.1084381132349734, + "grad_norm": 0.7778026461601257, + "learning_rate": 4.018049630141313e-05, + "loss": 0.117, + "step": 58020 + }, + { + "epoch": 2.108801511737772, + "grad_norm": 1.0851924419403076, + "learning_rate": 4.0176372885170396e-05, + "loss": 0.096, + "step": 58030 + }, + { + "epoch": 2.10916491024057, + "grad_norm": 0.5920321345329285, + "learning_rate": 4.017224881503176e-05, + "loss": 0.1812, + "step": 58040 + }, + { + "epoch": 2.109528308743368, + "grad_norm": 1.2104512453079224, + "learning_rate": 4.0168124091174896e-05, + "loss": 0.1002, + "step": 58050 + }, + { + "epoch": 2.1098917072461663, + "grad_norm": 0.8000385761260986, + "learning_rate": 4.016399871377754e-05, + "loss": 0.099, + "step": 58060 + }, + { + "epoch": 2.1102551057489642, + "grad_norm": 0.9628605246543884, + "learning_rate": 4.015987268301742e-05, + "loss": 0.1322, + "step": 58070 + }, + { + "epoch": 2.1106185042517627, + "grad_norm": 1.1031752824783325, + "learning_rate": 4.015574599907235e-05, + "loss": 0.1089, + "step": 58080 + }, + { + "epoch": 2.1109819027545607, + "grad_norm": 0.7440558075904846, + "learning_rate": 4.0151618662120084e-05, + "loss": 0.1255, + "step": 58090 + }, + { + "epoch": 2.1113453012573586, + "grad_norm": 0.7492482662200928, + "learning_rate": 4.0147490672338494e-05, + "loss": 0.0787, + "step": 58100 + }, + { + "epoch": 2.111708699760157, + "grad_norm": 1.2699692249298096, + "learning_rate": 4.0143362029905415e-05, + "loss": 0.0835, + "step": 58110 + }, + { + "epoch": 2.112072098262955, + "grad_norm": 0.5075403451919556, + "learning_rate": 4.013923273499876e-05, + "loss": 0.113, + "step": 58120 + }, + { + "epoch": 2.1124354967657535, + "grad_norm": 0.47074371576309204, + "learning_rate": 4.013510278779643e-05, + "loss": 0.1045, + "step": 58130 + }, + { + "epoch": 2.1127988952685515, + "grad_norm": 1.9055145978927612, + "learning_rate": 4.013097218847636e-05, + "loss": 0.1096, + "step": 58140 + }, + { + "epoch": 2.1131622937713495, + "grad_norm": 0.8922753930091858, + "learning_rate": 4.0126840937216545e-05, + "loss": 0.1129, + "step": 58150 + }, + { + "epoch": 2.113525692274148, + "grad_norm": 1.5678116083145142, + "learning_rate": 4.012270903419497e-05, + "loss": 0.1314, + "step": 58160 + }, + { + "epoch": 2.113889090776946, + "grad_norm": 1.4676604270935059, + "learning_rate": 4.0118576479589675e-05, + "loss": 0.119, + "step": 58170 + }, + { + "epoch": 2.1142524892797443, + "grad_norm": 1.0103446245193481, + "learning_rate": 4.0114443273578714e-05, + "loss": 0.1036, + "step": 58180 + }, + { + "epoch": 2.1146158877825423, + "grad_norm": 0.5744931101799011, + "learning_rate": 4.011030941634016e-05, + "loss": 0.1493, + "step": 58190 + }, + { + "epoch": 2.1149792862853407, + "grad_norm": 1.430180311203003, + "learning_rate": 4.010617490805214e-05, + "loss": 0.0928, + "step": 58200 + }, + { + "epoch": 2.1149792862853407, + "eval_loss": 0.3442366421222687, + "eval_runtime": 180.0152, + "eval_samples_per_second": 41.185, + "eval_steps_per_second": 5.15, + "eval_wer": 0.16051881569154247, + "step": 58200 + }, + { + "epoch": 2.1153426847881387, + "grad_norm": 0.7342690825462341, + "learning_rate": 4.0102039748892786e-05, + "loss": 0.0878, + "step": 58210 + }, + { + "epoch": 2.1157060832909367, + "grad_norm": 1.540487289428711, + "learning_rate": 4.0097903939040284e-05, + "loss": 0.1158, + "step": 58220 + }, + { + "epoch": 2.116069481793735, + "grad_norm": 0.9415495991706848, + "learning_rate": 4.009376747867281e-05, + "loss": 0.105, + "step": 58230 + }, + { + "epoch": 2.116432880296533, + "grad_norm": 0.8002855181694031, + "learning_rate": 4.008963036796861e-05, + "loss": 0.0855, + "step": 58240 + }, + { + "epoch": 2.1167962787993315, + "grad_norm": 0.7064021825790405, + "learning_rate": 4.008549260710591e-05, + "loss": 0.1319, + "step": 58250 + }, + { + "epoch": 2.1171596773021295, + "grad_norm": 0.5867117047309875, + "learning_rate": 4.008135419626302e-05, + "loss": 0.1232, + "step": 58260 + }, + { + "epoch": 2.1175230758049275, + "grad_norm": 0.7439972162246704, + "learning_rate": 4.007721513561824e-05, + "loss": 0.1359, + "step": 58270 + }, + { + "epoch": 2.117886474307726, + "grad_norm": 0.9335612058639526, + "learning_rate": 4.007307542534989e-05, + "loss": 0.0763, + "step": 58280 + }, + { + "epoch": 2.118249872810524, + "grad_norm": 0.6899220943450928, + "learning_rate": 4.006893506563637e-05, + "loss": 0.129, + "step": 58290 + }, + { + "epoch": 2.1186132713133223, + "grad_norm": 0.9896695613861084, + "learning_rate": 4.006479405665604e-05, + "loss": 0.0756, + "step": 58300 + }, + { + "epoch": 2.1189766698161203, + "grad_norm": 0.8844881057739258, + "learning_rate": 4.0060652398587335e-05, + "loss": 0.111, + "step": 58310 + }, + { + "epoch": 2.1193400683189187, + "grad_norm": 0.3384082615375519, + "learning_rate": 4.0056510091608706e-05, + "loss": 0.1182, + "step": 58320 + }, + { + "epoch": 2.1197034668217167, + "grad_norm": 1.8488768339157104, + "learning_rate": 4.005236713589863e-05, + "loss": 0.1045, + "step": 58330 + }, + { + "epoch": 2.1200668653245147, + "grad_norm": 4.640181064605713, + "learning_rate": 4.004822353163561e-05, + "loss": 0.1085, + "step": 58340 + }, + { + "epoch": 2.120430263827313, + "grad_norm": 2.7104008197784424, + "learning_rate": 4.004407927899817e-05, + "loss": 0.0996, + "step": 58350 + }, + { + "epoch": 2.120793662330111, + "grad_norm": 0.8320967555046082, + "learning_rate": 4.00399343781649e-05, + "loss": 0.0887, + "step": 58360 + }, + { + "epoch": 2.1211570608329096, + "grad_norm": 0.5715747475624084, + "learning_rate": 4.003578882931436e-05, + "loss": 0.0961, + "step": 58370 + }, + { + "epoch": 2.1215204593357075, + "grad_norm": 1.0619550943374634, + "learning_rate": 4.003164263262518e-05, + "loss": 0.1733, + "step": 58380 + }, + { + "epoch": 2.1218838578385055, + "grad_norm": 0.6880344748497009, + "learning_rate": 4.0027495788275995e-05, + "loss": 0.1722, + "step": 58390 + }, + { + "epoch": 2.122247256341304, + "grad_norm": 0.503822922706604, + "learning_rate": 4.0023348296445483e-05, + "loss": 0.0772, + "step": 58400 + }, + { + "epoch": 2.122610654844102, + "grad_norm": 0.4914768636226654, + "learning_rate": 4.001920015731235e-05, + "loss": 0.0752, + "step": 58410 + }, + { + "epoch": 2.1229740533469004, + "grad_norm": 0.7141969799995422, + "learning_rate": 4.001505137105532e-05, + "loss": 0.1247, + "step": 58420 + }, + { + "epoch": 2.1233374518496984, + "grad_norm": 1.7771844863891602, + "learning_rate": 4.0010901937853164e-05, + "loss": 0.123, + "step": 58430 + }, + { + "epoch": 2.1237008503524963, + "grad_norm": 0.8636963367462158, + "learning_rate": 4.0006751857884636e-05, + "loss": 0.1224, + "step": 58440 + }, + { + "epoch": 2.1240642488552948, + "grad_norm": 0.6579970121383667, + "learning_rate": 4.000260113132857e-05, + "loss": 0.0992, + "step": 58450 + }, + { + "epoch": 2.1244276473580928, + "grad_norm": 0.5212269425392151, + "learning_rate": 3.99984497583638e-05, + "loss": 0.1097, + "step": 58460 + }, + { + "epoch": 2.124791045860891, + "grad_norm": 0.44934549927711487, + "learning_rate": 3.999429773916919e-05, + "loss": 0.1304, + "step": 58470 + }, + { + "epoch": 2.125154444363689, + "grad_norm": 0.7750062942504883, + "learning_rate": 3.999014507392365e-05, + "loss": 0.1233, + "step": 58480 + }, + { + "epoch": 2.1255178428664876, + "grad_norm": 0.9064908623695374, + "learning_rate": 3.9985991762806087e-05, + "loss": 0.2681, + "step": 58490 + }, + { + "epoch": 2.1258812413692856, + "grad_norm": 0.9376353025436401, + "learning_rate": 3.998183780599546e-05, + "loss": 0.0911, + "step": 58500 + }, + { + "epoch": 2.1262446398720836, + "grad_norm": 1.2456096410751343, + "learning_rate": 3.9977683203670755e-05, + "loss": 0.1072, + "step": 58510 + }, + { + "epoch": 2.126608038374882, + "grad_norm": 1.1492791175842285, + "learning_rate": 3.997352795601096e-05, + "loss": 0.1181, + "step": 58520 + }, + { + "epoch": 2.12697143687768, + "grad_norm": 1.6713447570800781, + "learning_rate": 3.996937206319513e-05, + "loss": 0.1018, + "step": 58530 + }, + { + "epoch": 2.1273348353804784, + "grad_norm": 1.8490865230560303, + "learning_rate": 3.996521552540231e-05, + "loss": 0.1267, + "step": 58540 + }, + { + "epoch": 2.1276982338832764, + "grad_norm": 0.8250418305397034, + "learning_rate": 3.9961058342811606e-05, + "loss": 0.1118, + "step": 58550 + }, + { + "epoch": 2.1280616323860744, + "grad_norm": 1.141861915588379, + "learning_rate": 3.995690051560213e-05, + "loss": 0.0958, + "step": 58560 + }, + { + "epoch": 2.128425030888873, + "grad_norm": 0.9268454313278198, + "learning_rate": 3.995274204395303e-05, + "loss": 0.1196, + "step": 58570 + }, + { + "epoch": 2.128788429391671, + "grad_norm": 0.6160836219787598, + "learning_rate": 3.994858292804347e-05, + "loss": 0.1017, + "step": 58580 + }, + { + "epoch": 2.129151827894469, + "grad_norm": 0.9815055131912231, + "learning_rate": 3.994442316805266e-05, + "loss": 0.0977, + "step": 58590 + }, + { + "epoch": 2.129515226397267, + "grad_norm": 0.887614369392395, + "learning_rate": 3.994026276415983e-05, + "loss": 0.0924, + "step": 58600 + }, + { + "epoch": 2.1298786249000656, + "grad_norm": 1.7379142045974731, + "learning_rate": 3.993610171654424e-05, + "loss": 0.1115, + "step": 58610 + }, + { + "epoch": 2.1302420234028636, + "grad_norm": 0.9149182438850403, + "learning_rate": 3.993194002538516e-05, + "loss": 0.4902, + "step": 58620 + }, + { + "epoch": 2.1306054219056616, + "grad_norm": 0.4498516619205475, + "learning_rate": 3.992777769086192e-05, + "loss": 0.1172, + "step": 58630 + }, + { + "epoch": 2.13096882040846, + "grad_norm": 0.8547645807266235, + "learning_rate": 3.992361471315385e-05, + "loss": 0.1816, + "step": 58640 + }, + { + "epoch": 2.131332218911258, + "grad_norm": 0.6961509585380554, + "learning_rate": 3.991945109244032e-05, + "loss": 0.1024, + "step": 58650 + }, + { + "epoch": 2.1316956174140564, + "grad_norm": 0.989095151424408, + "learning_rate": 3.9915286828900725e-05, + "loss": 0.0871, + "step": 58660 + }, + { + "epoch": 2.1320590159168544, + "grad_norm": 0.6588122844696045, + "learning_rate": 3.9911121922714496e-05, + "loss": 0.2563, + "step": 58670 + }, + { + "epoch": 2.1324224144196524, + "grad_norm": 0.6134093999862671, + "learning_rate": 3.9906956374061075e-05, + "loss": 0.1274, + "step": 58680 + }, + { + "epoch": 2.132785812922451, + "grad_norm": 1.8236083984375, + "learning_rate": 3.990279018311993e-05, + "loss": 0.1083, + "step": 58690 + }, + { + "epoch": 2.133149211425249, + "grad_norm": 0.8734591007232666, + "learning_rate": 3.989862335007059e-05, + "loss": 0.0925, + "step": 58700 + }, + { + "epoch": 2.1335126099280473, + "grad_norm": 0.5155262351036072, + "learning_rate": 3.9894455875092587e-05, + "loss": 0.1428, + "step": 58710 + }, + { + "epoch": 2.1338760084308452, + "grad_norm": 1.4302911758422852, + "learning_rate": 3.989028775836546e-05, + "loss": 0.1089, + "step": 58720 + }, + { + "epoch": 2.1342394069336432, + "grad_norm": 1.1335387229919434, + "learning_rate": 3.988611900006882e-05, + "loss": 0.1031, + "step": 58730 + }, + { + "epoch": 2.1346028054364417, + "grad_norm": 0.45461785793304443, + "learning_rate": 3.988194960038228e-05, + "loss": 0.1125, + "step": 58740 + }, + { + "epoch": 2.1349662039392396, + "grad_norm": 2.3098812103271484, + "learning_rate": 3.9877779559485484e-05, + "loss": 0.6339, + "step": 58750 + }, + { + "epoch": 2.135329602442038, + "grad_norm": 2.491065502166748, + "learning_rate": 3.98736088775581e-05, + "loss": 0.1011, + "step": 58760 + }, + { + "epoch": 2.135693000944836, + "grad_norm": 2.0698654651641846, + "learning_rate": 3.986943755477983e-05, + "loss": 1.3794, + "step": 58770 + }, + { + "epoch": 2.1360563994476345, + "grad_norm": 1.4950264692306519, + "learning_rate": 3.9865265591330394e-05, + "loss": 0.108, + "step": 58780 + }, + { + "epoch": 2.1364197979504325, + "grad_norm": 0.3976856768131256, + "learning_rate": 3.986109298738957e-05, + "loss": 0.1407, + "step": 58790 + }, + { + "epoch": 2.1367831964532304, + "grad_norm": 1.319399356842041, + "learning_rate": 3.985691974313711e-05, + "loss": 0.1168, + "step": 58800 + }, + { + "epoch": 2.1367831964532304, + "eval_loss": 0.33521324396133423, + "eval_runtime": 180.0938, + "eval_samples_per_second": 41.167, + "eval_steps_per_second": 5.147, + "eval_wer": 0.16158984878464974, + "step": 58800 + }, + { + "epoch": 2.137146594956029, + "grad_norm": 3.924207925796509, + "learning_rate": 3.985274585875284e-05, + "loss": 0.0899, + "step": 58810 + }, + { + "epoch": 2.137509993458827, + "grad_norm": 0.7248135805130005, + "learning_rate": 3.984857133441661e-05, + "loss": 0.1256, + "step": 58820 + }, + { + "epoch": 2.1378733919616253, + "grad_norm": 0.5945442914962769, + "learning_rate": 3.984439617030826e-05, + "loss": 0.0891, + "step": 58830 + }, + { + "epoch": 2.1382367904644233, + "grad_norm": 2.0642237663269043, + "learning_rate": 3.98402203666077e-05, + "loss": 0.1359, + "step": 58840 + }, + { + "epoch": 2.1386001889672213, + "grad_norm": 1.0051828622817993, + "learning_rate": 3.983604392349485e-05, + "loss": 0.099, + "step": 58850 + }, + { + "epoch": 2.1389635874700197, + "grad_norm": 1.7241709232330322, + "learning_rate": 3.983186684114965e-05, + "loss": 0.1353, + "step": 58860 + }, + { + "epoch": 2.1393269859728177, + "grad_norm": 0.6430028080940247, + "learning_rate": 3.9827689119752076e-05, + "loss": 0.6511, + "step": 58870 + }, + { + "epoch": 2.139690384475616, + "grad_norm": 0.76287442445755, + "learning_rate": 3.9823510759482134e-05, + "loss": 0.1082, + "step": 58880 + }, + { + "epoch": 2.140053782978414, + "grad_norm": 0.6280699372291565, + "learning_rate": 3.981933176051986e-05, + "loss": 0.114, + "step": 58890 + }, + { + "epoch": 2.1404171814812125, + "grad_norm": 0.8308879733085632, + "learning_rate": 3.9815152123045305e-05, + "loss": 0.1072, + "step": 58900 + }, + { + "epoch": 2.1407805799840105, + "grad_norm": 0.5416497588157654, + "learning_rate": 3.981097184723856e-05, + "loss": 0.1809, + "step": 58910 + }, + { + "epoch": 2.1411439784868085, + "grad_norm": 0.5450316071510315, + "learning_rate": 3.9806790933279745e-05, + "loss": 0.1198, + "step": 58920 + }, + { + "epoch": 2.141507376989607, + "grad_norm": 0.6177099347114563, + "learning_rate": 3.980260938134898e-05, + "loss": 0.0926, + "step": 58930 + }, + { + "epoch": 2.141870775492405, + "grad_norm": 1.365262746810913, + "learning_rate": 3.9798427191626455e-05, + "loss": 0.0998, + "step": 58940 + }, + { + "epoch": 2.1422341739952033, + "grad_norm": 0.4065784513950348, + "learning_rate": 3.979424436429234e-05, + "loss": 0.0958, + "step": 58950 + }, + { + "epoch": 2.1425975724980013, + "grad_norm": 0.7803066372871399, + "learning_rate": 3.979006089952688e-05, + "loss": 0.0997, + "step": 58960 + }, + { + "epoch": 2.1429609710007993, + "grad_norm": 0.41044801473617554, + "learning_rate": 3.978587679751032e-05, + "loss": 0.1265, + "step": 58970 + }, + { + "epoch": 2.1433243695035977, + "grad_norm": 1.1145354509353638, + "learning_rate": 3.9781692058422936e-05, + "loss": 0.0965, + "step": 58980 + }, + { + "epoch": 2.1436877680063957, + "grad_norm": 0.6286850571632385, + "learning_rate": 3.977750668244504e-05, + "loss": 0.1237, + "step": 58990 + }, + { + "epoch": 2.144051166509194, + "grad_norm": 0.7701926827430725, + "learning_rate": 3.977332066975695e-05, + "loss": 0.0984, + "step": 59000 + }, + { + "epoch": 2.144414565011992, + "grad_norm": 2.205230236053467, + "learning_rate": 3.976913402053904e-05, + "loss": 0.1007, + "step": 59010 + }, + { + "epoch": 2.14477796351479, + "grad_norm": 0.7837009429931641, + "learning_rate": 3.97649467349717e-05, + "loss": 0.1412, + "step": 59020 + }, + { + "epoch": 2.1451413620175885, + "grad_norm": 1.4856473207473755, + "learning_rate": 3.9760758813235336e-05, + "loss": 0.1069, + "step": 59030 + }, + { + "epoch": 2.1455047605203865, + "grad_norm": 0.7916889190673828, + "learning_rate": 3.975657025551039e-05, + "loss": 0.1216, + "step": 59040 + }, + { + "epoch": 2.145868159023185, + "grad_norm": 2.3275558948516846, + "learning_rate": 3.975238106197734e-05, + "loss": 0.0862, + "step": 59050 + }, + { + "epoch": 2.146231557525983, + "grad_norm": 1.2247077226638794, + "learning_rate": 3.974819123281668e-05, + "loss": 0.09, + "step": 59060 + }, + { + "epoch": 2.1465949560287814, + "grad_norm": 3.578880548477173, + "learning_rate": 3.9744000768208926e-05, + "loss": 0.1694, + "step": 59070 + }, + { + "epoch": 2.1469583545315793, + "grad_norm": 0.7688897848129272, + "learning_rate": 3.973980966833465e-05, + "loss": 0.1064, + "step": 59080 + }, + { + "epoch": 2.1473217530343773, + "grad_norm": 0.7360697388648987, + "learning_rate": 3.973561793337441e-05, + "loss": 0.1038, + "step": 59090 + }, + { + "epoch": 2.1476851515371758, + "grad_norm": 1.5406807661056519, + "learning_rate": 3.9731425563508826e-05, + "loss": 0.0949, + "step": 59100 + }, + { + "epoch": 2.1480485500399737, + "grad_norm": 1.3897796869277954, + "learning_rate": 3.972723255891853e-05, + "loss": 0.1097, + "step": 59110 + }, + { + "epoch": 2.148411948542772, + "grad_norm": 0.9940290451049805, + "learning_rate": 3.9723038919784176e-05, + "loss": 0.1342, + "step": 59120 + }, + { + "epoch": 2.14877534704557, + "grad_norm": 1.5705652236938477, + "learning_rate": 3.971884464628647e-05, + "loss": 0.1225, + "step": 59130 + }, + { + "epoch": 2.149138745548368, + "grad_norm": 0.8528106212615967, + "learning_rate": 3.971464973860611e-05, + "loss": 0.1127, + "step": 59140 + }, + { + "epoch": 2.1495021440511666, + "grad_norm": 0.5715293884277344, + "learning_rate": 3.971045419692385e-05, + "loss": 0.1089, + "step": 59150 + }, + { + "epoch": 2.1498655425539646, + "grad_norm": 1.5109196901321411, + "learning_rate": 3.970625802142046e-05, + "loss": 0.0809, + "step": 59160 + }, + { + "epoch": 2.150228941056763, + "grad_norm": 0.4277292788028717, + "learning_rate": 3.9702061212276744e-05, + "loss": 0.1368, + "step": 59170 + }, + { + "epoch": 2.150592339559561, + "grad_norm": 0.692513644695282, + "learning_rate": 3.969786376967351e-05, + "loss": 0.1399, + "step": 59180 + }, + { + "epoch": 2.1509557380623594, + "grad_norm": 3.4921178817749023, + "learning_rate": 3.969366569379162e-05, + "loss": 0.1315, + "step": 59190 + }, + { + "epoch": 2.1513191365651574, + "grad_norm": 1.5540839433670044, + "learning_rate": 3.9689466984811964e-05, + "loss": 0.1015, + "step": 59200 + }, + { + "epoch": 2.1516825350679554, + "grad_norm": 0.6076385378837585, + "learning_rate": 3.9685267642915436e-05, + "loss": 0.111, + "step": 59210 + }, + { + "epoch": 2.152045933570754, + "grad_norm": 0.5078336596488953, + "learning_rate": 3.968106766828298e-05, + "loss": 0.1122, + "step": 59220 + }, + { + "epoch": 2.152409332073552, + "grad_norm": 1.294973373413086, + "learning_rate": 3.967686706109554e-05, + "loss": 0.1202, + "step": 59230 + }, + { + "epoch": 2.15277273057635, + "grad_norm": 0.5963008999824524, + "learning_rate": 3.967350612002765e-05, + "loss": 6.9715, + "step": 59240 + }, + { + "epoch": 2.153136129079148, + "grad_norm": 0.9680716395378113, + "learning_rate": 3.966930437469738e-05, + "loss": 0.761, + "step": 59250 + }, + { + "epoch": 2.153499527581946, + "grad_norm": 0.5637746453285217, + "learning_rate": 3.966510199731898e-05, + "loss": 0.127, + "step": 59260 + }, + { + "epoch": 2.1538629260847446, + "grad_norm": 0.5631716251373291, + "learning_rate": 3.9660898988073514e-05, + "loss": 0.1065, + "step": 59270 + }, + { + "epoch": 2.1542263245875426, + "grad_norm": 2.773534059524536, + "learning_rate": 3.965669534714208e-05, + "loss": 0.1039, + "step": 59280 + }, + { + "epoch": 2.154589723090341, + "grad_norm": 0.5603722333908081, + "learning_rate": 3.965249107470579e-05, + "loss": 0.1243, + "step": 59290 + }, + { + "epoch": 2.154953121593139, + "grad_norm": 0.8897901177406311, + "learning_rate": 3.964828617094579e-05, + "loss": 0.0867, + "step": 59300 + }, + { + "epoch": 2.155316520095937, + "grad_norm": 0.9018154144287109, + "learning_rate": 3.9644080636043255e-05, + "loss": 0.1066, + "step": 59310 + }, + { + "epoch": 2.1556799185987354, + "grad_norm": 1.247503399848938, + "learning_rate": 3.963987447017939e-05, + "loss": 0.1193, + "step": 59320 + }, + { + "epoch": 2.1560433171015334, + "grad_norm": 0.5965039730072021, + "learning_rate": 3.963566767353544e-05, + "loss": 0.1065, + "step": 59330 + }, + { + "epoch": 2.156406715604332, + "grad_norm": 0.6746231913566589, + "learning_rate": 3.9631460246292616e-05, + "loss": 0.1096, + "step": 59340 + }, + { + "epoch": 2.15677011410713, + "grad_norm": 0.8131401538848877, + "learning_rate": 3.9627252188632246e-05, + "loss": 0.0903, + "step": 59350 + }, + { + "epoch": 2.1571335126099282, + "grad_norm": 0.8984467387199402, + "learning_rate": 3.962304350073562e-05, + "loss": 0.1095, + "step": 59360 + }, + { + "epoch": 2.1574969111127262, + "grad_norm": 0.7640008926391602, + "learning_rate": 3.961883418278408e-05, + "loss": 0.1255, + "step": 59370 + }, + { + "epoch": 2.157860309615524, + "grad_norm": 0.522688627243042, + "learning_rate": 3.961462423495899e-05, + "loss": 0.1144, + "step": 59380 + }, + { + "epoch": 2.1582237081183226, + "grad_norm": 0.4221755266189575, + "learning_rate": 3.961041365744174e-05, + "loss": 0.1031, + "step": 59390 + }, + { + "epoch": 2.1585871066211206, + "grad_norm": 1.1756844520568848, + "learning_rate": 3.960620245041374e-05, + "loss": 0.1034, + "step": 59400 + }, + { + "epoch": 2.1585871066211206, + "eval_loss": 0.33832496404647827, + "eval_runtime": 179.7531, + "eval_samples_per_second": 41.245, + "eval_steps_per_second": 5.157, + "eval_wer": 0.16059142810463448, + "step": 59400 + }, + { + "epoch": 2.158950505123919, + "grad_norm": 0.6600112915039062, + "learning_rate": 3.960199061405646e-05, + "loss": 0.1055, + "step": 59410 + }, + { + "epoch": 2.159313903626717, + "grad_norm": 0.6152768135070801, + "learning_rate": 3.959777814855135e-05, + "loss": 0.1349, + "step": 59420 + }, + { + "epoch": 2.159677302129515, + "grad_norm": 0.9786444306373596, + "learning_rate": 3.959356505407992e-05, + "loss": 0.1021, + "step": 59430 + }, + { + "epoch": 2.1600407006323135, + "grad_norm": 1.3649888038635254, + "learning_rate": 3.9589351330823697e-05, + "loss": 0.1002, + "step": 59440 + }, + { + "epoch": 2.1604040991351114, + "grad_norm": 0.8674107789993286, + "learning_rate": 3.958513697896423e-05, + "loss": 0.0963, + "step": 59450 + }, + { + "epoch": 2.16076749763791, + "grad_norm": 0.7542990446090698, + "learning_rate": 3.9580921998683114e-05, + "loss": 0.0837, + "step": 59460 + }, + { + "epoch": 2.161130896140708, + "grad_norm": 1.032072901725769, + "learning_rate": 3.957670639016194e-05, + "loss": 0.1991, + "step": 59470 + }, + { + "epoch": 2.1614942946435063, + "grad_norm": 0.5288215279579163, + "learning_rate": 3.9572490153582354e-05, + "loss": 0.0821, + "step": 59480 + }, + { + "epoch": 2.1618576931463043, + "grad_norm": 1.010878562927246, + "learning_rate": 3.956827328912602e-05, + "loss": 0.1697, + "step": 59490 + }, + { + "epoch": 2.1622210916491023, + "grad_norm": 0.9703467488288879, + "learning_rate": 3.956405579697462e-05, + "loss": 0.135, + "step": 59500 + }, + { + "epoch": 2.1625844901519007, + "grad_norm": 0.8474395275115967, + "learning_rate": 3.9559837677309874e-05, + "loss": 0.0969, + "step": 59510 + }, + { + "epoch": 2.1629478886546987, + "grad_norm": 0.6262643933296204, + "learning_rate": 3.955561893031353e-05, + "loss": 0.6284, + "step": 59520 + }, + { + "epoch": 2.163311287157497, + "grad_norm": 1.7965657711029053, + "learning_rate": 3.955139955616735e-05, + "loss": 0.103, + "step": 59530 + }, + { + "epoch": 2.163674685660295, + "grad_norm": 1.317929744720459, + "learning_rate": 3.954717955505314e-05, + "loss": 0.1266, + "step": 59540 + }, + { + "epoch": 2.164038084163093, + "grad_norm": 2.5945920944213867, + "learning_rate": 3.954295892715272e-05, + "loss": 0.2541, + "step": 59550 + }, + { + "epoch": 2.1644014826658915, + "grad_norm": 0.8854953050613403, + "learning_rate": 3.9538737672647955e-05, + "loss": 0.0872, + "step": 59560 + }, + { + "epoch": 2.1647648811686895, + "grad_norm": 1.2449252605438232, + "learning_rate": 3.953451579172069e-05, + "loss": 0.1297, + "step": 59570 + }, + { + "epoch": 2.165128279671488, + "grad_norm": 0.9489690661430359, + "learning_rate": 3.9530293284552876e-05, + "loss": 0.1213, + "step": 59580 + }, + { + "epoch": 2.165491678174286, + "grad_norm": 1.2009365558624268, + "learning_rate": 3.952607015132642e-05, + "loss": 0.1116, + "step": 59590 + }, + { + "epoch": 2.165855076677084, + "grad_norm": 2.0308213233947754, + "learning_rate": 3.952184639222327e-05, + "loss": 0.104, + "step": 59600 + }, + { + "epoch": 2.1662184751798823, + "grad_norm": 0.9132998585700989, + "learning_rate": 3.951762200742544e-05, + "loss": 0.0821, + "step": 59610 + }, + { + "epoch": 2.1665818736826803, + "grad_norm": 0.3481888473033905, + "learning_rate": 3.951339699711493e-05, + "loss": 0.1061, + "step": 59620 + }, + { + "epoch": 2.1669452721854787, + "grad_norm": 1.2526309490203857, + "learning_rate": 3.950917136147378e-05, + "loss": 0.134, + "step": 59630 + }, + { + "epoch": 2.1673086706882767, + "grad_norm": 1.3150311708450317, + "learning_rate": 3.950494510068407e-05, + "loss": 0.1387, + "step": 59640 + }, + { + "epoch": 2.167672069191075, + "grad_norm": 0.6540773510932922, + "learning_rate": 3.950071821492787e-05, + "loss": 0.1038, + "step": 59650 + }, + { + "epoch": 2.168035467693873, + "grad_norm": 0.7014539837837219, + "learning_rate": 3.949649070438732e-05, + "loss": 0.1047, + "step": 59660 + }, + { + "epoch": 2.168398866196671, + "grad_norm": 1.7086548805236816, + "learning_rate": 3.9492262569244566e-05, + "loss": 0.1298, + "step": 59670 + }, + { + "epoch": 2.1687622646994695, + "grad_norm": 0.5339615941047668, + "learning_rate": 3.9488033809681785e-05, + "loss": 0.0818, + "step": 59680 + }, + { + "epoch": 2.1691256632022675, + "grad_norm": 1.4150161743164062, + "learning_rate": 3.9483804425881167e-05, + "loss": 0.0952, + "step": 59690 + }, + { + "epoch": 2.169489061705066, + "grad_norm": 1.182112216949463, + "learning_rate": 3.947957441802496e-05, + "loss": 0.0855, + "step": 59700 + }, + { + "epoch": 2.169852460207864, + "grad_norm": 22.265352249145508, + "learning_rate": 3.94753437862954e-05, + "loss": 0.2064, + "step": 59710 + }, + { + "epoch": 2.170215858710662, + "grad_norm": 1.3365362882614136, + "learning_rate": 3.9471112530874784e-05, + "loss": 0.1314, + "step": 59720 + }, + { + "epoch": 2.1705792572134603, + "grad_norm": 0.5914321541786194, + "learning_rate": 3.946688065194543e-05, + "loss": 0.1072, + "step": 59730 + }, + { + "epoch": 2.1709426557162583, + "grad_norm": 1.0717413425445557, + "learning_rate": 3.946264814968964e-05, + "loss": 0.1144, + "step": 59740 + }, + { + "epoch": 2.1713060542190568, + "grad_norm": 0.7842442393302917, + "learning_rate": 3.945841502428981e-05, + "loss": 0.0989, + "step": 59750 + }, + { + "epoch": 2.1716694527218547, + "grad_norm": 0.4757680594921112, + "learning_rate": 3.9454181275928315e-05, + "loss": 0.0909, + "step": 59760 + }, + { + "epoch": 2.172032851224653, + "grad_norm": 0.9192887544631958, + "learning_rate": 3.944994690478758e-05, + "loss": 2.1207, + "step": 59770 + }, + { + "epoch": 2.172396249727451, + "grad_norm": 1.9832956790924072, + "learning_rate": 3.9445711911050055e-05, + "loss": 0.1235, + "step": 59780 + }, + { + "epoch": 2.172759648230249, + "grad_norm": 12.941081047058105, + "learning_rate": 3.944147629489819e-05, + "loss": 0.3816, + "step": 59790 + }, + { + "epoch": 2.1731230467330476, + "grad_norm": 1.5549241304397583, + "learning_rate": 3.9437240056514504e-05, + "loss": 0.109, + "step": 59800 + }, + { + "epoch": 2.1734864452358456, + "grad_norm": 3.1633951663970947, + "learning_rate": 3.9433003196081495e-05, + "loss": 0.1156, + "step": 59810 + }, + { + "epoch": 2.173849843738644, + "grad_norm": 1.274003505706787, + "learning_rate": 3.9428765713781744e-05, + "loss": 0.0984, + "step": 59820 + }, + { + "epoch": 2.174213242241442, + "grad_norm": 0.5220558047294617, + "learning_rate": 3.9424527609797825e-05, + "loss": 0.1151, + "step": 59830 + }, + { + "epoch": 2.17457664074424, + "grad_norm": 1.241507887840271, + "learning_rate": 3.942028888431232e-05, + "loss": 0.1219, + "step": 59840 + }, + { + "epoch": 2.1749400392470384, + "grad_norm": 0.5816989541053772, + "learning_rate": 3.9416049537507875e-05, + "loss": 0.0976, + "step": 59850 + }, + { + "epoch": 2.1753034377498364, + "grad_norm": 0.6653616428375244, + "learning_rate": 3.941180956956715e-05, + "loss": 0.1196, + "step": 59860 + }, + { + "epoch": 2.175666836252635, + "grad_norm": 0.6018986105918884, + "learning_rate": 3.940756898067283e-05, + "loss": 0.1151, + "step": 59870 + }, + { + "epoch": 2.176030234755433, + "grad_norm": 0.5224238038063049, + "learning_rate": 3.940332777100762e-05, + "loss": 0.0892, + "step": 59880 + }, + { + "epoch": 2.1763936332582308, + "grad_norm": 0.7985048294067383, + "learning_rate": 3.939908594075427e-05, + "loss": 0.1244, + "step": 59890 + }, + { + "epoch": 2.176757031761029, + "grad_norm": 1.0602693557739258, + "learning_rate": 3.9394843490095535e-05, + "loss": 0.107, + "step": 59900 + }, + { + "epoch": 2.177120430263827, + "grad_norm": 0.789055347442627, + "learning_rate": 3.939060041921421e-05, + "loss": 0.1354, + "step": 59910 + }, + { + "epoch": 2.1774838287666256, + "grad_norm": 0.27713751792907715, + "learning_rate": 3.9386356728293123e-05, + "loss": 0.1047, + "step": 59920 + }, + { + "epoch": 2.1778472272694236, + "grad_norm": 1.9695335626602173, + "learning_rate": 3.9382112417515106e-05, + "loss": 0.0788, + "step": 59930 + }, + { + "epoch": 2.178210625772222, + "grad_norm": 1.5898009538650513, + "learning_rate": 3.937786748706304e-05, + "loss": 0.1194, + "step": 59940 + }, + { + "epoch": 2.17857402427502, + "grad_norm": 1.2933491468429565, + "learning_rate": 3.937362193711981e-05, + "loss": 0.0878, + "step": 59950 + }, + { + "epoch": 2.178937422777818, + "grad_norm": 0.5345110297203064, + "learning_rate": 3.9369375767868355e-05, + "loss": 0.1, + "step": 59960 + }, + { + "epoch": 2.1793008212806164, + "grad_norm": 0.5044030547142029, + "learning_rate": 3.936512897949163e-05, + "loss": 0.1144, + "step": 59970 + }, + { + "epoch": 2.1796642197834144, + "grad_norm": 0.5815631151199341, + "learning_rate": 3.9360881572172605e-05, + "loss": 0.0789, + "step": 59980 + }, + { + "epoch": 2.180027618286213, + "grad_norm": 0.8639971613883972, + "learning_rate": 3.9356633546094297e-05, + "loss": 0.0971, + "step": 59990 + }, + { + "epoch": 2.180391016789011, + "grad_norm": 1.318261981010437, + "learning_rate": 3.935238490143972e-05, + "loss": 0.0979, + "step": 60000 + }, + { + "epoch": 2.180391016789011, + "eval_loss": 0.339672327041626, + "eval_runtime": 179.2051, + "eval_samples_per_second": 41.372, + "eval_steps_per_second": 5.173, + "eval_wer": 0.1550910378129141, + "step": 60000 + }, + { + "epoch": 2.180754415291809, + "grad_norm": 1.4749493598937988, + "learning_rate": 3.934813563839195e-05, + "loss": 0.0857, + "step": 60010 + }, + { + "epoch": 2.1811178137946072, + "grad_norm": 0.6420970559120178, + "learning_rate": 3.934388575713407e-05, + "loss": 0.1378, + "step": 60020 + }, + { + "epoch": 2.181481212297405, + "grad_norm": 2.692276954650879, + "learning_rate": 3.9339635257849176e-05, + "loss": 0.1229, + "step": 60030 + }, + { + "epoch": 2.1818446108002036, + "grad_norm": 0.6107433438301086, + "learning_rate": 3.9335384140720435e-05, + "loss": 0.1196, + "step": 60040 + }, + { + "epoch": 2.1822080093030016, + "grad_norm": 8.781155586242676, + "learning_rate": 3.933113240593098e-05, + "loss": 0.1229, + "step": 60050 + }, + { + "epoch": 2.1825714078058, + "grad_norm": 2.4440197944641113, + "learning_rate": 3.9326880053664026e-05, + "loss": 0.1012, + "step": 60060 + }, + { + "epoch": 2.182934806308598, + "grad_norm": 0.6593974828720093, + "learning_rate": 3.932262708410279e-05, + "loss": 0.0975, + "step": 60070 + }, + { + "epoch": 2.183298204811396, + "grad_norm": 1.740123987197876, + "learning_rate": 3.931837349743051e-05, + "loss": 0.1086, + "step": 60080 + }, + { + "epoch": 2.1836616033141945, + "grad_norm": 0.8486297130584717, + "learning_rate": 3.9314119293830466e-05, + "loss": 0.1325, + "step": 60090 + }, + { + "epoch": 2.1840250018169924, + "grad_norm": 1.1630836725234985, + "learning_rate": 3.9309864473485945e-05, + "loss": 0.0936, + "step": 60100 + }, + { + "epoch": 2.184388400319791, + "grad_norm": 1.5026519298553467, + "learning_rate": 3.930560903658028e-05, + "loss": 0.1088, + "step": 60110 + }, + { + "epoch": 2.184751798822589, + "grad_norm": 0.8840125799179077, + "learning_rate": 3.9301352983296816e-05, + "loss": 0.7203, + "step": 60120 + }, + { + "epoch": 2.185115197325387, + "grad_norm": 1.5866588354110718, + "learning_rate": 3.929709631381895e-05, + "loss": 0.1023, + "step": 60130 + }, + { + "epoch": 2.1854785958281853, + "grad_norm": 1.0091042518615723, + "learning_rate": 3.9292839028330065e-05, + "loss": 0.1165, + "step": 60140 + }, + { + "epoch": 2.1858419943309833, + "grad_norm": 0.8317708969116211, + "learning_rate": 3.9288581127013603e-05, + "loss": 0.084, + "step": 60150 + }, + { + "epoch": 2.1862053928337817, + "grad_norm": 0.5231217741966248, + "learning_rate": 3.9284322610053016e-05, + "loss": 0.0832, + "step": 60160 + }, + { + "epoch": 2.1865687913365797, + "grad_norm": 1.9025609493255615, + "learning_rate": 3.928006347763179e-05, + "loss": 0.1349, + "step": 60170 + }, + { + "epoch": 2.1869321898393776, + "grad_norm": 1.5179822444915771, + "learning_rate": 3.927580372993344e-05, + "loss": 0.1029, + "step": 60180 + }, + { + "epoch": 2.187295588342176, + "grad_norm": 1.7581968307495117, + "learning_rate": 3.9271543367141494e-05, + "loss": 0.1232, + "step": 60190 + }, + { + "epoch": 2.187658986844974, + "grad_norm": 1.4503281116485596, + "learning_rate": 3.926728238943953e-05, + "loss": 0.0832, + "step": 60200 + }, + { + "epoch": 2.1880223853477725, + "grad_norm": 1.222233533859253, + "learning_rate": 3.926302079701113e-05, + "loss": 0.0918, + "step": 60210 + }, + { + "epoch": 2.1883857838505705, + "grad_norm": 2.6328423023223877, + "learning_rate": 3.9258758590039915e-05, + "loss": 0.1229, + "step": 60220 + }, + { + "epoch": 2.188749182353369, + "grad_norm": 1.2800387144088745, + "learning_rate": 3.925449576870952e-05, + "loss": 0.1132, + "step": 60230 + }, + { + "epoch": 2.189112580856167, + "grad_norm": 1.5218274593353271, + "learning_rate": 3.925023233320362e-05, + "loss": 0.1508, + "step": 60240 + }, + { + "epoch": 2.189475979358965, + "grad_norm": 0.6339848041534424, + "learning_rate": 3.9245968283705916e-05, + "loss": 0.0934, + "step": 60250 + }, + { + "epoch": 2.1898393778617633, + "grad_norm": 0.6518699526786804, + "learning_rate": 3.924170362040012e-05, + "loss": 0.0979, + "step": 60260 + }, + { + "epoch": 2.1902027763645613, + "grad_norm": 0.6267105340957642, + "learning_rate": 3.923743834346999e-05, + "loss": 0.3877, + "step": 60270 + }, + { + "epoch": 2.1905661748673597, + "grad_norm": 0.5715605616569519, + "learning_rate": 3.92331724530993e-05, + "loss": 0.1171, + "step": 60280 + }, + { + "epoch": 2.1909295733701577, + "grad_norm": 1.068161129951477, + "learning_rate": 3.922890594947185e-05, + "loss": 0.1452, + "step": 60290 + }, + { + "epoch": 2.1912929718729557, + "grad_norm": 0.9280456304550171, + "learning_rate": 3.9224638832771475e-05, + "loss": 0.0951, + "step": 60300 + }, + { + "epoch": 2.191656370375754, + "grad_norm": 1.1696865558624268, + "learning_rate": 3.922037110318201e-05, + "loss": 0.1019, + "step": 60310 + }, + { + "epoch": 2.192019768878552, + "grad_norm": 0.8494959473609924, + "learning_rate": 3.921610276088736e-05, + "loss": 0.1189, + "step": 60320 + }, + { + "epoch": 2.1923831673813505, + "grad_norm": 3.686048746109009, + "learning_rate": 3.921183380607142e-05, + "loss": 0.1161, + "step": 60330 + }, + { + "epoch": 2.1927465658841485, + "grad_norm": 1.5831258296966553, + "learning_rate": 3.920756423891814e-05, + "loss": 0.1309, + "step": 60340 + }, + { + "epoch": 2.193109964386947, + "grad_norm": 1.9985876083374023, + "learning_rate": 3.920329405961145e-05, + "loss": 0.1393, + "step": 60350 + }, + { + "epoch": 2.193473362889745, + "grad_norm": 4.160605430603027, + "learning_rate": 3.919902326833536e-05, + "loss": 0.1535, + "step": 60360 + }, + { + "epoch": 2.193836761392543, + "grad_norm": 0.43690192699432373, + "learning_rate": 3.919475186527388e-05, + "loss": 0.1186, + "step": 60370 + }, + { + "epoch": 2.1942001598953413, + "grad_norm": 0.8073493242263794, + "learning_rate": 3.9190479850611044e-05, + "loss": 0.1047, + "step": 60380 + }, + { + "epoch": 2.1945635583981393, + "grad_norm": 9.085131645202637, + "learning_rate": 3.9186207224530925e-05, + "loss": 0.1332, + "step": 60390 + }, + { + "epoch": 2.1949269569009378, + "grad_norm": 1.6787877082824707, + "learning_rate": 3.9181933987217614e-05, + "loss": 0.0998, + "step": 60400 + }, + { + "epoch": 2.1952903554037357, + "grad_norm": 0.6496911644935608, + "learning_rate": 3.917766013885522e-05, + "loss": 0.103, + "step": 60410 + }, + { + "epoch": 2.1956537539065337, + "grad_norm": 0.9650323987007141, + "learning_rate": 3.9173385679627896e-05, + "loss": 0.1055, + "step": 60420 + }, + { + "epoch": 2.196017152409332, + "grad_norm": 2.345998525619507, + "learning_rate": 3.916911060971981e-05, + "loss": 0.1, + "step": 60430 + }, + { + "epoch": 2.19638055091213, + "grad_norm": 0.6440123915672302, + "learning_rate": 3.9164834929315165e-05, + "loss": 0.1562, + "step": 60440 + }, + { + "epoch": 2.1967439494149286, + "grad_norm": 6.226611614227295, + "learning_rate": 3.916055863859818e-05, + "loss": 0.0971, + "step": 60450 + }, + { + "epoch": 2.1971073479177265, + "grad_norm": 6.518206596374512, + "learning_rate": 3.915628173775311e-05, + "loss": 0.0858, + "step": 60460 + }, + { + "epoch": 2.1974707464205245, + "grad_norm": 0.48097607493400574, + "learning_rate": 3.915200422696423e-05, + "loss": 0.1248, + "step": 60470 + }, + { + "epoch": 2.197834144923323, + "grad_norm": 1.0158125162124634, + "learning_rate": 3.914772610641584e-05, + "loss": 0.0952, + "step": 60480 + }, + { + "epoch": 2.198197543426121, + "grad_norm": 0.9592711925506592, + "learning_rate": 3.914344737629226e-05, + "loss": 0.1202, + "step": 60490 + }, + { + "epoch": 2.1985609419289194, + "grad_norm": 0.8496592044830322, + "learning_rate": 3.9139168036777864e-05, + "loss": 0.1377, + "step": 60500 + }, + { + "epoch": 2.1989243404317174, + "grad_norm": 0.9268959760665894, + "learning_rate": 3.913488808805702e-05, + "loss": 0.1058, + "step": 60510 + }, + { + "epoch": 2.199287738934516, + "grad_norm": 1.091874122619629, + "learning_rate": 3.913060753031414e-05, + "loss": 0.1232, + "step": 60520 + }, + { + "epoch": 2.1996511374373138, + "grad_norm": 45.63993835449219, + "learning_rate": 3.912632636373367e-05, + "loss": 0.4101, + "step": 60530 + }, + { + "epoch": 2.2000145359401118, + "grad_norm": 1.5052204132080078, + "learning_rate": 3.912204458850005e-05, + "loss": 0.1542, + "step": 60540 + }, + { + "epoch": 2.20037793444291, + "grad_norm": 0.9882798790931702, + "learning_rate": 3.911776220479777e-05, + "loss": 0.1096, + "step": 60550 + }, + { + "epoch": 2.200741332945708, + "grad_norm": 2.0385029315948486, + "learning_rate": 3.9113479212811356e-05, + "loss": 0.0945, + "step": 60560 + }, + { + "epoch": 2.2011047314485066, + "grad_norm": 0.5360209345817566, + "learning_rate": 3.910919561272533e-05, + "loss": 0.1064, + "step": 60570 + }, + { + "epoch": 2.2014681299513046, + "grad_norm": 2.028599739074707, + "learning_rate": 3.910491140472428e-05, + "loss": 0.1076, + "step": 60580 + }, + { + "epoch": 2.2018315284541026, + "grad_norm": 2.3928070068359375, + "learning_rate": 3.910062658899277e-05, + "loss": 0.1278, + "step": 60590 + }, + { + "epoch": 2.202194926956901, + "grad_norm": 0.851287305355072, + "learning_rate": 3.9096341165715436e-05, + "loss": 0.0905, + "step": 60600 + }, + { + "epoch": 2.202194926956901, + "eval_loss": 0.3481411039829254, + "eval_runtime": 180.2524, + "eval_samples_per_second": 41.131, + "eval_steps_per_second": 5.143, + "eval_wer": 0.16283333635885056, + "step": 60600 + }, + { + "epoch": 2.202558325459699, + "grad_norm": 2.9646081924438477, + "learning_rate": 3.9092055135076915e-05, + "loss": 0.1062, + "step": 60610 + }, + { + "epoch": 2.2029217239624974, + "grad_norm": 0.6181505918502808, + "learning_rate": 3.908776849726188e-05, + "loss": 0.102, + "step": 60620 + }, + { + "epoch": 2.2032851224652954, + "grad_norm": 0.49643078446388245, + "learning_rate": 3.908348125245502e-05, + "loss": 0.1266, + "step": 60630 + }, + { + "epoch": 2.203648520968094, + "grad_norm": 4.227423667907715, + "learning_rate": 3.907919340084106e-05, + "loss": 0.1613, + "step": 60640 + }, + { + "epoch": 2.204011919470892, + "grad_norm": 0.5859548449516296, + "learning_rate": 3.9074904942604764e-05, + "loss": 0.0863, + "step": 60650 + }, + { + "epoch": 2.20437531797369, + "grad_norm": 0.9373226761817932, + "learning_rate": 3.9070615877930886e-05, + "loss": 0.1071, + "step": 60660 + }, + { + "epoch": 2.2047387164764882, + "grad_norm": 0.8272415399551392, + "learning_rate": 3.906632620700422e-05, + "loss": 0.1139, + "step": 60670 + }, + { + "epoch": 2.205102114979286, + "grad_norm": 1.1634105443954468, + "learning_rate": 3.9062035930009625e-05, + "loss": 0.0981, + "step": 60680 + }, + { + "epoch": 2.2054655134820846, + "grad_norm": 1.0491262674331665, + "learning_rate": 3.905774504713192e-05, + "loss": 0.1312, + "step": 60690 + }, + { + "epoch": 2.2058289119848826, + "grad_norm": 0.6341159343719482, + "learning_rate": 3.905345355855601e-05, + "loss": 0.0847, + "step": 60700 + }, + { + "epoch": 2.2061923104876806, + "grad_norm": 0.8382464647293091, + "learning_rate": 3.904916146446678e-05, + "loss": 0.0945, + "step": 60710 + }, + { + "epoch": 2.206555708990479, + "grad_norm": 0.5253706574440002, + "learning_rate": 3.904486876504917e-05, + "loss": 0.1328, + "step": 60720 + }, + { + "epoch": 2.206919107493277, + "grad_norm": 3.4987101554870605, + "learning_rate": 3.904057546048815e-05, + "loss": 0.0862, + "step": 60730 + }, + { + "epoch": 2.2072825059960755, + "grad_norm": 1.807373285293579, + "learning_rate": 3.903628155096867e-05, + "loss": 0.1005, + "step": 60740 + }, + { + "epoch": 2.2076459044988734, + "grad_norm": 1.1272157430648804, + "learning_rate": 3.9031987036675774e-05, + "loss": 0.1044, + "step": 60750 + }, + { + "epoch": 2.2080093030016714, + "grad_norm": 0.47526538372039795, + "learning_rate": 3.902769191779448e-05, + "loss": 0.0947, + "step": 60760 + }, + { + "epoch": 2.20837270150447, + "grad_norm": 0.8546761274337769, + "learning_rate": 3.9023396194509846e-05, + "loss": 0.1129, + "step": 60770 + }, + { + "epoch": 2.208736100007268, + "grad_norm": 0.557783305644989, + "learning_rate": 3.901909986700697e-05, + "loss": 0.1198, + "step": 60780 + }, + { + "epoch": 2.2090994985100663, + "grad_norm": 0.5007415413856506, + "learning_rate": 3.901480293547096e-05, + "loss": 0.1154, + "step": 60790 + }, + { + "epoch": 2.2094628970128642, + "grad_norm": 1.8647228479385376, + "learning_rate": 3.901050540008696e-05, + "loss": 0.1997, + "step": 60800 + }, + { + "epoch": 2.2098262955156627, + "grad_norm": 0.7277741432189941, + "learning_rate": 3.900620726104012e-05, + "loss": 0.1032, + "step": 60810 + }, + { + "epoch": 2.2101896940184607, + "grad_norm": 0.4809872806072235, + "learning_rate": 3.9001908518515656e-05, + "loss": 0.1162, + "step": 60820 + }, + { + "epoch": 2.2105530925212586, + "grad_norm": 0.7930201888084412, + "learning_rate": 3.899760917269877e-05, + "loss": 0.1207, + "step": 60830 + }, + { + "epoch": 2.210916491024057, + "grad_norm": 1.0866421461105347, + "learning_rate": 3.89933092237747e-05, + "loss": 0.1334, + "step": 60840 + }, + { + "epoch": 2.211279889526855, + "grad_norm": 2.5568645000457764, + "learning_rate": 3.898900867192874e-05, + "loss": 0.0934, + "step": 60850 + }, + { + "epoch": 2.2116432880296535, + "grad_norm": 1.1865488290786743, + "learning_rate": 3.8984707517346154e-05, + "loss": 0.1072, + "step": 60860 + }, + { + "epoch": 2.2120066865324515, + "grad_norm": 1.0457924604415894, + "learning_rate": 3.8980405760212284e-05, + "loss": 0.1108, + "step": 60870 + }, + { + "epoch": 2.2123700850352495, + "grad_norm": 1.0669806003570557, + "learning_rate": 3.897610340071247e-05, + "loss": 0.1128, + "step": 60880 + }, + { + "epoch": 2.212733483538048, + "grad_norm": 4.467153072357178, + "learning_rate": 3.897180043903209e-05, + "loss": 0.1347, + "step": 60890 + }, + { + "epoch": 2.213096882040846, + "grad_norm": 0.48086392879486084, + "learning_rate": 3.896749687535655e-05, + "loss": 0.1018, + "step": 60900 + }, + { + "epoch": 2.2134602805436443, + "grad_norm": 0.6917502284049988, + "learning_rate": 3.8963192709871253e-05, + "loss": 0.0779, + "step": 60910 + }, + { + "epoch": 2.2138236790464423, + "grad_norm": 0.9939578771591187, + "learning_rate": 3.8958887942761665e-05, + "loss": 0.1278, + "step": 60920 + }, + { + "epoch": 2.2141870775492407, + "grad_norm": 0.8723199963569641, + "learning_rate": 3.895458257421327e-05, + "loss": 0.1034, + "step": 60930 + }, + { + "epoch": 2.2145504760520387, + "grad_norm": 2.1347460746765137, + "learning_rate": 3.8950276604411554e-05, + "loss": 0.1086, + "step": 60940 + }, + { + "epoch": 2.2149138745548367, + "grad_norm": 0.6032381653785706, + "learning_rate": 3.894597003354206e-05, + "loss": 0.1141, + "step": 60950 + }, + { + "epoch": 2.215277273057635, + "grad_norm": 0.540093719959259, + "learning_rate": 3.894166286179033e-05, + "loss": 0.0892, + "step": 60960 + }, + { + "epoch": 2.215640671560433, + "grad_norm": 0.6019798517227173, + "learning_rate": 3.893735508934197e-05, + "loss": 0.0911, + "step": 60970 + }, + { + "epoch": 2.2160040700632315, + "grad_norm": 1.290984869003296, + "learning_rate": 3.893304671638254e-05, + "loss": 0.1283, + "step": 60980 + }, + { + "epoch": 2.2163674685660295, + "grad_norm": 0.5830800533294678, + "learning_rate": 3.892873774309772e-05, + "loss": 0.1094, + "step": 60990 + }, + { + "epoch": 2.2167308670688275, + "grad_norm": 1.1006908416748047, + "learning_rate": 3.892442816967315e-05, + "loss": 0.1157, + "step": 61000 + }, + { + "epoch": 2.217094265571626, + "grad_norm": 0.42782625555992126, + "learning_rate": 3.8920117996294505e-05, + "loss": 0.0852, + "step": 61010 + }, + { + "epoch": 2.217457664074424, + "grad_norm": 1.036010503768921, + "learning_rate": 3.8915807223147506e-05, + "loss": 0.1175, + "step": 61020 + }, + { + "epoch": 2.2178210625772223, + "grad_norm": 1.0316133499145508, + "learning_rate": 3.891149585041789e-05, + "loss": 0.1007, + "step": 61030 + }, + { + "epoch": 2.2181844610800203, + "grad_norm": 1.3433195352554321, + "learning_rate": 3.890718387829141e-05, + "loss": 2.4829, + "step": 61040 + }, + { + "epoch": 2.2185478595828183, + "grad_norm": 1.0637513399124146, + "learning_rate": 3.890287130695386e-05, + "loss": 0.1012, + "step": 61050 + }, + { + "epoch": 2.2189112580856167, + "grad_norm": 0.9853934645652771, + "learning_rate": 3.8898558136591055e-05, + "loss": 0.0983, + "step": 61060 + }, + { + "epoch": 2.2192746565884147, + "grad_norm": 0.6070169806480408, + "learning_rate": 3.889424436738882e-05, + "loss": 0.0933, + "step": 61070 + }, + { + "epoch": 2.219638055091213, + "grad_norm": 0.9032323360443115, + "learning_rate": 3.8889929999533045e-05, + "loss": 0.1039, + "step": 61080 + }, + { + "epoch": 2.220001453594011, + "grad_norm": 1.1702359914779663, + "learning_rate": 3.888561503320961e-05, + "loss": 0.1674, + "step": 61090 + }, + { + "epoch": 2.2203648520968096, + "grad_norm": 1.5377318859100342, + "learning_rate": 3.888129946860442e-05, + "loss": 0.0977, + "step": 61100 + }, + { + "epoch": 2.2207282505996075, + "grad_norm": 0.8765788078308105, + "learning_rate": 3.887698330590342e-05, + "loss": 0.1133, + "step": 61110 + }, + { + "epoch": 2.2210916491024055, + "grad_norm": 1.543609857559204, + "learning_rate": 3.887266654529259e-05, + "loss": 0.1335, + "step": 61120 + }, + { + "epoch": 2.221455047605204, + "grad_norm": 2.144033908843994, + "learning_rate": 3.886834918695792e-05, + "loss": 0.1097, + "step": 61130 + }, + { + "epoch": 2.221818446108002, + "grad_norm": 0.9922833442687988, + "learning_rate": 3.886403123108542e-05, + "loss": 0.1245, + "step": 61140 + }, + { + "epoch": 2.2221818446108004, + "grad_norm": 0.7214832305908203, + "learning_rate": 3.885971267786115e-05, + "loss": 0.3578, + "step": 61150 + }, + { + "epoch": 2.2225452431135984, + "grad_norm": 0.3823475241661072, + "learning_rate": 3.8855393527471175e-05, + "loss": 0.1396, + "step": 61160 + }, + { + "epoch": 2.2229086416163963, + "grad_norm": 0.4039243459701538, + "learning_rate": 3.885107378010158e-05, + "loss": 0.0998, + "step": 61170 + }, + { + "epoch": 2.2232720401191948, + "grad_norm": 0.6202207207679749, + "learning_rate": 3.884675343593851e-05, + "loss": 0.1278, + "step": 61180 + }, + { + "epoch": 2.2236354386219928, + "grad_norm": 1.5638877153396606, + "learning_rate": 3.884243249516809e-05, + "loss": 0.1162, + "step": 61190 + }, + { + "epoch": 2.223998837124791, + "grad_norm": 2.9136829376220703, + "learning_rate": 3.8838110957976514e-05, + "loss": 0.1007, + "step": 61200 + }, + { + "epoch": 2.223998837124791, + "eval_loss": 0.3254208564758301, + "eval_runtime": 180.3346, + "eval_samples_per_second": 41.112, + "eval_steps_per_second": 5.14, + "eval_wer": 0.15656143917802748, + "step": 61200 + }, + { + "epoch": 2.224362235627589, + "grad_norm": 0.40365439653396606, + "learning_rate": 3.883378882454998e-05, + "loss": 0.1016, + "step": 61210 + }, + { + "epoch": 2.2247256341303876, + "grad_norm": 0.48598694801330566, + "learning_rate": 3.882946609507468e-05, + "loss": 0.1089, + "step": 61220 + }, + { + "epoch": 2.2250890326331856, + "grad_norm": 1.7332137823104858, + "learning_rate": 3.882514276973692e-05, + "loss": 0.1101, + "step": 61230 + }, + { + "epoch": 2.2254524311359836, + "grad_norm": 2.3783786296844482, + "learning_rate": 3.882081884872293e-05, + "loss": 0.0936, + "step": 61240 + }, + { + "epoch": 2.225815829638782, + "grad_norm": 0.684394896030426, + "learning_rate": 3.881649433221904e-05, + "loss": 0.0868, + "step": 61250 + }, + { + "epoch": 2.22617922814158, + "grad_norm": 0.43269750475883484, + "learning_rate": 3.881216922041156e-05, + "loss": 0.1026, + "step": 61260 + }, + { + "epoch": 2.2265426266443784, + "grad_norm": 0.9126709699630737, + "learning_rate": 3.8807843513486866e-05, + "loss": 0.1436, + "step": 61270 + }, + { + "epoch": 2.2269060251471764, + "grad_norm": 1.7345128059387207, + "learning_rate": 3.880351721163131e-05, + "loss": 0.0992, + "step": 61280 + }, + { + "epoch": 2.2272694236499744, + "grad_norm": 1.4722065925598145, + "learning_rate": 3.879919031503131e-05, + "loss": 0.1637, + "step": 61290 + }, + { + "epoch": 2.227632822152773, + "grad_norm": 0.6145905256271362, + "learning_rate": 3.879486282387331e-05, + "loss": 0.0881, + "step": 61300 + }, + { + "epoch": 2.227996220655571, + "grad_norm": 0.5936566591262817, + "learning_rate": 3.879053473834374e-05, + "loss": 0.0947, + "step": 61310 + }, + { + "epoch": 2.228359619158369, + "grad_norm": 2.5217325687408447, + "learning_rate": 3.87862060586291e-05, + "loss": 0.1251, + "step": 61320 + }, + { + "epoch": 2.228723017661167, + "grad_norm": 2.556070327758789, + "learning_rate": 3.878187678491589e-05, + "loss": 0.129, + "step": 61330 + }, + { + "epoch": 2.229086416163965, + "grad_norm": 1.7533297538757324, + "learning_rate": 3.877754691739065e-05, + "loss": 0.1331, + "step": 61340 + }, + { + "epoch": 2.2294498146667636, + "grad_norm": 0.6436717510223389, + "learning_rate": 3.877321645623994e-05, + "loss": 0.0836, + "step": 61350 + }, + { + "epoch": 2.2298132131695616, + "grad_norm": 0.5834245085716248, + "learning_rate": 3.8768885401650325e-05, + "loss": 0.0953, + "step": 61360 + }, + { + "epoch": 2.23017661167236, + "grad_norm": 2.3103013038635254, + "learning_rate": 3.8764553753808436e-05, + "loss": 0.1138, + "step": 61370 + }, + { + "epoch": 2.230540010175158, + "grad_norm": 1.5668505430221558, + "learning_rate": 3.87602215129009e-05, + "loss": 0.1569, + "step": 61380 + }, + { + "epoch": 2.2309034086779564, + "grad_norm": 0.719791054725647, + "learning_rate": 3.875588867911437e-05, + "loss": 0.108, + "step": 61390 + }, + { + "epoch": 2.2312668071807544, + "grad_norm": 0.729350745677948, + "learning_rate": 3.875155525263555e-05, + "loss": 0.0832, + "step": 61400 + }, + { + "epoch": 2.2316302056835524, + "grad_norm": 1.3647226095199585, + "learning_rate": 3.874722123365113e-05, + "loss": 0.0913, + "step": 61410 + }, + { + "epoch": 2.231993604186351, + "grad_norm": 0.6896275877952576, + "learning_rate": 3.8742886622347876e-05, + "loss": 0.1133, + "step": 61420 + }, + { + "epoch": 2.232357002689149, + "grad_norm": 0.8130580186843872, + "learning_rate": 3.8738551418912526e-05, + "loss": 0.0909, + "step": 61430 + }, + { + "epoch": 2.2327204011919473, + "grad_norm": 1.155916690826416, + "learning_rate": 3.873421562353188e-05, + "loss": 0.114, + "step": 61440 + }, + { + "epoch": 2.2330837996947452, + "grad_norm": 1.4737950563430786, + "learning_rate": 3.872987923639274e-05, + "loss": 0.4289, + "step": 61450 + }, + { + "epoch": 2.2334471981975432, + "grad_norm": 0.41144660115242004, + "learning_rate": 3.8725542257681966e-05, + "loss": 0.0862, + "step": 61460 + }, + { + "epoch": 2.2338105967003417, + "grad_norm": 0.5804570913314819, + "learning_rate": 3.872120468758641e-05, + "loss": 0.1067, + "step": 61470 + }, + { + "epoch": 2.2341739952031396, + "grad_norm": 0.8408393263816833, + "learning_rate": 3.871686652629296e-05, + "loss": 0.1097, + "step": 61480 + }, + { + "epoch": 2.234537393705938, + "grad_norm": 1.0146747827529907, + "learning_rate": 3.871252777398854e-05, + "loss": 0.1391, + "step": 61490 + }, + { + "epoch": 2.234900792208736, + "grad_norm": 0.8638483881950378, + "learning_rate": 3.8708188430860084e-05, + "loss": 0.5518, + "step": 61500 + }, + { + "epoch": 2.2352641907115345, + "grad_norm": 0.9493032693862915, + "learning_rate": 3.8703848497094565e-05, + "loss": 0.3308, + "step": 61510 + }, + { + "epoch": 2.2356275892143325, + "grad_norm": 2.7466158866882324, + "learning_rate": 3.8699507972878974e-05, + "loss": 0.1196, + "step": 61520 + }, + { + "epoch": 2.2359909877171305, + "grad_norm": 0.737774133682251, + "learning_rate": 3.869516685840032e-05, + "loss": 0.0988, + "step": 61530 + }, + { + "epoch": 2.236354386219929, + "grad_norm": 0.4675132632255554, + "learning_rate": 3.8690825153845667e-05, + "loss": 0.1314, + "step": 61540 + }, + { + "epoch": 2.236717784722727, + "grad_norm": 0.9533403515815735, + "learning_rate": 3.8686482859402055e-05, + "loss": 0.1024, + "step": 61550 + }, + { + "epoch": 2.2370811832255253, + "grad_norm": 0.7988652586936951, + "learning_rate": 3.8682139975256605e-05, + "loss": 0.1002, + "step": 61560 + }, + { + "epoch": 2.2374445817283233, + "grad_norm": 0.45931610465049744, + "learning_rate": 3.867779650159642e-05, + "loss": 0.1012, + "step": 61570 + }, + { + "epoch": 2.2378079802311213, + "grad_norm": 2.8576176166534424, + "learning_rate": 3.8673452438608646e-05, + "loss": 0.2177, + "step": 61580 + }, + { + "epoch": 2.2381713787339197, + "grad_norm": 1.2942947149276733, + "learning_rate": 3.8669107786480464e-05, + "loss": 0.1286, + "step": 61590 + }, + { + "epoch": 2.2385347772367177, + "grad_norm": 0.4589090049266815, + "learning_rate": 3.866476254539906e-05, + "loss": 0.0818, + "step": 61600 + }, + { + "epoch": 2.238898175739516, + "grad_norm": 0.5710172057151794, + "learning_rate": 3.866041671555166e-05, + "loss": 0.2093, + "step": 61610 + }, + { + "epoch": 2.239261574242314, + "grad_norm": 0.6458502411842346, + "learning_rate": 3.86560702971255e-05, + "loss": 0.1259, + "step": 61620 + }, + { + "epoch": 2.239624972745112, + "grad_norm": 1.265261173248291, + "learning_rate": 3.865172329030786e-05, + "loss": 0.1009, + "step": 61630 + }, + { + "epoch": 2.2399883712479105, + "grad_norm": 0.49177274107933044, + "learning_rate": 3.8647375695286036e-05, + "loss": 0.1111, + "step": 61640 + }, + { + "epoch": 2.2403517697507085, + "grad_norm": 1.8626538515090942, + "learning_rate": 3.864302751224736e-05, + "loss": 0.1194, + "step": 61650 + }, + { + "epoch": 2.240715168253507, + "grad_norm": 0.9763522148132324, + "learning_rate": 3.8638678741379166e-05, + "loss": 0.0953, + "step": 61660 + }, + { + "epoch": 2.241078566756305, + "grad_norm": 2.4940896034240723, + "learning_rate": 3.863432938286883e-05, + "loss": 0.1028, + "step": 61670 + }, + { + "epoch": 2.2414419652591033, + "grad_norm": 1.7410259246826172, + "learning_rate": 3.862997943690375e-05, + "loss": 0.1435, + "step": 61680 + }, + { + "epoch": 2.2418053637619013, + "grad_norm": 2.1346585750579834, + "learning_rate": 3.862562890367135e-05, + "loss": 0.1286, + "step": 61690 + }, + { + "epoch": 2.2421687622646993, + "grad_norm": 0.6110004782676697, + "learning_rate": 3.862127778335909e-05, + "loss": 0.1131, + "step": 61700 + }, + { + "epoch": 2.2425321607674977, + "grad_norm": 0.7446867227554321, + "learning_rate": 3.8616926076154426e-05, + "loss": 0.0888, + "step": 61710 + }, + { + "epoch": 2.2428955592702957, + "grad_norm": 0.820365846157074, + "learning_rate": 3.861257378224488e-05, + "loss": 0.104, + "step": 61720 + }, + { + "epoch": 2.243258957773094, + "grad_norm": 0.5953546166419983, + "learning_rate": 3.860822090181795e-05, + "loss": 0.0993, + "step": 61730 + }, + { + "epoch": 2.243622356275892, + "grad_norm": 1.5128546953201294, + "learning_rate": 3.86038674350612e-05, + "loss": 0.2106, + "step": 61740 + }, + { + "epoch": 2.24398575477869, + "grad_norm": 0.7002906799316406, + "learning_rate": 3.859951338216221e-05, + "loss": 0.0863, + "step": 61750 + }, + { + "epoch": 2.2443491532814885, + "grad_norm": 0.7450056076049805, + "learning_rate": 3.859515874330857e-05, + "loss": 0.0991, + "step": 61760 + }, + { + "epoch": 2.2447125517842865, + "grad_norm": 0.5604157447814941, + "learning_rate": 3.859080351868792e-05, + "loss": 0.1255, + "step": 61770 + }, + { + "epoch": 2.245075950287085, + "grad_norm": 1.1846556663513184, + "learning_rate": 3.85864477084879e-05, + "loss": 0.1329, + "step": 61780 + }, + { + "epoch": 2.245439348789883, + "grad_norm": 1.1680017709732056, + "learning_rate": 3.8582091312896186e-05, + "loss": 0.1409, + "step": 61790 + }, + { + "epoch": 2.2458027472926814, + "grad_norm": 0.34896320104599, + "learning_rate": 3.857773433210048e-05, + "loss": 1.4069, + "step": 61800 + }, + { + "epoch": 2.2458027472926814, + "eval_loss": 0.31015458703041077, + "eval_runtime": 179.4469, + "eval_samples_per_second": 41.316, + "eval_steps_per_second": 5.166, + "eval_wer": 0.15887595984533556, + "step": 61800 + }, + { + "epoch": 2.2461661457954794, + "grad_norm": 8.44802188873291, + "learning_rate": 3.8573376766288515e-05, + "loss": 0.0953, + "step": 61810 + }, + { + "epoch": 2.2465295442982773, + "grad_norm": 0.44796204566955566, + "learning_rate": 3.8569018615648034e-05, + "loss": 0.1058, + "step": 61820 + }, + { + "epoch": 2.2468929428010758, + "grad_norm": 0.7886875867843628, + "learning_rate": 3.8564659880366826e-05, + "loss": 0.163, + "step": 61830 + }, + { + "epoch": 2.2472563413038738, + "grad_norm": 0.5576759576797485, + "learning_rate": 3.856030056063269e-05, + "loss": 0.1326, + "step": 61840 + }, + { + "epoch": 2.247619739806672, + "grad_norm": 0.8255923986434937, + "learning_rate": 3.855594065663345e-05, + "loss": 0.1045, + "step": 61850 + }, + { + "epoch": 2.24798313830947, + "grad_norm": 1.2470930814743042, + "learning_rate": 3.855158016855695e-05, + "loss": 0.0788, + "step": 61860 + }, + { + "epoch": 2.248346536812268, + "grad_norm": 0.9577877521514893, + "learning_rate": 3.854721909659108e-05, + "loss": 0.1024, + "step": 61870 + }, + { + "epoch": 2.2487099353150666, + "grad_norm": 1.3195165395736694, + "learning_rate": 3.854285744092375e-05, + "loss": 0.0975, + "step": 61880 + }, + { + "epoch": 2.2490733338178646, + "grad_norm": 0.8952762484550476, + "learning_rate": 3.853849520174286e-05, + "loss": 0.1479, + "step": 61890 + }, + { + "epoch": 2.249436732320663, + "grad_norm": 0.9849411249160767, + "learning_rate": 3.85341323792364e-05, + "loss": 0.0954, + "step": 61900 + }, + { + "epoch": 2.249800130823461, + "grad_norm": 1.1869410276412964, + "learning_rate": 3.8529768973592325e-05, + "loss": 0.104, + "step": 61910 + }, + { + "epoch": 2.250163529326259, + "grad_norm": 0.7452064752578735, + "learning_rate": 3.852540498499864e-05, + "loss": 0.1, + "step": 61920 + }, + { + "epoch": 2.2505269278290574, + "grad_norm": 0.7757828831672668, + "learning_rate": 3.8521040413643385e-05, + "loss": 0.1397, + "step": 61930 + }, + { + "epoch": 2.2508903263318554, + "grad_norm": 1.0734906196594238, + "learning_rate": 3.8516675259714594e-05, + "loss": 0.1162, + "step": 61940 + }, + { + "epoch": 2.251253724834654, + "grad_norm": 1.4619065523147583, + "learning_rate": 3.851230952340037e-05, + "loss": 0.6661, + "step": 61950 + }, + { + "epoch": 2.251617123337452, + "grad_norm": 1.221156120300293, + "learning_rate": 3.850794320488881e-05, + "loss": 0.1048, + "step": 61960 + }, + { + "epoch": 2.2519805218402498, + "grad_norm": 1.1556357145309448, + "learning_rate": 3.8503576304368025e-05, + "loss": 0.1435, + "step": 61970 + }, + { + "epoch": 2.252343920343048, + "grad_norm": 0.5849198698997498, + "learning_rate": 3.849920882202619e-05, + "loss": 0.1031, + "step": 61980 + }, + { + "epoch": 2.252707318845846, + "grad_norm": 0.5589366555213928, + "learning_rate": 3.849484075805148e-05, + "loss": 0.1123, + "step": 61990 + }, + { + "epoch": 2.2530707173486446, + "grad_norm": 1.709695816040039, + "learning_rate": 3.849047211263209e-05, + "loss": 0.1071, + "step": 62000 + }, + { + "epoch": 2.2534341158514426, + "grad_norm": 5.30033016204834, + "learning_rate": 3.848610288595626e-05, + "loss": 0.1276, + "step": 62010 + }, + { + "epoch": 2.253797514354241, + "grad_norm": 1.242638349533081, + "learning_rate": 3.848173307821224e-05, + "loss": 0.1183, + "step": 62020 + }, + { + "epoch": 2.254160912857039, + "grad_norm": 0.650566816329956, + "learning_rate": 3.84773626895883e-05, + "loss": 0.1074, + "step": 62030 + }, + { + "epoch": 2.2545243113598374, + "grad_norm": 0.8243488669395447, + "learning_rate": 3.847299172027277e-05, + "loss": 0.2269, + "step": 62040 + }, + { + "epoch": 2.2548877098626354, + "grad_norm": 0.5993553996086121, + "learning_rate": 3.846862017045396e-05, + "loss": 0.1093, + "step": 62050 + }, + { + "epoch": 2.2552511083654334, + "grad_norm": 1.5640254020690918, + "learning_rate": 3.846424804032023e-05, + "loss": 0.1403, + "step": 62060 + }, + { + "epoch": 2.255614506868232, + "grad_norm": 2.9386844635009766, + "learning_rate": 3.8459875330059946e-05, + "loss": 0.1261, + "step": 62070 + }, + { + "epoch": 2.25597790537103, + "grad_norm": 0.45292994379997253, + "learning_rate": 3.845550203986154e-05, + "loss": 0.0919, + "step": 62080 + }, + { + "epoch": 2.2563413038738283, + "grad_norm": 1.122269868850708, + "learning_rate": 3.845112816991341e-05, + "loss": 0.1646, + "step": 62090 + }, + { + "epoch": 2.2567047023766262, + "grad_norm": 0.33831652998924255, + "learning_rate": 3.844675372040403e-05, + "loss": 0.0923, + "step": 62100 + }, + { + "epoch": 2.257068100879424, + "grad_norm": 0.6775882244110107, + "learning_rate": 3.844237869152188e-05, + "loss": 0.092, + "step": 62110 + }, + { + "epoch": 2.2574314993822227, + "grad_norm": 1.5221953392028809, + "learning_rate": 3.843800308345547e-05, + "loss": 0.1027, + "step": 62120 + }, + { + "epoch": 2.2577948978850206, + "grad_norm": 1.1137598752975464, + "learning_rate": 3.8433626896393306e-05, + "loss": 0.1145, + "step": 62130 + }, + { + "epoch": 2.258158296387819, + "grad_norm": 1.5517561435699463, + "learning_rate": 3.842925013052395e-05, + "loss": 0.0914, + "step": 62140 + }, + { + "epoch": 2.258521694890617, + "grad_norm": 0.9319009184837341, + "learning_rate": 3.8424872786036006e-05, + "loss": 0.0987, + "step": 62150 + }, + { + "epoch": 2.258885093393415, + "grad_norm": 1.056016206741333, + "learning_rate": 3.842049486311805e-05, + "loss": 0.0768, + "step": 62160 + }, + { + "epoch": 2.2592484918962135, + "grad_norm": 0.3143411874771118, + "learning_rate": 3.8416116361958724e-05, + "loss": 0.124, + "step": 62170 + }, + { + "epoch": 2.2596118903990114, + "grad_norm": 0.5706644058227539, + "learning_rate": 3.841173728274668e-05, + "loss": 0.0781, + "step": 62180 + }, + { + "epoch": 2.25997528890181, + "grad_norm": 0.7634672522544861, + "learning_rate": 3.840735762567058e-05, + "loss": 0.1179, + "step": 62190 + }, + { + "epoch": 2.260338687404608, + "grad_norm": 1.0519330501556396, + "learning_rate": 3.840297739091916e-05, + "loss": 0.0966, + "step": 62200 + }, + { + "epoch": 2.260702085907406, + "grad_norm": 0.7548292875289917, + "learning_rate": 3.839859657868112e-05, + "loss": 0.1004, + "step": 62210 + }, + { + "epoch": 2.2610654844102043, + "grad_norm": 0.5876504182815552, + "learning_rate": 3.8394215189145236e-05, + "loss": 0.1199, + "step": 62220 + }, + { + "epoch": 2.2614288829130023, + "grad_norm": 0.7557339668273926, + "learning_rate": 3.838983322250028e-05, + "loss": 0.1043, + "step": 62230 + }, + { + "epoch": 2.2617922814158007, + "grad_norm": 0.9950221180915833, + "learning_rate": 3.838545067893504e-05, + "loss": 0.103, + "step": 62240 + }, + { + "epoch": 2.2621556799185987, + "grad_norm": 1.2867968082427979, + "learning_rate": 3.838106755863836e-05, + "loss": 0.0987, + "step": 62250 + }, + { + "epoch": 2.2625190784213967, + "grad_norm": 0.8998819589614868, + "learning_rate": 3.837668386179909e-05, + "loss": 0.1041, + "step": 62260 + }, + { + "epoch": 2.262882476924195, + "grad_norm": 1.0797913074493408, + "learning_rate": 3.837229958860611e-05, + "loss": 0.1054, + "step": 62270 + }, + { + "epoch": 2.263245875426993, + "grad_norm": 1.4692394733428955, + "learning_rate": 3.836791473924831e-05, + "loss": 0.1027, + "step": 62280 + }, + { + "epoch": 2.2636092739297915, + "grad_norm": 1.2375293970108032, + "learning_rate": 3.836352931391464e-05, + "loss": 0.1983, + "step": 62290 + }, + { + "epoch": 2.2639726724325895, + "grad_norm": 1.2827754020690918, + "learning_rate": 3.8359143312794035e-05, + "loss": 0.0914, + "step": 62300 + }, + { + "epoch": 2.264336070935388, + "grad_norm": 0.5154075622558594, + "learning_rate": 3.835475673607547e-05, + "loss": 0.0992, + "step": 62310 + }, + { + "epoch": 2.264699469438186, + "grad_norm": 0.3848717510700226, + "learning_rate": 3.8350369583947956e-05, + "loss": 0.115, + "step": 62320 + }, + { + "epoch": 2.2650628679409843, + "grad_norm": 0.6954711675643921, + "learning_rate": 3.834598185660052e-05, + "loss": 0.1018, + "step": 62330 + }, + { + "epoch": 2.2654262664437823, + "grad_norm": 1.0320098400115967, + "learning_rate": 3.834159355422221e-05, + "loss": 0.1365, + "step": 62340 + }, + { + "epoch": 2.2657896649465803, + "grad_norm": 0.6527755856513977, + "learning_rate": 3.83372046770021e-05, + "loss": 0.0761, + "step": 62350 + }, + { + "epoch": 2.2661530634493787, + "grad_norm": 1.2087364196777344, + "learning_rate": 3.8332815225129303e-05, + "loss": 0.0941, + "step": 62360 + }, + { + "epoch": 2.2665164619521767, + "grad_norm": 1.7340302467346191, + "learning_rate": 3.8328425198792926e-05, + "loss": 0.1125, + "step": 62370 + }, + { + "epoch": 2.266879860454975, + "grad_norm": 1.7903550863265991, + "learning_rate": 3.8324034598182135e-05, + "loss": 0.1045, + "step": 62380 + }, + { + "epoch": 2.267243258957773, + "grad_norm": 0.498909592628479, + "learning_rate": 3.8319643423486105e-05, + "loss": 0.1317, + "step": 62390 + }, + { + "epoch": 2.267606657460571, + "grad_norm": 1.1796486377716064, + "learning_rate": 3.831525167489403e-05, + "loss": 0.0968, + "step": 62400 + }, + { + "epoch": 2.267606657460571, + "eval_loss": 0.3475956916809082, + "eval_runtime": 179.5468, + "eval_samples_per_second": 41.293, + "eval_steps_per_second": 5.163, + "eval_wer": 0.1566340515911195, + "step": 62400 + }, + { + "epoch": 2.2679700559633695, + "grad_norm": 0.7915635704994202, + "learning_rate": 3.831085935259513e-05, + "loss": 0.0949, + "step": 62410 + }, + { + "epoch": 2.2683334544661675, + "grad_norm": 0.5292233824729919, + "learning_rate": 3.8306466456778655e-05, + "loss": 0.1073, + "step": 62420 + }, + { + "epoch": 2.268696852968966, + "grad_norm": 0.5092893242835999, + "learning_rate": 3.8302072987633895e-05, + "loss": 0.1053, + "step": 62430 + }, + { + "epoch": 2.269060251471764, + "grad_norm": 0.5169047117233276, + "learning_rate": 3.829767894535013e-05, + "loss": 0.1986, + "step": 62440 + }, + { + "epoch": 2.269423649974562, + "grad_norm": 0.4594692587852478, + "learning_rate": 3.829328433011671e-05, + "loss": 0.1058, + "step": 62450 + }, + { + "epoch": 2.2697870484773603, + "grad_norm": 0.4674893021583557, + "learning_rate": 3.8288889142122955e-05, + "loss": 0.1126, + "step": 62460 + }, + { + "epoch": 2.2701504469801583, + "grad_norm": 1.422492504119873, + "learning_rate": 3.828449338155825e-05, + "loss": 0.1232, + "step": 62470 + }, + { + "epoch": 2.2705138454829568, + "grad_norm": 2.171562671661377, + "learning_rate": 3.828009704861199e-05, + "loss": 0.1213, + "step": 62480 + }, + { + "epoch": 2.2708772439857547, + "grad_norm": 1.4158885478973389, + "learning_rate": 3.8275700143473595e-05, + "loss": 0.1294, + "step": 62490 + }, + { + "epoch": 2.2712406424885527, + "grad_norm": 0.7011764049530029, + "learning_rate": 3.827130266633253e-05, + "loss": 0.0875, + "step": 62500 + }, + { + "epoch": 2.271604040991351, + "grad_norm": 2.2935948371887207, + "learning_rate": 3.8266904617378235e-05, + "loss": 0.2261, + "step": 62510 + }, + { + "epoch": 2.271967439494149, + "grad_norm": 0.653005063533783, + "learning_rate": 3.826250599680023e-05, + "loss": 0.1304, + "step": 62520 + }, + { + "epoch": 2.2723308379969476, + "grad_norm": 0.6509010791778564, + "learning_rate": 3.8258106804788035e-05, + "loss": 0.0971, + "step": 62530 + }, + { + "epoch": 2.2726942364997456, + "grad_norm": 1.473751425743103, + "learning_rate": 3.8253707041531186e-05, + "loss": 0.1029, + "step": 62540 + }, + { + "epoch": 2.2730576350025435, + "grad_norm": 0.31367307901382446, + "learning_rate": 3.824930670721926e-05, + "loss": 0.0996, + "step": 62550 + }, + { + "epoch": 2.273421033505342, + "grad_norm": 0.6324036121368408, + "learning_rate": 3.824490580204185e-05, + "loss": 0.0984, + "step": 62560 + }, + { + "epoch": 2.27378443200814, + "grad_norm": 1.8539944887161255, + "learning_rate": 3.824050432618858e-05, + "loss": 0.157, + "step": 62570 + }, + { + "epoch": 2.2741478305109384, + "grad_norm": 1.1299885511398315, + "learning_rate": 3.823610227984907e-05, + "loss": 0.1997, + "step": 62580 + }, + { + "epoch": 2.2745112290137364, + "grad_norm": 1.0749928951263428, + "learning_rate": 3.823169966321302e-05, + "loss": 0.092, + "step": 62590 + }, + { + "epoch": 2.274874627516535, + "grad_norm": 1.3616483211517334, + "learning_rate": 3.822729647647011e-05, + "loss": 0.1034, + "step": 62600 + }, + { + "epoch": 2.275238026019333, + "grad_norm": 1.2886927127838135, + "learning_rate": 3.8222892719810057e-05, + "loss": 0.0943, + "step": 62610 + }, + { + "epoch": 2.275601424522131, + "grad_norm": 0.5466746091842651, + "learning_rate": 3.82184883934226e-05, + "loss": 0.1075, + "step": 62620 + }, + { + "epoch": 2.275964823024929, + "grad_norm": 0.6999200582504272, + "learning_rate": 3.821408349749751e-05, + "loss": 0.1001, + "step": 62630 + }, + { + "epoch": 2.276328221527727, + "grad_norm": 0.6271117329597473, + "learning_rate": 3.820967803222458e-05, + "loss": 0.0887, + "step": 62640 + }, + { + "epoch": 2.2766916200305256, + "grad_norm": 1.088416337966919, + "learning_rate": 3.820527199779362e-05, + "loss": 0.2975, + "step": 62650 + }, + { + "epoch": 2.2770550185333236, + "grad_norm": 0.5583050847053528, + "learning_rate": 3.820086539439448e-05, + "loss": 0.0849, + "step": 62660 + }, + { + "epoch": 2.277418417036122, + "grad_norm": 0.5963543057441711, + "learning_rate": 3.819645822221701e-05, + "loss": 0.1082, + "step": 62670 + }, + { + "epoch": 2.27778181553892, + "grad_norm": 2.868208408355713, + "learning_rate": 3.819205048145113e-05, + "loss": 0.0928, + "step": 62680 + }, + { + "epoch": 2.278145214041718, + "grad_norm": 0.9108635187149048, + "learning_rate": 3.8187642172286706e-05, + "loss": 0.1155, + "step": 62690 + }, + { + "epoch": 2.2785086125445164, + "grad_norm": 0.9071031808853149, + "learning_rate": 3.8183233294913725e-05, + "loss": 0.0974, + "step": 62700 + }, + { + "epoch": 2.2788720110473144, + "grad_norm": 0.5449077486991882, + "learning_rate": 3.817882384952212e-05, + "loss": 0.0807, + "step": 62710 + }, + { + "epoch": 2.279235409550113, + "grad_norm": 0.7269715666770935, + "learning_rate": 3.817441383630187e-05, + "loss": 0.1273, + "step": 62720 + }, + { + "epoch": 2.279598808052911, + "grad_norm": 1.493605375289917, + "learning_rate": 3.817000325544302e-05, + "loss": 0.1112, + "step": 62730 + }, + { + "epoch": 2.279962206555709, + "grad_norm": 0.6935878992080688, + "learning_rate": 3.816559210713558e-05, + "loss": 0.1291, + "step": 62740 + }, + { + "epoch": 2.2803256050585072, + "grad_norm": 1.932387113571167, + "learning_rate": 3.8161180391569625e-05, + "loss": 0.0937, + "step": 62750 + }, + { + "epoch": 2.280689003561305, + "grad_norm": 1.2899200916290283, + "learning_rate": 3.8156768108935226e-05, + "loss": 0.0894, + "step": 62760 + }, + { + "epoch": 2.2810524020641036, + "grad_norm": 1.262176752090454, + "learning_rate": 3.815235525942251e-05, + "loss": 0.4695, + "step": 62770 + }, + { + "epoch": 2.2814158005669016, + "grad_norm": 0.48227742314338684, + "learning_rate": 3.8147941843221604e-05, + "loss": 0.1126, + "step": 62780 + }, + { + "epoch": 2.2817791990696996, + "grad_norm": 1.0351576805114746, + "learning_rate": 3.814352786052266e-05, + "loss": 0.1762, + "step": 62790 + }, + { + "epoch": 2.282142597572498, + "grad_norm": 1.1177520751953125, + "learning_rate": 3.813911331151586e-05, + "loss": 0.1046, + "step": 62800 + }, + { + "epoch": 2.282505996075296, + "grad_norm": 7.0832295417785645, + "learning_rate": 3.8134698196391427e-05, + "loss": 0.1436, + "step": 62810 + }, + { + "epoch": 2.2828693945780945, + "grad_norm": 0.9384248852729797, + "learning_rate": 3.8130282515339576e-05, + "loss": 0.1185, + "step": 62820 + }, + { + "epoch": 2.2832327930808924, + "grad_norm": 2.5718233585357666, + "learning_rate": 3.812586626855057e-05, + "loss": 0.1172, + "step": 62830 + }, + { + "epoch": 2.2835961915836904, + "grad_norm": 0.9541994333267212, + "learning_rate": 3.812144945621469e-05, + "loss": 0.1141, + "step": 62840 + }, + { + "epoch": 2.283959590086489, + "grad_norm": 0.7058838605880737, + "learning_rate": 3.811703207852224e-05, + "loss": 0.0813, + "step": 62850 + }, + { + "epoch": 2.284322988589287, + "grad_norm": 0.6324445605278015, + "learning_rate": 3.811261413566354e-05, + "loss": 0.1308, + "step": 62860 + }, + { + "epoch": 2.2846863870920853, + "grad_norm": 0.5424672365188599, + "learning_rate": 3.810819562782896e-05, + "loss": 0.1055, + "step": 62870 + }, + { + "epoch": 2.2850497855948833, + "grad_norm": 0.5509172677993774, + "learning_rate": 3.810377655520887e-05, + "loss": 0.1323, + "step": 62880 + }, + { + "epoch": 2.2854131840976817, + "grad_norm": 1.111088752746582, + "learning_rate": 3.8099356917993664e-05, + "loss": 0.142, + "step": 62890 + }, + { + "epoch": 2.2857765826004797, + "grad_norm": 0.44855383038520813, + "learning_rate": 3.8094936716373784e-05, + "loss": 0.0943, + "step": 62900 + }, + { + "epoch": 2.286139981103278, + "grad_norm": 3.367194890975952, + "learning_rate": 3.8090515950539674e-05, + "loss": 2.8216, + "step": 62910 + }, + { + "epoch": 2.286503379606076, + "grad_norm": 0.8625146746635437, + "learning_rate": 3.80860946206818e-05, + "loss": 0.1108, + "step": 62920 + }, + { + "epoch": 2.286866778108874, + "grad_norm": 0.6024346351623535, + "learning_rate": 3.808167272699067e-05, + "loss": 0.2589, + "step": 62930 + }, + { + "epoch": 2.2872301766116725, + "grad_norm": 0.3697529733181, + "learning_rate": 3.8077250269656813e-05, + "loss": 0.1722, + "step": 62940 + }, + { + "epoch": 2.2875935751144705, + "grad_norm": 1.8003566265106201, + "learning_rate": 3.807282724887077e-05, + "loss": 0.1144, + "step": 62950 + }, + { + "epoch": 2.287956973617269, + "grad_norm": 0.6778300404548645, + "learning_rate": 3.806840366482311e-05, + "loss": 0.0946, + "step": 62960 + }, + { + "epoch": 2.288320372120067, + "grad_norm": 0.5251741409301758, + "learning_rate": 3.806397951770444e-05, + "loss": 0.1181, + "step": 62970 + }, + { + "epoch": 2.288683770622865, + "grad_norm": 1.101876974105835, + "learning_rate": 3.805955480770537e-05, + "loss": 0.1195, + "step": 62980 + }, + { + "epoch": 2.2890471691256633, + "grad_norm": 0.5283622741699219, + "learning_rate": 3.805512953501655e-05, + "loss": 0.1299, + "step": 62990 + }, + { + "epoch": 2.2894105676284613, + "grad_norm": 0.3856213390827179, + "learning_rate": 3.8050703699828636e-05, + "loss": 0.0909, + "step": 63000 + }, + { + "epoch": 2.2894105676284613, + "eval_loss": 0.32783856987953186, + "eval_runtime": 179.5121, + "eval_samples_per_second": 41.301, + "eval_steps_per_second": 5.164, + "eval_wer": 0.15972915569916676, + "step": 63000 + }, + { + "epoch": 2.2897739661312597, + "grad_norm": 0.5770326256752014, + "learning_rate": 3.8046277302332357e-05, + "loss": 0.1017, + "step": 63010 + }, + { + "epoch": 2.2901373646340577, + "grad_norm": 0.5281986594200134, + "learning_rate": 3.804185034271839e-05, + "loss": 0.1164, + "step": 63020 + }, + { + "epoch": 2.2905007631368557, + "grad_norm": 1.011020302772522, + "learning_rate": 3.803742282117751e-05, + "loss": 0.0986, + "step": 63030 + }, + { + "epoch": 2.290864161639654, + "grad_norm": 0.9110655784606934, + "learning_rate": 3.803299473790046e-05, + "loss": 0.1498, + "step": 63040 + }, + { + "epoch": 2.291227560142452, + "grad_norm": 1.5773357152938843, + "learning_rate": 3.8028566093078036e-05, + "loss": 0.0975, + "step": 63050 + }, + { + "epoch": 2.2915909586452505, + "grad_norm": 3.258551597595215, + "learning_rate": 3.802413688690105e-05, + "loss": 0.0939, + "step": 63060 + }, + { + "epoch": 2.2919543571480485, + "grad_norm": 0.6953330039978027, + "learning_rate": 3.801970711956036e-05, + "loss": 0.1111, + "step": 63070 + }, + { + "epoch": 2.2923177556508465, + "grad_norm": 0.8726534843444824, + "learning_rate": 3.80152767912468e-05, + "loss": 0.1196, + "step": 63080 + }, + { + "epoch": 2.292681154153645, + "grad_norm": 0.5163739323616028, + "learning_rate": 3.801084590215128e-05, + "loss": 0.1048, + "step": 63090 + }, + { + "epoch": 2.293044552656443, + "grad_norm": 2.2029974460601807, + "learning_rate": 3.80064144524647e-05, + "loss": 0.099, + "step": 63100 + }, + { + "epoch": 2.2934079511592413, + "grad_norm": 0.8436546921730042, + "learning_rate": 3.8001982442378004e-05, + "loss": 0.0945, + "step": 63110 + }, + { + "epoch": 2.2937713496620393, + "grad_norm": 0.5407220721244812, + "learning_rate": 3.799754987208214e-05, + "loss": 0.1066, + "step": 63120 + }, + { + "epoch": 2.2941347481648373, + "grad_norm": 0.7019248008728027, + "learning_rate": 3.7993116741768095e-05, + "loss": 0.11, + "step": 63130 + }, + { + "epoch": 2.2944981466676357, + "grad_norm": 0.5901986956596375, + "learning_rate": 3.7988683051626886e-05, + "loss": 0.119, + "step": 63140 + }, + { + "epoch": 2.2948615451704337, + "grad_norm": 0.6483830809593201, + "learning_rate": 3.798424880184954e-05, + "loss": 0.0827, + "step": 63150 + }, + { + "epoch": 2.295224943673232, + "grad_norm": 0.41235288977622986, + "learning_rate": 3.7979813992627103e-05, + "loss": 0.0921, + "step": 63160 + }, + { + "epoch": 2.29558834217603, + "grad_norm": 3.996107339859009, + "learning_rate": 3.797537862415066e-05, + "loss": 0.0967, + "step": 63170 + }, + { + "epoch": 2.2959517406788286, + "grad_norm": 2.476738452911377, + "learning_rate": 3.7970942696611335e-05, + "loss": 0.1186, + "step": 63180 + }, + { + "epoch": 2.2963151391816266, + "grad_norm": 0.8279284238815308, + "learning_rate": 3.7966506210200224e-05, + "loss": 0.1086, + "step": 63190 + }, + { + "epoch": 2.296678537684425, + "grad_norm": 5.163793087005615, + "learning_rate": 3.79620691651085e-05, + "loss": 0.0827, + "step": 63200 + }, + { + "epoch": 2.297041936187223, + "grad_norm": 0.577820360660553, + "learning_rate": 3.795763156152734e-05, + "loss": 0.1126, + "step": 63210 + }, + { + "epoch": 2.297405334690021, + "grad_norm": 0.29767242074012756, + "learning_rate": 3.7953193399647934e-05, + "loss": 0.1169, + "step": 63220 + }, + { + "epoch": 2.2977687331928194, + "grad_norm": 1.0364243984222412, + "learning_rate": 3.794875467966152e-05, + "loss": 0.1004, + "step": 63230 + }, + { + "epoch": 2.2981321316956174, + "grad_norm": 0.5417031049728394, + "learning_rate": 3.794475935465031e-05, + "loss": 4.3853, + "step": 63240 + }, + { + "epoch": 2.298495530198416, + "grad_norm": 0.8863941431045532, + "learning_rate": 3.794031957478746e-05, + "loss": 0.0942, + "step": 63250 + }, + { + "epoch": 2.2988589287012138, + "grad_norm": 0.6005067825317383, + "learning_rate": 3.7935879237372296e-05, + "loss": 0.0879, + "step": 63260 + }, + { + "epoch": 2.2992223272040118, + "grad_norm": 0.5702997446060181, + "learning_rate": 3.793143834259612e-05, + "loss": 0.1344, + "step": 63270 + }, + { + "epoch": 2.29958572570681, + "grad_norm": 0.5447559356689453, + "learning_rate": 3.7926996890650265e-05, + "loss": 0.1249, + "step": 63280 + }, + { + "epoch": 2.299949124209608, + "grad_norm": 0.9382325410842896, + "learning_rate": 3.7922554881726125e-05, + "loss": 0.1639, + "step": 63290 + }, + { + "epoch": 2.3003125227124066, + "grad_norm": 1.128554344177246, + "learning_rate": 3.791811231601506e-05, + "loss": 0.124, + "step": 63300 + }, + { + "epoch": 2.3006759212152046, + "grad_norm": 1.2001831531524658, + "learning_rate": 3.7913669193708505e-05, + "loss": 0.1115, + "step": 63310 + }, + { + "epoch": 2.3010393197180026, + "grad_norm": 0.5141827464103699, + "learning_rate": 3.790922551499789e-05, + "loss": 0.1934, + "step": 63320 + }, + { + "epoch": 2.301402718220801, + "grad_norm": 1.1889158487319946, + "learning_rate": 3.7904781280074674e-05, + "loss": 0.112, + "step": 63330 + }, + { + "epoch": 2.301766116723599, + "grad_norm": 1.0070478916168213, + "learning_rate": 3.7900336489130355e-05, + "loss": 0.1434, + "step": 63340 + }, + { + "epoch": 2.3021295152263974, + "grad_norm": 0.6805721521377563, + "learning_rate": 3.789589114235643e-05, + "loss": 0.088, + "step": 63350 + }, + { + "epoch": 2.3024929137291954, + "grad_norm": 0.8101871013641357, + "learning_rate": 3.789144523994445e-05, + "loss": 0.0786, + "step": 63360 + }, + { + "epoch": 2.3028563122319934, + "grad_norm": 0.5728216767311096, + "learning_rate": 3.788699878208595e-05, + "loss": 0.1176, + "step": 63370 + }, + { + "epoch": 2.303219710734792, + "grad_norm": 6.430160999298096, + "learning_rate": 3.788255176897253e-05, + "loss": 0.1395, + "step": 63380 + }, + { + "epoch": 2.30358310923759, + "grad_norm": 0.8273718953132629, + "learning_rate": 3.78781042007958e-05, + "loss": 0.1157, + "step": 63390 + }, + { + "epoch": 2.3039465077403882, + "grad_norm": 0.7474293112754822, + "learning_rate": 3.787365607774736e-05, + "loss": 0.6462, + "step": 63400 + }, + { + "epoch": 2.304309906243186, + "grad_norm": 4.311099052429199, + "learning_rate": 3.7869207400018905e-05, + "loss": 0.1136, + "step": 63410 + }, + { + "epoch": 2.304673304745984, + "grad_norm": 0.8261300921440125, + "learning_rate": 3.7864758167802074e-05, + "loss": 0.1357, + "step": 63420 + }, + { + "epoch": 2.3050367032487826, + "grad_norm": 2.123488187789917, + "learning_rate": 3.78603083812886e-05, + "loss": 0.0907, + "step": 63430 + }, + { + "epoch": 2.3054001017515806, + "grad_norm": 0.7351600527763367, + "learning_rate": 3.7855858040670175e-05, + "loss": 0.1308, + "step": 63440 + }, + { + "epoch": 2.305763500254379, + "grad_norm": 3.070939064025879, + "learning_rate": 3.785140714613859e-05, + "loss": 2.963, + "step": 63450 + }, + { + "epoch": 2.306126898757177, + "grad_norm": 0.4340088963508606, + "learning_rate": 3.7846955697885586e-05, + "loss": 0.0927, + "step": 63460 + }, + { + "epoch": 2.3064902972599755, + "grad_norm": 0.6686544418334961, + "learning_rate": 3.7842503696102976e-05, + "loss": 0.1441, + "step": 63470 + }, + { + "epoch": 2.3068536957627734, + "grad_norm": 7.206737041473389, + "learning_rate": 3.7838051140982575e-05, + "loss": 0.1299, + "step": 63480 + }, + { + "epoch": 2.307217094265572, + "grad_norm": 0.6773508191108704, + "learning_rate": 3.7833598032716225e-05, + "loss": 0.1177, + "step": 63490 + }, + { + "epoch": 2.30758049276837, + "grad_norm": 0.5695934295654297, + "learning_rate": 3.78291443714958e-05, + "loss": 0.0918, + "step": 63500 + }, + { + "epoch": 2.307943891271168, + "grad_norm": 0.6884729862213135, + "learning_rate": 3.782469015751319e-05, + "loss": 0.1068, + "step": 63510 + }, + { + "epoch": 2.3083072897739663, + "grad_norm": 0.5272583365440369, + "learning_rate": 3.782023539096031e-05, + "loss": 0.1021, + "step": 63520 + }, + { + "epoch": 2.3086706882767642, + "grad_norm": 3.166252613067627, + "learning_rate": 3.7815780072029103e-05, + "loss": 0.1035, + "step": 63530 + }, + { + "epoch": 2.3090340867795627, + "grad_norm": 0.47669315338134766, + "learning_rate": 3.781132420091153e-05, + "loss": 0.1203, + "step": 63540 + }, + { + "epoch": 2.3093974852823607, + "grad_norm": 1.5463957786560059, + "learning_rate": 3.780686777779958e-05, + "loss": 0.0845, + "step": 63550 + }, + { + "epoch": 2.3097608837851586, + "grad_norm": 0.9264553785324097, + "learning_rate": 3.780241080288527e-05, + "loss": 0.0906, + "step": 63560 + }, + { + "epoch": 2.310124282287957, + "grad_norm": 0.604017436504364, + "learning_rate": 3.7797953276360624e-05, + "loss": 0.128, + "step": 63570 + }, + { + "epoch": 2.310487680790755, + "grad_norm": 0.6354121565818787, + "learning_rate": 3.779349519841771e-05, + "loss": 0.1044, + "step": 63580 + }, + { + "epoch": 2.3108510792935535, + "grad_norm": 0.6733710169792175, + "learning_rate": 3.7789036569248606e-05, + "loss": 0.1096, + "step": 63590 + }, + { + "epoch": 2.3112144777963515, + "grad_norm": 0.6780581474304199, + "learning_rate": 3.778457738904542e-05, + "loss": 0.1325, + "step": 63600 + }, + { + "epoch": 2.3112144777963515, + "eval_loss": 0.33778947591781616, + "eval_runtime": 180.1308, + "eval_samples_per_second": 41.159, + "eval_steps_per_second": 5.146, + "eval_wer": 0.15696988400167008, + "step": 63600 + }, + { + "epoch": 2.3115778762991495, + "grad_norm": 0.42744994163513184, + "learning_rate": 3.778011765800028e-05, + "loss": 0.0901, + "step": 63610 + }, + { + "epoch": 2.311941274801948, + "grad_norm": 0.5375288724899292, + "learning_rate": 3.777565737630534e-05, + "loss": 0.1126, + "step": 63620 + }, + { + "epoch": 2.312304673304746, + "grad_norm": 0.6710574626922607, + "learning_rate": 3.777119654415279e-05, + "loss": 0.4501, + "step": 63630 + }, + { + "epoch": 2.3126680718075443, + "grad_norm": 1.513808012008667, + "learning_rate": 3.77667351617348e-05, + "loss": 0.1793, + "step": 63640 + }, + { + "epoch": 2.3130314703103423, + "grad_norm": 0.4935424029827118, + "learning_rate": 3.776227322924364e-05, + "loss": 0.0848, + "step": 63650 + }, + { + "epoch": 2.3133948688131403, + "grad_norm": 1.5648393630981445, + "learning_rate": 3.775781074687152e-05, + "loss": 0.0965, + "step": 63660 + }, + { + "epoch": 2.3137582673159387, + "grad_norm": 0.3886503279209137, + "learning_rate": 3.775334771481073e-05, + "loss": 0.1081, + "step": 63670 + }, + { + "epoch": 2.3141216658187367, + "grad_norm": 1.0196889638900757, + "learning_rate": 3.7748884133253566e-05, + "loss": 0.0974, + "step": 63680 + }, + { + "epoch": 2.314485064321535, + "grad_norm": 1.0317192077636719, + "learning_rate": 3.7744420002392345e-05, + "loss": 0.1129, + "step": 63690 + }, + { + "epoch": 2.314848462824333, + "grad_norm": 1.3416907787322998, + "learning_rate": 3.773995532241941e-05, + "loss": 0.0985, + "step": 63700 + }, + { + "epoch": 2.3152118613271315, + "grad_norm": 1.2515931129455566, + "learning_rate": 3.7735490093527126e-05, + "loss": 0.1034, + "step": 63710 + }, + { + "epoch": 2.3155752598299295, + "grad_norm": 0.6103869676589966, + "learning_rate": 3.773102431590789e-05, + "loss": 0.1153, + "step": 63720 + }, + { + "epoch": 2.3159386583327275, + "grad_norm": 2.0320076942443848, + "learning_rate": 3.772655798975412e-05, + "loss": 0.1065, + "step": 63730 + }, + { + "epoch": 2.316302056835526, + "grad_norm": 0.9758360385894775, + "learning_rate": 3.772209111525824e-05, + "loss": 0.1187, + "step": 63740 + }, + { + "epoch": 2.316665455338324, + "grad_norm": 1.36004638671875, + "learning_rate": 3.771762369261272e-05, + "loss": 0.1045, + "step": 63750 + }, + { + "epoch": 2.3170288538411223, + "grad_norm": 0.45251816511154175, + "learning_rate": 3.771315572201004e-05, + "loss": 0.1054, + "step": 63760 + }, + { + "epoch": 2.3173922523439203, + "grad_norm": 3.3651912212371826, + "learning_rate": 3.7708687203642724e-05, + "loss": 0.1215, + "step": 63770 + }, + { + "epoch": 2.3177556508467188, + "grad_norm": 0.9686463475227356, + "learning_rate": 3.7704218137703284e-05, + "loss": 0.1114, + "step": 63780 + }, + { + "epoch": 2.3181190493495167, + "grad_norm": 0.7810651659965515, + "learning_rate": 3.769974852438429e-05, + "loss": 0.1284, + "step": 63790 + }, + { + "epoch": 2.3184824478523147, + "grad_norm": 0.600099503993988, + "learning_rate": 3.7695278363878325e-05, + "loss": 0.0929, + "step": 63800 + }, + { + "epoch": 2.318845846355113, + "grad_norm": 0.9034928679466248, + "learning_rate": 3.769080765637798e-05, + "loss": 0.099, + "step": 63810 + }, + { + "epoch": 2.319209244857911, + "grad_norm": 1.4133280515670776, + "learning_rate": 3.7686336402075885e-05, + "loss": 0.1076, + "step": 63820 + }, + { + "epoch": 2.3195726433607096, + "grad_norm": 0.6236594319343567, + "learning_rate": 3.768186460116469e-05, + "loss": 0.1036, + "step": 63830 + }, + { + "epoch": 2.3199360418635075, + "grad_norm": 2.116008996963501, + "learning_rate": 3.7677392253837076e-05, + "loss": 0.1521, + "step": 63840 + }, + { + "epoch": 2.3202994403663055, + "grad_norm": 1.0746735334396362, + "learning_rate": 3.767291936028574e-05, + "loss": 0.5176, + "step": 63850 + }, + { + "epoch": 2.320662838869104, + "grad_norm": 1.0380078554153442, + "learning_rate": 3.766844592070339e-05, + "loss": 0.0889, + "step": 63860 + }, + { + "epoch": 2.321026237371902, + "grad_norm": 0.835041344165802, + "learning_rate": 3.766397193528278e-05, + "loss": 0.1305, + "step": 63870 + }, + { + "epoch": 2.3213896358747004, + "grad_norm": 3.784654140472412, + "learning_rate": 3.7659497404216685e-05, + "loss": 0.1531, + "step": 63880 + }, + { + "epoch": 2.3217530343774984, + "grad_norm": 1.1239734888076782, + "learning_rate": 3.765502232769789e-05, + "loss": 0.1244, + "step": 63890 + }, + { + "epoch": 2.3221164328802963, + "grad_norm": 0.5810584425926208, + "learning_rate": 3.7650546705919204e-05, + "loss": 0.1013, + "step": 63900 + }, + { + "epoch": 2.3224798313830948, + "grad_norm": 0.6790658831596375, + "learning_rate": 3.7646070539073475e-05, + "loss": 0.1047, + "step": 63910 + }, + { + "epoch": 2.3228432298858928, + "grad_norm": 0.3619256615638733, + "learning_rate": 3.7641593827353556e-05, + "loss": 0.0927, + "step": 63920 + }, + { + "epoch": 2.323206628388691, + "grad_norm": 2.329050064086914, + "learning_rate": 3.7637116570952346e-05, + "loss": 0.12, + "step": 63930 + }, + { + "epoch": 2.323570026891489, + "grad_norm": 1.9159663915634155, + "learning_rate": 3.763263877006273e-05, + "loss": 0.1296, + "step": 63940 + }, + { + "epoch": 2.323933425394287, + "grad_norm": 1.206432819366455, + "learning_rate": 3.762816042487768e-05, + "loss": 0.0802, + "step": 63950 + }, + { + "epoch": 2.3242968238970856, + "grad_norm": 0.9730502963066101, + "learning_rate": 3.762368153559012e-05, + "loss": 0.1171, + "step": 63960 + }, + { + "epoch": 2.3246602223998836, + "grad_norm": 0.3301490247249603, + "learning_rate": 3.761920210239303e-05, + "loss": 0.1111, + "step": 63970 + }, + { + "epoch": 2.325023620902682, + "grad_norm": 0.580382227897644, + "learning_rate": 3.7614722125479425e-05, + "loss": 0.0951, + "step": 63980 + }, + { + "epoch": 2.32538701940548, + "grad_norm": 1.5714104175567627, + "learning_rate": 3.761024160504232e-05, + "loss": 0.1407, + "step": 63990 + }, + { + "epoch": 2.3257504179082784, + "grad_norm": 0.6567360162734985, + "learning_rate": 3.7605760541274784e-05, + "loss": 0.0823, + "step": 64000 + }, + { + "epoch": 2.3261138164110764, + "grad_norm": 1.1323597431182861, + "learning_rate": 3.760127893436988e-05, + "loss": 0.108, + "step": 64010 + }, + { + "epoch": 2.3264772149138744, + "grad_norm": 0.9358565807342529, + "learning_rate": 3.7596796784520684e-05, + "loss": 0.1014, + "step": 64020 + }, + { + "epoch": 2.326840613416673, + "grad_norm": 0.7020303010940552, + "learning_rate": 3.759231409192034e-05, + "loss": 0.098, + "step": 64030 + }, + { + "epoch": 2.327204011919471, + "grad_norm": 1.7113333940505981, + "learning_rate": 3.7587830856761996e-05, + "loss": 0.1149, + "step": 64040 + }, + { + "epoch": 2.3275674104222692, + "grad_norm": 2.211527109146118, + "learning_rate": 3.75833470792388e-05, + "loss": 0.085, + "step": 64050 + }, + { + "epoch": 2.327930808925067, + "grad_norm": 0.6617085933685303, + "learning_rate": 3.7578862759543954e-05, + "loss": 0.1596, + "step": 64060 + }, + { + "epoch": 2.3282942074278656, + "grad_norm": 0.6133392453193665, + "learning_rate": 3.757437789787066e-05, + "loss": 0.1044, + "step": 64070 + }, + { + "epoch": 2.3286576059306636, + "grad_norm": 5.026115894317627, + "learning_rate": 3.7569892494412175e-05, + "loss": 0.1376, + "step": 64080 + }, + { + "epoch": 2.3290210044334616, + "grad_norm": 1.0616756677627563, + "learning_rate": 3.756540654936174e-05, + "loss": 0.1029, + "step": 64090 + }, + { + "epoch": 2.32938440293626, + "grad_norm": 0.9439811706542969, + "learning_rate": 3.756092006291264e-05, + "loss": 0.0779, + "step": 64100 + }, + { + "epoch": 2.329747801439058, + "grad_norm": 1.4717971086502075, + "learning_rate": 3.755643303525819e-05, + "loss": 0.0926, + "step": 64110 + }, + { + "epoch": 2.3301111999418564, + "grad_norm": 0.3808611035346985, + "learning_rate": 3.7551945466591716e-05, + "loss": 0.1099, + "step": 64120 + }, + { + "epoch": 2.3304745984446544, + "grad_norm": 1.0548149347305298, + "learning_rate": 3.754745735710657e-05, + "loss": 0.1072, + "step": 64130 + }, + { + "epoch": 2.3308379969474524, + "grad_norm": 2.6092560291290283, + "learning_rate": 3.7542968706996136e-05, + "loss": 0.1365, + "step": 64140 + }, + { + "epoch": 2.331201395450251, + "grad_norm": 1.3453460931777954, + "learning_rate": 3.7538479516453805e-05, + "loss": 0.0904, + "step": 64150 + }, + { + "epoch": 2.331564793953049, + "grad_norm": 0.8132860660552979, + "learning_rate": 3.7533989785673e-05, + "loss": 0.0848, + "step": 64160 + }, + { + "epoch": 2.3319281924558473, + "grad_norm": 1.1206045150756836, + "learning_rate": 3.7529499514847175e-05, + "loss": 0.1255, + "step": 64170 + }, + { + "epoch": 2.3322915909586452, + "grad_norm": 1.248970866203308, + "learning_rate": 3.7525008704169795e-05, + "loss": 0.0924, + "step": 64180 + }, + { + "epoch": 2.3326549894614432, + "grad_norm": 0.9841907620429993, + "learning_rate": 3.752051735383436e-05, + "loss": 0.1479, + "step": 64190 + }, + { + "epoch": 2.3330183879642417, + "grad_norm": 2.1250979900360107, + "learning_rate": 3.7516025464034376e-05, + "loss": 0.0934, + "step": 64200 + }, + { + "epoch": 2.3330183879642417, + "eval_loss": 0.3528118431568146, + "eval_runtime": 179.4186, + "eval_samples_per_second": 41.322, + "eval_steps_per_second": 5.167, + "eval_wer": 0.15433768402708442, + "step": 64200 + }, + { + "epoch": 2.3333817864670396, + "grad_norm": 0.7961970567703247, + "learning_rate": 3.7511533034963384e-05, + "loss": 0.0977, + "step": 64210 + }, + { + "epoch": 2.333745184969838, + "grad_norm": 0.8082739114761353, + "learning_rate": 3.750704006681495e-05, + "loss": 0.1188, + "step": 64220 + }, + { + "epoch": 2.334108583472636, + "grad_norm": 1.3309545516967773, + "learning_rate": 3.7502546559782656e-05, + "loss": 0.1256, + "step": 64230 + }, + { + "epoch": 2.334471981975434, + "grad_norm": 0.8950253129005432, + "learning_rate": 3.749805251406013e-05, + "loss": 0.097, + "step": 64240 + }, + { + "epoch": 2.3348353804782325, + "grad_norm": 1.537735939025879, + "learning_rate": 3.7493557929840974e-05, + "loss": 0.1024, + "step": 64250 + }, + { + "epoch": 2.3351987789810305, + "grad_norm": 0.9132232666015625, + "learning_rate": 3.748906280731887e-05, + "loss": 0.1118, + "step": 64260 + }, + { + "epoch": 2.335562177483829, + "grad_norm": 0.540766179561615, + "learning_rate": 3.7484567146687485e-05, + "loss": 0.1203, + "step": 64270 + }, + { + "epoch": 2.335925575986627, + "grad_norm": 0.5811611413955688, + "learning_rate": 3.748007094814051e-05, + "loss": 0.1067, + "step": 64280 + }, + { + "epoch": 2.3362889744894253, + "grad_norm": 0.715090274810791, + "learning_rate": 3.747557421187169e-05, + "loss": 0.1685, + "step": 64290 + }, + { + "epoch": 2.3366523729922233, + "grad_norm": 0.6315838694572449, + "learning_rate": 3.747107693807477e-05, + "loss": 0.1196, + "step": 64300 + }, + { + "epoch": 2.3370157714950213, + "grad_norm": 1.2922756671905518, + "learning_rate": 3.7466579126943514e-05, + "loss": 0.0906, + "step": 64310 + }, + { + "epoch": 2.3373791699978197, + "grad_norm": 9.100321769714355, + "learning_rate": 3.746208077867172e-05, + "loss": 0.1205, + "step": 64320 + }, + { + "epoch": 2.3377425685006177, + "grad_norm": 0.5777522921562195, + "learning_rate": 3.74575818934532e-05, + "loss": 0.0787, + "step": 64330 + }, + { + "epoch": 2.338105967003416, + "grad_norm": 0.5407727360725403, + "learning_rate": 3.74530824714818e-05, + "loss": 0.0998, + "step": 64340 + }, + { + "epoch": 2.338469365506214, + "grad_norm": 0.6790062785148621, + "learning_rate": 3.744858251295139e-05, + "loss": 0.0855, + "step": 64350 + }, + { + "epoch": 2.3388327640090125, + "grad_norm": 0.552946925163269, + "learning_rate": 3.744408201805585e-05, + "loss": 0.0864, + "step": 64360 + }, + { + "epoch": 2.3391961625118105, + "grad_norm": 0.36451128125190735, + "learning_rate": 3.743958098698909e-05, + "loss": 0.1093, + "step": 64370 + }, + { + "epoch": 2.3395595610146085, + "grad_norm": 0.834068775177002, + "learning_rate": 3.743507941994505e-05, + "loss": 0.1181, + "step": 64380 + }, + { + "epoch": 2.339922959517407, + "grad_norm": 1.2418774366378784, + "learning_rate": 3.743057731711768e-05, + "loss": 0.1128, + "step": 64390 + }, + { + "epoch": 2.340286358020205, + "grad_norm": 1.0964419841766357, + "learning_rate": 3.7426074678700964e-05, + "loss": 0.1012, + "step": 64400 + }, + { + "epoch": 2.3406497565230033, + "grad_norm": 0.7740904092788696, + "learning_rate": 3.74215715048889e-05, + "loss": 0.0835, + "step": 64410 + }, + { + "epoch": 2.3410131550258013, + "grad_norm": 0.6739581823348999, + "learning_rate": 3.741706779587551e-05, + "loss": 0.1302, + "step": 64420 + }, + { + "epoch": 2.3413765535285993, + "grad_norm": 1.141020655632019, + "learning_rate": 3.7412563551854854e-05, + "loss": 0.1881, + "step": 64430 + }, + { + "epoch": 2.3417399520313977, + "grad_norm": 0.7994565367698669, + "learning_rate": 3.7408058773020994e-05, + "loss": 0.1099, + "step": 64440 + }, + { + "epoch": 2.3421033505341957, + "grad_norm": 0.7365929484367371, + "learning_rate": 3.740355345956804e-05, + "loss": 0.088, + "step": 64450 + }, + { + "epoch": 2.342466749036994, + "grad_norm": 0.5093470215797424, + "learning_rate": 3.7399047611690095e-05, + "loss": 0.1143, + "step": 64460 + }, + { + "epoch": 2.342830147539792, + "grad_norm": 1.3585693836212158, + "learning_rate": 3.7394541229581295e-05, + "loss": 0.12, + "step": 64470 + }, + { + "epoch": 2.34319354604259, + "grad_norm": 0.9446144700050354, + "learning_rate": 3.739003431343583e-05, + "loss": 2.1812, + "step": 64480 + }, + { + "epoch": 2.3435569445453885, + "grad_norm": 0.4501352310180664, + "learning_rate": 3.738552686344786e-05, + "loss": 0.1086, + "step": 64490 + }, + { + "epoch": 2.3439203430481865, + "grad_norm": 0.5260722637176514, + "learning_rate": 3.73810188798116e-05, + "loss": 0.1806, + "step": 64500 + }, + { + "epoch": 2.344283741550985, + "grad_norm": 2.6056125164031982, + "learning_rate": 3.73765103627213e-05, + "loss": 0.1072, + "step": 64510 + }, + { + "epoch": 2.344647140053783, + "grad_norm": 0.6981383562088013, + "learning_rate": 3.73720013123712e-05, + "loss": 0.1415, + "step": 64520 + }, + { + "epoch": 2.345010538556581, + "grad_norm": 0.5633025169372559, + "learning_rate": 3.7367491728955585e-05, + "loss": 0.0986, + "step": 64530 + }, + { + "epoch": 2.3453739370593794, + "grad_norm": 0.5558316111564636, + "learning_rate": 3.7362981612668745e-05, + "loss": 0.1791, + "step": 64540 + }, + { + "epoch": 2.3457373355621773, + "grad_norm": 0.543397068977356, + "learning_rate": 3.735847096370503e-05, + "loss": 0.1002, + "step": 64550 + }, + { + "epoch": 2.3461007340649758, + "grad_norm": 0.5885327458381653, + "learning_rate": 3.7353959782258755e-05, + "loss": 0.0944, + "step": 64560 + }, + { + "epoch": 2.3464641325677738, + "grad_norm": 0.9266073107719421, + "learning_rate": 3.7349448068524325e-05, + "loss": 1.9036, + "step": 64570 + }, + { + "epoch": 2.346827531070572, + "grad_norm": 0.4478204548358917, + "learning_rate": 3.7344935822696116e-05, + "loss": 0.1028, + "step": 64580 + }, + { + "epoch": 2.34719092957337, + "grad_norm": 1.6678454875946045, + "learning_rate": 3.7340423044968534e-05, + "loss": 0.1299, + "step": 64590 + }, + { + "epoch": 2.347554328076168, + "grad_norm": 2.0704760551452637, + "learning_rate": 3.733590973553604e-05, + "loss": 0.086, + "step": 64600 + }, + { + "epoch": 2.3479177265789666, + "grad_norm": 1.1192750930786133, + "learning_rate": 3.733139589459308e-05, + "loss": 0.0977, + "step": 64610 + }, + { + "epoch": 2.3482811250817646, + "grad_norm": 1.0189874172210693, + "learning_rate": 3.732688152233415e-05, + "loss": 0.0966, + "step": 64620 + }, + { + "epoch": 2.348644523584563, + "grad_norm": 0.8110418319702148, + "learning_rate": 3.7322366618953755e-05, + "loss": 0.1015, + "step": 64630 + }, + { + "epoch": 2.349007922087361, + "grad_norm": 0.7661551833152771, + "learning_rate": 3.731785118464642e-05, + "loss": 0.1056, + "step": 64640 + }, + { + "epoch": 2.3493713205901594, + "grad_norm": 0.6133613586425781, + "learning_rate": 3.731333521960672e-05, + "loss": 0.0862, + "step": 64650 + }, + { + "epoch": 2.3497347190929574, + "grad_norm": 1.6493825912475586, + "learning_rate": 3.73088187240292e-05, + "loss": 0.1103, + "step": 64660 + }, + { + "epoch": 2.3500981175957554, + "grad_norm": 0.7170090675354004, + "learning_rate": 3.7304301698108486e-05, + "loss": 1.3385, + "step": 64670 + }, + { + "epoch": 2.350461516098554, + "grad_norm": 0.3917316794395447, + "learning_rate": 3.7299784142039186e-05, + "loss": 0.0838, + "step": 64680 + }, + { + "epoch": 2.350824914601352, + "grad_norm": 0.85912024974823, + "learning_rate": 3.729526605601595e-05, + "loss": 0.1025, + "step": 64690 + }, + { + "epoch": 2.35118831310415, + "grad_norm": 0.3880862295627594, + "learning_rate": 3.729074744023345e-05, + "loss": 0.0867, + "step": 64700 + }, + { + "epoch": 2.351551711606948, + "grad_norm": 0.5538926124572754, + "learning_rate": 3.728622829488637e-05, + "loss": 0.1349, + "step": 64710 + }, + { + "epoch": 2.351915110109746, + "grad_norm": 0.4051951766014099, + "learning_rate": 3.7281708620169424e-05, + "loss": 0.1023, + "step": 64720 + }, + { + "epoch": 2.3522785086125446, + "grad_norm": 3.5087623596191406, + "learning_rate": 3.7277188416277354e-05, + "loss": 0.1052, + "step": 64730 + }, + { + "epoch": 2.3526419071153426, + "grad_norm": 0.7415525317192078, + "learning_rate": 3.727266768340492e-05, + "loss": 0.1682, + "step": 64740 + }, + { + "epoch": 2.353005305618141, + "grad_norm": 0.41486695408821106, + "learning_rate": 3.7268146421746895e-05, + "loss": 0.123, + "step": 64750 + }, + { + "epoch": 2.353368704120939, + "grad_norm": 1.6847058534622192, + "learning_rate": 3.726362463149811e-05, + "loss": 0.108, + "step": 64760 + }, + { + "epoch": 2.353732102623737, + "grad_norm": 0.6038152575492859, + "learning_rate": 3.7259102312853356e-05, + "loss": 0.1165, + "step": 64770 + }, + { + "epoch": 2.3540955011265354, + "grad_norm": 1.413368821144104, + "learning_rate": 3.7254579466007505e-05, + "loss": 0.0947, + "step": 64780 + }, + { + "epoch": 2.3544588996293334, + "grad_norm": 0.9277619123458862, + "learning_rate": 3.7250056091155427e-05, + "loss": 0.1269, + "step": 64790 + }, + { + "epoch": 2.354822298132132, + "grad_norm": 1.1914100646972656, + "learning_rate": 3.724553218849202e-05, + "loss": 0.0882, + "step": 64800 + }, + { + "epoch": 2.354822298132132, + "eval_loss": 0.34669631719589233, + "eval_runtime": 180.4215, + "eval_samples_per_second": 41.093, + "eval_steps_per_second": 5.138, + "eval_wer": 0.15537241091364568, + "step": 64800 + }, + { + "epoch": 2.35518569663493, + "grad_norm": 0.8063227534294128, + "learning_rate": 3.7241007758212195e-05, + "loss": 0.0882, + "step": 64810 + }, + { + "epoch": 2.355549095137728, + "grad_norm": 8.477306365966797, + "learning_rate": 3.723648280051091e-05, + "loss": 0.1379, + "step": 64820 + }, + { + "epoch": 2.3559124936405262, + "grad_norm": 1.4196289777755737, + "learning_rate": 3.723195731558311e-05, + "loss": 0.0822, + "step": 64830 + }, + { + "epoch": 2.3562758921433242, + "grad_norm": 0.8000519275665283, + "learning_rate": 3.722743130362379e-05, + "loss": 0.0988, + "step": 64840 + }, + { + "epoch": 2.3566392906461227, + "grad_norm": 5.073339939117432, + "learning_rate": 3.722290476482796e-05, + "loss": 0.0932, + "step": 64850 + }, + { + "epoch": 2.3570026891489206, + "grad_norm": 0.8329682946205139, + "learning_rate": 3.7218377699390666e-05, + "loss": 0.091, + "step": 64860 + }, + { + "epoch": 2.357366087651719, + "grad_norm": 33.570316314697266, + "learning_rate": 3.7213850107506936e-05, + "loss": 2.4337, + "step": 64870 + }, + { + "epoch": 2.357729486154517, + "grad_norm": 0.5240826606750488, + "learning_rate": 3.720932198937187e-05, + "loss": 0.118, + "step": 64880 + }, + { + "epoch": 2.358092884657315, + "grad_norm": 0.7187747955322266, + "learning_rate": 3.720479334518056e-05, + "loss": 0.1019, + "step": 64890 + }, + { + "epoch": 2.3584562831601135, + "grad_norm": 5.0828351974487305, + "learning_rate": 3.720026417512812e-05, + "loss": 0.0869, + "step": 64900 + }, + { + "epoch": 2.3588196816629114, + "grad_norm": 0.688025176525116, + "learning_rate": 3.719573447940972e-05, + "loss": 0.1029, + "step": 64910 + }, + { + "epoch": 2.35918308016571, + "grad_norm": 1.203792691230774, + "learning_rate": 3.71912042582205e-05, + "loss": 0.1196, + "step": 64920 + }, + { + "epoch": 2.359546478668508, + "grad_norm": 0.5731534361839294, + "learning_rate": 3.718667351175567e-05, + "loss": 0.0968, + "step": 64930 + }, + { + "epoch": 2.3599098771713063, + "grad_norm": 1.7429757118225098, + "learning_rate": 3.718214224021044e-05, + "loss": 0.104, + "step": 64940 + }, + { + "epoch": 2.3602732756741043, + "grad_norm": 1.9315886497497559, + "learning_rate": 3.7177610443780045e-05, + "loss": 0.1186, + "step": 64950 + }, + { + "epoch": 2.3606366741769023, + "grad_norm": 0.8713351488113403, + "learning_rate": 3.717307812265974e-05, + "loss": 0.0962, + "step": 64960 + }, + { + "epoch": 2.3610000726797007, + "grad_norm": 1.1917448043823242, + "learning_rate": 3.716854527704482e-05, + "loss": 0.1117, + "step": 64970 + }, + { + "epoch": 2.3613634711824987, + "grad_norm": 1.092644214630127, + "learning_rate": 3.716401190713057e-05, + "loss": 0.0889, + "step": 64980 + }, + { + "epoch": 2.361726869685297, + "grad_norm": 1.172472357749939, + "learning_rate": 3.715947801311233e-05, + "loss": 0.1126, + "step": 64990 + }, + { + "epoch": 2.362090268188095, + "grad_norm": 1.0360251665115356, + "learning_rate": 3.715494359518545e-05, + "loss": 0.104, + "step": 65000 + }, + { + "epoch": 2.362453666690893, + "grad_norm": 0.88475102186203, + "learning_rate": 3.715040865354529e-05, + "loss": 0.0905, + "step": 65010 + }, + { + "epoch": 2.3628170651936915, + "grad_norm": 2.172114849090576, + "learning_rate": 3.714587318838726e-05, + "loss": 0.1047, + "step": 65020 + }, + { + "epoch": 2.3631804636964895, + "grad_norm": 1.0699172019958496, + "learning_rate": 3.7141337199906766e-05, + "loss": 0.1052, + "step": 65030 + }, + { + "epoch": 2.363543862199288, + "grad_norm": 0.365556925535202, + "learning_rate": 3.713680068829925e-05, + "loss": 0.1084, + "step": 65040 + }, + { + "epoch": 2.363907260702086, + "grad_norm": 0.6626974940299988, + "learning_rate": 3.713226365376018e-05, + "loss": 0.0883, + "step": 65050 + }, + { + "epoch": 2.364270659204884, + "grad_norm": 1.7596914768218994, + "learning_rate": 3.7127726096485026e-05, + "loss": 0.0966, + "step": 65060 + }, + { + "epoch": 2.3646340577076823, + "grad_norm": 0.5741199254989624, + "learning_rate": 3.712318801666932e-05, + "loss": 0.1532, + "step": 65070 + }, + { + "epoch": 2.3649974562104803, + "grad_norm": 1.754315733909607, + "learning_rate": 3.711864941450856e-05, + "loss": 0.1066, + "step": 65080 + }, + { + "epoch": 2.3653608547132787, + "grad_norm": 0.7265182137489319, + "learning_rate": 3.711411029019833e-05, + "loss": 0.1318, + "step": 65090 + }, + { + "epoch": 2.3657242532160767, + "grad_norm": 0.5546099543571472, + "learning_rate": 3.7109570643934185e-05, + "loss": 0.1076, + "step": 65100 + }, + { + "epoch": 2.3660876517188747, + "grad_norm": 0.5260456800460815, + "learning_rate": 3.7105030475911716e-05, + "loss": 0.0945, + "step": 65110 + }, + { + "epoch": 2.366451050221673, + "grad_norm": 0.733099639415741, + "learning_rate": 3.710048978632657e-05, + "loss": 0.0959, + "step": 65120 + }, + { + "epoch": 2.366814448724471, + "grad_norm": 0.7349701523780823, + "learning_rate": 3.709594857537436e-05, + "loss": 0.0935, + "step": 65130 + }, + { + "epoch": 2.3671778472272695, + "grad_norm": 1.6328225135803223, + "learning_rate": 3.7091406843250774e-05, + "loss": 0.158, + "step": 65140 + }, + { + "epoch": 2.3675412457300675, + "grad_norm": 0.4439161717891693, + "learning_rate": 3.7086864590151484e-05, + "loss": 0.0965, + "step": 65150 + }, + { + "epoch": 2.367904644232866, + "grad_norm": 1.500626564025879, + "learning_rate": 3.70823218162722e-05, + "loss": 0.0838, + "step": 65160 + }, + { + "epoch": 2.368268042735664, + "grad_norm": 0.5546636581420898, + "learning_rate": 3.7077778521808656e-05, + "loss": 0.1117, + "step": 65170 + }, + { + "epoch": 2.368631441238462, + "grad_norm": 2.3335354328155518, + "learning_rate": 3.707323470695662e-05, + "loss": 0.5259, + "step": 65180 + }, + { + "epoch": 2.3689948397412603, + "grad_norm": 1.3475418090820312, + "learning_rate": 3.706869037191185e-05, + "loss": 0.1113, + "step": 65190 + }, + { + "epoch": 2.3693582382440583, + "grad_norm": 1.5157225131988525, + "learning_rate": 3.706414551687015e-05, + "loss": 0.1229, + "step": 65200 + }, + { + "epoch": 2.3697216367468568, + "grad_norm": 0.707976758480072, + "learning_rate": 3.7059600142027354e-05, + "loss": 0.1022, + "step": 65210 + }, + { + "epoch": 2.3700850352496547, + "grad_norm": 0.48478442430496216, + "learning_rate": 3.7055054247579285e-05, + "loss": 0.1455, + "step": 65220 + }, + { + "epoch": 2.370448433752453, + "grad_norm": 1.4668298959732056, + "learning_rate": 3.7050507833721824e-05, + "loss": 0.1318, + "step": 65230 + }, + { + "epoch": 2.370811832255251, + "grad_norm": 0.6836544275283813, + "learning_rate": 3.704596090065085e-05, + "loss": 0.1028, + "step": 65240 + }, + { + "epoch": 2.371175230758049, + "grad_norm": 0.5317667722702026, + "learning_rate": 3.70414134485623e-05, + "loss": 0.082, + "step": 65250 + }, + { + "epoch": 2.3715386292608476, + "grad_norm": 0.5413720607757568, + "learning_rate": 3.703686547765208e-05, + "loss": 0.0988, + "step": 65260 + }, + { + "epoch": 2.3719020277636456, + "grad_norm": 2.2720227241516113, + "learning_rate": 3.703231698811614e-05, + "loss": 0.1121, + "step": 65270 + }, + { + "epoch": 2.372265426266444, + "grad_norm": 1.079412579536438, + "learning_rate": 3.7027767980150485e-05, + "loss": 0.1999, + "step": 65280 + }, + { + "epoch": 2.372628824769242, + "grad_norm": 2.772294282913208, + "learning_rate": 3.70232184539511e-05, + "loss": 0.1109, + "step": 65290 + }, + { + "epoch": 2.37299222327204, + "grad_norm": 1.1685398817062378, + "learning_rate": 3.701866840971401e-05, + "loss": 0.0708, + "step": 65300 + }, + { + "epoch": 2.3733556217748384, + "grad_norm": 1.359842300415039, + "learning_rate": 3.701411784763526e-05, + "loss": 0.074, + "step": 65310 + }, + { + "epoch": 2.3737190202776364, + "grad_norm": 0.42569172382354736, + "learning_rate": 3.700956676791092e-05, + "loss": 0.1001, + "step": 65320 + }, + { + "epoch": 2.374082418780435, + "grad_norm": 0.6070738434791565, + "learning_rate": 3.700501517073707e-05, + "loss": 0.1083, + "step": 65330 + }, + { + "epoch": 2.374445817283233, + "grad_norm": 5.960649490356445, + "learning_rate": 3.700046305630984e-05, + "loss": 0.1321, + "step": 65340 + }, + { + "epoch": 2.3748092157860308, + "grad_norm": 0.613503098487854, + "learning_rate": 3.699591042482536e-05, + "loss": 0.1093, + "step": 65350 + }, + { + "epoch": 2.375172614288829, + "grad_norm": 0.5209415555000305, + "learning_rate": 3.699135727647977e-05, + "loss": 0.103, + "step": 65360 + }, + { + "epoch": 2.375536012791627, + "grad_norm": 0.7532001733779907, + "learning_rate": 3.698680361146926e-05, + "loss": 0.1061, + "step": 65370 + }, + { + "epoch": 2.3758994112944256, + "grad_norm": 0.7915641665458679, + "learning_rate": 3.6982249429990035e-05, + "loss": 0.0951, + "step": 65380 + }, + { + "epoch": 2.3762628097972236, + "grad_norm": 0.6081142425537109, + "learning_rate": 3.697769473223832e-05, + "loss": 0.115, + "step": 65390 + }, + { + "epoch": 2.3766262083000216, + "grad_norm": 6.751429080963135, + "learning_rate": 3.697313951841035e-05, + "loss": 0.1017, + "step": 65400 + }, + { + "epoch": 2.3766262083000216, + "eval_loss": 0.341545969247818, + "eval_runtime": 180.2661, + "eval_samples_per_second": 41.128, + "eval_steps_per_second": 5.142, + "eval_wer": 0.15476428195400005, + "step": 65400 + }, + { + "epoch": 2.37698960680282, + "grad_norm": 0.7073554992675781, + "learning_rate": 3.69685837887024e-05, + "loss": 0.0784, + "step": 65410 + }, + { + "epoch": 2.377353005305618, + "grad_norm": 1.2818964719772339, + "learning_rate": 3.696402754331076e-05, + "loss": 0.1119, + "step": 65420 + }, + { + "epoch": 2.3777164038084164, + "grad_norm": 1.005615234375, + "learning_rate": 3.695947078243174e-05, + "loss": 0.1153, + "step": 65430 + }, + { + "epoch": 2.3780798023112144, + "grad_norm": 0.8593710660934448, + "learning_rate": 3.695491350626168e-05, + "loss": 0.0793, + "step": 65440 + }, + { + "epoch": 2.378443200814013, + "grad_norm": 2.337388038635254, + "learning_rate": 3.695035571499692e-05, + "loss": 0.097, + "step": 65450 + }, + { + "epoch": 2.378806599316811, + "grad_norm": 0.9329900741577148, + "learning_rate": 3.694579740883387e-05, + "loss": 0.1108, + "step": 65460 + }, + { + "epoch": 2.379169997819609, + "grad_norm": 0.7032762765884399, + "learning_rate": 3.69412385879689e-05, + "loss": 0.1192, + "step": 65470 + }, + { + "epoch": 2.3795333963224072, + "grad_norm": 0.7048949599266052, + "learning_rate": 3.693667925259845e-05, + "loss": 0.0932, + "step": 65480 + }, + { + "epoch": 2.379896794825205, + "grad_norm": 0.8367437124252319, + "learning_rate": 3.693211940291896e-05, + "loss": 0.139, + "step": 65490 + }, + { + "epoch": 2.3802601933280036, + "grad_norm": 0.9763396978378296, + "learning_rate": 3.69275590391269e-05, + "loss": 0.0914, + "step": 65500 + }, + { + "epoch": 2.3806235918308016, + "grad_norm": 1.1304420232772827, + "learning_rate": 3.6922998161418764e-05, + "loss": 0.0912, + "step": 65510 + }, + { + "epoch": 2.3809869903336, + "grad_norm": 4.609717845916748, + "learning_rate": 3.691843676999105e-05, + "loss": 0.099, + "step": 65520 + }, + { + "epoch": 2.381350388836398, + "grad_norm": 1.2089684009552002, + "learning_rate": 3.6913874865040307e-05, + "loss": 0.1087, + "step": 65530 + }, + { + "epoch": 2.381713787339196, + "grad_norm": 0.7825998663902283, + "learning_rate": 3.690931244676309e-05, + "loss": 0.1141, + "step": 65540 + }, + { + "epoch": 2.3820771858419945, + "grad_norm": 0.3670007586479187, + "learning_rate": 3.690474951535597e-05, + "loss": 0.0831, + "step": 65550 + }, + { + "epoch": 2.3824405843447924, + "grad_norm": 0.8454808592796326, + "learning_rate": 3.6900186071015545e-05, + "loss": 2.7237, + "step": 65560 + }, + { + "epoch": 2.382803982847591, + "grad_norm": 0.6918748617172241, + "learning_rate": 3.689562211393845e-05, + "loss": 0.4282, + "step": 65570 + }, + { + "epoch": 2.383167381350389, + "grad_norm": 0.8183717727661133, + "learning_rate": 3.6891057644321326e-05, + "loss": 0.0964, + "step": 65580 + }, + { + "epoch": 2.383530779853187, + "grad_norm": 1.358555793762207, + "learning_rate": 3.688649266236083e-05, + "loss": 0.1215, + "step": 65590 + }, + { + "epoch": 2.3838941783559853, + "grad_norm": 0.7757040858268738, + "learning_rate": 3.688192716825366e-05, + "loss": 0.0865, + "step": 65600 + }, + { + "epoch": 2.3842575768587833, + "grad_norm": 2.875025510787964, + "learning_rate": 3.687736116219652e-05, + "loss": 0.0834, + "step": 65610 + }, + { + "epoch": 2.3846209753615817, + "grad_norm": 0.5162243247032166, + "learning_rate": 3.6872794644386156e-05, + "loss": 0.1043, + "step": 65620 + }, + { + "epoch": 2.3849843738643797, + "grad_norm": 0.7602340579032898, + "learning_rate": 3.68682276150193e-05, + "loss": 0.1051, + "step": 65630 + }, + { + "epoch": 2.3853477723671777, + "grad_norm": 0.4563780128955841, + "learning_rate": 3.686366007429276e-05, + "loss": 0.1144, + "step": 65640 + }, + { + "epoch": 2.385711170869976, + "grad_norm": 1.0391710996627808, + "learning_rate": 3.685909202240331e-05, + "loss": 0.0894, + "step": 65650 + }, + { + "epoch": 2.386074569372774, + "grad_norm": 0.5064871907234192, + "learning_rate": 3.685452345954778e-05, + "loss": 0.0882, + "step": 65660 + }, + { + "epoch": 2.3864379678755725, + "grad_norm": 1.3617416620254517, + "learning_rate": 3.684995438592301e-05, + "loss": 0.1068, + "step": 65670 + }, + { + "epoch": 2.3868013663783705, + "grad_norm": 0.7488900423049927, + "learning_rate": 3.684538480172587e-05, + "loss": 0.1205, + "step": 65680 + }, + { + "epoch": 2.3871647648811685, + "grad_norm": 0.4754915237426758, + "learning_rate": 3.684081470715325e-05, + "loss": 0.1073, + "step": 65690 + }, + { + "epoch": 2.387528163383967, + "grad_norm": 1.4789927005767822, + "learning_rate": 3.6836244102402053e-05, + "loss": 0.1681, + "step": 65700 + }, + { + "epoch": 2.387891561886765, + "grad_norm": 0.6496606469154358, + "learning_rate": 3.68316729876692e-05, + "loss": 0.0993, + "step": 65710 + }, + { + "epoch": 2.3882549603895633, + "grad_norm": 0.8786084651947021, + "learning_rate": 3.6827101363151676e-05, + "loss": 0.1326, + "step": 65720 + }, + { + "epoch": 2.3886183588923613, + "grad_norm": 0.7775259613990784, + "learning_rate": 3.682252922904641e-05, + "loss": 0.1014, + "step": 65730 + }, + { + "epoch": 2.3889817573951597, + "grad_norm": 1.226577877998352, + "learning_rate": 3.681795658555044e-05, + "loss": 0.1135, + "step": 65740 + }, + { + "epoch": 2.3893451558979577, + "grad_norm": 0.6108711957931519, + "learning_rate": 3.681338343286077e-05, + "loss": 0.1047, + "step": 65750 + }, + { + "epoch": 2.3897085544007557, + "grad_norm": 1.8602646589279175, + "learning_rate": 3.6808809771174435e-05, + "loss": 0.0888, + "step": 65760 + }, + { + "epoch": 2.390071952903554, + "grad_norm": 0.9307143092155457, + "learning_rate": 3.6804235600688503e-05, + "loss": 0.1109, + "step": 65770 + }, + { + "epoch": 2.390435351406352, + "grad_norm": 0.7531790733337402, + "learning_rate": 3.679966092160005e-05, + "loss": 0.1228, + "step": 65780 + }, + { + "epoch": 2.3907987499091505, + "grad_norm": 0.608249843120575, + "learning_rate": 3.679508573410621e-05, + "loss": 0.1062, + "step": 65790 + }, + { + "epoch": 2.3911621484119485, + "grad_norm": 1.1337485313415527, + "learning_rate": 3.679051003840408e-05, + "loss": 0.5558, + "step": 65800 + }, + { + "epoch": 2.391525546914747, + "grad_norm": 0.5861150622367859, + "learning_rate": 3.678593383469083e-05, + "loss": 0.1104, + "step": 65810 + }, + { + "epoch": 2.391888945417545, + "grad_norm": 0.3788084089756012, + "learning_rate": 3.678135712316362e-05, + "loss": 0.1413, + "step": 65820 + }, + { + "epoch": 2.392252343920343, + "grad_norm": 2.359208106994629, + "learning_rate": 3.6776779904019656e-05, + "loss": 0.1204, + "step": 65830 + }, + { + "epoch": 2.3926157424231413, + "grad_norm": 0.5361478328704834, + "learning_rate": 3.677220217745614e-05, + "loss": 0.1073, + "step": 65840 + }, + { + "epoch": 2.3929791409259393, + "grad_norm": 1.1404966115951538, + "learning_rate": 3.676762394367032e-05, + "loss": 0.0842, + "step": 65850 + }, + { + "epoch": 2.3933425394287378, + "grad_norm": 0.6131421327590942, + "learning_rate": 3.676304520285946e-05, + "loss": 0.0872, + "step": 65860 + }, + { + "epoch": 2.3937059379315357, + "grad_norm": 0.7355049848556519, + "learning_rate": 3.675846595522082e-05, + "loss": 0.1181, + "step": 65870 + }, + { + "epoch": 2.3940693364343337, + "grad_norm": 0.7013423442840576, + "learning_rate": 3.675388620095174e-05, + "loss": 0.0889, + "step": 65880 + }, + { + "epoch": 2.394432734937132, + "grad_norm": 0.5543515086174011, + "learning_rate": 3.674930594024951e-05, + "loss": 0.1213, + "step": 65890 + }, + { + "epoch": 2.39479613343993, + "grad_norm": 0.709343671798706, + "learning_rate": 3.674472517331149e-05, + "loss": 0.0912, + "step": 65900 + }, + { + "epoch": 2.3951595319427286, + "grad_norm": 0.6905022859573364, + "learning_rate": 3.674014390033506e-05, + "loss": 0.6845, + "step": 65910 + }, + { + "epoch": 2.3955229304455266, + "grad_norm": 0.6566099524497986, + "learning_rate": 3.6735562121517593e-05, + "loss": 0.1347, + "step": 65920 + }, + { + "epoch": 2.3958863289483245, + "grad_norm": 1.4284336566925049, + "learning_rate": 3.673097983705651e-05, + "loss": 0.1064, + "step": 65930 + }, + { + "epoch": 2.396249727451123, + "grad_norm": 0.9759535193443298, + "learning_rate": 3.672639704714925e-05, + "loss": 0.149, + "step": 65940 + }, + { + "epoch": 2.396613125953921, + "grad_norm": 1.247986078262329, + "learning_rate": 3.6721813751993255e-05, + "loss": 0.1248, + "step": 65950 + }, + { + "epoch": 2.3969765244567194, + "grad_norm": 0.7816616296768188, + "learning_rate": 3.671722995178603e-05, + "loss": 0.0935, + "step": 65960 + }, + { + "epoch": 2.3973399229595174, + "grad_norm": 2.142498016357422, + "learning_rate": 3.671264564672503e-05, + "loss": 0.1141, + "step": 65970 + }, + { + "epoch": 2.3977033214623154, + "grad_norm": 0.7998883724212646, + "learning_rate": 3.670806083700782e-05, + "loss": 0.0958, + "step": 65980 + }, + { + "epoch": 2.398066719965114, + "grad_norm": 1.2408504486083984, + "learning_rate": 3.6703475522831924e-05, + "loss": 0.1669, + "step": 65990 + }, + { + "epoch": 2.3984301184679118, + "grad_norm": 0.7468869686126709, + "learning_rate": 3.669888970439491e-05, + "loss": 0.0939, + "step": 66000 + }, + { + "epoch": 2.3984301184679118, + "eval_loss": 0.3319300711154938, + "eval_runtime": 179.03, + "eval_samples_per_second": 41.412, + "eval_steps_per_second": 5.178, + "eval_wer": 0.1522319240474159, + "step": 66000 + }, + { + "epoch": 2.39879351697071, + "grad_norm": 0.3617503046989441, + "learning_rate": 3.669430338189436e-05, + "loss": 0.0804, + "step": 66010 + }, + { + "epoch": 2.399156915473508, + "grad_norm": 1.2790522575378418, + "learning_rate": 3.668971655552788e-05, + "loss": 0.1073, + "step": 66020 + }, + { + "epoch": 2.3995203139763066, + "grad_norm": 0.5524618029594421, + "learning_rate": 3.668512922549312e-05, + "loss": 0.1024, + "step": 66030 + }, + { + "epoch": 2.3998837124791046, + "grad_norm": 0.6617368459701538, + "learning_rate": 3.6680541391987706e-05, + "loss": 0.1241, + "step": 66040 + }, + { + "epoch": 2.4002471109819026, + "grad_norm": 1.515463948249817, + "learning_rate": 3.667595305520933e-05, + "loss": 0.0991, + "step": 66050 + }, + { + "epoch": 2.400610509484701, + "grad_norm": 1.0713670253753662, + "learning_rate": 3.667136421535567e-05, + "loss": 0.0983, + "step": 66060 + }, + { + "epoch": 2.400973907987499, + "grad_norm": 0.4194028973579407, + "learning_rate": 3.666677487262446e-05, + "loss": 0.1354, + "step": 66070 + }, + { + "epoch": 2.4013373064902974, + "grad_norm": 1.1584357023239136, + "learning_rate": 3.6662185027213436e-05, + "loss": 0.1073, + "step": 66080 + }, + { + "epoch": 2.4017007049930954, + "grad_norm": 0.9621077179908752, + "learning_rate": 3.6657594679320346e-05, + "loss": 0.1, + "step": 66090 + }, + { + "epoch": 2.402064103495894, + "grad_norm": 0.8532549738883972, + "learning_rate": 3.665300382914298e-05, + "loss": 0.0972, + "step": 66100 + }, + { + "epoch": 2.402427501998692, + "grad_norm": 0.44833171367645264, + "learning_rate": 3.664841247687914e-05, + "loss": 0.094, + "step": 66110 + }, + { + "epoch": 2.40279090050149, + "grad_norm": 0.8976952433586121, + "learning_rate": 3.6643820622726654e-05, + "loss": 0.1537, + "step": 66120 + }, + { + "epoch": 2.4031542990042882, + "grad_norm": 1.280044674873352, + "learning_rate": 3.663922826688336e-05, + "loss": 0.1071, + "step": 66130 + }, + { + "epoch": 2.403517697507086, + "grad_norm": 1.037636160850525, + "learning_rate": 3.6634635409547144e-05, + "loss": 0.1271, + "step": 66140 + }, + { + "epoch": 2.4038810960098846, + "grad_norm": 0.6089548468589783, + "learning_rate": 3.663004205091588e-05, + "loss": 0.106, + "step": 66150 + }, + { + "epoch": 2.4042444945126826, + "grad_norm": 0.5719799995422363, + "learning_rate": 3.662544819118748e-05, + "loss": 0.1048, + "step": 66160 + }, + { + "epoch": 2.4046078930154806, + "grad_norm": 1.0621087551116943, + "learning_rate": 3.662131328915747e-05, + "loss": 0.1121, + "step": 66170 + }, + { + "epoch": 2.404971291518279, + "grad_norm": 0.9576284289360046, + "learning_rate": 3.6616718477889837e-05, + "loss": 0.1075, + "step": 66180 + }, + { + "epoch": 2.405334690021077, + "grad_norm": 0.6212823987007141, + "learning_rate": 3.661212316609915e-05, + "loss": 0.1262, + "step": 66190 + }, + { + "epoch": 2.4056980885238755, + "grad_norm": 0.9172229170799255, + "learning_rate": 3.660752735398338e-05, + "loss": 0.0759, + "step": 66200 + }, + { + "epoch": 2.4060614870266734, + "grad_norm": 0.7851585745811462, + "learning_rate": 3.660293104174057e-05, + "loss": 0.0873, + "step": 66210 + }, + { + "epoch": 2.4064248855294714, + "grad_norm": 0.6783828735351562, + "learning_rate": 3.659833422956873e-05, + "loss": 0.1069, + "step": 66220 + }, + { + "epoch": 2.40678828403227, + "grad_norm": 3.5662567615509033, + "learning_rate": 3.659373691766594e-05, + "loss": 0.0983, + "step": 66230 + }, + { + "epoch": 2.407151682535068, + "grad_norm": 0.9401397705078125, + "learning_rate": 3.658913910623028e-05, + "loss": 0.1096, + "step": 66240 + }, + { + "epoch": 2.4075150810378663, + "grad_norm": 0.5327457189559937, + "learning_rate": 3.658454079545985e-05, + "loss": 0.0867, + "step": 66250 + }, + { + "epoch": 2.4078784795406643, + "grad_norm": 0.5370202660560608, + "learning_rate": 3.657994198555278e-05, + "loss": 0.0901, + "step": 66260 + }, + { + "epoch": 2.4082418780434622, + "grad_norm": 0.2850395143032074, + "learning_rate": 3.65753426767072e-05, + "loss": 0.1159, + "step": 66270 + }, + { + "epoch": 2.4086052765462607, + "grad_norm": 3.4857585430145264, + "learning_rate": 3.65707428691213e-05, + "loss": 0.117, + "step": 66280 + }, + { + "epoch": 2.4089686750490586, + "grad_norm": 0.8752036690711975, + "learning_rate": 3.656614256299325e-05, + "loss": 0.1518, + "step": 66290 + }, + { + "epoch": 2.409332073551857, + "grad_norm": 0.7939157485961914, + "learning_rate": 3.656154175852128e-05, + "loss": 0.0889, + "step": 66300 + }, + { + "epoch": 2.409695472054655, + "grad_norm": 0.5964920520782471, + "learning_rate": 3.6556940455903603e-05, + "loss": 0.0838, + "step": 66310 + }, + { + "epoch": 2.4100588705574535, + "grad_norm": 0.3993948996067047, + "learning_rate": 3.655233865533848e-05, + "loss": 0.1318, + "step": 66320 + }, + { + "epoch": 2.4104222690602515, + "grad_norm": 0.5623260736465454, + "learning_rate": 3.65477363570242e-05, + "loss": 0.1193, + "step": 66330 + }, + { + "epoch": 2.4107856675630495, + "grad_norm": 0.5477907061576843, + "learning_rate": 3.654313356115903e-05, + "loss": 0.1197, + "step": 66340 + }, + { + "epoch": 2.411149066065848, + "grad_norm": 0.8918854594230652, + "learning_rate": 3.653853026794132e-05, + "loss": 0.1546, + "step": 66350 + }, + { + "epoch": 2.411512464568646, + "grad_norm": 0.5984349250793457, + "learning_rate": 3.6533926477569384e-05, + "loss": 0.0822, + "step": 66360 + }, + { + "epoch": 2.4118758630714443, + "grad_norm": 0.3398670554161072, + "learning_rate": 3.65293221902416e-05, + "loss": 0.1134, + "step": 66370 + }, + { + "epoch": 2.4122392615742423, + "grad_norm": 1.7309616804122925, + "learning_rate": 3.652471740615634e-05, + "loss": 0.0953, + "step": 66380 + }, + { + "epoch": 2.4126026600770407, + "grad_norm": 0.5632598996162415, + "learning_rate": 3.6520112125512016e-05, + "loss": 0.1023, + "step": 66390 + }, + { + "epoch": 2.4129660585798387, + "grad_norm": 1.3867424726486206, + "learning_rate": 3.6515506348507054e-05, + "loss": 0.0856, + "step": 66400 + }, + { + "epoch": 2.4133294570826367, + "grad_norm": 1.5078961849212646, + "learning_rate": 3.651090007533989e-05, + "loss": 0.0795, + "step": 66410 + }, + { + "epoch": 2.413692855585435, + "grad_norm": 0.596082866191864, + "learning_rate": 3.650629330620899e-05, + "loss": 0.1231, + "step": 66420 + }, + { + "epoch": 2.414056254088233, + "grad_norm": 0.9665220379829407, + "learning_rate": 3.6501686041312865e-05, + "loss": 0.1076, + "step": 66430 + }, + { + "epoch": 2.4144196525910315, + "grad_norm": 0.4780147075653076, + "learning_rate": 3.649753907919114e-05, + "loss": 3.842, + "step": 66440 + }, + { + "epoch": 2.4147830510938295, + "grad_norm": 0.7537965774536133, + "learning_rate": 3.6492930872887963e-05, + "loss": 0.0878, + "step": 66450 + }, + { + "epoch": 2.4151464495966275, + "grad_norm": 2.1421070098876953, + "learning_rate": 3.6488322171395295e-05, + "loss": 0.0812, + "step": 66460 + }, + { + "epoch": 2.415509848099426, + "grad_norm": 0.36758169531822205, + "learning_rate": 3.648371297491169e-05, + "loss": 0.1109, + "step": 66470 + }, + { + "epoch": 2.415873246602224, + "grad_norm": 1.2690719366073608, + "learning_rate": 3.647910328363577e-05, + "loss": 0.1199, + "step": 66480 + }, + { + "epoch": 2.4162366451050223, + "grad_norm": 1.2424167394638062, + "learning_rate": 3.647449309776612e-05, + "loss": 0.1087, + "step": 66490 + }, + { + "epoch": 2.4166000436078203, + "grad_norm": 0.6070811748504639, + "learning_rate": 3.6469882417501386e-05, + "loss": 0.1042, + "step": 66500 + }, + { + "epoch": 2.4169634421106183, + "grad_norm": 0.3652547597885132, + "learning_rate": 3.646527124304024e-05, + "loss": 0.13, + "step": 66510 + }, + { + "epoch": 2.4173268406134167, + "grad_norm": 0.8389589190483093, + "learning_rate": 3.646065957458134e-05, + "loss": 0.1059, + "step": 66520 + }, + { + "epoch": 2.4176902391162147, + "grad_norm": 4.236841678619385, + "learning_rate": 3.64560474123234e-05, + "loss": 0.1248, + "step": 66530 + }, + { + "epoch": 2.418053637619013, + "grad_norm": 0.4040025770664215, + "learning_rate": 3.645143475646514e-05, + "loss": 0.1224, + "step": 66540 + }, + { + "epoch": 2.418417036121811, + "grad_norm": 1.0393097400665283, + "learning_rate": 3.6446821607205294e-05, + "loss": 0.0945, + "step": 66550 + }, + { + "epoch": 2.418780434624609, + "grad_norm": 1.010204792022705, + "learning_rate": 3.644220796474264e-05, + "loss": 0.0797, + "step": 66560 + }, + { + "epoch": 2.4191438331274076, + "grad_norm": 0.8821393847465515, + "learning_rate": 3.643759382927595e-05, + "loss": 0.1042, + "step": 66570 + }, + { + "epoch": 2.4195072316302055, + "grad_norm": 0.35728177428245544, + "learning_rate": 3.643297920100404e-05, + "loss": 0.0965, + "step": 66580 + }, + { + "epoch": 2.419870630133004, + "grad_norm": 1.833901286125183, + "learning_rate": 3.642836408012573e-05, + "loss": 0.176, + "step": 66590 + }, + { + "epoch": 2.420234028635802, + "grad_norm": 1.3145054578781128, + "learning_rate": 3.6423748466839884e-05, + "loss": 0.0881, + "step": 66600 + }, + { + "epoch": 2.420234028635802, + "eval_loss": 0.3247428834438324, + "eval_runtime": 179.5083, + "eval_samples_per_second": 41.302, + "eval_steps_per_second": 5.164, + "eval_wer": 0.15563563091110424, + "step": 66600 + }, + { + "epoch": 2.4205974271386004, + "grad_norm": 0.4729728102684021, + "learning_rate": 3.6419132361345366e-05, + "loss": 0.1078, + "step": 66610 + }, + { + "epoch": 2.4209608256413984, + "grad_norm": 0.780598521232605, + "learning_rate": 3.6414515763841054e-05, + "loss": 0.104, + "step": 66620 + }, + { + "epoch": 2.4213242241441963, + "grad_norm": 0.8436282873153687, + "learning_rate": 3.6409898674525865e-05, + "loss": 0.1148, + "step": 66630 + }, + { + "epoch": 2.4216876226469948, + "grad_norm": 1.6270266771316528, + "learning_rate": 3.640528109359875e-05, + "loss": 1.2437, + "step": 66640 + }, + { + "epoch": 2.4220510211497928, + "grad_norm": 0.6060745716094971, + "learning_rate": 3.640066302125865e-05, + "loss": 0.0958, + "step": 66650 + }, + { + "epoch": 2.422414419652591, + "grad_norm": 1.076560139656067, + "learning_rate": 3.6396044457704535e-05, + "loss": 0.105, + "step": 66660 + }, + { + "epoch": 2.422777818155389, + "grad_norm": 0.4505023956298828, + "learning_rate": 3.6391425403135425e-05, + "loss": 0.1123, + "step": 66670 + }, + { + "epoch": 2.4231412166581876, + "grad_norm": 0.5208647847175598, + "learning_rate": 3.6386805857750315e-05, + "loss": 0.0909, + "step": 66680 + }, + { + "epoch": 2.4235046151609856, + "grad_norm": 0.7721276879310608, + "learning_rate": 3.638218582174826e-05, + "loss": 0.113, + "step": 66690 + }, + { + "epoch": 2.4238680136637836, + "grad_norm": 1.676924467086792, + "learning_rate": 3.6377565295328316e-05, + "loss": 0.1015, + "step": 66700 + }, + { + "epoch": 2.424231412166582, + "grad_norm": 0.7819331288337708, + "learning_rate": 3.6372944278689566e-05, + "loss": 0.1092, + "step": 66710 + }, + { + "epoch": 2.42459481066938, + "grad_norm": 0.6924077272415161, + "learning_rate": 3.636832277203111e-05, + "loss": 0.1739, + "step": 66720 + }, + { + "epoch": 2.4249582091721784, + "grad_norm": 0.48950478434562683, + "learning_rate": 3.636370077555208e-05, + "loss": 0.1288, + "step": 66730 + }, + { + "epoch": 2.4253216076749764, + "grad_norm": 1.9735438823699951, + "learning_rate": 3.6359078289451604e-05, + "loss": 0.1444, + "step": 66740 + }, + { + "epoch": 2.4256850061777744, + "grad_norm": 2.684687852859497, + "learning_rate": 3.635445531392887e-05, + "loss": 0.0867, + "step": 66750 + }, + { + "epoch": 2.426048404680573, + "grad_norm": 0.4811551570892334, + "learning_rate": 3.634983184918305e-05, + "loss": 0.1158, + "step": 66760 + }, + { + "epoch": 2.426411803183371, + "grad_norm": 1.0460630655288696, + "learning_rate": 3.6345207895413367e-05, + "loss": 0.1113, + "step": 66770 + }, + { + "epoch": 2.4267752016861692, + "grad_norm": 0.47594699263572693, + "learning_rate": 3.634058345281903e-05, + "loss": 0.119, + "step": 66780 + }, + { + "epoch": 2.427138600188967, + "grad_norm": 1.4716179370880127, + "learning_rate": 3.633595852159931e-05, + "loss": 0.1607, + "step": 66790 + }, + { + "epoch": 2.427501998691765, + "grad_norm": 2.8937737941741943, + "learning_rate": 3.6331333101953465e-05, + "loss": 0.0732, + "step": 66800 + }, + { + "epoch": 2.4278653971945636, + "grad_norm": 0.4008066654205322, + "learning_rate": 3.63267071940808e-05, + "loss": 0.108, + "step": 66810 + }, + { + "epoch": 2.4282287956973616, + "grad_norm": 0.6345723271369934, + "learning_rate": 3.632208079818062e-05, + "loss": 0.1021, + "step": 66820 + }, + { + "epoch": 2.42859219420016, + "grad_norm": 4.007993221282959, + "learning_rate": 3.631745391445226e-05, + "loss": 0.1041, + "step": 66830 + }, + { + "epoch": 2.428955592702958, + "grad_norm": 1.5959880352020264, + "learning_rate": 3.631282654309508e-05, + "loss": 0.1387, + "step": 66840 + }, + { + "epoch": 2.429318991205756, + "grad_norm": 2.597745180130005, + "learning_rate": 3.6308198684308465e-05, + "loss": 0.1163, + "step": 66850 + }, + { + "epoch": 2.4296823897085544, + "grad_norm": 0.8064637184143066, + "learning_rate": 3.630357033829179e-05, + "loss": 0.1064, + "step": 66860 + }, + { + "epoch": 2.4300457882113524, + "grad_norm": 0.9430283308029175, + "learning_rate": 3.629894150524449e-05, + "loss": 0.1267, + "step": 66870 + }, + { + "epoch": 2.430409186714151, + "grad_norm": 0.7025822997093201, + "learning_rate": 3.629431218536601e-05, + "loss": 0.0967, + "step": 66880 + }, + { + "epoch": 2.430772585216949, + "grad_norm": 1.0002391338348389, + "learning_rate": 3.628968237885579e-05, + "loss": 0.1241, + "step": 66890 + }, + { + "epoch": 2.4311359837197473, + "grad_norm": 1.6046959161758423, + "learning_rate": 3.628505208591334e-05, + "loss": 0.0894, + "step": 66900 + }, + { + "epoch": 2.4314993822225452, + "grad_norm": 0.773638129234314, + "learning_rate": 3.628042130673814e-05, + "loss": 0.0885, + "step": 66910 + }, + { + "epoch": 2.4318627807253432, + "grad_norm": 0.7153804898262024, + "learning_rate": 3.627579004152972e-05, + "loss": 0.127, + "step": 66920 + }, + { + "epoch": 2.4322261792281417, + "grad_norm": 0.8669637441635132, + "learning_rate": 3.627115829048763e-05, + "loss": 0.0891, + "step": 66930 + }, + { + "epoch": 2.4325895777309396, + "grad_norm": 2.438815116882324, + "learning_rate": 3.6266526053811434e-05, + "loss": 3.7705, + "step": 66940 + }, + { + "epoch": 2.432952976233738, + "grad_norm": 0.46661120653152466, + "learning_rate": 3.626189333170071e-05, + "loss": 0.0928, + "step": 66950 + }, + { + "epoch": 2.433316374736536, + "grad_norm": 1.2738080024719238, + "learning_rate": 3.625726012435508e-05, + "loss": 0.0838, + "step": 66960 + }, + { + "epoch": 2.4336797732393345, + "grad_norm": 0.8235649466514587, + "learning_rate": 3.6252626431974155e-05, + "loss": 0.1173, + "step": 66970 + }, + { + "epoch": 2.4340431717421325, + "grad_norm": 0.8627928495407104, + "learning_rate": 3.62479922547576e-05, + "loss": 0.0915, + "step": 66980 + }, + { + "epoch": 2.4344065702449305, + "grad_norm": 0.746405839920044, + "learning_rate": 3.624335759290509e-05, + "loss": 0.2557, + "step": 66990 + }, + { + "epoch": 2.434769968747729, + "grad_norm": 1.1601886749267578, + "learning_rate": 3.6238722446616285e-05, + "loss": 0.0987, + "step": 67000 + }, + { + "epoch": 2.435133367250527, + "grad_norm": 1.9349639415740967, + "learning_rate": 3.623408681609093e-05, + "loss": 0.0899, + "step": 67010 + }, + { + "epoch": 2.4354967657533253, + "grad_norm": 0.6410073637962341, + "learning_rate": 3.622945070152874e-05, + "loss": 0.164, + "step": 67020 + }, + { + "epoch": 2.4358601642561233, + "grad_norm": 0.46642959117889404, + "learning_rate": 3.622481410312948e-05, + "loss": 0.1156, + "step": 67030 + }, + { + "epoch": 2.4362235627589213, + "grad_norm": 1.0162826776504517, + "learning_rate": 3.6220177021092916e-05, + "loss": 0.1193, + "step": 67040 + }, + { + "epoch": 2.4365869612617197, + "grad_norm": 1.3133575916290283, + "learning_rate": 3.621553945561884e-05, + "loss": 0.0849, + "step": 67050 + }, + { + "epoch": 2.4369503597645177, + "grad_norm": 0.6921333074569702, + "learning_rate": 3.621090140690708e-05, + "loss": 0.0855, + "step": 67060 + }, + { + "epoch": 2.437313758267316, + "grad_norm": 0.8446233868598938, + "learning_rate": 3.620626287515746e-05, + "loss": 0.9796, + "step": 67070 + }, + { + "epoch": 2.437677156770114, + "grad_norm": 1.3895478248596191, + "learning_rate": 3.620162386056985e-05, + "loss": 0.1147, + "step": 67080 + }, + { + "epoch": 2.438040555272912, + "grad_norm": 0.5276104807853699, + "learning_rate": 3.619698436334412e-05, + "loss": 0.0987, + "step": 67090 + }, + { + "epoch": 2.4384039537757105, + "grad_norm": 1.7694755792617798, + "learning_rate": 3.619234438368018e-05, + "loss": 0.1291, + "step": 67100 + }, + { + "epoch": 2.4387673522785085, + "grad_norm": 0.5948963761329651, + "learning_rate": 3.618770392177794e-05, + "loss": 0.0976, + "step": 67110 + }, + { + "epoch": 2.439130750781307, + "grad_norm": 0.2391016185283661, + "learning_rate": 3.618306297783734e-05, + "loss": 0.0982, + "step": 67120 + }, + { + "epoch": 2.439494149284105, + "grad_norm": 0.9383694529533386, + "learning_rate": 3.617842155205835e-05, + "loss": 0.0995, + "step": 67130 + }, + { + "epoch": 2.439857547786903, + "grad_norm": 0.9149391055107117, + "learning_rate": 3.617377964464094e-05, + "loss": 0.1012, + "step": 67140 + }, + { + "epoch": 2.4402209462897013, + "grad_norm": 0.5762970447540283, + "learning_rate": 3.616913725578513e-05, + "loss": 0.0943, + "step": 67150 + }, + { + "epoch": 2.4405843447924993, + "grad_norm": 0.7008225321769714, + "learning_rate": 3.6164494385690936e-05, + "loss": 0.0838, + "step": 67160 + }, + { + "epoch": 2.4409477432952977, + "grad_norm": 1.0070174932479858, + "learning_rate": 3.61598510345584e-05, + "loss": 0.1395, + "step": 67170 + }, + { + "epoch": 2.4413111417980957, + "grad_norm": 0.7962942123413086, + "learning_rate": 3.6155207202587596e-05, + "loss": 0.1115, + "step": 67180 + }, + { + "epoch": 2.441674540300894, + "grad_norm": 0.4024165868759155, + "learning_rate": 3.615056288997859e-05, + "loss": 0.0848, + "step": 67190 + }, + { + "epoch": 2.442037938803692, + "grad_norm": 0.39084872603416443, + "learning_rate": 3.6145918096931515e-05, + "loss": 0.0967, + "step": 67200 + }, + { + "epoch": 2.442037938803692, + "eval_loss": 0.33589035272598267, + "eval_runtime": 179.6199, + "eval_samples_per_second": 41.276, + "eval_steps_per_second": 5.161, + "eval_wer": 0.15488227712527455, + "step": 67200 + }, + { + "epoch": 2.44240133730649, + "grad_norm": 0.6485455632209778, + "learning_rate": 3.614127282364648e-05, + "loss": 1.4456, + "step": 67210 + }, + { + "epoch": 2.4427647358092885, + "grad_norm": 0.4933464229106903, + "learning_rate": 3.613662707032364e-05, + "loss": 0.1259, + "step": 67220 + }, + { + "epoch": 2.4431281343120865, + "grad_norm": 2.199694871902466, + "learning_rate": 3.613198083716317e-05, + "loss": 0.1013, + "step": 67230 + }, + { + "epoch": 2.443491532814885, + "grad_norm": 1.2690855264663696, + "learning_rate": 3.612733412436524e-05, + "loss": 0.1256, + "step": 67240 + }, + { + "epoch": 2.443854931317683, + "grad_norm": 1.8013975620269775, + "learning_rate": 3.612268693213009e-05, + "loss": 0.4379, + "step": 67250 + }, + { + "epoch": 2.4442183298204814, + "grad_norm": 4.287527561187744, + "learning_rate": 3.611803926065792e-05, + "loss": 0.0803, + "step": 67260 + }, + { + "epoch": 2.4445817283232794, + "grad_norm": 0.6265177726745605, + "learning_rate": 3.6113391110149006e-05, + "loss": 0.1046, + "step": 67270 + }, + { + "epoch": 2.4449451268260773, + "grad_norm": 1.527327537536621, + "learning_rate": 3.6108742480803606e-05, + "loss": 0.1095, + "step": 67280 + }, + { + "epoch": 2.4453085253288758, + "grad_norm": 1.2177270650863647, + "learning_rate": 3.6104093372822026e-05, + "loss": 0.0972, + "step": 67290 + }, + { + "epoch": 2.4456719238316738, + "grad_norm": 0.7354857921600342, + "learning_rate": 3.609944378640457e-05, + "loss": 0.0893, + "step": 67300 + }, + { + "epoch": 2.446035322334472, + "grad_norm": 0.8578464984893799, + "learning_rate": 3.609479372175156e-05, + "loss": 0.0914, + "step": 67310 + }, + { + "epoch": 2.44639872083727, + "grad_norm": 0.5541604161262512, + "learning_rate": 3.6090143179063374e-05, + "loss": 0.113, + "step": 67320 + }, + { + "epoch": 2.446762119340068, + "grad_norm": 0.7503251433372498, + "learning_rate": 3.608549215854037e-05, + "loss": 0.116, + "step": 67330 + }, + { + "epoch": 2.4471255178428666, + "grad_norm": 0.7713415026664734, + "learning_rate": 3.608084066038297e-05, + "loss": 0.1122, + "step": 67340 + }, + { + "epoch": 2.4474889163456646, + "grad_norm": 2.4603497982025146, + "learning_rate": 3.607618868479156e-05, + "loss": 0.0932, + "step": 67350 + }, + { + "epoch": 2.447852314848463, + "grad_norm": 0.4980012774467468, + "learning_rate": 3.607153623196658e-05, + "loss": 0.0905, + "step": 67360 + }, + { + "epoch": 2.448215713351261, + "grad_norm": 0.5134033560752869, + "learning_rate": 3.606688330210851e-05, + "loss": 0.1666, + "step": 67370 + }, + { + "epoch": 2.448579111854059, + "grad_norm": 0.5784050822257996, + "learning_rate": 3.60622298954178e-05, + "loss": 0.1092, + "step": 67380 + }, + { + "epoch": 2.4489425103568574, + "grad_norm": 0.4290425777435303, + "learning_rate": 3.605757601209497e-05, + "loss": 0.1189, + "step": 67390 + }, + { + "epoch": 2.4493059088596554, + "grad_norm": 1.0926011800765991, + "learning_rate": 3.605292165234053e-05, + "loss": 0.0879, + "step": 67400 + }, + { + "epoch": 2.449669307362454, + "grad_norm": 1.1270503997802734, + "learning_rate": 3.604826681635504e-05, + "loss": 0.0893, + "step": 67410 + }, + { + "epoch": 2.450032705865252, + "grad_norm": 0.6691473126411438, + "learning_rate": 3.604361150433903e-05, + "loss": 0.7363, + "step": 67420 + }, + { + "epoch": 2.4503961043680498, + "grad_norm": 1.2996752262115479, + "learning_rate": 3.603895571649308e-05, + "loss": 0.0946, + "step": 67430 + }, + { + "epoch": 2.450759502870848, + "grad_norm": 1.3618733882904053, + "learning_rate": 3.603429945301783e-05, + "loss": 0.1242, + "step": 67440 + }, + { + "epoch": 2.451122901373646, + "grad_norm": 0.7978112101554871, + "learning_rate": 3.6029642714113853e-05, + "loss": 0.0783, + "step": 67450 + }, + { + "epoch": 2.4514862998764446, + "grad_norm": 1.727400302886963, + "learning_rate": 3.602498549998183e-05, + "loss": 0.106, + "step": 67460 + }, + { + "epoch": 2.4518496983792426, + "grad_norm": 0.9686618447303772, + "learning_rate": 3.602032781082241e-05, + "loss": 0.1259, + "step": 67470 + }, + { + "epoch": 2.452213096882041, + "grad_norm": 0.4624063968658447, + "learning_rate": 3.601566964683627e-05, + "loss": 0.1066, + "step": 67480 + }, + { + "epoch": 2.452576495384839, + "grad_norm": 0.38952404260635376, + "learning_rate": 3.601101100822412e-05, + "loss": 0.0993, + "step": 67490 + }, + { + "epoch": 2.452939893887637, + "grad_norm": 1.37151300907135, + "learning_rate": 3.600635189518668e-05, + "loss": 0.0988, + "step": 67500 + }, + { + "epoch": 2.4533032923904354, + "grad_norm": 0.4988241195678711, + "learning_rate": 3.60016923079247e-05, + "loss": 0.0942, + "step": 67510 + }, + { + "epoch": 2.4536666908932334, + "grad_norm": 0.8300676941871643, + "learning_rate": 3.599703224663894e-05, + "loss": 0.1087, + "step": 67520 + }, + { + "epoch": 2.454030089396032, + "grad_norm": 9.264083862304688, + "learning_rate": 3.599237171153019e-05, + "loss": 0.1155, + "step": 67530 + }, + { + "epoch": 2.45439348789883, + "grad_norm": 0.9220635294914246, + "learning_rate": 3.598771070279926e-05, + "loss": 0.1134, + "step": 67540 + }, + { + "epoch": 2.4547568864016283, + "grad_norm": 0.6584560871124268, + "learning_rate": 3.598304922064696e-05, + "loss": 0.0906, + "step": 67550 + }, + { + "epoch": 2.4551202849044262, + "grad_norm": 2.7506167888641357, + "learning_rate": 3.5978387265274157e-05, + "loss": 0.1129, + "step": 67560 + }, + { + "epoch": 2.4554836834072242, + "grad_norm": 1.5210083723068237, + "learning_rate": 3.5973724836881694e-05, + "loss": 0.1005, + "step": 67570 + }, + { + "epoch": 2.4558470819100227, + "grad_norm": 0.7032837271690369, + "learning_rate": 3.596906193567049e-05, + "loss": 0.0681, + "step": 67580 + }, + { + "epoch": 2.4562104804128206, + "grad_norm": 1.5217934846878052, + "learning_rate": 3.596439856184142e-05, + "loss": 0.1203, + "step": 67590 + }, + { + "epoch": 2.456573878915619, + "grad_norm": 0.5665151476860046, + "learning_rate": 3.595973471559544e-05, + "loss": 0.0865, + "step": 67600 + }, + { + "epoch": 2.456937277418417, + "grad_norm": 1.023913025856018, + "learning_rate": 3.595507039713348e-05, + "loss": 0.0941, + "step": 67610 + }, + { + "epoch": 2.457300675921215, + "grad_norm": 0.6718622446060181, + "learning_rate": 3.595040560665651e-05, + "loss": 0.1392, + "step": 67620 + }, + { + "epoch": 2.4576640744240135, + "grad_norm": 0.5096120238304138, + "learning_rate": 3.594574034436553e-05, + "loss": 0.1164, + "step": 67630 + }, + { + "epoch": 2.4580274729268115, + "grad_norm": 0.776214063167572, + "learning_rate": 3.594107461046154e-05, + "loss": 0.1106, + "step": 67640 + }, + { + "epoch": 2.45839087142961, + "grad_norm": 1.91248619556427, + "learning_rate": 3.5936408405145575e-05, + "loss": 4.1324, + "step": 67650 + }, + { + "epoch": 2.458754269932408, + "grad_norm": 1.217971920967102, + "learning_rate": 3.593174172861868e-05, + "loss": 0.0972, + "step": 67660 + }, + { + "epoch": 2.459117668435206, + "grad_norm": 6.793942451477051, + "learning_rate": 3.5927074581081935e-05, + "loss": 0.7676, + "step": 67670 + }, + { + "epoch": 2.4594810669380043, + "grad_norm": 0.5515997409820557, + "learning_rate": 3.592240696273643e-05, + "loss": 0.0907, + "step": 67680 + }, + { + "epoch": 2.4598444654408023, + "grad_norm": 0.4186965227127075, + "learning_rate": 3.591773887378326e-05, + "loss": 0.0876, + "step": 67690 + }, + { + "epoch": 2.4602078639436007, + "grad_norm": 0.4198078215122223, + "learning_rate": 3.5913070314423575e-05, + "loss": 0.0872, + "step": 67700 + }, + { + "epoch": 2.4605712624463987, + "grad_norm": 0.7509788870811462, + "learning_rate": 3.5908401284858514e-05, + "loss": 0.0912, + "step": 67710 + }, + { + "epoch": 2.4609346609491967, + "grad_norm": 0.8919647336006165, + "learning_rate": 3.590373178528926e-05, + "loss": 0.1003, + "step": 67720 + }, + { + "epoch": 2.461298059451995, + "grad_norm": 1.2128369808197021, + "learning_rate": 3.5899061815917e-05, + "loss": 0.1129, + "step": 67730 + }, + { + "epoch": 2.461661457954793, + "grad_norm": 0.5779681205749512, + "learning_rate": 3.589439137694293e-05, + "loss": 0.1169, + "step": 67740 + }, + { + "epoch": 2.4620248564575915, + "grad_norm": 0.6092358827590942, + "learning_rate": 3.588972046856831e-05, + "loss": 0.0884, + "step": 67750 + }, + { + "epoch": 2.4623882549603895, + "grad_norm": 1.222869873046875, + "learning_rate": 3.588504909099438e-05, + "loss": 0.0993, + "step": 67760 + }, + { + "epoch": 2.462751653463188, + "grad_norm": 0.26627829670906067, + "learning_rate": 3.5880377244422416e-05, + "loss": 0.1261, + "step": 67770 + }, + { + "epoch": 2.463115051965986, + "grad_norm": 1.2034231424331665, + "learning_rate": 3.58757049290537e-05, + "loss": 0.0899, + "step": 67780 + }, + { + "epoch": 2.463478450468784, + "grad_norm": 0.3671499192714691, + "learning_rate": 3.5871032145089565e-05, + "loss": 0.1387, + "step": 67790 + }, + { + "epoch": 2.4638418489715823, + "grad_norm": 0.5502142310142517, + "learning_rate": 3.586635889273133e-05, + "loss": 0.1053, + "step": 67800 + }, + { + "epoch": 2.4638418489715823, + "eval_loss": 0.32282206416130066, + "eval_runtime": 179.8955, + "eval_samples_per_second": 41.213, + "eval_steps_per_second": 5.153, + "eval_wer": 0.1538112440321673, + "step": 67800 + }, + { + "epoch": 2.4642052474743803, + "grad_norm": 1.090920329093933, + "learning_rate": 3.5861685172180346e-05, + "loss": 0.1039, + "step": 67810 + }, + { + "epoch": 2.4645686459771787, + "grad_norm": 0.406110018491745, + "learning_rate": 3.5857010983638e-05, + "loss": 0.1042, + "step": 67820 + }, + { + "epoch": 2.4649320444799767, + "grad_norm": 1.2592461109161377, + "learning_rate": 3.585233632730568e-05, + "loss": 0.0835, + "step": 67830 + }, + { + "epoch": 2.465295442982775, + "grad_norm": 0.5883360505104065, + "learning_rate": 3.58476612033848e-05, + "loss": 0.1341, + "step": 67840 + }, + { + "epoch": 2.465658841485573, + "grad_norm": 1.322466492652893, + "learning_rate": 3.58429856120768e-05, + "loss": 0.0797, + "step": 67850 + }, + { + "epoch": 2.466022239988371, + "grad_norm": 0.4922407567501068, + "learning_rate": 3.583830955358312e-05, + "loss": 0.0859, + "step": 67860 + }, + { + "epoch": 2.4663856384911695, + "grad_norm": 0.7841882705688477, + "learning_rate": 3.583363302810525e-05, + "loss": 0.1096, + "step": 67870 + }, + { + "epoch": 2.4667490369939675, + "grad_norm": 0.7191815376281738, + "learning_rate": 3.582895603584467e-05, + "loss": 0.0956, + "step": 67880 + }, + { + "epoch": 2.467112435496766, + "grad_norm": 0.43222716450691223, + "learning_rate": 3.5824278577002925e-05, + "loss": 0.139, + "step": 67890 + }, + { + "epoch": 2.467475833999564, + "grad_norm": 1.4954817295074463, + "learning_rate": 3.581960065178151e-05, + "loss": 0.0903, + "step": 67900 + }, + { + "epoch": 2.467839232502362, + "grad_norm": 0.6472924947738647, + "learning_rate": 3.5814922260382e-05, + "loss": 0.0989, + "step": 67910 + }, + { + "epoch": 2.4682026310051604, + "grad_norm": 1.0343185663223267, + "learning_rate": 3.581024340300598e-05, + "loss": 0.0951, + "step": 67920 + }, + { + "epoch": 2.4685660295079583, + "grad_norm": 0.6948789358139038, + "learning_rate": 3.580556407985503e-05, + "loss": 0.3052, + "step": 67930 + }, + { + "epoch": 2.4689294280107568, + "grad_norm": 0.5896201729774475, + "learning_rate": 3.580088429113077e-05, + "loss": 0.0787, + "step": 67940 + }, + { + "epoch": 2.4692928265135548, + "grad_norm": 0.7022304534912109, + "learning_rate": 3.5796204037034834e-05, + "loss": 0.086, + "step": 67950 + }, + { + "epoch": 2.4696562250163527, + "grad_norm": 0.6120296120643616, + "learning_rate": 3.579152331776888e-05, + "loss": 0.101, + "step": 67960 + }, + { + "epoch": 2.470019623519151, + "grad_norm": 0.7050819993019104, + "learning_rate": 3.5786842133534584e-05, + "loss": 0.1042, + "step": 67970 + }, + { + "epoch": 2.470383022021949, + "grad_norm": 0.728625476360321, + "learning_rate": 3.578216048453364e-05, + "loss": 0.1194, + "step": 67980 + }, + { + "epoch": 2.4707464205247476, + "grad_norm": 5.270279884338379, + "learning_rate": 3.577747837096776e-05, + "loss": 0.1007, + "step": 67990 + }, + { + "epoch": 2.4711098190275456, + "grad_norm": 1.098525047302246, + "learning_rate": 3.577279579303868e-05, + "loss": 0.1017, + "step": 68000 + }, + { + "epoch": 2.4714732175303435, + "grad_norm": 2.74465012550354, + "learning_rate": 3.576811275094817e-05, + "loss": 0.3871, + "step": 68010 + }, + { + "epoch": 2.471836616033142, + "grad_norm": 0.6227459907531738, + "learning_rate": 3.576342924489799e-05, + "loss": 0.1103, + "step": 68020 + }, + { + "epoch": 2.47220001453594, + "grad_norm": 2.293656349182129, + "learning_rate": 3.5758745275089945e-05, + "loss": 0.0953, + "step": 68030 + }, + { + "epoch": 2.4725634130387384, + "grad_norm": 1.2598451375961304, + "learning_rate": 3.575406084172584e-05, + "loss": 0.1743, + "step": 68040 + }, + { + "epoch": 2.4729268115415364, + "grad_norm": 1.4611924886703491, + "learning_rate": 3.574937594500751e-05, + "loss": 0.0955, + "step": 68050 + }, + { + "epoch": 2.473290210044335, + "grad_norm": 0.6100664138793945, + "learning_rate": 3.5744690585136834e-05, + "loss": 0.0935, + "step": 68060 + }, + { + "epoch": 2.473653608547133, + "grad_norm": 1.22284996509552, + "learning_rate": 3.574000476231566e-05, + "loss": 0.1435, + "step": 68070 + }, + { + "epoch": 2.4740170070499308, + "grad_norm": 0.8457713723182678, + "learning_rate": 3.5735318476745887e-05, + "loss": 0.0832, + "step": 68080 + }, + { + "epoch": 2.474380405552729, + "grad_norm": 1.3872827291488647, + "learning_rate": 3.573063172862944e-05, + "loss": 0.1453, + "step": 68090 + }, + { + "epoch": 2.474743804055527, + "grad_norm": 1.066683292388916, + "learning_rate": 3.572594451816826e-05, + "loss": 0.0809, + "step": 68100 + }, + { + "epoch": 2.4751072025583256, + "grad_norm": 1.5101946592330933, + "learning_rate": 3.5721256845564286e-05, + "loss": 0.0854, + "step": 68110 + }, + { + "epoch": 2.4754706010611236, + "grad_norm": 0.6682563424110413, + "learning_rate": 3.571656871101951e-05, + "loss": 0.1077, + "step": 68120 + }, + { + "epoch": 2.475833999563922, + "grad_norm": 1.0795047283172607, + "learning_rate": 3.5711880114735917e-05, + "loss": 0.0855, + "step": 68130 + }, + { + "epoch": 2.47619739806672, + "grad_norm": 4.4557671546936035, + "learning_rate": 3.570719105691551e-05, + "loss": 0.1676, + "step": 68140 + }, + { + "epoch": 2.476560796569518, + "grad_norm": 0.7962543368339539, + "learning_rate": 3.570250153776035e-05, + "loss": 0.0869, + "step": 68150 + }, + { + "epoch": 2.4769241950723164, + "grad_norm": 12.166545867919922, + "learning_rate": 3.569781155747247e-05, + "loss": 0.2161, + "step": 68160 + }, + { + "epoch": 2.4772875935751144, + "grad_norm": 0.4934634864330292, + "learning_rate": 3.569312111625396e-05, + "loss": 0.1146, + "step": 68170 + }, + { + "epoch": 2.477650992077913, + "grad_norm": 1.008591651916504, + "learning_rate": 3.56884302143069e-05, + "loss": 0.1029, + "step": 68180 + }, + { + "epoch": 2.478014390580711, + "grad_norm": 1.2141749858856201, + "learning_rate": 3.568373885183342e-05, + "loss": 0.1215, + "step": 68190 + }, + { + "epoch": 2.478377789083509, + "grad_norm": 1.004011631011963, + "learning_rate": 3.567904702903564e-05, + "loss": 0.0831, + "step": 68200 + }, + { + "epoch": 2.4787411875863072, + "grad_norm": 31.751787185668945, + "learning_rate": 3.567435474611572e-05, + "loss": 0.4307, + "step": 68210 + }, + { + "epoch": 2.4791045860891052, + "grad_norm": 0.7640292048454285, + "learning_rate": 3.566966200327584e-05, + "loss": 0.1086, + "step": 68220 + }, + { + "epoch": 2.4794679845919037, + "grad_norm": 0.5559817552566528, + "learning_rate": 3.566496880071817e-05, + "loss": 0.1082, + "step": 68230 + }, + { + "epoch": 2.4798313830947016, + "grad_norm": 2.7342145442962646, + "learning_rate": 3.566027513864496e-05, + "loss": 0.1049, + "step": 68240 + }, + { + "epoch": 2.4801947815974996, + "grad_norm": 1.2804802656173706, + "learning_rate": 3.565558101725841e-05, + "loss": 0.0957, + "step": 68250 + }, + { + "epoch": 2.480558180100298, + "grad_norm": 6.4595770835876465, + "learning_rate": 3.565088643676079e-05, + "loss": 0.0967, + "step": 68260 + }, + { + "epoch": 2.480921578603096, + "grad_norm": 0.7362810373306274, + "learning_rate": 3.564619139735437e-05, + "loss": 0.1271, + "step": 68270 + }, + { + "epoch": 2.4812849771058945, + "grad_norm": 2.1541872024536133, + "learning_rate": 3.564149589924145e-05, + "loss": 0.1168, + "step": 68280 + }, + { + "epoch": 2.4816483756086924, + "grad_norm": 1.1019583940505981, + "learning_rate": 3.563679994262433e-05, + "loss": 0.1151, + "step": 68290 + }, + { + "epoch": 2.4820117741114904, + "grad_norm": 0.7224584817886353, + "learning_rate": 3.563210352770534e-05, + "loss": 0.2149, + "step": 68300 + }, + { + "epoch": 2.482375172614289, + "grad_norm": 0.6910248398780823, + "learning_rate": 3.562740665468684e-05, + "loss": 0.0971, + "step": 68310 + }, + { + "epoch": 2.482738571117087, + "grad_norm": 1.294913411140442, + "learning_rate": 3.56227093237712e-05, + "loss": 0.1336, + "step": 68320 + }, + { + "epoch": 2.4831019696198853, + "grad_norm": 0.5386795401573181, + "learning_rate": 3.561801153516082e-05, + "loss": 0.1147, + "step": 68330 + }, + { + "epoch": 2.4834653681226833, + "grad_norm": 0.5479850769042969, + "learning_rate": 3.561331328905809e-05, + "loss": 0.0878, + "step": 68340 + }, + { + "epoch": 2.4838287666254817, + "grad_norm": 0.24666792154312134, + "learning_rate": 3.560861458566546e-05, + "loss": 0.9362, + "step": 68350 + }, + { + "epoch": 2.4841921651282797, + "grad_norm": 0.776744544506073, + "learning_rate": 3.560391542518537e-05, + "loss": 0.1084, + "step": 68360 + }, + { + "epoch": 2.4845555636310777, + "grad_norm": 0.7053751945495605, + "learning_rate": 3.55992158078203e-05, + "loss": 0.1096, + "step": 68370 + }, + { + "epoch": 2.484918962133876, + "grad_norm": 0.5632005929946899, + "learning_rate": 3.559451573377272e-05, + "loss": 0.1125, + "step": 68380 + }, + { + "epoch": 2.485282360636674, + "grad_norm": 0.5601955652236938, + "learning_rate": 3.558981520324516e-05, + "loss": 0.1011, + "step": 68390 + }, + { + "epoch": 2.4856457591394725, + "grad_norm": 0.9751861691474915, + "learning_rate": 3.558511421644014e-05, + "loss": 0.1193, + "step": 68400 + }, + { + "epoch": 2.4856457591394725, + "eval_loss": 0.3299192190170288, + "eval_runtime": 180.0784, + "eval_samples_per_second": 41.171, + "eval_steps_per_second": 5.148, + "eval_wer": 0.15812260605950587, + "step": 68400 + }, + { + "epoch": 2.4860091576422705, + "grad_norm": 3.060753107070923, + "learning_rate": 3.5580412773560214e-05, + "loss": 0.1417, + "step": 68410 + }, + { + "epoch": 2.486372556145069, + "grad_norm": 0.9213599562644958, + "learning_rate": 3.557571087480794e-05, + "loss": 0.1066, + "step": 68420 + }, + { + "epoch": 2.486735954647867, + "grad_norm": 0.6596553921699524, + "learning_rate": 3.557100852038592e-05, + "loss": 0.0984, + "step": 68430 + }, + { + "epoch": 2.487099353150665, + "grad_norm": 0.7937065362930298, + "learning_rate": 3.556630571049675e-05, + "loss": 0.1673, + "step": 68440 + }, + { + "epoch": 2.4874627516534633, + "grad_norm": 1.1487483978271484, + "learning_rate": 3.556160244534307e-05, + "loss": 0.0982, + "step": 68450 + }, + { + "epoch": 2.4878261501562613, + "grad_norm": 0.7516663074493408, + "learning_rate": 3.5556898725127504e-05, + "loss": 0.0879, + "step": 68460 + }, + { + "epoch": 2.4881895486590597, + "grad_norm": 3.729604721069336, + "learning_rate": 3.5552194550052745e-05, + "loss": 0.1866, + "step": 68470 + }, + { + "epoch": 2.4885529471618577, + "grad_norm": 0.6454250812530518, + "learning_rate": 3.554748992032146e-05, + "loss": 0.1261, + "step": 68480 + }, + { + "epoch": 2.4889163456646557, + "grad_norm": 1.3000408411026, + "learning_rate": 3.554278483613637e-05, + "loss": 0.1297, + "step": 68490 + }, + { + "epoch": 2.489279744167454, + "grad_norm": 1.060686707496643, + "learning_rate": 3.5538079297700185e-05, + "loss": 0.0863, + "step": 68500 + }, + { + "epoch": 2.489643142670252, + "grad_norm": 1.2778925895690918, + "learning_rate": 3.5533373305215665e-05, + "loss": 0.0819, + "step": 68510 + }, + { + "epoch": 2.4900065411730505, + "grad_norm": 0.9975671172142029, + "learning_rate": 3.5528666858885565e-05, + "loss": 0.101, + "step": 68520 + }, + { + "epoch": 2.4903699396758485, + "grad_norm": 0.8623627424240112, + "learning_rate": 3.5523959958912666e-05, + "loss": 0.1161, + "step": 68530 + }, + { + "epoch": 2.4907333381786465, + "grad_norm": 0.5452187061309814, + "learning_rate": 3.551925260549979e-05, + "loss": 0.0967, + "step": 68540 + }, + { + "epoch": 2.491096736681445, + "grad_norm": 0.7726628184318542, + "learning_rate": 3.5514544798849736e-05, + "loss": 0.1111, + "step": 68550 + }, + { + "epoch": 2.491460135184243, + "grad_norm": 2.074589490890503, + "learning_rate": 3.550983653916536e-05, + "loss": 0.0911, + "step": 68560 + }, + { + "epoch": 2.4918235336870413, + "grad_norm": 0.777515709400177, + "learning_rate": 3.550512782664952e-05, + "loss": 0.1118, + "step": 68570 + }, + { + "epoch": 2.4921869321898393, + "grad_norm": 0.7411642074584961, + "learning_rate": 3.55004186615051e-05, + "loss": 0.0832, + "step": 68580 + }, + { + "epoch": 2.4925503306926373, + "grad_norm": 1.0494729280471802, + "learning_rate": 3.5495709043935e-05, + "loss": 0.1126, + "step": 68590 + }, + { + "epoch": 2.4929137291954357, + "grad_norm": 0.825706422328949, + "learning_rate": 3.5490998974142144e-05, + "loss": 2.8725, + "step": 68600 + }, + { + "epoch": 2.4932771276982337, + "grad_norm": 0.7414544820785522, + "learning_rate": 3.548628845232947e-05, + "loss": 0.1034, + "step": 68610 + }, + { + "epoch": 2.493640526201032, + "grad_norm": 1.752670168876648, + "learning_rate": 3.548157747869993e-05, + "loss": 0.4002, + "step": 68620 + }, + { + "epoch": 2.49400392470383, + "grad_norm": 0.9184174537658691, + "learning_rate": 3.547686605345651e-05, + "loss": 0.101, + "step": 68630 + }, + { + "epoch": 2.4943673232066286, + "grad_norm": 0.540532112121582, + "learning_rate": 3.547215417680222e-05, + "loss": 0.1, + "step": 68640 + }, + { + "epoch": 2.4947307217094266, + "grad_norm": 0.7241819500923157, + "learning_rate": 3.5467441848940056e-05, + "loss": 0.0812, + "step": 68650 + }, + { + "epoch": 2.4950941202122245, + "grad_norm": 0.5261086225509644, + "learning_rate": 3.546272907007307e-05, + "loss": 0.1093, + "step": 68660 + }, + { + "epoch": 2.495457518715023, + "grad_norm": 0.5485601425170898, + "learning_rate": 3.545801584040431e-05, + "loss": 0.7212, + "step": 68670 + }, + { + "epoch": 2.495820917217821, + "grad_norm": 0.5442925691604614, + "learning_rate": 3.545330216013687e-05, + "loss": 0.1235, + "step": 68680 + }, + { + "epoch": 2.4961843157206194, + "grad_norm": 0.6182003021240234, + "learning_rate": 3.5448588029473825e-05, + "loss": 0.1382, + "step": 68690 + }, + { + "epoch": 2.4965477142234174, + "grad_norm": 0.8053919076919556, + "learning_rate": 3.5443873448618296e-05, + "loss": 0.1266, + "step": 68700 + }, + { + "epoch": 2.496911112726216, + "grad_norm": 2.04055118560791, + "learning_rate": 3.5439158417773424e-05, + "loss": 0.1026, + "step": 68710 + }, + { + "epoch": 2.497274511229014, + "grad_norm": 0.5255793929100037, + "learning_rate": 3.5434442937142354e-05, + "loss": 0.1031, + "step": 68720 + }, + { + "epoch": 2.4976379097318118, + "grad_norm": 1.7394444942474365, + "learning_rate": 3.5429727006928266e-05, + "loss": 0.081, + "step": 68730 + }, + { + "epoch": 2.49800130823461, + "grad_norm": 1.1095107793807983, + "learning_rate": 3.542501062733435e-05, + "loss": 0.1198, + "step": 68740 + }, + { + "epoch": 2.498364706737408, + "grad_norm": 1.0827983617782593, + "learning_rate": 3.542029379856382e-05, + "loss": 0.0985, + "step": 68750 + }, + { + "epoch": 2.4987281052402066, + "grad_norm": 0.5815703868865967, + "learning_rate": 3.54155765208199e-05, + "loss": 0.0946, + "step": 68760 + }, + { + "epoch": 2.4990915037430046, + "grad_norm": 1.133452296257019, + "learning_rate": 3.541085879430585e-05, + "loss": 0.0897, + "step": 68770 + }, + { + "epoch": 2.4994549022458026, + "grad_norm": 1.6809009313583374, + "learning_rate": 3.5406140619224936e-05, + "loss": 0.1182, + "step": 68780 + }, + { + "epoch": 2.499818300748601, + "grad_norm": 0.6066719889640808, + "learning_rate": 3.540142199578045e-05, + "loss": 0.1223, + "step": 68790 + }, + { + "epoch": 2.500181699251399, + "grad_norm": 0.45101696252822876, + "learning_rate": 3.53967029241757e-05, + "loss": 0.0951, + "step": 68800 + }, + { + "epoch": 2.5005450977541974, + "grad_norm": 2.0316238403320312, + "learning_rate": 3.5391983404614e-05, + "loss": 0.0941, + "step": 68810 + }, + { + "epoch": 2.5009084962569954, + "grad_norm": 0.8582636117935181, + "learning_rate": 3.538726343729873e-05, + "loss": 0.1308, + "step": 68820 + }, + { + "epoch": 2.5012718947597934, + "grad_norm": 1.0573068857192993, + "learning_rate": 3.538254302243322e-05, + "loss": 0.1064, + "step": 68830 + }, + { + "epoch": 2.501635293262592, + "grad_norm": 1.7201263904571533, + "learning_rate": 3.537782216022088e-05, + "loss": 0.1303, + "step": 68840 + }, + { + "epoch": 2.50199869176539, + "grad_norm": 0.8848857879638672, + "learning_rate": 3.53731008508651e-05, + "loss": 0.0885, + "step": 68850 + }, + { + "epoch": 2.5023620902681882, + "grad_norm": 0.6936333775520325, + "learning_rate": 3.5368379094569325e-05, + "loss": 0.0989, + "step": 68860 + }, + { + "epoch": 2.502725488770986, + "grad_norm": 0.7901983261108398, + "learning_rate": 3.536365689153698e-05, + "loss": 0.1984, + "step": 68870 + }, + { + "epoch": 2.503088887273784, + "grad_norm": 0.5054183602333069, + "learning_rate": 3.5358934241971534e-05, + "loss": 0.0928, + "step": 68880 + }, + { + "epoch": 2.5034522857765826, + "grad_norm": 1.7566126585006714, + "learning_rate": 3.535421114607647e-05, + "loss": 0.1212, + "step": 68890 + }, + { + "epoch": 2.5038156842793806, + "grad_norm": 0.5128380656242371, + "learning_rate": 3.5349487604055274e-05, + "loss": 0.0774, + "step": 68900 + }, + { + "epoch": 2.504179082782179, + "grad_norm": 0.994647741317749, + "learning_rate": 3.53447636161115e-05, + "loss": 0.1288, + "step": 68910 + }, + { + "epoch": 2.504542481284977, + "grad_norm": 0.35602259635925293, + "learning_rate": 3.534003918244866e-05, + "loss": 0.1006, + "step": 68920 + }, + { + "epoch": 2.504905879787775, + "grad_norm": 0.9458356499671936, + "learning_rate": 3.533531430327032e-05, + "loss": 0.1199, + "step": 68930 + }, + { + "epoch": 2.5052692782905734, + "grad_norm": 1.100160837173462, + "learning_rate": 3.533058897878006e-05, + "loss": 0.0892, + "step": 68940 + }, + { + "epoch": 2.505632676793372, + "grad_norm": 0.695726215839386, + "learning_rate": 3.532586320918147e-05, + "loss": 0.0928, + "step": 68950 + }, + { + "epoch": 2.50599607529617, + "grad_norm": 1.826897382736206, + "learning_rate": 3.532113699467819e-05, + "loss": 0.105, + "step": 68960 + }, + { + "epoch": 2.506359473798968, + "grad_norm": 1.4014049768447876, + "learning_rate": 3.531641033547383e-05, + "loss": 0.2298, + "step": 68970 + }, + { + "epoch": 2.5067228723017663, + "grad_norm": 1.4749367237091064, + "learning_rate": 3.531168323177206e-05, + "loss": 0.0966, + "step": 68980 + }, + { + "epoch": 2.5070862708045643, + "grad_norm": 4.613848686218262, + "learning_rate": 3.530695568377655e-05, + "loss": 0.1281, + "step": 68990 + }, + { + "epoch": 2.5074496693073627, + "grad_norm": 0.9928845167160034, + "learning_rate": 3.5302227691690984e-05, + "loss": 0.1213, + "step": 69000 + }, + { + "epoch": 2.5074496693073627, + "eval_loss": 0.30671653151512146, + "eval_runtime": 179.555, + "eval_samples_per_second": 41.291, + "eval_steps_per_second": 5.163, + "eval_wer": 0.1598017681122588, + "step": 69000 + }, + { + "epoch": 2.5078130678101607, + "grad_norm": 0.24582041800022125, + "learning_rate": 3.5297499255719094e-05, + "loss": 0.0949, + "step": 69010 + }, + { + "epoch": 2.5081764663129587, + "grad_norm": 0.4762285053730011, + "learning_rate": 3.529277037606458e-05, + "loss": 0.0983, + "step": 69020 + }, + { + "epoch": 2.508539864815757, + "grad_norm": 0.6749287843704224, + "learning_rate": 3.528804105293123e-05, + "loss": 0.0911, + "step": 69030 + }, + { + "epoch": 2.508903263318555, + "grad_norm": 0.4179406762123108, + "learning_rate": 3.528331128652279e-05, + "loss": 0.1979, + "step": 69040 + }, + { + "epoch": 2.5092666618213535, + "grad_norm": 1.1406326293945312, + "learning_rate": 3.5278581077043047e-05, + "loss": 0.0918, + "step": 69050 + }, + { + "epoch": 2.5096300603241515, + "grad_norm": 0.8093327879905701, + "learning_rate": 3.527385042469583e-05, + "loss": 0.0978, + "step": 69060 + }, + { + "epoch": 2.5099934588269495, + "grad_norm": 1.7931946516036987, + "learning_rate": 3.5269119329684945e-05, + "loss": 0.109, + "step": 69070 + }, + { + "epoch": 2.510356857329748, + "grad_norm": 0.6986146569252014, + "learning_rate": 3.526438779221425e-05, + "loss": 0.0993, + "step": 69080 + }, + { + "epoch": 2.510720255832546, + "grad_norm": 1.2395824193954468, + "learning_rate": 3.5259655812487604e-05, + "loss": 0.1468, + "step": 69090 + }, + { + "epoch": 2.5110836543353443, + "grad_norm": 3.537288188934326, + "learning_rate": 3.525492339070889e-05, + "loss": 0.0997, + "step": 69100 + }, + { + "epoch": 2.5114470528381423, + "grad_norm": 0.8501663208007812, + "learning_rate": 3.525019052708202e-05, + "loss": 0.0933, + "step": 69110 + }, + { + "epoch": 2.5118104513409403, + "grad_norm": 1.3228484392166138, + "learning_rate": 3.524545722181091e-05, + "loss": 0.1387, + "step": 69120 + }, + { + "epoch": 2.5121738498437387, + "grad_norm": 1.2074254751205444, + "learning_rate": 3.52407234750995e-05, + "loss": 0.1062, + "step": 69130 + }, + { + "epoch": 2.5125372483465367, + "grad_norm": 0.6108558177947998, + "learning_rate": 3.523598928715174e-05, + "loss": 0.1207, + "step": 69140 + }, + { + "epoch": 2.512900646849335, + "grad_norm": 0.6959209442138672, + "learning_rate": 3.523125465817164e-05, + "loss": 0.0823, + "step": 69150 + }, + { + "epoch": 2.513264045352133, + "grad_norm": 0.5447746515274048, + "learning_rate": 3.5226519588363164e-05, + "loss": 0.1009, + "step": 69160 + }, + { + "epoch": 2.513627443854931, + "grad_norm": 6.87611198425293, + "learning_rate": 3.522178407793036e-05, + "loss": 0.1082, + "step": 69170 + }, + { + "epoch": 2.5139908423577295, + "grad_norm": 1.2013996839523315, + "learning_rate": 3.5217048127077246e-05, + "loss": 0.1041, + "step": 69180 + }, + { + "epoch": 2.5143542408605275, + "grad_norm": 2.1484246253967285, + "learning_rate": 3.521231173600787e-05, + "loss": 0.1174, + "step": 69190 + }, + { + "epoch": 2.514717639363326, + "grad_norm": 0.6024388670921326, + "learning_rate": 3.520757490492633e-05, + "loss": 0.0968, + "step": 69200 + }, + { + "epoch": 2.515081037866124, + "grad_norm": 0.621998131275177, + "learning_rate": 3.5202837634036696e-05, + "loss": 0.1441, + "step": 69210 + }, + { + "epoch": 2.515444436368922, + "grad_norm": 0.7772573828697205, + "learning_rate": 3.519809992354309e-05, + "loss": 0.1199, + "step": 69220 + }, + { + "epoch": 2.5158078348717203, + "grad_norm": 0.8994972109794617, + "learning_rate": 3.519336177364966e-05, + "loss": 0.1099, + "step": 69230 + }, + { + "epoch": 2.5161712333745188, + "grad_norm": 0.7937003970146179, + "learning_rate": 3.5188623184560524e-05, + "loss": 0.1091, + "step": 69240 + }, + { + "epoch": 2.5165346318773167, + "grad_norm": 1.3785254955291748, + "learning_rate": 3.518388415647986e-05, + "loss": 0.1035, + "step": 69250 + }, + { + "epoch": 2.5168980303801147, + "grad_norm": 0.6472801566123962, + "learning_rate": 3.517914468961188e-05, + "loss": 0.1054, + "step": 69260 + }, + { + "epoch": 2.517261428882913, + "grad_norm": 2.0437135696411133, + "learning_rate": 3.517440478416076e-05, + "loss": 0.1224, + "step": 69270 + }, + { + "epoch": 2.517624827385711, + "grad_norm": 0.9029390811920166, + "learning_rate": 3.516966444033074e-05, + "loss": 0.0865, + "step": 69280 + }, + { + "epoch": 2.5179882258885096, + "grad_norm": 0.801255464553833, + "learning_rate": 3.5164923658326064e-05, + "loss": 0.0891, + "step": 69290 + }, + { + "epoch": 2.5183516243913076, + "grad_norm": 1.0700057744979858, + "learning_rate": 3.5160182438350995e-05, + "loss": 0.0928, + "step": 69300 + }, + { + "epoch": 2.5187150228941055, + "grad_norm": 0.6255751848220825, + "learning_rate": 3.515544078060982e-05, + "loss": 0.1071, + "step": 69310 + }, + { + "epoch": 2.519078421396904, + "grad_norm": 0.784589409828186, + "learning_rate": 3.515069868530683e-05, + "loss": 0.0892, + "step": 69320 + }, + { + "epoch": 2.519441819899702, + "grad_norm": 0.8623689413070679, + "learning_rate": 3.514595615264635e-05, + "loss": 0.1024, + "step": 69330 + }, + { + "epoch": 2.5198052184025004, + "grad_norm": 1.3670728206634521, + "learning_rate": 3.514121318283272e-05, + "loss": 0.1021, + "step": 69340 + }, + { + "epoch": 2.5201686169052984, + "grad_norm": 1.2742701768875122, + "learning_rate": 3.513646977607029e-05, + "loss": 0.0916, + "step": 69350 + }, + { + "epoch": 2.5205320154080963, + "grad_norm": 2.6667962074279785, + "learning_rate": 3.513172593256345e-05, + "loss": 0.0921, + "step": 69360 + }, + { + "epoch": 2.5208954139108948, + "grad_norm": 0.8958526849746704, + "learning_rate": 3.512698165251659e-05, + "loss": 0.0989, + "step": 69370 + }, + { + "epoch": 2.5212588124136928, + "grad_norm": 1.1172994375228882, + "learning_rate": 3.512223693613412e-05, + "loss": 0.1104, + "step": 69380 + }, + { + "epoch": 2.521622210916491, + "grad_norm": 0.5839262008666992, + "learning_rate": 3.5117491783620475e-05, + "loss": 0.128, + "step": 69390 + }, + { + "epoch": 2.521985609419289, + "grad_norm": 0.9729129672050476, + "learning_rate": 3.51127461951801e-05, + "loss": 0.1229, + "step": 69400 + }, + { + "epoch": 2.522349007922087, + "grad_norm": 3.964264154434204, + "learning_rate": 3.510800017101749e-05, + "loss": 0.096, + "step": 69410 + }, + { + "epoch": 2.5227124064248856, + "grad_norm": 0.4221835732460022, + "learning_rate": 3.51032537113371e-05, + "loss": 0.1109, + "step": 69420 + }, + { + "epoch": 2.5230758049276836, + "grad_norm": 0.6467729806900024, + "learning_rate": 3.5098506816343466e-05, + "loss": 0.116, + "step": 69430 + }, + { + "epoch": 2.523439203430482, + "grad_norm": 3.9705393314361572, + "learning_rate": 3.50937594862411e-05, + "loss": 0.1349, + "step": 69440 + }, + { + "epoch": 2.52380260193328, + "grad_norm": 1.3955297470092773, + "learning_rate": 3.508901172123455e-05, + "loss": 0.1116, + "step": 69450 + }, + { + "epoch": 2.524166000436078, + "grad_norm": 0.8039283156394958, + "learning_rate": 3.508426352152838e-05, + "loss": 0.0905, + "step": 69460 + }, + { + "epoch": 2.5245293989388764, + "grad_norm": 1.1199578046798706, + "learning_rate": 3.507951488732718e-05, + "loss": 0.1136, + "step": 69470 + }, + { + "epoch": 2.5248927974416744, + "grad_norm": 0.7925732731819153, + "learning_rate": 3.507476581883555e-05, + "loss": 0.1058, + "step": 69480 + }, + { + "epoch": 2.525256195944473, + "grad_norm": 0.8125994205474854, + "learning_rate": 3.5070016316258106e-05, + "loss": 0.1033, + "step": 69490 + }, + { + "epoch": 2.525619594447271, + "grad_norm": 0.4621226489543915, + "learning_rate": 3.5065266379799475e-05, + "loss": 1.4773, + "step": 69500 + }, + { + "epoch": 2.525982992950069, + "grad_norm": 1.0948034524917603, + "learning_rate": 3.506051600966434e-05, + "loss": 0.0797, + "step": 69510 + }, + { + "epoch": 2.526346391452867, + "grad_norm": 1.1567878723144531, + "learning_rate": 3.5055765206057354e-05, + "loss": 0.1143, + "step": 69520 + }, + { + "epoch": 2.5267097899556656, + "grad_norm": 0.95686936378479, + "learning_rate": 3.505101396918324e-05, + "loss": 0.1188, + "step": 69530 + }, + { + "epoch": 2.5270731884584636, + "grad_norm": 0.34038084745407104, + "learning_rate": 3.504626229924669e-05, + "loss": 0.1076, + "step": 69540 + }, + { + "epoch": 2.5274365869612616, + "grad_norm": 4.851949214935303, + "learning_rate": 3.504151019645243e-05, + "loss": 0.0955, + "step": 69550 + }, + { + "epoch": 2.52779998546406, + "grad_norm": 0.8883131742477417, + "learning_rate": 3.503675766100524e-05, + "loss": 0.1427, + "step": 69560 + }, + { + "epoch": 2.528163383966858, + "grad_norm": 0.7588313221931458, + "learning_rate": 3.5032004693109866e-05, + "loss": 0.1198, + "step": 69570 + }, + { + "epoch": 2.5285267824696565, + "grad_norm": 0.5408293604850769, + "learning_rate": 3.50272512929711e-05, + "loss": 0.1115, + "step": 69580 + }, + { + "epoch": 2.5288901809724544, + "grad_norm": 1.0919950008392334, + "learning_rate": 3.5022497460793754e-05, + "loss": 0.7792, + "step": 69590 + }, + { + "epoch": 2.5292535794752524, + "grad_norm": 0.9922258853912354, + "learning_rate": 3.501774319678266e-05, + "loss": 0.079, + "step": 69600 + }, + { + "epoch": 2.5292535794752524, + "eval_loss": 0.3091621398925781, + "eval_runtime": 180.6731, + "eval_samples_per_second": 41.035, + "eval_steps_per_second": 5.131, + "eval_wer": 0.15795015157841233, + "step": 69600 + }, + { + "epoch": 2.529616977978051, + "grad_norm": 1.5794726610183716, + "learning_rate": 3.501298850114266e-05, + "loss": 0.1154, + "step": 69610 + }, + { + "epoch": 2.529980376480849, + "grad_norm": 3.069139003753662, + "learning_rate": 3.5008233374078594e-05, + "loss": 0.1161, + "step": 69620 + }, + { + "epoch": 2.5303437749836473, + "grad_norm": 0.8879293203353882, + "learning_rate": 3.500347781579537e-05, + "loss": 0.0929, + "step": 69630 + }, + { + "epoch": 2.5307071734864452, + "grad_norm": 2.097984552383423, + "learning_rate": 3.4998721826497885e-05, + "loss": 0.0873, + "step": 69640 + }, + { + "epoch": 2.5310705719892432, + "grad_norm": 0.8583676218986511, + "learning_rate": 3.499396540639104e-05, + "loss": 0.6541, + "step": 69650 + }, + { + "epoch": 2.5314339704920417, + "grad_norm": 0.44445595145225525, + "learning_rate": 3.498920855567979e-05, + "loss": 0.0748, + "step": 69660 + }, + { + "epoch": 2.5317973689948396, + "grad_norm": 0.9186582565307617, + "learning_rate": 3.4984451274569094e-05, + "loss": 0.1022, + "step": 69670 + }, + { + "epoch": 2.532160767497638, + "grad_norm": 1.34561288356781, + "learning_rate": 3.497969356326391e-05, + "loss": 0.0962, + "step": 69680 + }, + { + "epoch": 2.532524166000436, + "grad_norm": 1.5889935493469238, + "learning_rate": 3.497493542196923e-05, + "loss": 0.1013, + "step": 69690 + }, + { + "epoch": 2.532887564503234, + "grad_norm": 1.0599699020385742, + "learning_rate": 3.4970176850890085e-05, + "loss": 0.1048, + "step": 69700 + }, + { + "epoch": 2.5332509630060325, + "grad_norm": 0.7291392087936401, + "learning_rate": 3.496541785023149e-05, + "loss": 0.1002, + "step": 69710 + }, + { + "epoch": 2.5336143615088305, + "grad_norm": 0.5541179180145264, + "learning_rate": 3.4960658420198494e-05, + "loss": 0.1062, + "step": 69720 + }, + { + "epoch": 2.533977760011629, + "grad_norm": 1.0008395910263062, + "learning_rate": 3.495589856099617e-05, + "loss": 0.2525, + "step": 69730 + }, + { + "epoch": 2.534341158514427, + "grad_norm": 0.7523865699768066, + "learning_rate": 3.49511382728296e-05, + "loss": 0.117, + "step": 69740 + }, + { + "epoch": 2.534704557017225, + "grad_norm": 1.8582743406295776, + "learning_rate": 3.4946377555903886e-05, + "loss": 0.0834, + "step": 69750 + }, + { + "epoch": 2.5350679555200233, + "grad_norm": 0.44991886615753174, + "learning_rate": 3.494161641042415e-05, + "loss": 0.0895, + "step": 69760 + }, + { + "epoch": 2.5354313540228213, + "grad_norm": 0.46044957637786865, + "learning_rate": 3.4936854836595545e-05, + "loss": 0.1333, + "step": 69770 + }, + { + "epoch": 2.5357947525256197, + "grad_norm": 2.098876476287842, + "learning_rate": 3.493209283462321e-05, + "loss": 0.1073, + "step": 69780 + }, + { + "epoch": 2.5361581510284177, + "grad_norm": 0.5006657838821411, + "learning_rate": 3.492733040471234e-05, + "loss": 0.1205, + "step": 69790 + }, + { + "epoch": 2.5365215495312157, + "grad_norm": 1.2363359928131104, + "learning_rate": 3.492256754706813e-05, + "loss": 0.0865, + "step": 69800 + }, + { + "epoch": 2.536884948034014, + "grad_norm": 0.5873517394065857, + "learning_rate": 3.491780426189577e-05, + "loss": 0.0842, + "step": 69810 + }, + { + "epoch": 2.5372483465368125, + "grad_norm": 0.5149590373039246, + "learning_rate": 3.491304054940053e-05, + "loss": 0.302, + "step": 69820 + }, + { + "epoch": 2.5376117450396105, + "grad_norm": 0.613667368888855, + "learning_rate": 3.4908276409787635e-05, + "loss": 0.1106, + "step": 69830 + }, + { + "epoch": 2.5379751435424085, + "grad_norm": 1.8323549032211304, + "learning_rate": 3.490351184326236e-05, + "loss": 0.1301, + "step": 69840 + }, + { + "epoch": 2.538338542045207, + "grad_norm": 1.859044075012207, + "learning_rate": 3.4898746850030005e-05, + "loss": 0.0863, + "step": 69850 + }, + { + "epoch": 2.538701940548005, + "grad_norm": 1.0749214887619019, + "learning_rate": 3.4893981430295864e-05, + "loss": 0.0798, + "step": 69860 + }, + { + "epoch": 2.5390653390508033, + "grad_norm": 0.9566397070884705, + "learning_rate": 3.488921558426527e-05, + "loss": 0.1183, + "step": 69870 + }, + { + "epoch": 2.5394287375536013, + "grad_norm": 1.2835750579833984, + "learning_rate": 3.4884449312143555e-05, + "loss": 0.104, + "step": 69880 + }, + { + "epoch": 2.5397921360563993, + "grad_norm": 0.6767297387123108, + "learning_rate": 3.48796826141361e-05, + "loss": 0.1889, + "step": 69890 + }, + { + "epoch": 2.5401555345591977, + "grad_norm": 1.499045729637146, + "learning_rate": 3.487491549044826e-05, + "loss": 0.1031, + "step": 69900 + }, + { + "epoch": 2.5405189330619957, + "grad_norm": 0.3522442877292633, + "learning_rate": 3.487014794128545e-05, + "loss": 0.1065, + "step": 69910 + }, + { + "epoch": 2.540882331564794, + "grad_norm": 0.6056109070777893, + "learning_rate": 3.486537996685309e-05, + "loss": 0.1181, + "step": 69920 + }, + { + "epoch": 2.541245730067592, + "grad_norm": 2.347325563430786, + "learning_rate": 3.48606115673566e-05, + "loss": 0.3176, + "step": 69930 + }, + { + "epoch": 2.54160912857039, + "grad_norm": 2.3445467948913574, + "learning_rate": 3.4855842743001446e-05, + "loss": 0.1717, + "step": 69940 + }, + { + "epoch": 2.5419725270731885, + "grad_norm": 0.9979462027549744, + "learning_rate": 3.485107349399309e-05, + "loss": 0.0845, + "step": 69950 + }, + { + "epoch": 2.5423359255759865, + "grad_norm": 3.576714038848877, + "learning_rate": 3.484630382053704e-05, + "loss": 0.1516, + "step": 69960 + }, + { + "epoch": 2.542699324078785, + "grad_norm": 0.4525027573108673, + "learning_rate": 3.484153372283878e-05, + "loss": 0.1062, + "step": 69970 + }, + { + "epoch": 2.543062722581583, + "grad_norm": 1.1381046772003174, + "learning_rate": 3.4836763201103854e-05, + "loss": 0.1246, + "step": 69980 + }, + { + "epoch": 2.543426121084381, + "grad_norm": 0.6374491453170776, + "learning_rate": 3.48319922555378e-05, + "loss": 0.1775, + "step": 69990 + }, + { + "epoch": 2.5437895195871794, + "grad_norm": 1.7682280540466309, + "learning_rate": 3.482722088634618e-05, + "loss": 0.1135, + "step": 70000 + }, + { + "epoch": 2.5441529180899773, + "grad_norm": 1.1015331745147705, + "learning_rate": 3.482244909373458e-05, + "loss": 0.0904, + "step": 70010 + }, + { + "epoch": 2.5445163165927758, + "grad_norm": 4.6638689041137695, + "learning_rate": 3.481767687790859e-05, + "loss": 0.2748, + "step": 70020 + }, + { + "epoch": 2.5448797150955738, + "grad_norm": 0.8912318348884583, + "learning_rate": 3.481290423907384e-05, + "loss": 0.094, + "step": 70030 + }, + { + "epoch": 2.5452431135983717, + "grad_norm": 2.43723726272583, + "learning_rate": 3.480813117743596e-05, + "loss": 0.1276, + "step": 70040 + }, + { + "epoch": 2.54560651210117, + "grad_norm": 3.3461971282958984, + "learning_rate": 3.480335769320061e-05, + "loss": 0.0971, + "step": 70050 + }, + { + "epoch": 2.545969910603968, + "grad_norm": 3.6578071117401123, + "learning_rate": 3.479858378657346e-05, + "loss": 0.1575, + "step": 70060 + }, + { + "epoch": 2.5463333091067666, + "grad_norm": 1.831850290298462, + "learning_rate": 3.479380945776018e-05, + "loss": 0.0947, + "step": 70070 + }, + { + "epoch": 2.5466967076095646, + "grad_norm": 0.8772917985916138, + "learning_rate": 3.478903470696651e-05, + "loss": 0.1189, + "step": 70080 + }, + { + "epoch": 2.5470601061123626, + "grad_norm": 7.842989921569824, + "learning_rate": 3.478425953439816e-05, + "loss": 0.1245, + "step": 70090 + }, + { + "epoch": 2.547423504615161, + "grad_norm": 1.8557602167129517, + "learning_rate": 3.4779483940260885e-05, + "loss": 0.1099, + "step": 70100 + }, + { + "epoch": 2.5477869031179594, + "grad_norm": 1.2630740404129028, + "learning_rate": 3.477470792476044e-05, + "loss": 0.076, + "step": 70110 + }, + { + "epoch": 2.5481503016207574, + "grad_norm": 0.5257185697555542, + "learning_rate": 3.4769931488102606e-05, + "loss": 0.1972, + "step": 70120 + }, + { + "epoch": 2.5485137001235554, + "grad_norm": 0.5321794748306274, + "learning_rate": 3.4765154630493194e-05, + "loss": 0.087, + "step": 70130 + }, + { + "epoch": 2.548877098626354, + "grad_norm": 0.7569301128387451, + "learning_rate": 3.4760377352138e-05, + "loss": 0.1349, + "step": 70140 + }, + { + "epoch": 2.549240497129152, + "grad_norm": 3.5890607833862305, + "learning_rate": 3.475559965324289e-05, + "loss": 1.1846, + "step": 70150 + }, + { + "epoch": 2.5496038956319502, + "grad_norm": 0.8748692870140076, + "learning_rate": 3.475082153401368e-05, + "loss": 0.0899, + "step": 70160 + }, + { + "epoch": 2.549967294134748, + "grad_norm": 0.45375722646713257, + "learning_rate": 3.474604299465628e-05, + "loss": 0.1166, + "step": 70170 + }, + { + "epoch": 2.550330692637546, + "grad_norm": 4.401093006134033, + "learning_rate": 3.474126403537656e-05, + "loss": 0.1247, + "step": 70180 + }, + { + "epoch": 2.5506940911403446, + "grad_norm": 0.7887241244316101, + "learning_rate": 3.473648465638043e-05, + "loss": 0.1344, + "step": 70190 + }, + { + "epoch": 2.5510574896431426, + "grad_norm": 1.8106690645217896, + "learning_rate": 3.4731704857873826e-05, + "loss": 0.095, + "step": 70200 + }, + { + "epoch": 2.5510574896431426, + "eval_loss": 0.32158222794532776, + "eval_runtime": 180.3244, + "eval_samples_per_second": 41.115, + "eval_steps_per_second": 5.141, + "eval_wer": 0.15185070887868282, + "step": 70200 + }, + { + "epoch": 2.551420888145941, + "grad_norm": 0.9934507012367249, + "learning_rate": 3.4726924640062676e-05, + "loss": 0.0928, + "step": 70210 + }, + { + "epoch": 2.551784286648739, + "grad_norm": 2.932734966278076, + "learning_rate": 3.472214400315296e-05, + "loss": 0.1131, + "step": 70220 + }, + { + "epoch": 2.552147685151537, + "grad_norm": 0.6811621189117432, + "learning_rate": 3.471736294735065e-05, + "loss": 0.1616, + "step": 70230 + }, + { + "epoch": 2.5525110836543354, + "grad_norm": 3.0019402503967285, + "learning_rate": 3.471258147286173e-05, + "loss": 0.1317, + "step": 70240 + }, + { + "epoch": 2.5528744821571334, + "grad_norm": 0.5437862873077393, + "learning_rate": 3.470779957989225e-05, + "loss": 0.0941, + "step": 70250 + }, + { + "epoch": 2.553237880659932, + "grad_norm": 1.201907992362976, + "learning_rate": 3.470301726864822e-05, + "loss": 0.0959, + "step": 70260 + }, + { + "epoch": 2.55360127916273, + "grad_norm": 0.8288230299949646, + "learning_rate": 3.469823453933569e-05, + "loss": 0.1101, + "step": 70270 + }, + { + "epoch": 2.553964677665528, + "grad_norm": 0.6374495625495911, + "learning_rate": 3.469345139216075e-05, + "loss": 0.0854, + "step": 70280 + }, + { + "epoch": 2.5543280761683262, + "grad_norm": 0.5856258273124695, + "learning_rate": 3.468866782732948e-05, + "loss": 0.1128, + "step": 70290 + }, + { + "epoch": 2.5546914746711242, + "grad_norm": 0.42517444491386414, + "learning_rate": 3.4683883845047985e-05, + "loss": 0.1721, + "step": 70300 + }, + { + "epoch": 2.5550548731739227, + "grad_norm": 3.486084222793579, + "learning_rate": 3.467909944552239e-05, + "loss": 0.0961, + "step": 70310 + }, + { + "epoch": 2.5554182716767206, + "grad_norm": 0.40620315074920654, + "learning_rate": 3.467431462895884e-05, + "loss": 0.1109, + "step": 70320 + }, + { + "epoch": 2.5557816701795186, + "grad_norm": 0.5691574811935425, + "learning_rate": 3.466952939556349e-05, + "loss": 0.1062, + "step": 70330 + }, + { + "epoch": 2.556145068682317, + "grad_norm": 1.281260371208191, + "learning_rate": 3.466474374554252e-05, + "loss": 0.096, + "step": 70340 + }, + { + "epoch": 2.556508467185115, + "grad_norm": 0.9632150530815125, + "learning_rate": 3.465995767910213e-05, + "loss": 0.0934, + "step": 70350 + }, + { + "epoch": 2.5568718656879135, + "grad_norm": 1.603409767150879, + "learning_rate": 3.4655171196448544e-05, + "loss": 0.0931, + "step": 70360 + }, + { + "epoch": 2.5572352641907115, + "grad_norm": 0.9560374021530151, + "learning_rate": 3.465038429778798e-05, + "loss": 0.0984, + "step": 70370 + }, + { + "epoch": 2.5575986626935094, + "grad_norm": 0.6290355920791626, + "learning_rate": 3.464559698332669e-05, + "loss": 0.113, + "step": 70380 + }, + { + "epoch": 2.557962061196308, + "grad_norm": 156.17115783691406, + "learning_rate": 3.464080925327094e-05, + "loss": 0.4685, + "step": 70390 + }, + { + "epoch": 2.5583254596991063, + "grad_norm": 1.0737193822860718, + "learning_rate": 3.4636021107827026e-05, + "loss": 0.0767, + "step": 70400 + }, + { + "epoch": 2.5586888582019043, + "grad_norm": 0.8538148403167725, + "learning_rate": 3.463123254720125e-05, + "loss": 0.087, + "step": 70410 + }, + { + "epoch": 2.5590522567047023, + "grad_norm": 0.826351523399353, + "learning_rate": 3.462644357159993e-05, + "loss": 0.1008, + "step": 70420 + }, + { + "epoch": 2.5594156552075007, + "grad_norm": 0.4948084056377411, + "learning_rate": 3.462165418122941e-05, + "loss": 0.0961, + "step": 70430 + }, + { + "epoch": 2.5597790537102987, + "grad_norm": 0.9462293982505798, + "learning_rate": 3.4616864376296046e-05, + "loss": 0.135, + "step": 70440 + }, + { + "epoch": 2.560142452213097, + "grad_norm": 0.5528499484062195, + "learning_rate": 3.4612074157006206e-05, + "loss": 0.101, + "step": 70450 + }, + { + "epoch": 2.560505850715895, + "grad_norm": 0.822938859462738, + "learning_rate": 3.4607283523566294e-05, + "loss": 0.095, + "step": 70460 + }, + { + "epoch": 2.560869249218693, + "grad_norm": 0.6554206013679504, + "learning_rate": 3.460249247618271e-05, + "loss": 0.0966, + "step": 70470 + }, + { + "epoch": 2.5612326477214915, + "grad_norm": 0.8792755007743835, + "learning_rate": 3.4597701015061904e-05, + "loss": 0.1041, + "step": 70480 + }, + { + "epoch": 2.5615960462242895, + "grad_norm": 0.8316457867622375, + "learning_rate": 3.4592909140410304e-05, + "loss": 0.1166, + "step": 70490 + }, + { + "epoch": 2.561959444727088, + "grad_norm": 1.183933138847351, + "learning_rate": 3.458811685243438e-05, + "loss": 0.0911, + "step": 70500 + }, + { + "epoch": 2.562322843229886, + "grad_norm": 0.6310432553291321, + "learning_rate": 3.458332415134062e-05, + "loss": 0.0833, + "step": 70510 + }, + { + "epoch": 2.562686241732684, + "grad_norm": 0.5768032670021057, + "learning_rate": 3.457853103733552e-05, + "loss": 0.1941, + "step": 70520 + }, + { + "epoch": 2.5630496402354823, + "grad_norm": 0.5388504266738892, + "learning_rate": 3.457373751062559e-05, + "loss": 0.1083, + "step": 70530 + }, + { + "epoch": 2.5634130387382803, + "grad_norm": 9.278057098388672, + "learning_rate": 3.4568943571417376e-05, + "loss": 0.102, + "step": 70540 + }, + { + "epoch": 2.5637764372410787, + "grad_norm": 0.8533729910850525, + "learning_rate": 3.456414921991744e-05, + "loss": 0.0916, + "step": 70550 + }, + { + "epoch": 2.5641398357438767, + "grad_norm": 0.8473436832427979, + "learning_rate": 3.455935445633234e-05, + "loss": 0.0953, + "step": 70560 + }, + { + "epoch": 2.5645032342466747, + "grad_norm": 0.8911932706832886, + "learning_rate": 3.455455928086866e-05, + "loss": 0.0992, + "step": 70570 + }, + { + "epoch": 2.564866632749473, + "grad_norm": 0.9488405585289001, + "learning_rate": 3.4549763693733026e-05, + "loss": 0.1191, + "step": 70580 + }, + { + "epoch": 2.565230031252271, + "grad_norm": 0.6498254537582397, + "learning_rate": 3.454496769513204e-05, + "loss": 0.1442, + "step": 70590 + }, + { + "epoch": 2.5655934297550695, + "grad_norm": 0.5127254724502563, + "learning_rate": 3.4540171285272374e-05, + "loss": 0.0843, + "step": 70600 + }, + { + "epoch": 2.5659568282578675, + "grad_norm": 2.8321163654327393, + "learning_rate": 3.453537446436066e-05, + "loss": 0.076, + "step": 70610 + }, + { + "epoch": 2.5663202267606655, + "grad_norm": 0.8829347491264343, + "learning_rate": 3.4530577232603584e-05, + "loss": 0.1044, + "step": 70620 + }, + { + "epoch": 2.566683625263464, + "grad_norm": 1.8622163534164429, + "learning_rate": 3.452577959020785e-05, + "loss": 0.1057, + "step": 70630 + }, + { + "epoch": 2.567047023766262, + "grad_norm": 0.5306766629219055, + "learning_rate": 3.452098153738017e-05, + "loss": 0.1118, + "step": 70640 + }, + { + "epoch": 2.5674104222690604, + "grad_norm": 0.5810162425041199, + "learning_rate": 3.451618307432727e-05, + "loss": 0.0862, + "step": 70650 + }, + { + "epoch": 2.5677738207718583, + "grad_norm": 0.790539026260376, + "learning_rate": 3.4511384201255895e-05, + "loss": 0.1227, + "step": 70660 + }, + { + "epoch": 2.5681372192746563, + "grad_norm": 3.3890788555145264, + "learning_rate": 3.450658491837282e-05, + "loss": 0.0883, + "step": 70670 + }, + { + "epoch": 2.5685006177774548, + "grad_norm": 1.1996808052062988, + "learning_rate": 3.4501785225884816e-05, + "loss": 0.0989, + "step": 70680 + }, + { + "epoch": 2.568864016280253, + "grad_norm": 0.81224524974823, + "learning_rate": 3.449698512399871e-05, + "loss": 0.1149, + "step": 70690 + }, + { + "epoch": 2.569227414783051, + "grad_norm": 1.3377439975738525, + "learning_rate": 3.4492184612921305e-05, + "loss": 0.1048, + "step": 70700 + }, + { + "epoch": 2.569590813285849, + "grad_norm": 0.9538800716400146, + "learning_rate": 3.4487383692859423e-05, + "loss": 0.0946, + "step": 70710 + }, + { + "epoch": 2.5699542117886476, + "grad_norm": 0.49254775047302246, + "learning_rate": 3.448258236401994e-05, + "loss": 0.1008, + "step": 70720 + }, + { + "epoch": 2.5703176102914456, + "grad_norm": 0.44506704807281494, + "learning_rate": 3.447778062660973e-05, + "loss": 0.1111, + "step": 70730 + }, + { + "epoch": 2.570681008794244, + "grad_norm": 0.8836443424224854, + "learning_rate": 3.4472978480835674e-05, + "loss": 0.1064, + "step": 70740 + }, + { + "epoch": 2.571044407297042, + "grad_norm": 0.8320255279541016, + "learning_rate": 3.4468175926904666e-05, + "loss": 0.0926, + "step": 70750 + }, + { + "epoch": 2.57140780579984, + "grad_norm": 0.6895723342895508, + "learning_rate": 3.446337296502366e-05, + "loss": 0.0766, + "step": 70760 + }, + { + "epoch": 2.5717712043026384, + "grad_norm": 0.6943153738975525, + "learning_rate": 3.445856959539958e-05, + "loss": 0.1134, + "step": 70770 + }, + { + "epoch": 2.5721346028054364, + "grad_norm": 0.7596734166145325, + "learning_rate": 3.4453765818239387e-05, + "loss": 0.0969, + "step": 70780 + }, + { + "epoch": 2.572498001308235, + "grad_norm": 0.42216864228248596, + "learning_rate": 3.4448961633750066e-05, + "loss": 0.1094, + "step": 70790 + }, + { + "epoch": 2.572861399811033, + "grad_norm": 0.8295478224754333, + "learning_rate": 3.44441570421386e-05, + "loss": 0.0825, + "step": 70800 + }, + { + "epoch": 2.572861399811033, + "eval_loss": 0.3259897530078888, + "eval_runtime": 180.0538, + "eval_samples_per_second": 41.177, + "eval_steps_per_second": 5.148, + "eval_wer": 0.15045291992666146, + "step": 70800 + }, + { + "epoch": 2.5732247983138308, + "grad_norm": 0.8128442168235779, + "learning_rate": 3.4439352043612015e-05, + "loss": 0.093, + "step": 70810 + }, + { + "epoch": 2.573588196816629, + "grad_norm": 0.5261029601097107, + "learning_rate": 3.4434546638377334e-05, + "loss": 0.1067, + "step": 70820 + }, + { + "epoch": 2.573951595319427, + "grad_norm": 2.5018603801727295, + "learning_rate": 3.442974082664161e-05, + "loss": 0.114, + "step": 70830 + }, + { + "epoch": 2.5743149938222256, + "grad_norm": 0.37377244234085083, + "learning_rate": 3.44249346086119e-05, + "loss": 0.1822, + "step": 70840 + }, + { + "epoch": 2.5746783923250236, + "grad_norm": 6.293512344360352, + "learning_rate": 3.4420127984495295e-05, + "loss": 0.1651, + "step": 70850 + }, + { + "epoch": 2.5750417908278216, + "grad_norm": 1.2653559446334839, + "learning_rate": 3.4415320954498894e-05, + "loss": 0.0936, + "step": 70860 + }, + { + "epoch": 2.57540518933062, + "grad_norm": 0.4816114008426666, + "learning_rate": 3.4410513518829806e-05, + "loss": 0.1242, + "step": 70870 + }, + { + "epoch": 2.575768587833418, + "grad_norm": 0.6479201316833496, + "learning_rate": 3.440570567769518e-05, + "loss": 0.0924, + "step": 70880 + }, + { + "epoch": 2.5761319863362164, + "grad_norm": 1.3513591289520264, + "learning_rate": 3.440089743130216e-05, + "loss": 0.1385, + "step": 70890 + }, + { + "epoch": 2.5764953848390144, + "grad_norm": 0.4289826452732086, + "learning_rate": 3.4396088779857917e-05, + "loss": 0.0769, + "step": 70900 + }, + { + "epoch": 2.5768587833418124, + "grad_norm": 1.5458887815475464, + "learning_rate": 3.4391279723569635e-05, + "loss": 0.1007, + "step": 70910 + }, + { + "epoch": 2.577222181844611, + "grad_norm": 0.5470010638237, + "learning_rate": 3.438647026264453e-05, + "loss": 0.1041, + "step": 70920 + }, + { + "epoch": 2.577585580347409, + "grad_norm": 0.7723416090011597, + "learning_rate": 3.438166039728982e-05, + "loss": 0.128, + "step": 70930 + }, + { + "epoch": 2.5779489788502072, + "grad_norm": 0.7723271250724792, + "learning_rate": 3.437685012771274e-05, + "loss": 0.1332, + "step": 70940 + }, + { + "epoch": 2.5783123773530052, + "grad_norm": 0.6610028147697449, + "learning_rate": 3.4372039454120556e-05, + "loss": 0.093, + "step": 70950 + }, + { + "epoch": 2.578675775855803, + "grad_norm": 0.8244014978408813, + "learning_rate": 3.436722837672053e-05, + "loss": 0.0913, + "step": 70960 + }, + { + "epoch": 2.5790391743586016, + "grad_norm": 3.8544437885284424, + "learning_rate": 3.4362416895719966e-05, + "loss": 0.1292, + "step": 70970 + }, + { + "epoch": 2.5794025728614, + "grad_norm": 0.570715069770813, + "learning_rate": 3.4357605011326164e-05, + "loss": 0.1142, + "step": 70980 + }, + { + "epoch": 2.579765971364198, + "grad_norm": 0.6846952438354492, + "learning_rate": 3.435279272374647e-05, + "loss": 0.1157, + "step": 70990 + }, + { + "epoch": 2.580129369866996, + "grad_norm": 0.8145487904548645, + "learning_rate": 3.4347980033188203e-05, + "loss": 0.0972, + "step": 71000 + }, + { + "epoch": 2.5804927683697945, + "grad_norm": 0.3357942998409271, + "learning_rate": 3.434316693985874e-05, + "loss": 0.0952, + "step": 71010 + }, + { + "epoch": 2.5808561668725924, + "grad_norm": 0.6499632000923157, + "learning_rate": 3.433835344396546e-05, + "loss": 0.2527, + "step": 71020 + }, + { + "epoch": 2.581219565375391, + "grad_norm": 1.1719329357147217, + "learning_rate": 3.4333539545715754e-05, + "loss": 0.0954, + "step": 71030 + }, + { + "epoch": 2.581582963878189, + "grad_norm": 2.707500457763672, + "learning_rate": 3.432872524531704e-05, + "loss": 0.1674, + "step": 71040 + }, + { + "epoch": 2.581946362380987, + "grad_norm": 1.7513278722763062, + "learning_rate": 3.432391054297674e-05, + "loss": 0.0758, + "step": 71050 + }, + { + "epoch": 2.5823097608837853, + "grad_norm": 0.9239100813865662, + "learning_rate": 3.431909543890231e-05, + "loss": 0.0919, + "step": 71060 + }, + { + "epoch": 2.5826731593865833, + "grad_norm": 0.21719126403331757, + "learning_rate": 3.431427993330122e-05, + "loss": 0.1458, + "step": 71070 + }, + { + "epoch": 2.5830365578893817, + "grad_norm": 0.7722142338752747, + "learning_rate": 3.430946402638095e-05, + "loss": 0.0977, + "step": 71080 + }, + { + "epoch": 2.5833999563921797, + "grad_norm": 0.7812473773956299, + "learning_rate": 3.430464771834899e-05, + "loss": 0.1204, + "step": 71090 + }, + { + "epoch": 2.5837633548949777, + "grad_norm": 1.0319454669952393, + "learning_rate": 3.429983100941287e-05, + "loss": 0.0921, + "step": 71100 + }, + { + "epoch": 2.584126753397776, + "grad_norm": 0.8233940601348877, + "learning_rate": 3.429501389978013e-05, + "loss": 0.0931, + "step": 71110 + }, + { + "epoch": 2.584490151900574, + "grad_norm": 0.5543156862258911, + "learning_rate": 3.42901963896583e-05, + "loss": 0.0936, + "step": 71120 + }, + { + "epoch": 2.5848535504033725, + "grad_norm": 0.849062979221344, + "learning_rate": 3.4285378479254964e-05, + "loss": 0.1034, + "step": 71130 + }, + { + "epoch": 2.5852169489061705, + "grad_norm": 0.7621930837631226, + "learning_rate": 3.428056016877771e-05, + "loss": 0.1219, + "step": 71140 + }, + { + "epoch": 2.5855803474089685, + "grad_norm": 1.097886323928833, + "learning_rate": 3.427574145843413e-05, + "loss": 0.1034, + "step": 71150 + }, + { + "epoch": 2.585943745911767, + "grad_norm": 1.2844264507293701, + "learning_rate": 3.4270922348431866e-05, + "loss": 0.0961, + "step": 71160 + }, + { + "epoch": 2.586307144414565, + "grad_norm": 0.6416186094284058, + "learning_rate": 3.4266102838978544e-05, + "loss": 0.0982, + "step": 71170 + }, + { + "epoch": 2.5866705429173633, + "grad_norm": 1.0426020622253418, + "learning_rate": 3.426128293028181e-05, + "loss": 0.108, + "step": 71180 + }, + { + "epoch": 2.5870339414201613, + "grad_norm": 1.2115471363067627, + "learning_rate": 3.425646262254935e-05, + "loss": 0.0981, + "step": 71190 + }, + { + "epoch": 2.5873973399229593, + "grad_norm": 1.362383484840393, + "learning_rate": 3.425164191598885e-05, + "loss": 0.0936, + "step": 71200 + }, + { + "epoch": 2.5877607384257577, + "grad_norm": 1.0915354490280151, + "learning_rate": 3.4246820810808025e-05, + "loss": 0.0832, + "step": 71210 + }, + { + "epoch": 2.5881241369285557, + "grad_norm": 1.0872890949249268, + "learning_rate": 3.424199930721459e-05, + "loss": 0.1135, + "step": 71220 + }, + { + "epoch": 2.588487535431354, + "grad_norm": 0.5933959484100342, + "learning_rate": 3.4237177405416276e-05, + "loss": 0.0971, + "step": 71230 + }, + { + "epoch": 2.588850933934152, + "grad_norm": 1.2194724082946777, + "learning_rate": 3.423235510562086e-05, + "loss": 0.1171, + "step": 71240 + }, + { + "epoch": 2.58921433243695, + "grad_norm": 1.0390851497650146, + "learning_rate": 3.422753240803612e-05, + "loss": 1.326, + "step": 71250 + }, + { + "epoch": 2.5895777309397485, + "grad_norm": 1.630076289176941, + "learning_rate": 3.4222709312869825e-05, + "loss": 0.0837, + "step": 71260 + }, + { + "epoch": 2.589941129442547, + "grad_norm": 0.4006626307964325, + "learning_rate": 3.421788582032981e-05, + "loss": 0.1116, + "step": 71270 + }, + { + "epoch": 2.590304527945345, + "grad_norm": 0.5512908697128296, + "learning_rate": 3.4213061930623884e-05, + "loss": 0.1169, + "step": 71280 + }, + { + "epoch": 2.590667926448143, + "grad_norm": 1.0259326696395874, + "learning_rate": 3.420823764395991e-05, + "loss": 0.1221, + "step": 71290 + }, + { + "epoch": 2.5910313249509414, + "grad_norm": 1.1377673149108887, + "learning_rate": 3.420341296054574e-05, + "loss": 0.1241, + "step": 71300 + }, + { + "epoch": 2.5913947234537393, + "grad_norm": 0.9114333987236023, + "learning_rate": 3.419858788058924e-05, + "loss": 0.1012, + "step": 71310 + }, + { + "epoch": 2.5917581219565378, + "grad_norm": 0.6368651390075684, + "learning_rate": 3.4193762404298327e-05, + "loss": 0.1096, + "step": 71320 + }, + { + "epoch": 2.5921215204593357, + "grad_norm": 1.080757975578308, + "learning_rate": 3.4188936531880894e-05, + "loss": 0.1046, + "step": 71330 + }, + { + "epoch": 2.5924849189621337, + "grad_norm": 11.998626708984375, + "learning_rate": 3.418411026354489e-05, + "loss": 0.2426, + "step": 71340 + }, + { + "epoch": 2.592848317464932, + "grad_norm": 1.4404159784317017, + "learning_rate": 3.417928359949824e-05, + "loss": 0.0751, + "step": 71350 + }, + { + "epoch": 2.59321171596773, + "grad_norm": 0.42481374740600586, + "learning_rate": 3.417445653994893e-05, + "loss": 0.0891, + "step": 71360 + }, + { + "epoch": 2.5935751144705286, + "grad_norm": 0.6405854225158691, + "learning_rate": 3.416962908510493e-05, + "loss": 0.1497, + "step": 71370 + }, + { + "epoch": 2.5939385129733266, + "grad_norm": 0.6601307392120361, + "learning_rate": 3.416480123517424e-05, + "loss": 0.1041, + "step": 71380 + }, + { + "epoch": 2.5943019114761245, + "grad_norm": 6.7601318359375, + "learning_rate": 3.415997299036486e-05, + "loss": 0.1162, + "step": 71390 + }, + { + "epoch": 2.594665309978923, + "grad_norm": 0.7878421545028687, + "learning_rate": 3.415514435088485e-05, + "loss": 0.089, + "step": 71400 + }, + { + "epoch": 2.594665309978923, + "eval_loss": 0.32694903016090393, + "eval_runtime": 180.1568, + "eval_samples_per_second": 41.153, + "eval_steps_per_second": 5.146, + "eval_wer": 0.15186886198195582, + "step": 71400 + }, + { + "epoch": 2.595028708481721, + "grad_norm": 0.8284702897071838, + "learning_rate": 3.415031531694224e-05, + "loss": 0.1116, + "step": 71410 + }, + { + "epoch": 2.5953921069845194, + "grad_norm": 0.6205730438232422, + "learning_rate": 3.41454858887451e-05, + "loss": 0.1055, + "step": 71420 + }, + { + "epoch": 2.5957555054873174, + "grad_norm": 0.6210823655128479, + "learning_rate": 3.414065606650151e-05, + "loss": 0.1, + "step": 71430 + }, + { + "epoch": 2.5961189039901154, + "grad_norm": 1.2746903896331787, + "learning_rate": 3.4135825850419576e-05, + "loss": 0.0938, + "step": 71440 + }, + { + "epoch": 2.596482302492914, + "grad_norm": 0.6673762202262878, + "learning_rate": 3.4130995240707406e-05, + "loss": 1.6946, + "step": 71450 + }, + { + "epoch": 2.5968457009957118, + "grad_norm": 0.6017360687255859, + "learning_rate": 3.4126164237573145e-05, + "loss": 0.0974, + "step": 71460 + }, + { + "epoch": 2.59720909949851, + "grad_norm": 0.5965964198112488, + "learning_rate": 3.4121332841224926e-05, + "loss": 0.1109, + "step": 71470 + }, + { + "epoch": 2.597572498001308, + "grad_norm": 0.8033668398857117, + "learning_rate": 3.411650105187094e-05, + "loss": 0.1007, + "step": 71480 + }, + { + "epoch": 2.597935896504106, + "grad_norm": 0.9280270338058472, + "learning_rate": 3.411166886971936e-05, + "loss": 0.1341, + "step": 71490 + }, + { + "epoch": 2.5982992950069046, + "grad_norm": 0.6610667705535889, + "learning_rate": 3.4106836294978386e-05, + "loss": 0.088, + "step": 71500 + }, + { + "epoch": 2.5986626935097026, + "grad_norm": 0.6038778424263, + "learning_rate": 3.410200332785624e-05, + "loss": 0.0901, + "step": 71510 + }, + { + "epoch": 2.599026092012501, + "grad_norm": 0.673305094242096, + "learning_rate": 3.409716996856115e-05, + "loss": 0.1105, + "step": 71520 + }, + { + "epoch": 2.599389490515299, + "grad_norm": 0.5786300301551819, + "learning_rate": 3.409233621730139e-05, + "loss": 0.0854, + "step": 71530 + }, + { + "epoch": 2.599752889018097, + "grad_norm": 0.8676998615264893, + "learning_rate": 3.40875020742852e-05, + "loss": 0.1241, + "step": 71540 + }, + { + "epoch": 2.6001162875208954, + "grad_norm": 1.203029990196228, + "learning_rate": 3.4083151010791036e-05, + "loss": 2.9622, + "step": 71550 + }, + { + "epoch": 2.600479686023694, + "grad_norm": 0.34861287474632263, + "learning_rate": 3.40783161240115e-05, + "loss": 0.1029, + "step": 71560 + }, + { + "epoch": 2.600843084526492, + "grad_norm": 0.649398148059845, + "learning_rate": 3.407348084607961e-05, + "loss": 0.1265, + "step": 71570 + }, + { + "epoch": 2.60120648302929, + "grad_norm": 1.2412714958190918, + "learning_rate": 3.406864517720373e-05, + "loss": 0.0755, + "step": 71580 + }, + { + "epoch": 2.6015698815320882, + "grad_norm": 0.3836827278137207, + "learning_rate": 3.406380911759219e-05, + "loss": 0.1238, + "step": 71590 + }, + { + "epoch": 2.601933280034886, + "grad_norm": 0.7698721885681152, + "learning_rate": 3.405897266745337e-05, + "loss": 0.09, + "step": 71600 + }, + { + "epoch": 2.6022966785376846, + "grad_norm": 3.227402448654175, + "learning_rate": 3.4054135826995636e-05, + "loss": 0.5652, + "step": 71610 + }, + { + "epoch": 2.6026600770404826, + "grad_norm": 0.5352892279624939, + "learning_rate": 3.4049298596427415e-05, + "loss": 0.1299, + "step": 71620 + }, + { + "epoch": 2.6030234755432806, + "grad_norm": 1.8280853033065796, + "learning_rate": 3.40444609759571e-05, + "loss": 0.0985, + "step": 71630 + }, + { + "epoch": 2.603386874046079, + "grad_norm": 0.6967837810516357, + "learning_rate": 3.403962296579316e-05, + "loss": 0.1074, + "step": 71640 + }, + { + "epoch": 2.603750272548877, + "grad_norm": 0.4568573832511902, + "learning_rate": 3.403478456614402e-05, + "loss": 0.0929, + "step": 71650 + }, + { + "epoch": 2.6041136710516755, + "grad_norm": 0.7679555416107178, + "learning_rate": 3.402994577721816e-05, + "loss": 0.0802, + "step": 71660 + }, + { + "epoch": 2.6044770695544734, + "grad_norm": 0.9601152539253235, + "learning_rate": 3.402510659922407e-05, + "loss": 0.1268, + "step": 71670 + }, + { + "epoch": 2.6048404680572714, + "grad_norm": 0.4837740659713745, + "learning_rate": 3.4020267032370245e-05, + "loss": 0.172, + "step": 71680 + }, + { + "epoch": 2.60520386656007, + "grad_norm": 0.4992314279079437, + "learning_rate": 3.401542707686521e-05, + "loss": 0.1196, + "step": 71690 + }, + { + "epoch": 2.605567265062868, + "grad_norm": 0.8764163255691528, + "learning_rate": 3.4010586732917495e-05, + "loss": 0.4405, + "step": 71700 + }, + { + "epoch": 2.6059306635656663, + "grad_norm": 1.7109190225601196, + "learning_rate": 3.400574600073566e-05, + "loss": 0.1298, + "step": 71710 + }, + { + "epoch": 2.6062940620684643, + "grad_norm": 1.2784879207611084, + "learning_rate": 3.4000904880528275e-05, + "loss": 0.1156, + "step": 71720 + }, + { + "epoch": 2.6066574605712622, + "grad_norm": 8.60650634765625, + "learning_rate": 3.399606337250392e-05, + "loss": 0.125, + "step": 71730 + }, + { + "epoch": 2.6070208590740607, + "grad_norm": 0.4801369309425354, + "learning_rate": 3.39912214768712e-05, + "loss": 0.1208, + "step": 71740 + }, + { + "epoch": 2.6073842575768587, + "grad_norm": 4.337435722351074, + "learning_rate": 3.398637919383873e-05, + "loss": 0.1001, + "step": 71750 + }, + { + "epoch": 2.607747656079657, + "grad_norm": 0.7785841822624207, + "learning_rate": 3.398153652361517e-05, + "loss": 0.088, + "step": 71760 + }, + { + "epoch": 2.608111054582455, + "grad_norm": 0.7540931105613708, + "learning_rate": 3.3976693466409155e-05, + "loss": 0.0973, + "step": 71770 + }, + { + "epoch": 2.608474453085253, + "grad_norm": 0.8233292698860168, + "learning_rate": 3.3971850022429354e-05, + "loss": 0.1088, + "step": 71780 + }, + { + "epoch": 2.6088378515880515, + "grad_norm": 3.5180065631866455, + "learning_rate": 3.396700619188446e-05, + "loss": 0.1259, + "step": 71790 + }, + { + "epoch": 2.6092012500908495, + "grad_norm": 0.8610531687736511, + "learning_rate": 3.396216197498317e-05, + "loss": 0.0909, + "step": 71800 + }, + { + "epoch": 2.609564648593648, + "grad_norm": 0.7979753613471985, + "learning_rate": 3.395731737193421e-05, + "loss": 0.0987, + "step": 71810 + }, + { + "epoch": 2.609928047096446, + "grad_norm": 3.379258871078491, + "learning_rate": 3.3952472382946313e-05, + "loss": 0.0889, + "step": 71820 + }, + { + "epoch": 2.610291445599244, + "grad_norm": 1.4110392332077026, + "learning_rate": 3.394762700822824e-05, + "loss": 0.0718, + "step": 71830 + }, + { + "epoch": 2.6106548441020423, + "grad_norm": 0.41368210315704346, + "learning_rate": 3.3942781247988754e-05, + "loss": 0.0993, + "step": 71840 + }, + { + "epoch": 2.6110182426048407, + "grad_norm": 0.6575911045074463, + "learning_rate": 3.3937935102436636e-05, + "loss": 0.0845, + "step": 71850 + }, + { + "epoch": 2.6113816411076387, + "grad_norm": 0.545257568359375, + "learning_rate": 3.39330885717807e-05, + "loss": 0.0896, + "step": 71860 + }, + { + "epoch": 2.6117450396104367, + "grad_norm": 3.2522082328796387, + "learning_rate": 3.392824165622976e-05, + "loss": 0.1424, + "step": 71870 + }, + { + "epoch": 2.612108438113235, + "grad_norm": 1.4843670129776, + "learning_rate": 3.392339435599265e-05, + "loss": 0.0788, + "step": 71880 + }, + { + "epoch": 2.612471836616033, + "grad_norm": 1.475480318069458, + "learning_rate": 3.3918546671278235e-05, + "loss": 0.1693, + "step": 71890 + }, + { + "epoch": 2.6128352351188315, + "grad_norm": 0.9142501354217529, + "learning_rate": 3.3913698602295376e-05, + "loss": 0.0975, + "step": 71900 + }, + { + "epoch": 2.6131986336216295, + "grad_norm": 1.1580731868743896, + "learning_rate": 3.390885014925295e-05, + "loss": 0.1173, + "step": 71910 + }, + { + "epoch": 2.6135620321244275, + "grad_norm": 0.5489696860313416, + "learning_rate": 3.3904001312359874e-05, + "loss": 0.111, + "step": 71920 + }, + { + "epoch": 2.613925430627226, + "grad_norm": 1.6776854991912842, + "learning_rate": 3.3899152091825064e-05, + "loss": 0.1057, + "step": 71930 + }, + { + "epoch": 2.614288829130024, + "grad_norm": 0.8543124198913574, + "learning_rate": 3.3894302487857446e-05, + "loss": 0.1383, + "step": 71940 + }, + { + "epoch": 2.6146522276328223, + "grad_norm": 0.4847543239593506, + "learning_rate": 3.388945250066599e-05, + "loss": 0.1408, + "step": 71950 + }, + { + "epoch": 2.6150156261356203, + "grad_norm": 0.8432245254516602, + "learning_rate": 3.388460213045965e-05, + "loss": 0.1051, + "step": 71960 + }, + { + "epoch": 2.6153790246384183, + "grad_norm": 0.6041918396949768, + "learning_rate": 3.387975137744742e-05, + "loss": 0.0875, + "step": 71970 + }, + { + "epoch": 2.6157424231412167, + "grad_norm": 0.6646948456764221, + "learning_rate": 3.387490024183829e-05, + "loss": 0.0961, + "step": 71980 + }, + { + "epoch": 2.6161058216440147, + "grad_norm": 1.7802671194076538, + "learning_rate": 3.387004872384129e-05, + "loss": 0.1365, + "step": 71990 + }, + { + "epoch": 2.616469220146813, + "grad_norm": 5.307714462280273, + "learning_rate": 3.3865196823665454e-05, + "loss": 0.0987, + "step": 72000 + }, + { + "epoch": 2.616469220146813, + "eval_loss": 0.3264125883579254, + "eval_runtime": 179.4492, + "eval_samples_per_second": 41.315, + "eval_steps_per_second": 5.166, + "eval_wer": 0.15330295714052317, + "step": 72000 + }, + { + "epoch": 2.616832618649611, + "grad_norm": 2.6637954711914062, + "learning_rate": 3.386034454151982e-05, + "loss": 0.0894, + "step": 72010 + }, + { + "epoch": 2.617196017152409, + "grad_norm": 1.1595145463943481, + "learning_rate": 3.385549187761347e-05, + "loss": 0.1152, + "step": 72020 + }, + { + "epoch": 2.6175594156552076, + "grad_norm": 0.6032492518424988, + "learning_rate": 3.3850638832155486e-05, + "loss": 0.097, + "step": 72030 + }, + { + "epoch": 2.6179228141580055, + "grad_norm": 0.9775734543800354, + "learning_rate": 3.3845785405354955e-05, + "loss": 0.109, + "step": 72040 + }, + { + "epoch": 2.618286212660804, + "grad_norm": 1.095192790031433, + "learning_rate": 3.384093159742102e-05, + "loss": 3.8308, + "step": 72050 + }, + { + "epoch": 2.618649611163602, + "grad_norm": 0.4287867248058319, + "learning_rate": 3.383607740856278e-05, + "loss": 0.109, + "step": 72060 + }, + { + "epoch": 2.6190130096664, + "grad_norm": 0.8537576794624329, + "learning_rate": 3.3831222838989416e-05, + "loss": 0.1129, + "step": 72070 + }, + { + "epoch": 2.6193764081691984, + "grad_norm": 0.6496450901031494, + "learning_rate": 3.382636788891008e-05, + "loss": 0.1073, + "step": 72080 + }, + { + "epoch": 2.6197398066719964, + "grad_norm": 1.3108080625534058, + "learning_rate": 3.382151255853396e-05, + "loss": 0.0974, + "step": 72090 + }, + { + "epoch": 2.620103205174795, + "grad_norm": 0.6950684785842896, + "learning_rate": 3.381665684807024e-05, + "loss": 0.0906, + "step": 72100 + }, + { + "epoch": 2.6204666036775928, + "grad_norm": 0.5899102091789246, + "learning_rate": 3.381180075772815e-05, + "loss": 0.0891, + "step": 72110 + }, + { + "epoch": 2.6208300021803907, + "grad_norm": 1.007866382598877, + "learning_rate": 3.380694428771692e-05, + "loss": 0.1396, + "step": 72120 + }, + { + "epoch": 2.621193400683189, + "grad_norm": 1.360654592514038, + "learning_rate": 3.38020874382458e-05, + "loss": 0.1011, + "step": 72130 + }, + { + "epoch": 2.6215567991859876, + "grad_norm": 0.9132879972457886, + "learning_rate": 3.3797230209524046e-05, + "loss": 0.1458, + "step": 72140 + }, + { + "epoch": 2.6219201976887856, + "grad_norm": 0.9610080122947693, + "learning_rate": 3.379237260176093e-05, + "loss": 0.0807, + "step": 72150 + }, + { + "epoch": 2.6222835961915836, + "grad_norm": 0.8496606349945068, + "learning_rate": 3.378751461516578e-05, + "loss": 0.1041, + "step": 72160 + }, + { + "epoch": 2.622646994694382, + "grad_norm": 0.5927808284759521, + "learning_rate": 3.3782656249947894e-05, + "loss": 1.5747, + "step": 72170 + }, + { + "epoch": 2.62301039319718, + "grad_norm": 0.6998677849769592, + "learning_rate": 3.3777797506316586e-05, + "loss": 0.1019, + "step": 72180 + }, + { + "epoch": 2.6233737916999784, + "grad_norm": 0.7553665637969971, + "learning_rate": 3.3772938384481225e-05, + "loss": 0.1208, + "step": 72190 + }, + { + "epoch": 2.6237371902027764, + "grad_norm": 0.3797171115875244, + "learning_rate": 3.376807888465116e-05, + "loss": 0.1391, + "step": 72200 + }, + { + "epoch": 2.6241005887055744, + "grad_norm": 0.8174279928207397, + "learning_rate": 3.376321900703576e-05, + "loss": 0.0969, + "step": 72210 + }, + { + "epoch": 2.624463987208373, + "grad_norm": 3.2531015872955322, + "learning_rate": 3.375835875184445e-05, + "loss": 0.1024, + "step": 72220 + }, + { + "epoch": 2.624827385711171, + "grad_norm": 1.0118309259414673, + "learning_rate": 3.3753498119286616e-05, + "loss": 0.1033, + "step": 72230 + }, + { + "epoch": 2.6251907842139692, + "grad_norm": 1.3845003843307495, + "learning_rate": 3.374863710957169e-05, + "loss": 0.0972, + "step": 72240 + }, + { + "epoch": 2.625554182716767, + "grad_norm": 0.5679929256439209, + "learning_rate": 3.3743775722909124e-05, + "loss": 0.0885, + "step": 72250 + }, + { + "epoch": 2.625917581219565, + "grad_norm": 0.30551066994667053, + "learning_rate": 3.373891395950838e-05, + "loss": 0.0825, + "step": 72260 + }, + { + "epoch": 2.6262809797223636, + "grad_norm": 0.40965649485588074, + "learning_rate": 3.373405181957891e-05, + "loss": 0.1317, + "step": 72270 + }, + { + "epoch": 2.6266443782251616, + "grad_norm": 0.6095037460327148, + "learning_rate": 3.3729189303330236e-05, + "loss": 0.0956, + "step": 72280 + }, + { + "epoch": 2.62700777672796, + "grad_norm": 0.6658949851989746, + "learning_rate": 3.3724326410971844e-05, + "loss": 0.7257, + "step": 72290 + }, + { + "epoch": 2.627371175230758, + "grad_norm": 3.251826524734497, + "learning_rate": 3.371946314271327e-05, + "loss": 0.1154, + "step": 72300 + }, + { + "epoch": 2.627734573733556, + "grad_norm": 0.6652829051017761, + "learning_rate": 3.371459949876406e-05, + "loss": 0.0783, + "step": 72310 + }, + { + "epoch": 2.6280979722363544, + "grad_norm": 7.321001052856445, + "learning_rate": 3.370973547933376e-05, + "loss": 0.1234, + "step": 72320 + }, + { + "epoch": 2.6284613707391524, + "grad_norm": 0.5794792771339417, + "learning_rate": 3.370487108463195e-05, + "loss": 0.0981, + "step": 72330 + }, + { + "epoch": 2.628824769241951, + "grad_norm": 2.157670736312866, + "learning_rate": 3.370000631486822e-05, + "loss": 0.1266, + "step": 72340 + }, + { + "epoch": 2.629188167744749, + "grad_norm": 0.7778168320655823, + "learning_rate": 3.369514117025216e-05, + "loss": 0.1093, + "step": 72350 + }, + { + "epoch": 2.629551566247547, + "grad_norm": 0.4507717490196228, + "learning_rate": 3.3690275650993416e-05, + "loss": 0.0935, + "step": 72360 + }, + { + "epoch": 2.6299149647503453, + "grad_norm": 0.2999439835548401, + "learning_rate": 3.36854097573016e-05, + "loss": 0.1175, + "step": 72370 + }, + { + "epoch": 2.6302783632531432, + "grad_norm": 0.6613568663597107, + "learning_rate": 3.36805434893864e-05, + "loss": 0.0982, + "step": 72380 + }, + { + "epoch": 2.6306417617559417, + "grad_norm": 2.211897611618042, + "learning_rate": 3.367567684745745e-05, + "loss": 0.0877, + "step": 72390 + }, + { + "epoch": 2.6310051602587397, + "grad_norm": 2.100520372390747, + "learning_rate": 3.367080983172446e-05, + "loss": 0.1534, + "step": 72400 + }, + { + "epoch": 2.6313685587615376, + "grad_norm": 0.6530410647392273, + "learning_rate": 3.366594244239713e-05, + "loss": 0.0819, + "step": 72410 + }, + { + "epoch": 2.631731957264336, + "grad_norm": 0.8508390188217163, + "learning_rate": 3.366107467968517e-05, + "loss": 1.7912, + "step": 72420 + }, + { + "epoch": 2.6320953557671345, + "grad_norm": 2.8727540969848633, + "learning_rate": 3.365620654379831e-05, + "loss": 0.1243, + "step": 72430 + }, + { + "epoch": 2.6324587542699325, + "grad_norm": 0.42828473448753357, + "learning_rate": 3.3651338034946314e-05, + "loss": 0.1423, + "step": 72440 + }, + { + "epoch": 2.6328221527727305, + "grad_norm": 0.5628288388252258, + "learning_rate": 3.364646915333895e-05, + "loss": 0.4869, + "step": 72450 + }, + { + "epoch": 2.633185551275529, + "grad_norm": 0.48309531807899475, + "learning_rate": 3.364159989918598e-05, + "loss": 0.095, + "step": 72460 + }, + { + "epoch": 2.633548949778327, + "grad_norm": 0.5635913014411926, + "learning_rate": 3.3636730272697234e-05, + "loss": 0.0948, + "step": 72470 + }, + { + "epoch": 2.6339123482811253, + "grad_norm": 0.7630922198295593, + "learning_rate": 3.3631860274082504e-05, + "loss": 0.108, + "step": 72480 + }, + { + "epoch": 2.6342757467839233, + "grad_norm": 1.6271787881851196, + "learning_rate": 3.3626989903551626e-05, + "loss": 0.1169, + "step": 72490 + }, + { + "epoch": 2.6346391452867213, + "grad_norm": 1.0612133741378784, + "learning_rate": 3.3622119161314446e-05, + "loss": 0.1574, + "step": 72500 + }, + { + "epoch": 2.6350025437895197, + "grad_norm": 0.5140257477760315, + "learning_rate": 3.361724804758083e-05, + "loss": 0.0765, + "step": 72510 + }, + { + "epoch": 2.6353659422923177, + "grad_norm": 1.7995847463607788, + "learning_rate": 3.361237656256066e-05, + "loss": 0.1069, + "step": 72520 + }, + { + "epoch": 2.635729340795116, + "grad_norm": 0.6248586177825928, + "learning_rate": 3.360750470646383e-05, + "loss": 0.0842, + "step": 72530 + }, + { + "epoch": 2.636092739297914, + "grad_norm": 0.8050362467765808, + "learning_rate": 3.360263247950023e-05, + "loss": 0.2037, + "step": 72540 + }, + { + "epoch": 2.636456137800712, + "grad_norm": 1.0852928161621094, + "learning_rate": 3.359775988187983e-05, + "loss": 0.0985, + "step": 72550 + }, + { + "epoch": 2.6368195363035105, + "grad_norm": 0.591783344745636, + "learning_rate": 3.359288691381253e-05, + "loss": 0.0948, + "step": 72560 + }, + { + "epoch": 2.6371829348063085, + "grad_norm": 0.6417847275733948, + "learning_rate": 3.358801357550831e-05, + "loss": 0.1011, + "step": 72570 + }, + { + "epoch": 2.637546333309107, + "grad_norm": 0.8405566811561584, + "learning_rate": 3.358313986717714e-05, + "loss": 0.1016, + "step": 72580 + }, + { + "epoch": 2.637909731811905, + "grad_norm": 1.3981549739837646, + "learning_rate": 3.357826578902901e-05, + "loss": 0.1164, + "step": 72590 + }, + { + "epoch": 2.638273130314703, + "grad_norm": 0.6139928698539734, + "learning_rate": 3.357339134127393e-05, + "loss": 0.091, + "step": 72600 + }, + { + "epoch": 2.638273130314703, + "eval_loss": 0.3287167251110077, + "eval_runtime": 180.0474, + "eval_samples_per_second": 41.178, + "eval_steps_per_second": 5.149, + "eval_wer": 0.15324849783070416, + "step": 72600 + }, + { + "epoch": 2.6386365288175013, + "grad_norm": 0.5749704241752625, + "learning_rate": 3.356851652412193e-05, + "loss": 0.1381, + "step": 72610 + }, + { + "epoch": 2.6389999273202993, + "grad_norm": 0.48276287317276, + "learning_rate": 3.3563641337783035e-05, + "loss": 0.1009, + "step": 72620 + }, + { + "epoch": 2.6393633258230977, + "grad_norm": 1.9593979120254517, + "learning_rate": 3.35587657824673e-05, + "loss": 0.1092, + "step": 72630 + }, + { + "epoch": 2.6397267243258957, + "grad_norm": 1.3498990535736084, + "learning_rate": 3.35538898583848e-05, + "loss": 0.1033, + "step": 72640 + }, + { + "epoch": 2.6400901228286937, + "grad_norm": 4.862055778503418, + "learning_rate": 3.354901356574563e-05, + "loss": 0.0958, + "step": 72650 + }, + { + "epoch": 2.640453521331492, + "grad_norm": 0.3533124029636383, + "learning_rate": 3.354413690475987e-05, + "loss": 0.0717, + "step": 72660 + }, + { + "epoch": 2.64081691983429, + "grad_norm": 1.32888662815094, + "learning_rate": 3.3539259875637664e-05, + "loss": 0.1151, + "step": 72670 + }, + { + "epoch": 2.6411803183370886, + "grad_norm": 0.8028721213340759, + "learning_rate": 3.353438247858912e-05, + "loss": 0.1194, + "step": 72680 + }, + { + "epoch": 2.6415437168398865, + "grad_norm": 0.8875879645347595, + "learning_rate": 3.352950471382441e-05, + "loss": 0.1026, + "step": 72690 + }, + { + "epoch": 2.6419071153426845, + "grad_norm": 0.956411600112915, + "learning_rate": 3.3524626581553684e-05, + "loss": 0.0575, + "step": 72700 + }, + { + "epoch": 2.642270513845483, + "grad_norm": 2.3979716300964355, + "learning_rate": 3.351974808198713e-05, + "loss": 0.1078, + "step": 72710 + }, + { + "epoch": 2.6426339123482814, + "grad_norm": 3.241748571395874, + "learning_rate": 3.351486921533495e-05, + "loss": 0.1079, + "step": 72720 + }, + { + "epoch": 2.6429973108510794, + "grad_norm": 0.5039170384407043, + "learning_rate": 3.350998998180735e-05, + "loss": 0.112, + "step": 72730 + }, + { + "epoch": 2.6433607093538773, + "grad_norm": 1.3268622159957886, + "learning_rate": 3.350511038161456e-05, + "loss": 0.115, + "step": 72740 + }, + { + "epoch": 2.6437241078566758, + "grad_norm": 0.8192645907402039, + "learning_rate": 3.350023041496682e-05, + "loss": 0.0873, + "step": 72750 + }, + { + "epoch": 2.6440875063594738, + "grad_norm": 0.7651393413543701, + "learning_rate": 3.34953500820744e-05, + "loss": 0.0978, + "step": 72760 + }, + { + "epoch": 2.644450904862272, + "grad_norm": 3.417607069015503, + "learning_rate": 3.3490469383147564e-05, + "loss": 0.1212, + "step": 72770 + }, + { + "epoch": 2.64481430336507, + "grad_norm": 6.00585412979126, + "learning_rate": 3.348558831839661e-05, + "loss": 0.1212, + "step": 72780 + }, + { + "epoch": 2.645177701867868, + "grad_norm": 0.36312735080718994, + "learning_rate": 3.3480706888031865e-05, + "loss": 0.1236, + "step": 72790 + }, + { + "epoch": 2.6455411003706666, + "grad_norm": 0.6871273517608643, + "learning_rate": 3.347582509226362e-05, + "loss": 0.0875, + "step": 72800 + }, + { + "epoch": 2.6459044988734646, + "grad_norm": 1.383457899093628, + "learning_rate": 3.3470942931302236e-05, + "loss": 0.1213, + "step": 72810 + }, + { + "epoch": 2.646267897376263, + "grad_norm": 0.7942748665809631, + "learning_rate": 3.346606040535805e-05, + "loss": 0.0837, + "step": 72820 + }, + { + "epoch": 2.646631295879061, + "grad_norm": 1.3881112337112427, + "learning_rate": 3.346117751464146e-05, + "loss": 0.085, + "step": 72830 + }, + { + "epoch": 2.646994694381859, + "grad_norm": 2.80416202545166, + "learning_rate": 3.345629425936283e-05, + "loss": 4.2814, + "step": 72840 + }, + { + "epoch": 2.6473580928846574, + "grad_norm": 0.7120780348777771, + "learning_rate": 3.345141063973256e-05, + "loss": 0.0874, + "step": 72850 + }, + { + "epoch": 2.6477214913874554, + "grad_norm": 1.7287395000457764, + "learning_rate": 3.344652665596108e-05, + "loss": 0.1066, + "step": 72860 + }, + { + "epoch": 2.648084889890254, + "grad_norm": 0.6621074080467224, + "learning_rate": 3.344164230825882e-05, + "loss": 0.1096, + "step": 72870 + }, + { + "epoch": 2.648448288393052, + "grad_norm": 1.1361407041549683, + "learning_rate": 3.343675759683623e-05, + "loss": 0.1008, + "step": 72880 + }, + { + "epoch": 2.64881168689585, + "grad_norm": 0.6596959829330444, + "learning_rate": 3.3431872521903766e-05, + "loss": 0.1222, + "step": 72890 + }, + { + "epoch": 2.649175085398648, + "grad_norm": 1.1007713079452515, + "learning_rate": 3.342698708367192e-05, + "loss": 0.0837, + "step": 72900 + }, + { + "epoch": 2.649538483901446, + "grad_norm": 0.754059910774231, + "learning_rate": 3.342210128235119e-05, + "loss": 0.1006, + "step": 72910 + }, + { + "epoch": 2.6499018824042446, + "grad_norm": 0.3821747303009033, + "learning_rate": 3.341721511815208e-05, + "loss": 0.1176, + "step": 72920 + }, + { + "epoch": 2.6502652809070426, + "grad_norm": 1.3366892337799072, + "learning_rate": 3.341232859128511e-05, + "loss": 0.0974, + "step": 72930 + }, + { + "epoch": 2.6506286794098406, + "grad_norm": 0.8863315582275391, + "learning_rate": 3.340744170196084e-05, + "loss": 0.1181, + "step": 72940 + }, + { + "epoch": 2.650992077912639, + "grad_norm": 0.8968802094459534, + "learning_rate": 3.3402554450389826e-05, + "loss": 0.1012, + "step": 72950 + }, + { + "epoch": 2.651355476415437, + "grad_norm": 0.6080183982849121, + "learning_rate": 3.339766683678262e-05, + "loss": 0.0997, + "step": 72960 + }, + { + "epoch": 2.6517188749182354, + "grad_norm": 0.8404228687286377, + "learning_rate": 3.339277886134985e-05, + "loss": 0.098, + "step": 72970 + }, + { + "epoch": 2.6520822734210334, + "grad_norm": 0.5841569900512695, + "learning_rate": 3.338789052430208e-05, + "loss": 0.1032, + "step": 72980 + }, + { + "epoch": 2.6524456719238314, + "grad_norm": 1.0763466358184814, + "learning_rate": 3.3383001825849966e-05, + "loss": 0.0971, + "step": 72990 + }, + { + "epoch": 2.65280907042663, + "grad_norm": 2.3036322593688965, + "learning_rate": 3.337811276620412e-05, + "loss": 1.71, + "step": 73000 + }, + { + "epoch": 2.6531724689294283, + "grad_norm": 0.29687023162841797, + "learning_rate": 3.337322334557521e-05, + "loss": 0.1814, + "step": 73010 + }, + { + "epoch": 2.6535358674322262, + "grad_norm": 1.0260635614395142, + "learning_rate": 3.3368333564173905e-05, + "loss": 0.1138, + "step": 73020 + }, + { + "epoch": 2.6538992659350242, + "grad_norm": 1.9731409549713135, + "learning_rate": 3.3363443422210875e-05, + "loss": 0.1022, + "step": 73030 + }, + { + "epoch": 2.6542626644378227, + "grad_norm": 1.9720983505249023, + "learning_rate": 3.335855291989682e-05, + "loss": 0.1169, + "step": 73040 + }, + { + "epoch": 2.6546260629406206, + "grad_norm": 0.8619999885559082, + "learning_rate": 3.335366205744246e-05, + "loss": 0.0857, + "step": 73050 + }, + { + "epoch": 2.654989461443419, + "grad_norm": 1.4044042825698853, + "learning_rate": 3.334877083505853e-05, + "loss": 0.0945, + "step": 73060 + }, + { + "epoch": 2.655352859946217, + "grad_norm": 0.6870516538619995, + "learning_rate": 3.3343879252955765e-05, + "loss": 0.0916, + "step": 73070 + }, + { + "epoch": 2.655716258449015, + "grad_norm": 1.0098764896392822, + "learning_rate": 3.3338987311344935e-05, + "loss": 0.1125, + "step": 73080 + }, + { + "epoch": 2.6560796569518135, + "grad_norm": 3.241283655166626, + "learning_rate": 3.333409501043681e-05, + "loss": 0.1192, + "step": 73090 + }, + { + "epoch": 2.6564430554546115, + "grad_norm": 0.5080071687698364, + "learning_rate": 3.332920235044219e-05, + "loss": 0.0923, + "step": 73100 + }, + { + "epoch": 2.65680645395741, + "grad_norm": 0.7751922607421875, + "learning_rate": 3.332430933157187e-05, + "loss": 0.1025, + "step": 73110 + }, + { + "epoch": 2.657169852460208, + "grad_norm": 1.3310539722442627, + "learning_rate": 3.3319415954036674e-05, + "loss": 0.1309, + "step": 73120 + }, + { + "epoch": 2.657533250963006, + "grad_norm": 2.5508885383605957, + "learning_rate": 3.331452221804745e-05, + "loss": 0.1121, + "step": 73130 + }, + { + "epoch": 2.6578966494658043, + "grad_norm": 4.273713111877441, + "learning_rate": 3.330962812381505e-05, + "loss": 0.1043, + "step": 73140 + }, + { + "epoch": 2.6582600479686023, + "grad_norm": 0.32448074221611023, + "learning_rate": 3.3304733671550336e-05, + "loss": 0.865, + "step": 73150 + }, + { + "epoch": 2.6586234464714007, + "grad_norm": 0.39604759216308594, + "learning_rate": 3.329983886146419e-05, + "loss": 0.0736, + "step": 73160 + }, + { + "epoch": 2.6589868449741987, + "grad_norm": 2.2893431186676025, + "learning_rate": 3.3294943693767536e-05, + "loss": 0.1108, + "step": 73170 + }, + { + "epoch": 2.6593502434769967, + "grad_norm": 1.7954707145690918, + "learning_rate": 3.3290048168671256e-05, + "loss": 0.0905, + "step": 73180 + }, + { + "epoch": 2.659713641979795, + "grad_norm": 0.5648924708366394, + "learning_rate": 3.3285152286386305e-05, + "loss": 0.1076, + "step": 73190 + }, + { + "epoch": 2.660077040482593, + "grad_norm": 0.6963376998901367, + "learning_rate": 3.3280256047123614e-05, + "loss": 0.0812, + "step": 73200 + }, + { + "epoch": 2.660077040482593, + "eval_loss": 0.3232385516166687, + "eval_runtime": 181.4857, + "eval_samples_per_second": 40.852, + "eval_steps_per_second": 5.108, + "eval_wer": 0.1502078530324759, + "step": 73200 + }, + { + "epoch": 2.6604404389853915, + "grad_norm": 15.696502685546875, + "learning_rate": 3.3275359451094157e-05, + "loss": 0.0925, + "step": 73210 + }, + { + "epoch": 2.6608038374881895, + "grad_norm": 0.43037521839141846, + "learning_rate": 3.327046249850891e-05, + "loss": 0.1148, + "step": 73220 + }, + { + "epoch": 2.6611672359909875, + "grad_norm": 2.093749523162842, + "learning_rate": 3.326556518957885e-05, + "loss": 0.098, + "step": 73230 + }, + { + "epoch": 2.661530634493786, + "grad_norm": 0.5571810603141785, + "learning_rate": 3.3260667524514996e-05, + "loss": 0.1274, + "step": 73240 + }, + { + "epoch": 2.661894032996584, + "grad_norm": 0.46131211519241333, + "learning_rate": 3.3255769503528374e-05, + "loss": 0.0872, + "step": 73250 + }, + { + "epoch": 2.6622574314993823, + "grad_norm": 2.3554348945617676, + "learning_rate": 3.325087112683002e-05, + "loss": 0.1181, + "step": 73260 + }, + { + "epoch": 2.6626208300021803, + "grad_norm": 0.8151417970657349, + "learning_rate": 3.324597239463097e-05, + "loss": 0.1176, + "step": 73270 + }, + { + "epoch": 2.6629842285049783, + "grad_norm": 0.97243332862854, + "learning_rate": 3.324107330714233e-05, + "loss": 0.1183, + "step": 73280 + }, + { + "epoch": 2.6633476270077767, + "grad_norm": 0.8408851623535156, + "learning_rate": 3.3236173864575154e-05, + "loss": 0.1119, + "step": 73290 + }, + { + "epoch": 2.663711025510575, + "grad_norm": 0.8413365483283997, + "learning_rate": 3.323127406714055e-05, + "loss": 0.1135, + "step": 73300 + }, + { + "epoch": 2.664074424013373, + "grad_norm": 1.1940609216690063, + "learning_rate": 3.3226373915049636e-05, + "loss": 0.114, + "step": 73310 + }, + { + "epoch": 2.664437822516171, + "grad_norm": 0.761283814907074, + "learning_rate": 3.3221473408513534e-05, + "loss": 0.0885, + "step": 73320 + }, + { + "epoch": 2.6648012210189695, + "grad_norm": 7.94679594039917, + "learning_rate": 3.3216572547743396e-05, + "loss": 0.0983, + "step": 73330 + }, + { + "epoch": 2.6651646195217675, + "grad_norm": 0.7168159484863281, + "learning_rate": 3.321167133295038e-05, + "loss": 0.1152, + "step": 73340 + }, + { + "epoch": 2.665528018024566, + "grad_norm": 1.2092477083206177, + "learning_rate": 3.3206769764345676e-05, + "loss": 0.0965, + "step": 73350 + }, + { + "epoch": 2.665891416527364, + "grad_norm": 0.47513625025749207, + "learning_rate": 3.320186784214045e-05, + "loss": 0.4253, + "step": 73360 + }, + { + "epoch": 2.666254815030162, + "grad_norm": 1.22835111618042, + "learning_rate": 3.319696556654592e-05, + "loss": 0.1073, + "step": 73370 + }, + { + "epoch": 2.6666182135329604, + "grad_norm": 0.9767407774925232, + "learning_rate": 3.319206293777332e-05, + "loss": 0.1118, + "step": 73380 + }, + { + "epoch": 2.6669816120357583, + "grad_norm": 0.8150458335876465, + "learning_rate": 3.318715995603387e-05, + "loss": 0.0999, + "step": 73390 + }, + { + "epoch": 2.6673450105385568, + "grad_norm": 0.8495771288871765, + "learning_rate": 3.3182256621538826e-05, + "loss": 0.0753, + "step": 73400 + }, + { + "epoch": 2.6677084090413548, + "grad_norm": 1.7628092765808105, + "learning_rate": 3.317735293449946e-05, + "loss": 0.1248, + "step": 73410 + }, + { + "epoch": 2.6680718075441527, + "grad_norm": 0.7195779085159302, + "learning_rate": 3.317244889512704e-05, + "loss": 0.1246, + "step": 73420 + }, + { + "epoch": 2.668435206046951, + "grad_norm": 1.0770323276519775, + "learning_rate": 3.316754450363289e-05, + "loss": 0.0859, + "step": 73430 + }, + { + "epoch": 2.668798604549749, + "grad_norm": 0.6442772746086121, + "learning_rate": 3.31626397602283e-05, + "loss": 0.1092, + "step": 73440 + }, + { + "epoch": 2.6691620030525476, + "grad_norm": 0.9785648584365845, + "learning_rate": 3.315773466512461e-05, + "loss": 0.1202, + "step": 73450 + }, + { + "epoch": 2.6695254015553456, + "grad_norm": 0.7215647101402283, + "learning_rate": 3.315282921853316e-05, + "loss": 0.1158, + "step": 73460 + }, + { + "epoch": 2.6698888000581436, + "grad_norm": 0.5394707918167114, + "learning_rate": 3.31479234206653e-05, + "loss": 0.1174, + "step": 73470 + }, + { + "epoch": 2.670252198560942, + "grad_norm": 0.6166718602180481, + "learning_rate": 3.3143017271732416e-05, + "loss": 0.1115, + "step": 73480 + }, + { + "epoch": 2.67061559706374, + "grad_norm": 0.7951743006706238, + "learning_rate": 3.3138110771945876e-05, + "loss": 0.0974, + "step": 73490 + }, + { + "epoch": 2.6709789955665384, + "grad_norm": 1.5483492612838745, + "learning_rate": 3.313320392151711e-05, + "loss": 0.0771, + "step": 73500 + }, + { + "epoch": 2.6713423940693364, + "grad_norm": 0.6423888206481934, + "learning_rate": 3.3128296720657524e-05, + "loss": 0.0939, + "step": 73510 + }, + { + "epoch": 2.6717057925721344, + "grad_norm": 0.5538840293884277, + "learning_rate": 3.3123389169578556e-05, + "loss": 0.108, + "step": 73520 + }, + { + "epoch": 2.672069191074933, + "grad_norm": 0.5549068450927734, + "learning_rate": 3.311848126849165e-05, + "loss": 0.1466, + "step": 73530 + }, + { + "epoch": 2.6724325895777308, + "grad_norm": 0.7564308643341064, + "learning_rate": 3.311357301760827e-05, + "loss": 0.1241, + "step": 73540 + }, + { + "epoch": 2.672795988080529, + "grad_norm": 0.683080792427063, + "learning_rate": 3.310866441713989e-05, + "loss": 0.0924, + "step": 73550 + }, + { + "epoch": 2.673159386583327, + "grad_norm": 0.5799837708473206, + "learning_rate": 3.3103755467298024e-05, + "loss": 0.1, + "step": 73560 + }, + { + "epoch": 2.673522785086125, + "grad_norm": 0.9366778135299683, + "learning_rate": 3.309884616829416e-05, + "loss": 0.0942, + "step": 73570 + }, + { + "epoch": 2.6738861835889236, + "grad_norm": 2.1698505878448486, + "learning_rate": 3.309393652033984e-05, + "loss": 0.1085, + "step": 73580 + }, + { + "epoch": 2.674249582091722, + "grad_norm": 0.8438801169395447, + "learning_rate": 3.308902652364658e-05, + "loss": 1.4071, + "step": 73590 + }, + { + "epoch": 2.67461298059452, + "grad_norm": 0.5139632225036621, + "learning_rate": 3.308411617842595e-05, + "loss": 0.0993, + "step": 73600 + }, + { + "epoch": 2.674976379097318, + "grad_norm": 0.5203242301940918, + "learning_rate": 3.3079205484889534e-05, + "loss": 0.0798, + "step": 73610 + }, + { + "epoch": 2.6753397776001164, + "grad_norm": 0.8717512488365173, + "learning_rate": 3.307429444324888e-05, + "loss": 0.1186, + "step": 73620 + }, + { + "epoch": 2.6757031761029144, + "grad_norm": 2.0422654151916504, + "learning_rate": 3.3069383053715617e-05, + "loss": 0.1053, + "step": 73630 + }, + { + "epoch": 2.676066574605713, + "grad_norm": 1.5940972566604614, + "learning_rate": 3.306447131650135e-05, + "loss": 0.129, + "step": 73640 + }, + { + "epoch": 2.676429973108511, + "grad_norm": 0.8113188743591309, + "learning_rate": 3.30595592318177e-05, + "loss": 0.0888, + "step": 73650 + }, + { + "epoch": 2.676793371611309, + "grad_norm": 0.5296781063079834, + "learning_rate": 3.305464679987632e-05, + "loss": 0.0927, + "step": 73660 + }, + { + "epoch": 2.6771567701141072, + "grad_norm": 1.491728663444519, + "learning_rate": 3.304973402088887e-05, + "loss": 0.1386, + "step": 73670 + }, + { + "epoch": 2.6775201686169052, + "grad_norm": 0.6483957767486572, + "learning_rate": 3.304482089506703e-05, + "loss": 0.0994, + "step": 73680 + }, + { + "epoch": 2.6778835671197037, + "grad_norm": 1.6856306791305542, + "learning_rate": 3.303990742262247e-05, + "loss": 0.1227, + "step": 73690 + }, + { + "epoch": 2.6782469656225016, + "grad_norm": 1.752510905265808, + "learning_rate": 3.3034993603766906e-05, + "loss": 0.0956, + "step": 73700 + }, + { + "epoch": 2.6786103641252996, + "grad_norm": 0.6596788167953491, + "learning_rate": 3.303007943871206e-05, + "loss": 0.1126, + "step": 73710 + }, + { + "epoch": 2.678973762628098, + "grad_norm": 1.1651583909988403, + "learning_rate": 3.302516492766966e-05, + "loss": 0.1116, + "step": 73720 + }, + { + "epoch": 2.679337161130896, + "grad_norm": 25.031896591186523, + "learning_rate": 3.302025007085146e-05, + "loss": 0.1139, + "step": 73730 + }, + { + "epoch": 2.6797005596336945, + "grad_norm": 0.4031795561313629, + "learning_rate": 3.301533486846922e-05, + "loss": 0.0996, + "step": 73740 + }, + { + "epoch": 2.6800639581364925, + "grad_norm": 0.6333705186843872, + "learning_rate": 3.301041932073472e-05, + "loss": 0.4087, + "step": 73750 + }, + { + "epoch": 2.6804273566392904, + "grad_norm": 0.9752139449119568, + "learning_rate": 3.300550342785975e-05, + "loss": 0.1121, + "step": 73760 + }, + { + "epoch": 2.680790755142089, + "grad_norm": 0.5102497339248657, + "learning_rate": 3.300058719005612e-05, + "loss": 0.1125, + "step": 73770 + }, + { + "epoch": 2.681154153644887, + "grad_norm": 0.661267876625061, + "learning_rate": 3.299567060753565e-05, + "loss": 0.116, + "step": 73780 + }, + { + "epoch": 2.6815175521476853, + "grad_norm": 4.50182580947876, + "learning_rate": 3.2990753680510194e-05, + "loss": 0.1047, + "step": 73790 + }, + { + "epoch": 2.6818809506504833, + "grad_norm": 1.0736654996871948, + "learning_rate": 3.298583640919159e-05, + "loss": 0.0967, + "step": 73800 + }, + { + "epoch": 2.6818809506504833, + "eval_loss": 0.3250260353088379, + "eval_runtime": 180.3631, + "eval_samples_per_second": 41.106, + "eval_steps_per_second": 5.14, + "eval_wer": 0.150552761994663, + "step": 73800 + }, + { + "epoch": 2.6822443491532812, + "grad_norm": 0.7733132839202881, + "learning_rate": 3.2980918793791706e-05, + "loss": 0.0863, + "step": 73810 + }, + { + "epoch": 2.6826077476560797, + "grad_norm": 1.0003288984298706, + "learning_rate": 3.2976000834522424e-05, + "loss": 0.1159, + "step": 73820 + }, + { + "epoch": 2.6829711461588777, + "grad_norm": 1.0312319993972778, + "learning_rate": 3.2971082531595646e-05, + "loss": 0.1081, + "step": 73830 + }, + { + "epoch": 2.683334544661676, + "grad_norm": 0.9150594472885132, + "learning_rate": 3.2966163885223286e-05, + "loss": 0.1042, + "step": 73840 + }, + { + "epoch": 2.683697943164474, + "grad_norm": 1.0201760530471802, + "learning_rate": 3.2961244895617265e-05, + "loss": 0.0894, + "step": 73850 + }, + { + "epoch": 2.684061341667272, + "grad_norm": 0.7284217476844788, + "learning_rate": 3.295632556298953e-05, + "loss": 0.0844, + "step": 73860 + }, + { + "epoch": 2.6844247401700705, + "grad_norm": 0.5448662042617798, + "learning_rate": 3.2951405887552033e-05, + "loss": 0.0978, + "step": 73870 + }, + { + "epoch": 2.684788138672869, + "grad_norm": 0.558380126953125, + "learning_rate": 3.294648586951675e-05, + "loss": 0.1168, + "step": 73880 + }, + { + "epoch": 2.685151537175667, + "grad_norm": 0.5779474377632141, + "learning_rate": 3.2941565509095676e-05, + "loss": 0.0737, + "step": 73890 + }, + { + "epoch": 2.685514935678465, + "grad_norm": 1.018198847770691, + "learning_rate": 3.293664480650078e-05, + "loss": 0.0783, + "step": 73900 + }, + { + "epoch": 2.6858783341812633, + "grad_norm": 1.5002427101135254, + "learning_rate": 3.293172376194411e-05, + "loss": 0.103, + "step": 73910 + }, + { + "epoch": 2.6862417326840613, + "grad_norm": 0.6482456922531128, + "learning_rate": 3.2926802375637675e-05, + "loss": 0.1128, + "step": 73920 + }, + { + "epoch": 2.6866051311868597, + "grad_norm": 0.38328006863594055, + "learning_rate": 3.292188064779354e-05, + "loss": 0.09, + "step": 73930 + }, + { + "epoch": 2.6869685296896577, + "grad_norm": 0.3640558421611786, + "learning_rate": 3.2916958578623746e-05, + "loss": 0.0808, + "step": 73940 + }, + { + "epoch": 2.6873319281924557, + "grad_norm": 0.7267985343933105, + "learning_rate": 3.2912036168340376e-05, + "loss": 0.0819, + "step": 73950 + }, + { + "epoch": 2.687695326695254, + "grad_norm": 0.9190502166748047, + "learning_rate": 3.2907113417155525e-05, + "loss": 0.0973, + "step": 73960 + }, + { + "epoch": 2.688058725198052, + "grad_norm": 1.3987020254135132, + "learning_rate": 3.290219032528128e-05, + "loss": 0.0987, + "step": 73970 + }, + { + "epoch": 2.6884221237008505, + "grad_norm": 0.7504600286483765, + "learning_rate": 3.289726689292978e-05, + "loss": 0.1197, + "step": 73980 + }, + { + "epoch": 2.6887855222036485, + "grad_norm": 0.8399171829223633, + "learning_rate": 3.2892343120313144e-05, + "loss": 0.1476, + "step": 73990 + }, + { + "epoch": 2.6891489207064465, + "grad_norm": 0.37751835584640503, + "learning_rate": 3.288741900764353e-05, + "loss": 0.0819, + "step": 74000 + }, + { + "epoch": 2.689512319209245, + "grad_norm": 49.60390853881836, + "learning_rate": 3.288249455513308e-05, + "loss": 0.8904, + "step": 74010 + }, + { + "epoch": 2.689875717712043, + "grad_norm": 0.541854977607727, + "learning_rate": 3.2877569762994e-05, + "loss": 0.1234, + "step": 74020 + }, + { + "epoch": 2.6902391162148414, + "grad_norm": 1.2786191701889038, + "learning_rate": 3.2872644631438466e-05, + "loss": 0.0906, + "step": 74030 + }, + { + "epoch": 2.6906025147176393, + "grad_norm": 0.8454063534736633, + "learning_rate": 3.2867719160678676e-05, + "loss": 0.1212, + "step": 74040 + }, + { + "epoch": 2.6909659132204373, + "grad_norm": 0.7831797003746033, + "learning_rate": 3.286279335092687e-05, + "loss": 0.0792, + "step": 74050 + }, + { + "epoch": 2.6913293117232358, + "grad_norm": 0.46578606963157654, + "learning_rate": 3.285786720239526e-05, + "loss": 0.0707, + "step": 74060 + }, + { + "epoch": 2.6916927102260337, + "grad_norm": 0.9215951561927795, + "learning_rate": 3.285294071529613e-05, + "loss": 0.1329, + "step": 74070 + }, + { + "epoch": 2.692056108728832, + "grad_norm": 0.5061529278755188, + "learning_rate": 3.284801388984171e-05, + "loss": 0.1024, + "step": 74080 + }, + { + "epoch": 2.69241950723163, + "grad_norm": 1.1045541763305664, + "learning_rate": 3.2843086726244307e-05, + "loss": 0.0937, + "step": 74090 + }, + { + "epoch": 2.692782905734428, + "grad_norm": 0.4563618004322052, + "learning_rate": 3.28381592247162e-05, + "loss": 0.0796, + "step": 74100 + }, + { + "epoch": 2.6931463042372266, + "grad_norm": 0.6136996150016785, + "learning_rate": 3.28332313854697e-05, + "loss": 0.0797, + "step": 74110 + }, + { + "epoch": 2.6935097027400245, + "grad_norm": 0.3746108114719391, + "learning_rate": 3.2828303208717125e-05, + "loss": 0.0847, + "step": 74120 + }, + { + "epoch": 2.693873101242823, + "grad_norm": 9.6745023727417, + "learning_rate": 3.282337469467082e-05, + "loss": 0.1017, + "step": 74130 + }, + { + "epoch": 2.694236499745621, + "grad_norm": 0.8015979528427124, + "learning_rate": 3.281844584354314e-05, + "loss": 0.7825, + "step": 74140 + }, + { + "epoch": 2.694599898248419, + "grad_norm": 0.7004625797271729, + "learning_rate": 3.281351665554644e-05, + "loss": 0.1076, + "step": 74150 + }, + { + "epoch": 2.6949632967512174, + "grad_norm": 0.49726399779319763, + "learning_rate": 3.2808587130893107e-05, + "loss": 0.09, + "step": 74160 + }, + { + "epoch": 2.695326695254016, + "grad_norm": 0.7523669600486755, + "learning_rate": 3.280365726979555e-05, + "loss": 0.1085, + "step": 74170 + }, + { + "epoch": 2.695690093756814, + "grad_norm": 0.8919450044631958, + "learning_rate": 3.279872707246615e-05, + "loss": 0.1102, + "step": 74180 + }, + { + "epoch": 2.6960534922596118, + "grad_norm": 2.7779541015625, + "learning_rate": 3.279379653911736e-05, + "loss": 0.1118, + "step": 74190 + }, + { + "epoch": 2.69641689076241, + "grad_norm": 1.5841718912124634, + "learning_rate": 3.27888656699616e-05, + "loss": 0.214, + "step": 74200 + }, + { + "epoch": 2.696780289265208, + "grad_norm": 1.1387628316879272, + "learning_rate": 3.278393446521134e-05, + "loss": 0.0854, + "step": 74210 + }, + { + "epoch": 2.6971436877680066, + "grad_norm": 3.4580607414245605, + "learning_rate": 3.277900292507904e-05, + "loss": 0.1282, + "step": 74220 + }, + { + "epoch": 2.6975070862708046, + "grad_norm": 1.5573906898498535, + "learning_rate": 3.277407104977717e-05, + "loss": 0.1394, + "step": 74230 + }, + { + "epoch": 2.6978704847736026, + "grad_norm": 1.1080759763717651, + "learning_rate": 3.276913883951824e-05, + "loss": 0.1507, + "step": 74240 + }, + { + "epoch": 2.698233883276401, + "grad_norm": 0.7681221961975098, + "learning_rate": 3.276420629451476e-05, + "loss": 0.0866, + "step": 74250 + }, + { + "epoch": 2.698597281779199, + "grad_norm": 0.5694213509559631, + "learning_rate": 3.275927341497927e-05, + "loss": 0.0853, + "step": 74260 + }, + { + "epoch": 2.6989606802819974, + "grad_norm": 1.044425129890442, + "learning_rate": 3.275434020112428e-05, + "loss": 0.1089, + "step": 74270 + }, + { + "epoch": 2.6993240787847954, + "grad_norm": 0.5060895681381226, + "learning_rate": 3.274940665316237e-05, + "loss": 0.0757, + "step": 74280 + }, + { + "epoch": 2.6996874772875934, + "grad_norm": 3.3999178409576416, + "learning_rate": 3.274447277130611e-05, + "loss": 0.112, + "step": 74290 + }, + { + "epoch": 2.700050875790392, + "grad_norm": 1.1808927059173584, + "learning_rate": 3.273953855576805e-05, + "loss": 0.0899, + "step": 74300 + }, + { + "epoch": 2.70041427429319, + "grad_norm": 0.481868177652359, + "learning_rate": 3.273460400676083e-05, + "loss": 0.0935, + "step": 74310 + }, + { + "epoch": 2.7007776727959882, + "grad_norm": 3.8246326446533203, + "learning_rate": 3.272966912449703e-05, + "loss": 0.094, + "step": 74320 + }, + { + "epoch": 2.7011410712987862, + "grad_norm": 0.8496780395507812, + "learning_rate": 3.27247339091893e-05, + "loss": 0.1554, + "step": 74330 + }, + { + "epoch": 2.701504469801584, + "grad_norm": 1.1947115659713745, + "learning_rate": 3.271979836105026e-05, + "loss": 0.1302, + "step": 74340 + }, + { + "epoch": 2.7018678683043826, + "grad_norm": 2.5246219635009766, + "learning_rate": 3.271486248029258e-05, + "loss": 0.088, + "step": 74350 + }, + { + "epoch": 2.7022312668071806, + "grad_norm": 1.4565434455871582, + "learning_rate": 3.270992626712893e-05, + "loss": 0.0862, + "step": 74360 + }, + { + "epoch": 2.702594665309979, + "grad_norm": 0.473636269569397, + "learning_rate": 3.270498972177198e-05, + "loss": 0.1168, + "step": 74370 + }, + { + "epoch": 2.702958063812777, + "grad_norm": 2.2672886848449707, + "learning_rate": 3.270005284443445e-05, + "loss": 0.2479, + "step": 74380 + }, + { + "epoch": 2.703321462315575, + "grad_norm": 2.7625412940979004, + "learning_rate": 3.269511563532903e-05, + "loss": 0.1127, + "step": 74390 + }, + { + "epoch": 2.7036848608183734, + "grad_norm": 1.6143194437026978, + "learning_rate": 3.2690178094668455e-05, + "loss": 0.1106, + "step": 74400 + }, + { + "epoch": 2.7036848608183734, + "eval_loss": 0.30357053875923157, + "eval_runtime": 179.9214, + "eval_samples_per_second": 41.207, + "eval_steps_per_second": 5.152, + "eval_wer": 0.15205946956632235, + "step": 74400 + }, + { + "epoch": 2.7040482593211714, + "grad_norm": 1.560831069946289, + "learning_rate": 3.268524022266547e-05, + "loss": 0.0948, + "step": 74410 + }, + { + "epoch": 2.70441165782397, + "grad_norm": 0.5551114082336426, + "learning_rate": 3.2680302019532836e-05, + "loss": 0.1299, + "step": 74420 + }, + { + "epoch": 2.704775056326768, + "grad_norm": 1.2796604633331299, + "learning_rate": 3.2675363485483304e-05, + "loss": 0.1122, + "step": 74430 + }, + { + "epoch": 2.705138454829566, + "grad_norm": 1.2246671915054321, + "learning_rate": 3.2670424620729675e-05, + "loss": 0.1183, + "step": 74440 + }, + { + "epoch": 2.7055018533323643, + "grad_norm": 2.03642201423645, + "learning_rate": 3.266548542548474e-05, + "loss": 0.0848, + "step": 74450 + }, + { + "epoch": 2.7058652518351627, + "grad_norm": 1.559633493423462, + "learning_rate": 3.2660545899961305e-05, + "loss": 0.0841, + "step": 74460 + }, + { + "epoch": 2.7062286503379607, + "grad_norm": 0.7185676097869873, + "learning_rate": 3.265560604437221e-05, + "loss": 0.104, + "step": 74470 + }, + { + "epoch": 2.7065920488407587, + "grad_norm": 0.627554178237915, + "learning_rate": 3.265066585893029e-05, + "loss": 0.1016, + "step": 74480 + }, + { + "epoch": 2.706955447343557, + "grad_norm": 0.7802649736404419, + "learning_rate": 3.26457253438484e-05, + "loss": 0.0987, + "step": 74490 + }, + { + "epoch": 2.707318845846355, + "grad_norm": 0.4407544732093811, + "learning_rate": 3.2640784499339404e-05, + "loss": 0.0948, + "step": 74500 + }, + { + "epoch": 2.7076822443491535, + "grad_norm": 0.7778434157371521, + "learning_rate": 3.263584332561619e-05, + "loss": 0.0943, + "step": 74510 + }, + { + "epoch": 2.7080456428519515, + "grad_norm": 0.46505939960479736, + "learning_rate": 3.263090182289166e-05, + "loss": 0.1109, + "step": 74520 + }, + { + "epoch": 2.7084090413547495, + "grad_norm": 0.6025874614715576, + "learning_rate": 3.2625959991378715e-05, + "loss": 0.0936, + "step": 74530 + }, + { + "epoch": 2.708772439857548, + "grad_norm": 1.0582184791564941, + "learning_rate": 3.262101783129029e-05, + "loss": 0.0809, + "step": 74540 + }, + { + "epoch": 2.709135838360346, + "grad_norm": 1.1490769386291504, + "learning_rate": 3.261607534283932e-05, + "loss": 0.0848, + "step": 74550 + }, + { + "epoch": 2.7094992368631443, + "grad_norm": 0.6131067276000977, + "learning_rate": 3.2611132526238766e-05, + "loss": 0.084, + "step": 74560 + }, + { + "epoch": 2.7098626353659423, + "grad_norm": 1.1936076879501343, + "learning_rate": 3.26061893817016e-05, + "loss": 0.0876, + "step": 74570 + }, + { + "epoch": 2.7102260338687403, + "grad_norm": 1.389906406402588, + "learning_rate": 3.260124590944078e-05, + "loss": 0.0985, + "step": 74580 + }, + { + "epoch": 2.7105894323715387, + "grad_norm": 1.8453727960586548, + "learning_rate": 3.259630210966933e-05, + "loss": 0.0959, + "step": 74590 + }, + { + "epoch": 2.7109528308743367, + "grad_norm": 0.9837631583213806, + "learning_rate": 3.259135798260025e-05, + "loss": 0.0882, + "step": 74600 + }, + { + "epoch": 2.711316229377135, + "grad_norm": 1.1993751525878906, + "learning_rate": 3.2586413528446555e-05, + "loss": 0.0676, + "step": 74610 + }, + { + "epoch": 2.711679627879933, + "grad_norm": 6.072772026062012, + "learning_rate": 3.25814687474213e-05, + "loss": 0.0994, + "step": 74620 + }, + { + "epoch": 2.712043026382731, + "grad_norm": 1.2597107887268066, + "learning_rate": 3.257652363973753e-05, + "loss": 0.1082, + "step": 74630 + }, + { + "epoch": 2.7124064248855295, + "grad_norm": 1.3290597200393677, + "learning_rate": 3.257157820560831e-05, + "loss": 0.128, + "step": 74640 + }, + { + "epoch": 2.7127698233883275, + "grad_norm": 2.4594569206237793, + "learning_rate": 3.256663244524673e-05, + "loss": 0.0824, + "step": 74650 + }, + { + "epoch": 2.713133221891126, + "grad_norm": 0.3981126844882965, + "learning_rate": 3.256168635886588e-05, + "loss": 0.1033, + "step": 74660 + }, + { + "epoch": 2.713496620393924, + "grad_norm": 0.6342089772224426, + "learning_rate": 3.255673994667887e-05, + "loss": 0.1291, + "step": 74670 + }, + { + "epoch": 2.713860018896722, + "grad_norm": 1.3177196979522705, + "learning_rate": 3.2551793208898826e-05, + "loss": 0.1083, + "step": 74680 + }, + { + "epoch": 2.7142234173995203, + "grad_norm": 1.9709926843643188, + "learning_rate": 3.2546846145738873e-05, + "loss": 0.1416, + "step": 74690 + }, + { + "epoch": 2.7145868159023183, + "grad_norm": 2.019582986831665, + "learning_rate": 3.2541898757412174e-05, + "loss": 0.1671, + "step": 74700 + }, + { + "epoch": 2.7149502144051167, + "grad_norm": 0.7411203980445862, + "learning_rate": 3.25369510441319e-05, + "loss": 0.0905, + "step": 74710 + }, + { + "epoch": 2.7153136129079147, + "grad_norm": 0.7094716429710388, + "learning_rate": 3.2532003006111215e-05, + "loss": 0.1127, + "step": 74720 + }, + { + "epoch": 2.7156770114107127, + "grad_norm": 0.6261359453201294, + "learning_rate": 3.252705464356332e-05, + "loss": 0.0934, + "step": 74730 + }, + { + "epoch": 2.716040409913511, + "grad_norm": 0.4718007445335388, + "learning_rate": 3.252210595670142e-05, + "loss": 0.094, + "step": 74740 + }, + { + "epoch": 2.7164038084163096, + "grad_norm": 0.8705607652664185, + "learning_rate": 3.2517156945738734e-05, + "loss": 0.097, + "step": 74750 + }, + { + "epoch": 2.7167672069191076, + "grad_norm": 0.4735512435436249, + "learning_rate": 3.251220761088851e-05, + "loss": 0.0741, + "step": 74760 + }, + { + "epoch": 2.7171306054219055, + "grad_norm": 0.6891077756881714, + "learning_rate": 3.250725795236398e-05, + "loss": 0.1007, + "step": 74770 + }, + { + "epoch": 2.717494003924704, + "grad_norm": 2.493716239929199, + "learning_rate": 3.250230797037843e-05, + "loss": 0.0849, + "step": 74780 + }, + { + "epoch": 2.717857402427502, + "grad_norm": 1.8947222232818604, + "learning_rate": 3.249735766514512e-05, + "loss": 0.1433, + "step": 74790 + }, + { + "epoch": 2.7182208009303004, + "grad_norm": 0.5233703255653381, + "learning_rate": 3.2492407036877334e-05, + "loss": 0.0772, + "step": 74800 + }, + { + "epoch": 2.7185841994330984, + "grad_norm": 0.4244493544101715, + "learning_rate": 3.2487456085788395e-05, + "loss": 0.0771, + "step": 74810 + }, + { + "epoch": 2.7189475979358964, + "grad_norm": 0.5100713968276978, + "learning_rate": 3.24825048120916e-05, + "loss": 0.0915, + "step": 74820 + }, + { + "epoch": 2.719310996438695, + "grad_norm": 1.360521674156189, + "learning_rate": 3.2477553216000314e-05, + "loss": 0.1032, + "step": 74830 + }, + { + "epoch": 2.7196743949414928, + "grad_norm": 0.9262562990188599, + "learning_rate": 3.2472601297727853e-05, + "loss": 0.0992, + "step": 74840 + }, + { + "epoch": 2.720037793444291, + "grad_norm": 1.0304359197616577, + "learning_rate": 3.246764905748759e-05, + "loss": 1.7738, + "step": 74850 + }, + { + "epoch": 2.720401191947089, + "grad_norm": 0.621614933013916, + "learning_rate": 3.246269649549291e-05, + "loss": 0.0865, + "step": 74860 + }, + { + "epoch": 2.720764590449887, + "grad_norm": 0.544062077999115, + "learning_rate": 3.245774361195718e-05, + "loss": 0.0947, + "step": 74870 + }, + { + "epoch": 2.7211279889526856, + "grad_norm": 0.8662183284759521, + "learning_rate": 3.2452790407093814e-05, + "loss": 0.1065, + "step": 74880 + }, + { + "epoch": 2.7214913874554836, + "grad_norm": 0.6098036170005798, + "learning_rate": 3.244783688111622e-05, + "loss": 0.1058, + "step": 74890 + }, + { + "epoch": 2.721854785958282, + "grad_norm": 0.5739080309867859, + "learning_rate": 3.2442883034237845e-05, + "loss": 1.0575, + "step": 74900 + }, + { + "epoch": 2.72221818446108, + "grad_norm": 0.8640351891517639, + "learning_rate": 3.2437928866672124e-05, + "loss": 0.0948, + "step": 74910 + }, + { + "epoch": 2.722581582963878, + "grad_norm": 1.041825294494629, + "learning_rate": 3.2432974378632504e-05, + "loss": 0.1079, + "step": 74920 + }, + { + "epoch": 2.7229449814666764, + "grad_norm": 1.7764942646026611, + "learning_rate": 3.242801957033247e-05, + "loss": 0.1058, + "step": 74930 + }, + { + "epoch": 2.7233083799694744, + "grad_norm": 0.5012884140014648, + "learning_rate": 3.24230644419855e-05, + "loss": 0.0924, + "step": 74940 + }, + { + "epoch": 2.723671778472273, + "grad_norm": 1.0178241729736328, + "learning_rate": 3.241810899380509e-05, + "loss": 0.0794, + "step": 74950 + }, + { + "epoch": 2.724035176975071, + "grad_norm": 0.5228786468505859, + "learning_rate": 3.241315322600476e-05, + "loss": 0.0947, + "step": 74960 + }, + { + "epoch": 2.724398575477869, + "grad_norm": 0.7717702388763428, + "learning_rate": 3.2408197138798035e-05, + "loss": 0.1196, + "step": 74970 + }, + { + "epoch": 2.724761973980667, + "grad_norm": 0.5628354549407959, + "learning_rate": 3.240324073239846e-05, + "loss": 0.086, + "step": 74980 + }, + { + "epoch": 2.725125372483465, + "grad_norm": 0.535048246383667, + "learning_rate": 3.239828400701957e-05, + "loss": 0.1447, + "step": 74990 + }, + { + "epoch": 2.7254887709862636, + "grad_norm": 0.42276647686958313, + "learning_rate": 3.2393326962874953e-05, + "loss": 0.0784, + "step": 75000 + }, + { + "epoch": 2.7254887709862636, + "eval_loss": 0.3183054029941559, + "eval_runtime": 180.0805, + "eval_samples_per_second": 41.17, + "eval_steps_per_second": 5.148, + "eval_wer": 0.1470129068564271, + "step": 75000 + }, + { + "epoch": 2.7258521694890616, + "grad_norm": 1.1505578756332397, + "learning_rate": 3.238836960017818e-05, + "loss": 0.0862, + "step": 75010 + }, + { + "epoch": 2.7262155679918596, + "grad_norm": 2.7842421531677246, + "learning_rate": 3.238341191914285e-05, + "loss": 0.1111, + "step": 75020 + }, + { + "epoch": 2.726578966494658, + "grad_norm": 0.8966996073722839, + "learning_rate": 3.237845391998257e-05, + "loss": 0.085, + "step": 75030 + }, + { + "epoch": 2.7269423649974565, + "grad_norm": 2.870903491973877, + "learning_rate": 3.237349560291096e-05, + "loss": 0.103, + "step": 75040 + }, + { + "epoch": 2.7273057635002544, + "grad_norm": 1.8146476745605469, + "learning_rate": 3.236853696814167e-05, + "loss": 0.0806, + "step": 75050 + }, + { + "epoch": 2.7276691620030524, + "grad_norm": 1.370487928390503, + "learning_rate": 3.236357801588833e-05, + "loss": 0.0923, + "step": 75060 + }, + { + "epoch": 2.728032560505851, + "grad_norm": 0.7427790760993958, + "learning_rate": 3.235861874636462e-05, + "loss": 0.1101, + "step": 75070 + }, + { + "epoch": 2.728395959008649, + "grad_norm": 0.7309651970863342, + "learning_rate": 3.23536591597842e-05, + "loss": 0.1063, + "step": 75080 + }, + { + "epoch": 2.7287593575114473, + "grad_norm": 0.5744786262512207, + "learning_rate": 3.2348699256360784e-05, + "loss": 0.1202, + "step": 75090 + }, + { + "epoch": 2.7291227560142453, + "grad_norm": 0.7631998658180237, + "learning_rate": 3.234373903630806e-05, + "loss": 0.7446, + "step": 75100 + }, + { + "epoch": 2.7294861545170432, + "grad_norm": 0.4925616979598999, + "learning_rate": 3.233877849983974e-05, + "loss": 0.0862, + "step": 75110 + }, + { + "epoch": 2.7298495530198417, + "grad_norm": 2.4710123538970947, + "learning_rate": 3.233381764716958e-05, + "loss": 0.0956, + "step": 75120 + }, + { + "epoch": 2.7302129515226397, + "grad_norm": 0.8318620920181274, + "learning_rate": 3.23288564785113e-05, + "loss": 0.1057, + "step": 75130 + }, + { + "epoch": 2.730576350025438, + "grad_norm": 0.6408945918083191, + "learning_rate": 3.2323894994078674e-05, + "loss": 0.123, + "step": 75140 + }, + { + "epoch": 2.730939748528236, + "grad_norm": 1.6067343950271606, + "learning_rate": 3.2318933194085474e-05, + "loss": 0.1345, + "step": 75150 + }, + { + "epoch": 2.731303147031034, + "grad_norm": 0.9870972633361816, + "learning_rate": 3.231397107874548e-05, + "loss": 0.0888, + "step": 75160 + }, + { + "epoch": 2.7316665455338325, + "grad_norm": 0.7536956071853638, + "learning_rate": 3.23090086482725e-05, + "loss": 0.1541, + "step": 75170 + }, + { + "epoch": 2.7320299440366305, + "grad_norm": 0.45174309611320496, + "learning_rate": 3.2304045902880334e-05, + "loss": 0.0858, + "step": 75180 + }, + { + "epoch": 2.732393342539429, + "grad_norm": 0.9457273483276367, + "learning_rate": 3.229908284278283e-05, + "loss": 0.1079, + "step": 75190 + }, + { + "epoch": 2.732756741042227, + "grad_norm": 0.7832821011543274, + "learning_rate": 3.229411946819381e-05, + "loss": 0.0844, + "step": 75200 + }, + { + "epoch": 2.733120139545025, + "grad_norm": 0.7583007216453552, + "learning_rate": 3.228915577932713e-05, + "loss": 0.1036, + "step": 75210 + }, + { + "epoch": 2.7334835380478233, + "grad_norm": 0.38852185010910034, + "learning_rate": 3.2284191776396675e-05, + "loss": 0.092, + "step": 75220 + }, + { + "epoch": 2.7338469365506213, + "grad_norm": 0.9784302115440369, + "learning_rate": 3.22792274596163e-05, + "loss": 0.1215, + "step": 75230 + }, + { + "epoch": 2.7342103350534197, + "grad_norm": 0.6491109728813171, + "learning_rate": 3.227426282919992e-05, + "loss": 0.118, + "step": 75240 + }, + { + "epoch": 2.7345737335562177, + "grad_norm": 1.2278261184692383, + "learning_rate": 3.226929788536143e-05, + "loss": 0.7961, + "step": 75250 + }, + { + "epoch": 2.7349371320590157, + "grad_norm": 0.7250826358795166, + "learning_rate": 3.226433262831477e-05, + "loss": 0.2077, + "step": 75260 + }, + { + "epoch": 2.735300530561814, + "grad_norm": 0.6161037683486938, + "learning_rate": 3.2259367058273855e-05, + "loss": 0.0958, + "step": 75270 + }, + { + "epoch": 2.735663929064612, + "grad_norm": 0.6900900602340698, + "learning_rate": 3.2254401175452646e-05, + "loss": 0.0848, + "step": 75280 + }, + { + "epoch": 2.7360273275674105, + "grad_norm": 1.1437780857086182, + "learning_rate": 3.2249434980065106e-05, + "loss": 0.1336, + "step": 75290 + }, + { + "epoch": 2.7363907260702085, + "grad_norm": 2.3000125885009766, + "learning_rate": 3.2244468472325194e-05, + "loss": 0.0859, + "step": 75300 + }, + { + "epoch": 2.7367541245730065, + "grad_norm": 1.2666622400283813, + "learning_rate": 3.2239501652446926e-05, + "loss": 0.1021, + "step": 75310 + }, + { + "epoch": 2.737117523075805, + "grad_norm": 0.4458122253417969, + "learning_rate": 3.2234534520644275e-05, + "loss": 0.0954, + "step": 75320 + }, + { + "epoch": 2.7374809215786033, + "grad_norm": 1.7729071378707886, + "learning_rate": 3.2229567077131285e-05, + "loss": 1.6003, + "step": 75330 + }, + { + "epoch": 2.7378443200814013, + "grad_norm": 0.8443679213523865, + "learning_rate": 3.222459932212196e-05, + "loss": 0.0879, + "step": 75340 + }, + { + "epoch": 2.7382077185841993, + "grad_norm": 0.7365388870239258, + "learning_rate": 3.221963125583037e-05, + "loss": 0.0823, + "step": 75350 + }, + { + "epoch": 2.7385711170869977, + "grad_norm": 0.6535968780517578, + "learning_rate": 3.2214662878470546e-05, + "loss": 0.1252, + "step": 75360 + }, + { + "epoch": 2.7389345155897957, + "grad_norm": 0.674757719039917, + "learning_rate": 3.220969419025657e-05, + "loss": 0.1166, + "step": 75370 + }, + { + "epoch": 2.739297914092594, + "grad_norm": 0.6909737586975098, + "learning_rate": 3.220472519140253e-05, + "loss": 0.1192, + "step": 75380 + }, + { + "epoch": 2.739661312595392, + "grad_norm": 0.7484961152076721, + "learning_rate": 3.219975588212251e-05, + "loss": 0.1064, + "step": 75390 + }, + { + "epoch": 2.74002471109819, + "grad_norm": 2.3563716411590576, + "learning_rate": 3.219478626263063e-05, + "loss": 0.0755, + "step": 75400 + }, + { + "epoch": 2.7403881096009886, + "grad_norm": 0.4629516005516052, + "learning_rate": 3.2189816333141004e-05, + "loss": 0.0974, + "step": 75410 + }, + { + "epoch": 2.7407515081037865, + "grad_norm": 0.487054705619812, + "learning_rate": 3.2184846093867774e-05, + "loss": 0.1004, + "step": 75420 + }, + { + "epoch": 2.741114906606585, + "grad_norm": 3.042552947998047, + "learning_rate": 3.2179875545025096e-05, + "loss": 0.1113, + "step": 75430 + }, + { + "epoch": 2.741478305109383, + "grad_norm": 0.5376294255256653, + "learning_rate": 3.2174904686827114e-05, + "loss": 0.0972, + "step": 75440 + }, + { + "epoch": 2.741841703612181, + "grad_norm": 0.9033780694007874, + "learning_rate": 3.216993351948803e-05, + "loss": 0.0784, + "step": 75450 + }, + { + "epoch": 2.7422051021149794, + "grad_norm": 0.29921913146972656, + "learning_rate": 3.2164962043222015e-05, + "loss": 0.076, + "step": 75460 + }, + { + "epoch": 2.7425685006177773, + "grad_norm": 0.30889561772346497, + "learning_rate": 3.2159990258243286e-05, + "loss": 0.09, + "step": 75470 + }, + { + "epoch": 2.742931899120576, + "grad_norm": 4.390368938446045, + "learning_rate": 3.2155018164766044e-05, + "loss": 0.087, + "step": 75480 + }, + { + "epoch": 2.7432952976233738, + "grad_norm": 2.617569923400879, + "learning_rate": 3.2150045763004526e-05, + "loss": 0.0989, + "step": 75490 + }, + { + "epoch": 2.7436586961261717, + "grad_norm": 0.5497812032699585, + "learning_rate": 3.214507305317298e-05, + "loss": 0.0869, + "step": 75500 + }, + { + "epoch": 2.74402209462897, + "grad_norm": 2.1748311519622803, + "learning_rate": 3.214010003548566e-05, + "loss": 0.1087, + "step": 75510 + }, + { + "epoch": 2.744385493131768, + "grad_norm": 0.7790930271148682, + "learning_rate": 3.213512671015683e-05, + "loss": 1.0444, + "step": 75520 + }, + { + "epoch": 2.7447488916345666, + "grad_norm": 0.8997694849967957, + "learning_rate": 3.2130153077400784e-05, + "loss": 0.2153, + "step": 75530 + }, + { + "epoch": 2.7451122901373646, + "grad_norm": 0.6659709811210632, + "learning_rate": 3.2125179137431805e-05, + "loss": 0.1144, + "step": 75540 + }, + { + "epoch": 2.7454756886401626, + "grad_norm": 0.6743984818458557, + "learning_rate": 3.212020489046421e-05, + "loss": 0.0858, + "step": 75550 + }, + { + "epoch": 2.745839087142961, + "grad_norm": 0.6624968647956848, + "learning_rate": 3.2115230336712316e-05, + "loss": 0.0913, + "step": 75560 + }, + { + "epoch": 2.7462024856457594, + "grad_norm": 0.6814375519752502, + "learning_rate": 3.211025547639047e-05, + "loss": 0.089, + "step": 75570 + }, + { + "epoch": 2.7465658841485574, + "grad_norm": 0.9000943303108215, + "learning_rate": 3.210528030971301e-05, + "loss": 0.1184, + "step": 75580 + }, + { + "epoch": 2.7469292826513554, + "grad_norm": 1.0685985088348389, + "learning_rate": 3.21003048368943e-05, + "loss": 0.1682, + "step": 75590 + }, + { + "epoch": 2.7472926811541534, + "grad_norm": 0.4757719933986664, + "learning_rate": 3.209532905814872e-05, + "loss": 0.1029, + "step": 75600 + }, + { + "epoch": 2.7472926811541534, + "eval_loss": 0.304624080657959, + "eval_runtime": 179.9156, + "eval_samples_per_second": 41.208, + "eval_steps_per_second": 5.152, + "eval_wer": 0.1494363461433732, + "step": 75600 + }, + { + "epoch": 2.747656079656952, + "grad_norm": 1.4344089031219482, + "learning_rate": 3.209035297369066e-05, + "loss": 0.0954, + "step": 75610 + }, + { + "epoch": 2.7480194781597502, + "grad_norm": 0.8026723265647888, + "learning_rate": 3.208537658373451e-05, + "loss": 0.1161, + "step": 75620 + }, + { + "epoch": 2.748382876662548, + "grad_norm": 0.8414619565010071, + "learning_rate": 3.20803998884947e-05, + "loss": 0.0969, + "step": 75630 + }, + { + "epoch": 2.748746275165346, + "grad_norm": 0.46038514375686646, + "learning_rate": 3.2075422888185645e-05, + "loss": 0.1183, + "step": 75640 + }, + { + "epoch": 2.7491096736681446, + "grad_norm": 0.6400403380393982, + "learning_rate": 3.207044558302179e-05, + "loss": 0.0962, + "step": 75650 + }, + { + "epoch": 2.7494730721709426, + "grad_norm": 0.637139618396759, + "learning_rate": 3.206596574790073e-05, + "loss": 1.817, + "step": 75660 + }, + { + "epoch": 2.749836470673741, + "grad_norm": 1.1132330894470215, + "learning_rate": 3.206098786410359e-05, + "loss": 0.1059, + "step": 75670 + }, + { + "epoch": 2.750199869176539, + "grad_norm": 0.6702316999435425, + "learning_rate": 3.2056009676073615e-05, + "loss": 0.0735, + "step": 75680 + }, + { + "epoch": 2.750563267679337, + "grad_norm": 0.7996656894683838, + "learning_rate": 3.205103118402528e-05, + "loss": 0.1147, + "step": 75690 + }, + { + "epoch": 2.7509266661821354, + "grad_norm": 0.6503117680549622, + "learning_rate": 3.204605238817311e-05, + "loss": 0.0837, + "step": 75700 + }, + { + "epoch": 2.7512900646849334, + "grad_norm": 0.7380549907684326, + "learning_rate": 3.204107328873161e-05, + "loss": 0.0742, + "step": 75710 + }, + { + "epoch": 2.751653463187732, + "grad_norm": 1.3897452354431152, + "learning_rate": 3.203609388591531e-05, + "loss": 0.1324, + "step": 75720 + }, + { + "epoch": 2.75201686169053, + "grad_norm": 0.6932911276817322, + "learning_rate": 3.203111417993876e-05, + "loss": 0.0847, + "step": 75730 + }, + { + "epoch": 2.752380260193328, + "grad_norm": 0.9964193105697632, + "learning_rate": 3.2026134171016516e-05, + "loss": 0.098, + "step": 75740 + }, + { + "epoch": 2.7527436586961262, + "grad_norm": 2.219566822052002, + "learning_rate": 3.2021153859363154e-05, + "loss": 0.0866, + "step": 75750 + }, + { + "epoch": 2.7531070571989242, + "grad_norm": 0.4468567967414856, + "learning_rate": 3.201617324519325e-05, + "loss": 0.0779, + "step": 75760 + }, + { + "epoch": 2.7534704557017227, + "grad_norm": 1.1400572061538696, + "learning_rate": 3.2011192328721406e-05, + "loss": 0.1104, + "step": 75770 + }, + { + "epoch": 2.7538338542045206, + "grad_norm": 0.7320595383644104, + "learning_rate": 3.2006211110162234e-05, + "loss": 0.0955, + "step": 75780 + }, + { + "epoch": 2.7541972527073186, + "grad_norm": 1.0638219118118286, + "learning_rate": 3.200122958973034e-05, + "loss": 0.1557, + "step": 75790 + }, + { + "epoch": 2.754560651210117, + "grad_norm": 0.8229318261146545, + "learning_rate": 3.1996247767640385e-05, + "loss": 0.0807, + "step": 75800 + }, + { + "epoch": 2.754924049712915, + "grad_norm": 0.5131879448890686, + "learning_rate": 3.1991265644107005e-05, + "loss": 0.0859, + "step": 75810 + }, + { + "epoch": 2.7552874482157135, + "grad_norm": 0.7201241850852966, + "learning_rate": 3.198628321934486e-05, + "loss": 0.1255, + "step": 75820 + }, + { + "epoch": 2.7556508467185115, + "grad_norm": 0.8084592819213867, + "learning_rate": 3.198130049356863e-05, + "loss": 0.0959, + "step": 75830 + }, + { + "epoch": 2.7560142452213094, + "grad_norm": 0.9655843377113342, + "learning_rate": 3.197631746699301e-05, + "loss": 0.1137, + "step": 75840 + }, + { + "epoch": 2.756377643724108, + "grad_norm": 0.6856592893600464, + "learning_rate": 3.197133413983268e-05, + "loss": 0.0953, + "step": 75850 + }, + { + "epoch": 2.7567410422269063, + "grad_norm": 0.8975215554237366, + "learning_rate": 3.196635051230237e-05, + "loss": 0.2702, + "step": 75860 + }, + { + "epoch": 2.7571044407297043, + "grad_norm": 1.0239101648330688, + "learning_rate": 3.19613665846168e-05, + "loss": 0.8354, + "step": 75870 + }, + { + "epoch": 2.7574678392325023, + "grad_norm": 1.5141791105270386, + "learning_rate": 3.195638235699072e-05, + "loss": 0.1123, + "step": 75880 + }, + { + "epoch": 2.7578312377353003, + "grad_norm": 0.787190318107605, + "learning_rate": 3.195139782963887e-05, + "loss": 0.1198, + "step": 75890 + }, + { + "epoch": 2.7581946362380987, + "grad_norm": 0.6342429518699646, + "learning_rate": 3.1946413002776024e-05, + "loss": 0.0838, + "step": 75900 + }, + { + "epoch": 2.758558034740897, + "grad_norm": 2.490267753601074, + "learning_rate": 3.194142787661695e-05, + "loss": 0.0861, + "step": 75910 + }, + { + "epoch": 2.758921433243695, + "grad_norm": 0.7890759110450745, + "learning_rate": 3.1936442451376454e-05, + "loss": 0.1203, + "step": 75920 + }, + { + "epoch": 2.759284831746493, + "grad_norm": 1.0969079732894897, + "learning_rate": 3.193145672726933e-05, + "loss": 0.1042, + "step": 75930 + }, + { + "epoch": 2.7596482302492915, + "grad_norm": 0.42303451895713806, + "learning_rate": 3.1926470704510395e-05, + "loss": 0.0979, + "step": 75940 + }, + { + "epoch": 2.7600116287520895, + "grad_norm": 0.7856914401054382, + "learning_rate": 3.192148438331448e-05, + "loss": 0.081, + "step": 75950 + }, + { + "epoch": 2.760375027254888, + "grad_norm": 0.7306569814682007, + "learning_rate": 3.191649776389644e-05, + "loss": 0.0688, + "step": 75960 + }, + { + "epoch": 2.760738425757686, + "grad_norm": 0.43717941641807556, + "learning_rate": 3.1911510846471115e-05, + "loss": 0.1219, + "step": 75970 + }, + { + "epoch": 2.761101824260484, + "grad_norm": 0.6672983169555664, + "learning_rate": 3.190652363125337e-05, + "loss": 0.1003, + "step": 75980 + }, + { + "epoch": 2.7614652227632823, + "grad_norm": 2.3338167667388916, + "learning_rate": 3.190153611845811e-05, + "loss": 0.117, + "step": 75990 + }, + { + "epoch": 2.7618286212660803, + "grad_norm": 0.893578052520752, + "learning_rate": 3.1896548308300206e-05, + "loss": 0.0922, + "step": 76000 + }, + { + "epoch": 2.7621920197688787, + "grad_norm": 1.831598162651062, + "learning_rate": 3.189156020099458e-05, + "loss": 0.0921, + "step": 76010 + }, + { + "epoch": 2.7625554182716767, + "grad_norm": 0.4112573564052582, + "learning_rate": 3.1886571796756136e-05, + "loss": 0.1237, + "step": 76020 + }, + { + "epoch": 2.7629188167744747, + "grad_norm": 2.476116418838501, + "learning_rate": 3.1881583095799816e-05, + "loss": 0.097, + "step": 76030 + }, + { + "epoch": 2.763282215277273, + "grad_norm": 0.5987531542778015, + "learning_rate": 3.1876594098340575e-05, + "loss": 0.117, + "step": 76040 + }, + { + "epoch": 2.763645613780071, + "grad_norm": 0.4382152855396271, + "learning_rate": 3.187160480459335e-05, + "loss": 0.0773, + "step": 76050 + }, + { + "epoch": 2.7640090122828695, + "grad_norm": 0.4360668659210205, + "learning_rate": 3.186661521477313e-05, + "loss": 0.0785, + "step": 76060 + }, + { + "epoch": 2.7643724107856675, + "grad_norm": 2.2010788917541504, + "learning_rate": 3.1861625329094894e-05, + "loss": 0.1354, + "step": 76070 + }, + { + "epoch": 2.7647358092884655, + "grad_norm": 0.8447809815406799, + "learning_rate": 3.185663514777363e-05, + "loss": 0.0982, + "step": 76080 + }, + { + "epoch": 2.765099207791264, + "grad_norm": 1.7606275081634521, + "learning_rate": 3.185164467102436e-05, + "loss": 0.1429, + "step": 76090 + }, + { + "epoch": 2.765462606294062, + "grad_norm": 0.8025608062744141, + "learning_rate": 3.1846653899062094e-05, + "loss": 0.0833, + "step": 76100 + }, + { + "epoch": 2.7658260047968604, + "grad_norm": 1.0630611181259155, + "learning_rate": 3.184166283210188e-05, + "loss": 0.0663, + "step": 76110 + }, + { + "epoch": 2.7661894032996583, + "grad_norm": 0.47696417570114136, + "learning_rate": 3.1836671470358744e-05, + "loss": 0.1282, + "step": 76120 + }, + { + "epoch": 2.7665528018024563, + "grad_norm": 4.783881187438965, + "learning_rate": 3.183167981404777e-05, + "loss": 0.1214, + "step": 76130 + }, + { + "epoch": 2.7669162003052548, + "grad_norm": 1.0744116306304932, + "learning_rate": 3.1826687863384006e-05, + "loss": 0.1492, + "step": 76140 + }, + { + "epoch": 2.767279598808053, + "grad_norm": 1.5960917472839355, + "learning_rate": 3.182169561858257e-05, + "loss": 0.0879, + "step": 76150 + }, + { + "epoch": 2.767642997310851, + "grad_norm": 0.9969580769538879, + "learning_rate": 3.1816703079858535e-05, + "loss": 0.0919, + "step": 76160 + }, + { + "epoch": 2.768006395813649, + "grad_norm": 0.3668254613876343, + "learning_rate": 3.181171024742701e-05, + "loss": 0.1718, + "step": 76170 + }, + { + "epoch": 2.768369794316447, + "grad_norm": 0.7729851603507996, + "learning_rate": 3.180671712150314e-05, + "loss": 0.0842, + "step": 76180 + }, + { + "epoch": 2.7687331928192456, + "grad_norm": 0.6386042833328247, + "learning_rate": 3.1801723702302034e-05, + "loss": 0.1199, + "step": 76190 + }, + { + "epoch": 2.769096591322044, + "grad_norm": 2.096891164779663, + "learning_rate": 3.179672999003887e-05, + "loss": 0.1033, + "step": 76200 + }, + { + "epoch": 2.769096591322044, + "eval_loss": 0.31721433997154236, + "eval_runtime": 179.2645, + "eval_samples_per_second": 41.358, + "eval_steps_per_second": 5.171, + "eval_wer": 0.14798409788153286, + "step": 76200 + }, + { + "epoch": 2.769459989824842, + "grad_norm": 2.548231601715088, + "learning_rate": 3.1791735984928784e-05, + "loss": 0.1199, + "step": 76210 + }, + { + "epoch": 2.76982338832764, + "grad_norm": 1.2589582204818726, + "learning_rate": 3.178674168718696e-05, + "loss": 0.1165, + "step": 76220 + }, + { + "epoch": 2.7701867868304384, + "grad_norm": 1.0916184186935425, + "learning_rate": 3.178174709702858e-05, + "loss": 0.09, + "step": 76230 + }, + { + "epoch": 2.7705501853332364, + "grad_norm": 0.47342580556869507, + "learning_rate": 3.177675221466885e-05, + "loss": 0.0816, + "step": 76240 + }, + { + "epoch": 2.770913583836035, + "grad_norm": 1.0653049945831299, + "learning_rate": 3.177175704032298e-05, + "loss": 0.1169, + "step": 76250 + }, + { + "epoch": 2.771276982338833, + "grad_norm": 0.5227024555206299, + "learning_rate": 3.176676157420619e-05, + "loss": 0.0903, + "step": 76260 + }, + { + "epoch": 2.771640380841631, + "grad_norm": 1.8630784749984741, + "learning_rate": 3.1761765816533726e-05, + "loss": 0.1136, + "step": 76270 + }, + { + "epoch": 2.772003779344429, + "grad_norm": 1.7517484426498413, + "learning_rate": 3.175676976752083e-05, + "loss": 0.0897, + "step": 76280 + }, + { + "epoch": 2.772367177847227, + "grad_norm": 0.7945340871810913, + "learning_rate": 3.175177342738276e-05, + "loss": 0.1031, + "step": 76290 + }, + { + "epoch": 2.7727305763500256, + "grad_norm": 0.6702117919921875, + "learning_rate": 3.174677679633481e-05, + "loss": 0.3309, + "step": 76300 + }, + { + "epoch": 2.7730939748528236, + "grad_norm": 0.8197999000549316, + "learning_rate": 3.174177987459223e-05, + "loss": 0.1137, + "step": 76310 + }, + { + "epoch": 2.7734573733556216, + "grad_norm": 0.35699373483657837, + "learning_rate": 3.1736782662370354e-05, + "loss": 0.092, + "step": 76320 + }, + { + "epoch": 2.77382077185842, + "grad_norm": 0.566719114780426, + "learning_rate": 3.173178515988449e-05, + "loss": 0.1059, + "step": 76330 + }, + { + "epoch": 2.774184170361218, + "grad_norm": 0.6718754172325134, + "learning_rate": 3.172678736734995e-05, + "loss": 0.1299, + "step": 76340 + }, + { + "epoch": 2.7745475688640164, + "grad_norm": 0.5272148847579956, + "learning_rate": 3.1721789284982075e-05, + "loss": 0.1943, + "step": 76350 + }, + { + "epoch": 2.7749109673668144, + "grad_norm": 0.7239329218864441, + "learning_rate": 3.1716790912996214e-05, + "loss": 0.0834, + "step": 76360 + }, + { + "epoch": 2.7752743658696124, + "grad_norm": 1.7969343662261963, + "learning_rate": 3.171179225160774e-05, + "loss": 0.1131, + "step": 76370 + }, + { + "epoch": 2.775637764372411, + "grad_norm": 1.348568320274353, + "learning_rate": 3.1706793301032e-05, + "loss": 0.1328, + "step": 76380 + }, + { + "epoch": 2.776001162875209, + "grad_norm": 0.6011419892311096, + "learning_rate": 3.170179406148441e-05, + "loss": 0.1409, + "step": 76390 + }, + { + "epoch": 2.7763645613780072, + "grad_norm": 1.8177915811538696, + "learning_rate": 3.169679453318036e-05, + "loss": 0.0737, + "step": 76400 + }, + { + "epoch": 2.7767279598808052, + "grad_norm": 0.592851996421814, + "learning_rate": 3.1691794716335266e-05, + "loss": 0.0998, + "step": 76410 + }, + { + "epoch": 2.777091358383603, + "grad_norm": 1.3811548948287964, + "learning_rate": 3.168679461116454e-05, + "loss": 0.0754, + "step": 76420 + }, + { + "epoch": 2.7774547568864016, + "grad_norm": 2.7338156700134277, + "learning_rate": 3.168179421788363e-05, + "loss": 0.1265, + "step": 76430 + }, + { + "epoch": 2.7778181553892, + "grad_norm": 0.604120135307312, + "learning_rate": 3.167679353670798e-05, + "loss": 0.1086, + "step": 76440 + }, + { + "epoch": 2.778181553891998, + "grad_norm": 0.8220155239105225, + "learning_rate": 3.1671792567853045e-05, + "loss": 1.4637, + "step": 76450 + }, + { + "epoch": 2.778544952394796, + "grad_norm": 0.8582079410552979, + "learning_rate": 3.166679131153432e-05, + "loss": 0.1023, + "step": 76460 + }, + { + "epoch": 2.778908350897594, + "grad_norm": 0.5946437120437622, + "learning_rate": 3.166178976796727e-05, + "loss": 0.1233, + "step": 76470 + }, + { + "epoch": 2.7792717494003925, + "grad_norm": 1.113297700881958, + "learning_rate": 3.165678793736741e-05, + "loss": 0.1044, + "step": 76480 + }, + { + "epoch": 2.779635147903191, + "grad_norm": 96.74727630615234, + "learning_rate": 3.165178581995023e-05, + "loss": 1.901, + "step": 76490 + }, + { + "epoch": 2.779998546405989, + "grad_norm": 0.4184577465057373, + "learning_rate": 3.164678341593127e-05, + "loss": 0.0768, + "step": 76500 + }, + { + "epoch": 2.780361944908787, + "grad_norm": 0.558016300201416, + "learning_rate": 3.164178072552606e-05, + "loss": 0.2095, + "step": 76510 + }, + { + "epoch": 2.7807253434115853, + "grad_norm": 0.7608421444892883, + "learning_rate": 3.1636777748950156e-05, + "loss": 0.0934, + "step": 76520 + }, + { + "epoch": 2.7810887419143833, + "grad_norm": 0.43345919251441956, + "learning_rate": 3.163177448641911e-05, + "loss": 0.0852, + "step": 76530 + }, + { + "epoch": 2.7814521404171817, + "grad_norm": 3.386565923690796, + "learning_rate": 3.1626770938148496e-05, + "loss": 0.4973, + "step": 76540 + }, + { + "epoch": 2.7818155389199797, + "grad_norm": Infinity, + "learning_rate": 3.1622267500575804e-05, + "loss": 2.2209, + "step": 76550 + }, + { + "epoch": 2.7821789374227777, + "grad_norm": 1.2026207447052002, + "learning_rate": 3.161726340999396e-05, + "loss": 0.091, + "step": 76560 + }, + { + "epoch": 2.782542335925576, + "grad_norm": 0.6033660769462585, + "learning_rate": 3.1612259034297784e-05, + "loss": 0.1076, + "step": 76570 + }, + { + "epoch": 2.782905734428374, + "grad_norm": 0.6164398193359375, + "learning_rate": 3.1607254373702885e-05, + "loss": 0.1115, + "step": 76580 + }, + { + "epoch": 2.7832691329311725, + "grad_norm": 0.4211709201335907, + "learning_rate": 3.1602249428424916e-05, + "loss": 0.1061, + "step": 76590 + }, + { + "epoch": 2.7836325314339705, + "grad_norm": 0.8229207396507263, + "learning_rate": 3.1597244198679496e-05, + "loss": 0.1703, + "step": 76600 + }, + { + "epoch": 2.7839959299367685, + "grad_norm": 0.2660597860813141, + "learning_rate": 3.159223868468231e-05, + "loss": 0.0873, + "step": 76610 + }, + { + "epoch": 2.784359328439567, + "grad_norm": 0.8367421627044678, + "learning_rate": 3.1587232886649006e-05, + "loss": 0.0906, + "step": 76620 + }, + { + "epoch": 2.784722726942365, + "grad_norm": 0.4646151661872864, + "learning_rate": 3.158222680479527e-05, + "loss": 0.0922, + "step": 76630 + }, + { + "epoch": 2.7850861254451633, + "grad_norm": 1.0064074993133545, + "learning_rate": 3.1577220439336814e-05, + "loss": 0.0827, + "step": 76640 + }, + { + "epoch": 2.7854495239479613, + "grad_norm": 0.7360056638717651, + "learning_rate": 3.157221379048932e-05, + "loss": 0.1526, + "step": 76650 + }, + { + "epoch": 2.7858129224507593, + "grad_norm": 0.4394819736480713, + "learning_rate": 3.1567206858468524e-05, + "loss": 0.0802, + "step": 76660 + }, + { + "epoch": 2.7861763209535577, + "grad_norm": 2.16237735748291, + "learning_rate": 3.1562199643490156e-05, + "loss": 0.1079, + "step": 76670 + }, + { + "epoch": 2.7865397194563557, + "grad_norm": 1.211832046508789, + "learning_rate": 3.155719214576994e-05, + "loss": 0.0896, + "step": 76680 + }, + { + "epoch": 2.786903117959154, + "grad_norm": 0.561252772808075, + "learning_rate": 3.1552184365523654e-05, + "loss": 0.1002, + "step": 76690 + }, + { + "epoch": 2.787266516461952, + "grad_norm": 0.947999894618988, + "learning_rate": 3.1547176302967046e-05, + "loss": 0.075, + "step": 76700 + }, + { + "epoch": 2.78762991496475, + "grad_norm": 0.3279600739479065, + "learning_rate": 3.154216795831591e-05, + "loss": 0.0808, + "step": 76710 + }, + { + "epoch": 2.7879933134675485, + "grad_norm": 0.4340432584285736, + "learning_rate": 3.1537159331786046e-05, + "loss": 0.1208, + "step": 76720 + }, + { + "epoch": 2.788356711970347, + "grad_norm": 1.4407846927642822, + "learning_rate": 3.1532150423593234e-05, + "loss": 0.0973, + "step": 76730 + }, + { + "epoch": 2.788720110473145, + "grad_norm": 1.0775196552276611, + "learning_rate": 3.152714123395331e-05, + "loss": 0.1013, + "step": 76740 + }, + { + "epoch": 2.789083508975943, + "grad_norm": 1.0762931108474731, + "learning_rate": 3.152213176308209e-05, + "loss": 0.0797, + "step": 76750 + }, + { + "epoch": 2.789446907478741, + "grad_norm": 0.645371675491333, + "learning_rate": 3.1517122011195414e-05, + "loss": 0.0918, + "step": 76760 + }, + { + "epoch": 2.7898103059815393, + "grad_norm": 0.4548865556716919, + "learning_rate": 3.151211197850914e-05, + "loss": 0.1365, + "step": 76770 + }, + { + "epoch": 2.7901737044843378, + "grad_norm": 0.9523658156394958, + "learning_rate": 3.1507101665239136e-05, + "loss": 0.103, + "step": 76780 + }, + { + "epoch": 2.7905371029871358, + "grad_norm": 0.6093083024024963, + "learning_rate": 3.150209107160127e-05, + "loss": 0.0966, + "step": 76790 + }, + { + "epoch": 2.7909005014899337, + "grad_norm": 1.1917424201965332, + "learning_rate": 3.149708019781143e-05, + "loss": 0.0723, + "step": 76800 + }, + { + "epoch": 2.7909005014899337, + "eval_loss": 0.33958899974823, + "eval_runtime": 180.4871, + "eval_samples_per_second": 41.078, + "eval_steps_per_second": 5.136, + "eval_wer": 0.14761195926443627, + "step": 76800 + }, + { + "epoch": 2.791263899992732, + "grad_norm": 0.5231362581253052, + "learning_rate": 3.149206904408553e-05, + "loss": 0.0774, + "step": 76810 + }, + { + "epoch": 2.79162729849553, + "grad_norm": 0.3892790973186493, + "learning_rate": 3.148705761063947e-05, + "loss": 0.1298, + "step": 76820 + }, + { + "epoch": 2.7919906969983286, + "grad_norm": 1.2342190742492676, + "learning_rate": 3.1482045897689174e-05, + "loss": 0.0784, + "step": 76830 + }, + { + "epoch": 2.7923540955011266, + "grad_norm": 0.6379334926605225, + "learning_rate": 3.147703390545059e-05, + "loss": 0.1218, + "step": 76840 + }, + { + "epoch": 2.7927174940039245, + "grad_norm": 4.186805248260498, + "learning_rate": 3.1472021634139656e-05, + "loss": 0.1091, + "step": 76850 + }, + { + "epoch": 2.793080892506723, + "grad_norm": 0.9693048596382141, + "learning_rate": 3.146700908397234e-05, + "loss": 0.081, + "step": 76860 + }, + { + "epoch": 2.793444291009521, + "grad_norm": 0.41208523511886597, + "learning_rate": 3.146199625516461e-05, + "loss": 0.1323, + "step": 76870 + }, + { + "epoch": 2.7938076895123194, + "grad_norm": 0.6204960346221924, + "learning_rate": 3.145698314793245e-05, + "loss": 0.0843, + "step": 76880 + }, + { + "epoch": 2.7941710880151174, + "grad_norm": 0.9049692153930664, + "learning_rate": 3.145196976249187e-05, + "loss": 0.1092, + "step": 76890 + }, + { + "epoch": 2.7945344865179154, + "grad_norm": 0.9340922832489014, + "learning_rate": 3.144695609905887e-05, + "loss": 0.1021, + "step": 76900 + }, + { + "epoch": 2.794897885020714, + "grad_norm": 0.40798988938331604, + "learning_rate": 3.144194215784946e-05, + "loss": 0.1096, + "step": 76910 + }, + { + "epoch": 2.7952612835235118, + "grad_norm": 0.4020129144191742, + "learning_rate": 3.143692793907968e-05, + "loss": 0.0928, + "step": 76920 + }, + { + "epoch": 2.79562468202631, + "grad_norm": 0.621026873588562, + "learning_rate": 3.1431913442965585e-05, + "loss": 0.0978, + "step": 76930 + }, + { + "epoch": 2.795988080529108, + "grad_norm": 0.428535133600235, + "learning_rate": 3.1426898669723225e-05, + "loss": 0.1034, + "step": 76940 + }, + { + "epoch": 2.796351479031906, + "grad_norm": 1.3301656246185303, + "learning_rate": 3.1421883619568665e-05, + "loss": 0.0813, + "step": 76950 + }, + { + "epoch": 2.7967148775347046, + "grad_norm": 0.4475337266921997, + "learning_rate": 3.141686829271799e-05, + "loss": 0.0865, + "step": 76960 + }, + { + "epoch": 2.7970782760375026, + "grad_norm": 2.5869007110595703, + "learning_rate": 3.1411852689387294e-05, + "loss": 0.1085, + "step": 76970 + }, + { + "epoch": 2.797441674540301, + "grad_norm": 1.98558509349823, + "learning_rate": 3.140683680979268e-05, + "loss": 0.0887, + "step": 76980 + }, + { + "epoch": 2.797805073043099, + "grad_norm": 0.4044126570224762, + "learning_rate": 3.1401820654150267e-05, + "loss": 0.1273, + "step": 76990 + }, + { + "epoch": 2.798168471545897, + "grad_norm": 0.5848196148872375, + "learning_rate": 3.139680422267617e-05, + "loss": 0.9749, + "step": 77000 + }, + { + "epoch": 2.7985318700486954, + "grad_norm": 1.0990394353866577, + "learning_rate": 3.139178751558655e-05, + "loss": 0.0864, + "step": 77010 + }, + { + "epoch": 2.798895268551494, + "grad_norm": 0.5997377634048462, + "learning_rate": 3.138677053309753e-05, + "loss": 0.1654, + "step": 77020 + }, + { + "epoch": 2.799258667054292, + "grad_norm": 2.957549571990967, + "learning_rate": 3.138175327542531e-05, + "loss": 0.0771, + "step": 77030 + }, + { + "epoch": 2.79962206555709, + "grad_norm": 1.3711345195770264, + "learning_rate": 3.137673574278604e-05, + "loss": 0.1682, + "step": 77040 + }, + { + "epoch": 2.799985464059888, + "grad_norm": 1.1909111738204956, + "learning_rate": 3.137171793539591e-05, + "loss": 0.1002, + "step": 77050 + }, + { + "epoch": 2.8003488625626862, + "grad_norm": 0.622058093547821, + "learning_rate": 3.136669985347113e-05, + "loss": 0.0842, + "step": 77060 + }, + { + "epoch": 2.8007122610654847, + "grad_norm": 1.0402450561523438, + "learning_rate": 3.136168149722791e-05, + "loss": 0.1096, + "step": 77070 + }, + { + "epoch": 2.8010756595682826, + "grad_norm": 4.3762969970703125, + "learning_rate": 3.135666286688247e-05, + "loss": 0.0888, + "step": 77080 + }, + { + "epoch": 2.8014390580710806, + "grad_norm": 1.1344795227050781, + "learning_rate": 3.135164396265103e-05, + "loss": 0.1097, + "step": 77090 + }, + { + "epoch": 2.801802456573879, + "grad_norm": 0.5318688154220581, + "learning_rate": 3.134662478474987e-05, + "loss": 0.0926, + "step": 77100 + }, + { + "epoch": 2.802165855076677, + "grad_norm": 1.101820468902588, + "learning_rate": 3.1341605333395216e-05, + "loss": 0.0989, + "step": 77110 + }, + { + "epoch": 2.8025292535794755, + "grad_norm": 0.3960217237472534, + "learning_rate": 3.133658560880336e-05, + "loss": 0.1064, + "step": 77120 + }, + { + "epoch": 2.8028926520822735, + "grad_norm": 0.6371271014213562, + "learning_rate": 3.133156561119057e-05, + "loss": 0.0957, + "step": 77130 + }, + { + "epoch": 2.8032560505850714, + "grad_norm": 0.6579133868217468, + "learning_rate": 3.132654534077315e-05, + "loss": 0.0861, + "step": 77140 + }, + { + "epoch": 2.80361944908787, + "grad_norm": 0.3245817720890045, + "learning_rate": 3.13215247977674e-05, + "loss": 2.9585, + "step": 77150 + }, + { + "epoch": 2.803982847590668, + "grad_norm": 0.4539554715156555, + "learning_rate": 3.131650398238963e-05, + "loss": 3.7161, + "step": 77160 + }, + { + "epoch": 2.8043462460934663, + "grad_norm": 0.5258297324180603, + "learning_rate": 3.1311482894856194e-05, + "loss": 0.0938, + "step": 77170 + }, + { + "epoch": 2.8047096445962643, + "grad_norm": 1.8863434791564941, + "learning_rate": 3.13064615353834e-05, + "loss": 0.1022, + "step": 77180 + }, + { + "epoch": 2.8050730430990622, + "grad_norm": 0.4523390531539917, + "learning_rate": 3.130143990418763e-05, + "loss": 0.1094, + "step": 77190 + }, + { + "epoch": 2.8054364416018607, + "grad_norm": 1.1494712829589844, + "learning_rate": 3.1296418001485225e-05, + "loss": 0.0911, + "step": 77200 + }, + { + "epoch": 2.8057998401046587, + "grad_norm": 1.3108868598937988, + "learning_rate": 3.129139582749258e-05, + "loss": 0.1123, + "step": 77210 + }, + { + "epoch": 2.806163238607457, + "grad_norm": 0.6619325280189514, + "learning_rate": 3.128637338242607e-05, + "loss": 0.1178, + "step": 77220 + }, + { + "epoch": 2.806526637110255, + "grad_norm": 0.9496577978134155, + "learning_rate": 3.128135066650209e-05, + "loss": 0.0903, + "step": 77230 + }, + { + "epoch": 2.806890035613053, + "grad_norm": 1.0447412729263306, + "learning_rate": 3.127632767993707e-05, + "loss": 0.1962, + "step": 77240 + }, + { + "epoch": 2.8072534341158515, + "grad_norm": 0.6359366774559021, + "learning_rate": 3.127130442294742e-05, + "loss": 0.0872, + "step": 77250 + }, + { + "epoch": 2.8076168326186495, + "grad_norm": 0.36711859703063965, + "learning_rate": 3.126628089574957e-05, + "loss": 0.1117, + "step": 77260 + }, + { + "epoch": 2.807980231121448, + "grad_norm": 0.46006709337234497, + "learning_rate": 3.1261257098559975e-05, + "loss": 0.1075, + "step": 77270 + }, + { + "epoch": 2.808343629624246, + "grad_norm": 0.4880903959274292, + "learning_rate": 3.125623303159509e-05, + "loss": 0.0933, + "step": 77280 + }, + { + "epoch": 2.808707028127044, + "grad_norm": 0.6547648906707764, + "learning_rate": 3.125120869507138e-05, + "loss": 0.0927, + "step": 77290 + }, + { + "epoch": 2.8090704266298423, + "grad_norm": 0.9286164045333862, + "learning_rate": 3.124618408920533e-05, + "loss": 0.0701, + "step": 77300 + }, + { + "epoch": 2.8094338251326407, + "grad_norm": 0.7774373292922974, + "learning_rate": 3.1241159214213436e-05, + "loss": 0.0885, + "step": 77310 + }, + { + "epoch": 2.8097972236354387, + "grad_norm": 0.6464956402778625, + "learning_rate": 3.12361340703122e-05, + "loss": 0.1133, + "step": 77320 + }, + { + "epoch": 2.8101606221382367, + "grad_norm": 0.6208813190460205, + "learning_rate": 3.123110865771813e-05, + "loss": 0.1143, + "step": 77330 + }, + { + "epoch": 2.8105240206410347, + "grad_norm": 0.5069448351860046, + "learning_rate": 3.122608297664776e-05, + "loss": 0.0991, + "step": 77340 + }, + { + "epoch": 2.810887419143833, + "grad_norm": 0.6329632997512817, + "learning_rate": 3.122105702731762e-05, + "loss": 0.0925, + "step": 77350 + }, + { + "epoch": 2.8112508176466315, + "grad_norm": 0.8677617311477661, + "learning_rate": 3.121603080994428e-05, + "loss": 0.0819, + "step": 77360 + }, + { + "epoch": 2.8116142161494295, + "grad_norm": 0.7725453972816467, + "learning_rate": 3.1211004324744274e-05, + "loss": 0.1081, + "step": 77370 + }, + { + "epoch": 2.8119776146522275, + "grad_norm": 0.7517724633216858, + "learning_rate": 3.12059775719342e-05, + "loss": 0.0873, + "step": 77380 + }, + { + "epoch": 2.812341013155026, + "grad_norm": 0.8779316544532776, + "learning_rate": 3.1200950551730636e-05, + "loss": 0.1038, + "step": 77390 + }, + { + "epoch": 2.812704411657824, + "grad_norm": 0.4052380323410034, + "learning_rate": 3.119592326435016e-05, + "loss": 0.0791, + "step": 77400 + }, + { + "epoch": 2.812704411657824, + "eval_loss": 0.3295031487941742, + "eval_runtime": 180.5257, + "eval_samples_per_second": 41.069, + "eval_steps_per_second": 5.135, + "eval_wer": 0.14710367237279212, + "step": 77400 + }, + { + "epoch": 2.8130678101606224, + "grad_norm": 0.4020283818244934, + "learning_rate": 3.1190895710009416e-05, + "loss": 0.1089, + "step": 77410 + }, + { + "epoch": 2.8134312086634203, + "grad_norm": 0.852902889251709, + "learning_rate": 3.118586788892499e-05, + "loss": 0.0949, + "step": 77420 + }, + { + "epoch": 2.8137946071662183, + "grad_norm": 0.7513383030891418, + "learning_rate": 3.1180839801313536e-05, + "loss": 0.11, + "step": 77430 + }, + { + "epoch": 2.8141580056690167, + "grad_norm": 0.7311908006668091, + "learning_rate": 3.117581144739168e-05, + "loss": 0.1077, + "step": 77440 + }, + { + "epoch": 2.8145214041718147, + "grad_norm": 0.7238545417785645, + "learning_rate": 3.117078282737608e-05, + "loss": 0.0743, + "step": 77450 + }, + { + "epoch": 2.814884802674613, + "grad_norm": 0.6685813069343567, + "learning_rate": 3.116575394148341e-05, + "loss": 0.0895, + "step": 77460 + }, + { + "epoch": 2.815248201177411, + "grad_norm": 0.6721900105476379, + "learning_rate": 3.116072478993034e-05, + "loss": 2.3187, + "step": 77470 + }, + { + "epoch": 2.815611599680209, + "grad_norm": 0.5871604084968567, + "learning_rate": 3.1155695372933553e-05, + "loss": 0.0879, + "step": 77480 + }, + { + "epoch": 2.8159749981830076, + "grad_norm": 0.6194286942481995, + "learning_rate": 3.1150665690709755e-05, + "loss": 0.1139, + "step": 77490 + }, + { + "epoch": 2.8163383966858055, + "grad_norm": 2.4753482341766357, + "learning_rate": 3.114563574347566e-05, + "loss": 0.0882, + "step": 77500 + }, + { + "epoch": 2.816701795188604, + "grad_norm": 0.6420596241950989, + "learning_rate": 3.1140605531447985e-05, + "loss": 0.0677, + "step": 77510 + }, + { + "epoch": 2.817065193691402, + "grad_norm": 0.7851647734642029, + "learning_rate": 3.1135575054843464e-05, + "loss": 0.0943, + "step": 77520 + }, + { + "epoch": 2.8174285921942, + "grad_norm": 0.5447911024093628, + "learning_rate": 3.113054431387885e-05, + "loss": 0.0936, + "step": 77530 + }, + { + "epoch": 2.8177919906969984, + "grad_norm": 1.930080771446228, + "learning_rate": 3.1125513308770886e-05, + "loss": 0.0979, + "step": 77540 + }, + { + "epoch": 2.8181553891997964, + "grad_norm": 1.2513458728790283, + "learning_rate": 3.112048203973636e-05, + "loss": 0.0968, + "step": 77550 + }, + { + "epoch": 2.818518787702595, + "grad_norm": 0.7506178617477417, + "learning_rate": 3.1115450506992025e-05, + "loss": 0.0754, + "step": 77560 + }, + { + "epoch": 2.8188821862053928, + "grad_norm": 2.6080875396728516, + "learning_rate": 3.11104187107547e-05, + "loss": 0.902, + "step": 77570 + }, + { + "epoch": 2.8192455847081908, + "grad_norm": 1.5008831024169922, + "learning_rate": 3.110538665124117e-05, + "loss": 0.1221, + "step": 77580 + }, + { + "epoch": 2.819608983210989, + "grad_norm": 0.6769622564315796, + "learning_rate": 3.1100354328668244e-05, + "loss": 0.1255, + "step": 77590 + }, + { + "epoch": 2.8199723817137876, + "grad_norm": 1.557826280593872, + "learning_rate": 3.109532174325277e-05, + "loss": 0.4934, + "step": 77600 + }, + { + "epoch": 2.8203357802165856, + "grad_norm": 0.48424941301345825, + "learning_rate": 3.1090288895211554e-05, + "loss": 0.1016, + "step": 77610 + }, + { + "epoch": 2.8206991787193836, + "grad_norm": 0.5457457900047302, + "learning_rate": 3.1085255784761466e-05, + "loss": 0.1036, + "step": 77620 + }, + { + "epoch": 2.821062577222182, + "grad_norm": 0.729720950126648, + "learning_rate": 3.108022241211936e-05, + "loss": 0.0936, + "step": 77630 + }, + { + "epoch": 2.82142597572498, + "grad_norm": 3.1648247241973877, + "learning_rate": 3.1075188777502104e-05, + "loss": 0.1059, + "step": 77640 + }, + { + "epoch": 2.8217893742277784, + "grad_norm": 2.022939920425415, + "learning_rate": 3.107015488112658e-05, + "loss": 2.3178, + "step": 77650 + }, + { + "epoch": 2.8221527727305764, + "grad_norm": 0.5269419550895691, + "learning_rate": 3.1065120723209676e-05, + "loss": 0.1035, + "step": 77660 + }, + { + "epoch": 2.8225161712333744, + "grad_norm": 2.0776426792144775, + "learning_rate": 3.10600863039683e-05, + "loss": 0.1209, + "step": 77670 + }, + { + "epoch": 2.822879569736173, + "grad_norm": 1.718711495399475, + "learning_rate": 3.105505162361936e-05, + "loss": 0.0931, + "step": 77680 + }, + { + "epoch": 2.823242968238971, + "grad_norm": 0.9783419370651245, + "learning_rate": 3.10500166823798e-05, + "loss": 0.1366, + "step": 77690 + }, + { + "epoch": 2.8236063667417692, + "grad_norm": 0.3474352955818176, + "learning_rate": 3.1044981480466544e-05, + "loss": 0.0823, + "step": 77700 + }, + { + "epoch": 2.823969765244567, + "grad_norm": 0.4555971622467041, + "learning_rate": 3.103994601809655e-05, + "loss": 0.0809, + "step": 77710 + }, + { + "epoch": 2.824333163747365, + "grad_norm": 0.5148143768310547, + "learning_rate": 3.103491029548676e-05, + "loss": 0.1185, + "step": 77720 + }, + { + "epoch": 2.8246965622501636, + "grad_norm": 0.7520084381103516, + "learning_rate": 3.102987431285416e-05, + "loss": 0.0865, + "step": 77730 + }, + { + "epoch": 2.8250599607529616, + "grad_norm": 2.1148874759674072, + "learning_rate": 3.102483807041574e-05, + "loss": 0.0923, + "step": 77740 + }, + { + "epoch": 2.82542335925576, + "grad_norm": 0.8044182658195496, + "learning_rate": 3.1019801568388476e-05, + "loss": 0.0848, + "step": 77750 + }, + { + "epoch": 2.825786757758558, + "grad_norm": 0.5779685974121094, + "learning_rate": 3.1014764806989385e-05, + "loss": 0.0902, + "step": 77760 + }, + { + "epoch": 2.826150156261356, + "grad_norm": 0.9560330510139465, + "learning_rate": 3.1009727786435474e-05, + "loss": 1.5009, + "step": 77770 + }, + { + "epoch": 2.8265135547641544, + "grad_norm": 0.7544772624969482, + "learning_rate": 3.100469050694378e-05, + "loss": 0.1239, + "step": 77780 + }, + { + "epoch": 2.8268769532669524, + "grad_norm": 0.824269711971283, + "learning_rate": 3.099965296873134e-05, + "loss": 0.0934, + "step": 77790 + }, + { + "epoch": 2.827240351769751, + "grad_norm": 0.6971185803413391, + "learning_rate": 3.099461517201519e-05, + "loss": 0.0923, + "step": 77800 + }, + { + "epoch": 2.827603750272549, + "grad_norm": 0.81715989112854, + "learning_rate": 3.098957711701241e-05, + "loss": 0.1041, + "step": 77810 + }, + { + "epoch": 2.827967148775347, + "grad_norm": 0.5701466798782349, + "learning_rate": 3.098453880394006e-05, + "loss": 0.1016, + "step": 77820 + }, + { + "epoch": 2.8283305472781453, + "grad_norm": 6.4445881843566895, + "learning_rate": 3.0979500233015224e-05, + "loss": 0.0902, + "step": 77830 + }, + { + "epoch": 2.8286939457809432, + "grad_norm": 2.981534004211426, + "learning_rate": 3.0974461404455e-05, + "loss": 0.1224, + "step": 77840 + }, + { + "epoch": 2.8290573442837417, + "grad_norm": 0.878025472164154, + "learning_rate": 3.096942231847649e-05, + "loss": 0.0965, + "step": 77850 + }, + { + "epoch": 2.8294207427865397, + "grad_norm": 0.45175373554229736, + "learning_rate": 3.096438297529681e-05, + "loss": 0.0821, + "step": 77860 + }, + { + "epoch": 2.8297841412893376, + "grad_norm": 1.207901954650879, + "learning_rate": 3.0959343375133096e-05, + "loss": 0.1296, + "step": 77870 + }, + { + "epoch": 2.830147539792136, + "grad_norm": 2.8622663021087646, + "learning_rate": 3.0954303518202476e-05, + "loss": 0.0949, + "step": 77880 + }, + { + "epoch": 2.8305109382949345, + "grad_norm": 1.028940200805664, + "learning_rate": 3.0949263404722104e-05, + "loss": 0.1271, + "step": 77890 + }, + { + "epoch": 2.8308743367977325, + "grad_norm": 0.8942374587059021, + "learning_rate": 3.094422303490913e-05, + "loss": 1.3726, + "step": 77900 + }, + { + "epoch": 2.8312377353005305, + "grad_norm": 1.3904191255569458, + "learning_rate": 3.093918240898075e-05, + "loss": 0.0858, + "step": 77910 + }, + { + "epoch": 2.831601133803329, + "grad_norm": 3.551011562347412, + "learning_rate": 3.093414152715412e-05, + "loss": 0.106, + "step": 77920 + }, + { + "epoch": 2.831964532306127, + "grad_norm": 1.3634971380233765, + "learning_rate": 3.092910038964645e-05, + "loss": 0.1102, + "step": 77930 + }, + { + "epoch": 2.8323279308089253, + "grad_norm": 0.9616494178771973, + "learning_rate": 3.092405899667494e-05, + "loss": 0.0946, + "step": 77940 + }, + { + "epoch": 2.8326913293117233, + "grad_norm": 1.1661938428878784, + "learning_rate": 3.0919017348456805e-05, + "loss": 0.0848, + "step": 77950 + }, + { + "epoch": 2.8330547278145213, + "grad_norm": 1.016788125038147, + "learning_rate": 3.091397544520927e-05, + "loss": 0.0958, + "step": 77960 + }, + { + "epoch": 2.8334181263173197, + "grad_norm": 0.3791126012802124, + "learning_rate": 3.090893328714958e-05, + "loss": 0.0954, + "step": 77970 + }, + { + "epoch": 2.8337815248201177, + "grad_norm": 1.1772645711898804, + "learning_rate": 3.0903890874494975e-05, + "loss": 0.117, + "step": 77980 + }, + { + "epoch": 2.834144923322916, + "grad_norm": 0.7023350596427917, + "learning_rate": 3.089884820746272e-05, + "loss": 0.0857, + "step": 77990 + }, + { + "epoch": 2.834508321825714, + "grad_norm": 0.8230845332145691, + "learning_rate": 3.0893805286270085e-05, + "loss": 0.085, + "step": 78000 + }, + { + "epoch": 2.834508321825714, + "eval_loss": 0.34080591797828674, + "eval_runtime": 179.5232, + "eval_samples_per_second": 41.298, + "eval_steps_per_second": 5.164, + "eval_wer": 0.1498084847604698, + "step": 78000 + }, + { + "epoch": 2.834871720328512, + "grad_norm": 0.7763819098472595, + "learning_rate": 3.088876211113435e-05, + "loss": 0.0876, + "step": 78010 + }, + { + "epoch": 2.8352351188313105, + "grad_norm": 0.9296404719352722, + "learning_rate": 3.088371868227281e-05, + "loss": 0.115, + "step": 78020 + }, + { + "epoch": 2.8355985173341085, + "grad_norm": 0.7724182605743408, + "learning_rate": 3.087867499990276e-05, + "loss": 0.1133, + "step": 78030 + }, + { + "epoch": 2.835961915836907, + "grad_norm": 0.6081514954566956, + "learning_rate": 3.087363106424152e-05, + "loss": 0.1185, + "step": 78040 + }, + { + "epoch": 2.836325314339705, + "grad_norm": 0.692362904548645, + "learning_rate": 3.086858687550642e-05, + "loss": 0.0796, + "step": 78050 + }, + { + "epoch": 2.836688712842503, + "grad_norm": 0.7458900213241577, + "learning_rate": 3.0863542433914794e-05, + "loss": 0.0985, + "step": 78060 + }, + { + "epoch": 2.8370521113453013, + "grad_norm": 0.2584981322288513, + "learning_rate": 3.0858497739683984e-05, + "loss": 0.102, + "step": 78070 + }, + { + "epoch": 2.8374155098480993, + "grad_norm": 1.8372403383255005, + "learning_rate": 3.085345279303136e-05, + "loss": 0.1121, + "step": 78080 + }, + { + "epoch": 2.8377789083508977, + "grad_norm": 2.2560524940490723, + "learning_rate": 3.0848407594174266e-05, + "loss": 0.1086, + "step": 78090 + }, + { + "epoch": 2.8381423068536957, + "grad_norm": 1.1488791704177856, + "learning_rate": 3.0843362143330104e-05, + "loss": 1.6247, + "step": 78100 + }, + { + "epoch": 2.8385057053564937, + "grad_norm": 1.206886649131775, + "learning_rate": 3.083831644071626e-05, + "loss": 0.0846, + "step": 78110 + }, + { + "epoch": 2.838869103859292, + "grad_norm": 0.6960283517837524, + "learning_rate": 3.083327048655013e-05, + "loss": 0.1261, + "step": 78120 + }, + { + "epoch": 2.83923250236209, + "grad_norm": 0.6910631656646729, + "learning_rate": 3.082822428104914e-05, + "loss": 0.0979, + "step": 78130 + }, + { + "epoch": 2.8395959008648886, + "grad_norm": 1.3443272113800049, + "learning_rate": 3.082317782443069e-05, + "loss": 0.1082, + "step": 78140 + }, + { + "epoch": 2.8399592993676865, + "grad_norm": 0.290623277425766, + "learning_rate": 3.081813111691223e-05, + "loss": 0.0773, + "step": 78150 + }, + { + "epoch": 2.8403226978704845, + "grad_norm": 0.5689085721969604, + "learning_rate": 3.08130841587112e-05, + "loss": 0.0972, + "step": 78160 + }, + { + "epoch": 2.840686096373283, + "grad_norm": 0.45181599259376526, + "learning_rate": 3.080803695004506e-05, + "loss": 0.1002, + "step": 78170 + }, + { + "epoch": 2.8410494948760814, + "grad_norm": 0.31175151467323303, + "learning_rate": 3.080298949113127e-05, + "loss": 0.0951, + "step": 78180 + }, + { + "epoch": 2.8414128933788794, + "grad_norm": 0.6608039140701294, + "learning_rate": 3.0797941782187314e-05, + "loss": 0.1207, + "step": 78190 + }, + { + "epoch": 2.8417762918816774, + "grad_norm": 0.9375587701797485, + "learning_rate": 3.079289382343068e-05, + "loss": 0.0875, + "step": 78200 + }, + { + "epoch": 2.842139690384476, + "grad_norm": 0.581164538860321, + "learning_rate": 3.078784561507885e-05, + "loss": 0.204, + "step": 78210 + }, + { + "epoch": 2.8425030888872738, + "grad_norm": 0.8400561809539795, + "learning_rate": 3.078279715734935e-05, + "loss": 0.106, + "step": 78220 + }, + { + "epoch": 2.842866487390072, + "grad_norm": 0.5431386232376099, + "learning_rate": 3.07777484504597e-05, + "loss": 0.1134, + "step": 78230 + }, + { + "epoch": 2.84322988589287, + "grad_norm": 0.5342890620231628, + "learning_rate": 3.077269949462742e-05, + "loss": 0.1102, + "step": 78240 + }, + { + "epoch": 2.843593284395668, + "grad_norm": 0.6654142737388611, + "learning_rate": 3.076765029007006e-05, + "loss": 0.0818, + "step": 78250 + }, + { + "epoch": 2.8439566828984666, + "grad_norm": 0.49340054392814636, + "learning_rate": 3.076260083700518e-05, + "loss": 0.0899, + "step": 78260 + }, + { + "epoch": 2.8443200814012646, + "grad_norm": 0.5866402387619019, + "learning_rate": 3.0757551135650325e-05, + "loss": 0.1185, + "step": 78270 + }, + { + "epoch": 2.844683479904063, + "grad_norm": 0.45987945795059204, + "learning_rate": 3.075250118622308e-05, + "loss": 0.1405, + "step": 78280 + }, + { + "epoch": 2.845046878406861, + "grad_norm": 1.2310110330581665, + "learning_rate": 3.0747450988941025e-05, + "loss": 0.1569, + "step": 78290 + }, + { + "epoch": 2.845410276909659, + "grad_norm": 1.251125693321228, + "learning_rate": 3.074240054402175e-05, + "loss": 0.0897, + "step": 78300 + }, + { + "epoch": 2.8457736754124574, + "grad_norm": 0.369094580411911, + "learning_rate": 3.0737349851682876e-05, + "loss": 0.0805, + "step": 78310 + }, + { + "epoch": 2.8461370739152554, + "grad_norm": 0.8887357711791992, + "learning_rate": 3.0732298912142e-05, + "loss": 0.1093, + "step": 78320 + }, + { + "epoch": 2.846500472418054, + "grad_norm": 0.629465639591217, + "learning_rate": 3.072724772561677e-05, + "loss": 0.1114, + "step": 78330 + }, + { + "epoch": 2.846863870920852, + "grad_norm": 1.0231704711914062, + "learning_rate": 3.072219629232481e-05, + "loss": 0.1372, + "step": 78340 + }, + { + "epoch": 2.84722726942365, + "grad_norm": 0.8119713664054871, + "learning_rate": 3.071714461248377e-05, + "loss": 0.0918, + "step": 78350 + }, + { + "epoch": 2.847590667926448, + "grad_norm": 0.886022686958313, + "learning_rate": 3.071209268631131e-05, + "loss": 0.1005, + "step": 78360 + }, + { + "epoch": 2.847954066429246, + "grad_norm": 0.7678380012512207, + "learning_rate": 3.07070405140251e-05, + "loss": 0.1497, + "step": 78370 + }, + { + "epoch": 2.8483174649320446, + "grad_norm": 1.3270221948623657, + "learning_rate": 3.070198809584283e-05, + "loss": 0.1596, + "step": 78380 + }, + { + "epoch": 2.8486808634348426, + "grad_norm": 0.3739996552467346, + "learning_rate": 3.0696935431982165e-05, + "loss": 0.1301, + "step": 78390 + }, + { + "epoch": 2.8490442619376406, + "grad_norm": 1.079307198524475, + "learning_rate": 3.0691882522660834e-05, + "loss": 0.0879, + "step": 78400 + }, + { + "epoch": 2.849407660440439, + "grad_norm": 2.8571724891662598, + "learning_rate": 3.068682936809652e-05, + "loss": 0.0914, + "step": 78410 + }, + { + "epoch": 2.849771058943237, + "grad_norm": 0.8942508697509766, + "learning_rate": 3.068177596850698e-05, + "loss": 0.1367, + "step": 78420 + }, + { + "epoch": 2.8501344574460354, + "grad_norm": 0.7308377027511597, + "learning_rate": 3.0676722324109924e-05, + "loss": 0.0927, + "step": 78430 + }, + { + "epoch": 2.8504978559488334, + "grad_norm": 0.5024714469909668, + "learning_rate": 3.06716684351231e-05, + "loss": 0.114, + "step": 78440 + }, + { + "epoch": 2.8508612544516314, + "grad_norm": 0.8501279354095459, + "learning_rate": 3.066661430176426e-05, + "loss": 0.0976, + "step": 78450 + }, + { + "epoch": 2.85122465295443, + "grad_norm": 1.5030112266540527, + "learning_rate": 3.066155992425118e-05, + "loss": 0.0904, + "step": 78460 + }, + { + "epoch": 2.8515880514572283, + "grad_norm": 0.9223312139511108, + "learning_rate": 3.065650530280162e-05, + "loss": 0.088, + "step": 78470 + }, + { + "epoch": 2.8519514499600263, + "grad_norm": 0.8321495652198792, + "learning_rate": 3.0651450437633375e-05, + "loss": 0.0871, + "step": 78480 + }, + { + "epoch": 2.8523148484628242, + "grad_norm": 0.5586594343185425, + "learning_rate": 3.064639532896423e-05, + "loss": 0.0893, + "step": 78490 + }, + { + "epoch": 2.8526782469656227, + "grad_norm": 2.804748296737671, + "learning_rate": 3.064133997701201e-05, + "loss": 0.0791, + "step": 78500 + }, + { + "epoch": 2.8530416454684207, + "grad_norm": 0.7210208177566528, + "learning_rate": 3.063628438199453e-05, + "loss": 0.0806, + "step": 78510 + }, + { + "epoch": 2.853405043971219, + "grad_norm": 0.3784767687320709, + "learning_rate": 3.063122854412959e-05, + "loss": 0.1137, + "step": 78520 + }, + { + "epoch": 2.853768442474017, + "grad_norm": 0.754591703414917, + "learning_rate": 3.062617246363506e-05, + "loss": 0.0763, + "step": 78530 + }, + { + "epoch": 2.854131840976815, + "grad_norm": 0.5262603759765625, + "learning_rate": 3.062111614072877e-05, + "loss": 0.1768, + "step": 78540 + }, + { + "epoch": 2.8544952394796135, + "grad_norm": 0.6021800637245178, + "learning_rate": 3.0616059575628596e-05, + "loss": 0.0806, + "step": 78550 + }, + { + "epoch": 2.8548586379824115, + "grad_norm": 0.5301395654678345, + "learning_rate": 3.061100276855239e-05, + "loss": 0.1013, + "step": 78560 + }, + { + "epoch": 2.85522203648521, + "grad_norm": 0.6743770837783813, + "learning_rate": 3.060594571971804e-05, + "loss": 0.0996, + "step": 78570 + }, + { + "epoch": 2.855585434988008, + "grad_norm": 0.4729292094707489, + "learning_rate": 3.060088842934344e-05, + "loss": 0.1166, + "step": 78580 + }, + { + "epoch": 2.855948833490806, + "grad_norm": 1.35919189453125, + "learning_rate": 3.059583089764648e-05, + "loss": 0.0929, + "step": 78590 + }, + { + "epoch": 2.8563122319936043, + "grad_norm": 1.0586267709732056, + "learning_rate": 3.059077312484507e-05, + "loss": 0.1016, + "step": 78600 + }, + { + "epoch": 2.8563122319936043, + "eval_loss": 0.3411506116390228, + "eval_runtime": 179.4197, + "eval_samples_per_second": 41.322, + "eval_steps_per_second": 5.167, + "eval_wer": 0.1493818868335542, + "step": 78600 + }, + { + "epoch": 2.8566756304964023, + "grad_norm": 1.362656593322754, + "learning_rate": 3.0585715111157145e-05, + "loss": 0.0997, + "step": 78610 + }, + { + "epoch": 2.8570390289992007, + "grad_norm": 0.3799169361591339, + "learning_rate": 3.058065685680063e-05, + "loss": 0.0939, + "step": 78620 + }, + { + "epoch": 2.8574024275019987, + "grad_norm": 0.5108311772346497, + "learning_rate": 3.0575598361993476e-05, + "loss": 0.1104, + "step": 78630 + }, + { + "epoch": 2.8577658260047967, + "grad_norm": 0.7714293599128723, + "learning_rate": 3.057053962695361e-05, + "loss": 0.1018, + "step": 78640 + }, + { + "epoch": 2.858129224507595, + "grad_norm": 0.46769365668296814, + "learning_rate": 3.056548065189902e-05, + "loss": 0.0867, + "step": 78650 + }, + { + "epoch": 2.858492623010393, + "grad_norm": 0.6245691180229187, + "learning_rate": 3.056042143704767e-05, + "loss": 0.1026, + "step": 78660 + }, + { + "epoch": 2.8588560215131915, + "grad_norm": 0.45852673053741455, + "learning_rate": 3.055536198261755e-05, + "loss": 0.3827, + "step": 78670 + }, + { + "epoch": 2.8592194200159895, + "grad_norm": 1.1499156951904297, + "learning_rate": 3.055030228882663e-05, + "loss": 0.1123, + "step": 78680 + }, + { + "epoch": 2.8595828185187875, + "grad_norm": 0.426826536655426, + "learning_rate": 3.054524235589295e-05, + "loss": 0.095, + "step": 78690 + }, + { + "epoch": 2.859946217021586, + "grad_norm": 0.8750178217887878, + "learning_rate": 3.05401821840345e-05, + "loss": 0.0875, + "step": 78700 + }, + { + "epoch": 2.860309615524384, + "grad_norm": 0.49052000045776367, + "learning_rate": 3.053512177346932e-05, + "loss": 0.0902, + "step": 78710 + }, + { + "epoch": 2.8606730140271823, + "grad_norm": 0.41709479689598083, + "learning_rate": 3.0530061124415426e-05, + "loss": 0.1248, + "step": 78720 + }, + { + "epoch": 2.8610364125299803, + "grad_norm": 1.0585765838623047, + "learning_rate": 3.052500023709088e-05, + "loss": 0.0996, + "step": 78730 + }, + { + "epoch": 2.8613998110327783, + "grad_norm": 1.3335462808609009, + "learning_rate": 3.051993911171373e-05, + "loss": 0.1188, + "step": 78740 + }, + { + "epoch": 2.8617632095355767, + "grad_norm": 4.254768371582031, + "learning_rate": 3.051487774850204e-05, + "loss": 1.4491, + "step": 78750 + }, + { + "epoch": 2.862126608038375, + "grad_norm": 0.6429024934768677, + "learning_rate": 3.0509816147673897e-05, + "loss": 0.0981, + "step": 78760 + }, + { + "epoch": 2.862490006541173, + "grad_norm": 0.5360010266304016, + "learning_rate": 3.050475430944738e-05, + "loss": 0.1222, + "step": 78770 + }, + { + "epoch": 2.862853405043971, + "grad_norm": 0.7741981148719788, + "learning_rate": 3.0499692234040577e-05, + "loss": 0.0939, + "step": 78780 + }, + { + "epoch": 2.8632168035467696, + "grad_norm": 0.3562833368778229, + "learning_rate": 3.0494629921671614e-05, + "loss": 0.1132, + "step": 78790 + }, + { + "epoch": 2.8635802020495675, + "grad_norm": 0.3981534242630005, + "learning_rate": 3.0489567372558585e-05, + "loss": 0.1176, + "step": 78800 + }, + { + "epoch": 2.863943600552366, + "grad_norm": 0.6364398002624512, + "learning_rate": 3.0484504586919643e-05, + "loss": 0.1225, + "step": 78810 + }, + { + "epoch": 2.864306999055164, + "grad_norm": 0.9307785034179688, + "learning_rate": 3.047944156497291e-05, + "loss": 0.1134, + "step": 78820 + }, + { + "epoch": 2.864670397557962, + "grad_norm": 0.3588425815105438, + "learning_rate": 3.047437830693653e-05, + "loss": 0.1112, + "step": 78830 + }, + { + "epoch": 2.8650337960607604, + "grad_norm": 2.3050026893615723, + "learning_rate": 3.0469314813028672e-05, + "loss": 0.1121, + "step": 78840 + }, + { + "epoch": 2.8653971945635583, + "grad_norm": 3.113504648208618, + "learning_rate": 3.0464251083467492e-05, + "loss": 0.085, + "step": 78850 + }, + { + "epoch": 2.8657605930663568, + "grad_norm": 1.4952332973480225, + "learning_rate": 3.0459187118471177e-05, + "loss": 0.0785, + "step": 78860 + }, + { + "epoch": 2.8661239915691548, + "grad_norm": 3.147885322570801, + "learning_rate": 3.0454122918257915e-05, + "loss": 0.1065, + "step": 78870 + }, + { + "epoch": 2.8664873900719527, + "grad_norm": 1.8741129636764526, + "learning_rate": 3.0449058483045906e-05, + "loss": 0.1066, + "step": 78880 + }, + { + "epoch": 2.866850788574751, + "grad_norm": 0.5779256224632263, + "learning_rate": 3.044399381305335e-05, + "loss": 0.1182, + "step": 78890 + }, + { + "epoch": 2.867214187077549, + "grad_norm": 0.8008689880371094, + "learning_rate": 3.043892890849847e-05, + "loss": 0.0872, + "step": 78900 + }, + { + "epoch": 2.8675775855803476, + "grad_norm": 0.38459739089012146, + "learning_rate": 3.0433863769599498e-05, + "loss": 0.0808, + "step": 78910 + }, + { + "epoch": 2.8679409840831456, + "grad_norm": 0.524728536605835, + "learning_rate": 3.0428798396574663e-05, + "loss": 0.0871, + "step": 78920 + }, + { + "epoch": 2.8683043825859436, + "grad_norm": 0.9773525595664978, + "learning_rate": 3.042373278964223e-05, + "loss": 0.0752, + "step": 78930 + }, + { + "epoch": 2.868667781088742, + "grad_norm": 2.2767083644866943, + "learning_rate": 3.041866694902045e-05, + "loss": 0.164, + "step": 78940 + }, + { + "epoch": 2.86903117959154, + "grad_norm": 1.0827412605285645, + "learning_rate": 3.0413600874927578e-05, + "loss": 0.0826, + "step": 78950 + }, + { + "epoch": 2.8693945780943384, + "grad_norm": 0.5473418831825256, + "learning_rate": 3.040853456758192e-05, + "loss": 0.0948, + "step": 78960 + }, + { + "epoch": 2.8697579765971364, + "grad_norm": 1.4309738874435425, + "learning_rate": 3.0403468027201742e-05, + "loss": 0.1169, + "step": 78970 + }, + { + "epoch": 2.8701213750999344, + "grad_norm": 0.6939014196395874, + "learning_rate": 3.0398401254005353e-05, + "loss": 0.0897, + "step": 78980 + }, + { + "epoch": 2.870484773602733, + "grad_norm": 1.1855500936508179, + "learning_rate": 3.0393334248211064e-05, + "loss": 0.115, + "step": 78990 + }, + { + "epoch": 2.870848172105531, + "grad_norm": 0.3316340446472168, + "learning_rate": 3.0388267010037193e-05, + "loss": 0.0837, + "step": 79000 + }, + { + "epoch": 2.871211570608329, + "grad_norm": 0.5338824987411499, + "learning_rate": 3.0383199539702067e-05, + "loss": 1.6163, + "step": 79010 + }, + { + "epoch": 2.871574969111127, + "grad_norm": 0.6817033290863037, + "learning_rate": 3.0378131837424024e-05, + "loss": 0.0984, + "step": 79020 + }, + { + "epoch": 2.871938367613925, + "grad_norm": 1.228437066078186, + "learning_rate": 3.0373063903421416e-05, + "loss": 0.088, + "step": 79030 + }, + { + "epoch": 2.8723017661167236, + "grad_norm": 1.157533884048462, + "learning_rate": 3.0367995737912604e-05, + "loss": 0.1136, + "step": 79040 + }, + { + "epoch": 2.872665164619522, + "grad_norm": 0.695222795009613, + "learning_rate": 3.0362927341115954e-05, + "loss": 0.074, + "step": 79050 + }, + { + "epoch": 2.87302856312232, + "grad_norm": 0.658486008644104, + "learning_rate": 3.0357858713249844e-05, + "loss": 0.9512, + "step": 79060 + }, + { + "epoch": 2.873391961625118, + "grad_norm": 0.7332690358161926, + "learning_rate": 3.035278985453267e-05, + "loss": 0.12, + "step": 79070 + }, + { + "epoch": 2.8737553601279164, + "grad_norm": 0.8998427987098694, + "learning_rate": 3.034772076518283e-05, + "loss": 0.087, + "step": 79080 + }, + { + "epoch": 2.8741187586307144, + "grad_norm": 0.6652089953422546, + "learning_rate": 3.034265144541872e-05, + "loss": 0.1126, + "step": 79090 + }, + { + "epoch": 2.874482157133513, + "grad_norm": 0.5899316668510437, + "learning_rate": 3.0337581895458773e-05, + "loss": 0.097, + "step": 79100 + }, + { + "epoch": 2.874845555636311, + "grad_norm": 2.4437944889068604, + "learning_rate": 3.0332512115521407e-05, + "loss": 0.0967, + "step": 79110 + }, + { + "epoch": 2.875208954139109, + "grad_norm": 1.678469181060791, + "learning_rate": 3.0327442105825076e-05, + "loss": 0.1091, + "step": 79120 + }, + { + "epoch": 2.8755723526419072, + "grad_norm": 2.5215954780578613, + "learning_rate": 3.032237186658821e-05, + "loss": 0.0954, + "step": 79130 + }, + { + "epoch": 2.8759357511447052, + "grad_norm": 0.5362206697463989, + "learning_rate": 3.031730139802929e-05, + "loss": 0.0925, + "step": 79140 + }, + { + "epoch": 2.8762991496475037, + "grad_norm": 0.8719716668128967, + "learning_rate": 3.0312230700366766e-05, + "loss": 0.0721, + "step": 79150 + }, + { + "epoch": 2.8766625481503016, + "grad_norm": 0.5796521306037903, + "learning_rate": 3.030715977381912e-05, + "loss": 0.085, + "step": 79160 + }, + { + "epoch": 2.8770259466530996, + "grad_norm": 0.5834314227104187, + "learning_rate": 3.0302088618604844e-05, + "loss": 0.0967, + "step": 79170 + }, + { + "epoch": 2.877389345155898, + "grad_norm": 0.9963647127151489, + "learning_rate": 3.029701723494243e-05, + "loss": 0.1186, + "step": 79180 + }, + { + "epoch": 2.877752743658696, + "grad_norm": 0.852916419506073, + "learning_rate": 3.02919456230504e-05, + "loss": 0.1209, + "step": 79190 + }, + { + "epoch": 2.8781161421614945, + "grad_norm": 0.6775915622711182, + "learning_rate": 3.028687378314725e-05, + "loss": 0.093, + "step": 79200 + }, + { + "epoch": 2.8781161421614945, + "eval_loss": 0.3184477686882019, + "eval_runtime": 179.9919, + "eval_samples_per_second": 41.191, + "eval_steps_per_second": 5.15, + "eval_wer": 0.14663169168769402, + "step": 79200 + }, + { + "epoch": 2.8784795406642925, + "grad_norm": 1.629595160484314, + "learning_rate": 3.028180171545153e-05, + "loss": 0.0897, + "step": 79210 + }, + { + "epoch": 2.8788429391670904, + "grad_norm": 0.8693638443946838, + "learning_rate": 3.027672942018176e-05, + "loss": 0.8562, + "step": 79220 + }, + { + "epoch": 2.879206337669889, + "grad_norm": 0.8512022495269775, + "learning_rate": 3.02716568975565e-05, + "loss": 0.0919, + "step": 79230 + }, + { + "epoch": 2.879569736172687, + "grad_norm": 0.3746062219142914, + "learning_rate": 3.0266584147794295e-05, + "loss": 0.1221, + "step": 79240 + }, + { + "epoch": 2.8799331346754853, + "grad_norm": 0.9358767867088318, + "learning_rate": 3.026151117111372e-05, + "loss": 0.1888, + "step": 79250 + }, + { + "epoch": 2.8802965331782833, + "grad_norm": 0.5931565165519714, + "learning_rate": 3.025643796773335e-05, + "loss": 0.1583, + "step": 79260 + }, + { + "epoch": 2.8806599316810813, + "grad_norm": 0.6608014702796936, + "learning_rate": 3.0251364537871767e-05, + "loss": 0.1107, + "step": 79270 + }, + { + "epoch": 2.8810233301838797, + "grad_norm": 0.8677799105644226, + "learning_rate": 3.0246290881747574e-05, + "loss": 0.1313, + "step": 79280 + }, + { + "epoch": 2.8813867286866777, + "grad_norm": 1.321589469909668, + "learning_rate": 3.0241216999579368e-05, + "loss": 0.1476, + "step": 79290 + }, + { + "epoch": 2.881750127189476, + "grad_norm": 1.2019727230072021, + "learning_rate": 3.0236142891585777e-05, + "loss": 0.0955, + "step": 79300 + }, + { + "epoch": 2.882113525692274, + "grad_norm": 0.7772002220153809, + "learning_rate": 3.023106855798542e-05, + "loss": 0.0799, + "step": 79310 + }, + { + "epoch": 2.882476924195072, + "grad_norm": 0.4837106764316559, + "learning_rate": 3.0225993998996925e-05, + "loss": 0.1102, + "step": 79320 + }, + { + "epoch": 2.8828403226978705, + "grad_norm": 1.1968311071395874, + "learning_rate": 3.0220919214838946e-05, + "loss": 0.0921, + "step": 79330 + }, + { + "epoch": 2.883203721200669, + "grad_norm": 0.2687516510486603, + "learning_rate": 3.021584420573013e-05, + "loss": 0.4854, + "step": 79340 + }, + { + "epoch": 2.883567119703467, + "grad_norm": 0.661100447177887, + "learning_rate": 3.0210768971889152e-05, + "loss": 0.0993, + "step": 79350 + }, + { + "epoch": 2.883930518206265, + "grad_norm": 0.4699995219707489, + "learning_rate": 3.0205693513534672e-05, + "loss": 0.1043, + "step": 79360 + }, + { + "epoch": 2.8842939167090633, + "grad_norm": 0.5575037598609924, + "learning_rate": 3.0200617830885386e-05, + "loss": 0.113, + "step": 79370 + }, + { + "epoch": 2.8846573152118613, + "grad_norm": 0.5147402286529541, + "learning_rate": 3.0195541924159974e-05, + "loss": 0.0983, + "step": 79380 + }, + { + "epoch": 2.8850207137146597, + "grad_norm": 0.8548463582992554, + "learning_rate": 3.0190465793577155e-05, + "loss": 0.0788, + "step": 79390 + }, + { + "epoch": 2.8853841122174577, + "grad_norm": 0.46500858664512634, + "learning_rate": 3.018538943935563e-05, + "loss": 0.0641, + "step": 79400 + }, + { + "epoch": 2.8857475107202557, + "grad_norm": 0.479408860206604, + "learning_rate": 3.018031286171412e-05, + "loss": 0.1539, + "step": 79410 + }, + { + "epoch": 2.886110909223054, + "grad_norm": 0.9867441654205322, + "learning_rate": 3.0175236060871366e-05, + "loss": 0.0999, + "step": 79420 + }, + { + "epoch": 2.886474307725852, + "grad_norm": 0.8628416061401367, + "learning_rate": 3.0170159037046096e-05, + "loss": 0.1166, + "step": 79430 + }, + { + "epoch": 2.8868377062286505, + "grad_norm": 1.9563437700271606, + "learning_rate": 3.0165081790457077e-05, + "loss": 0.1211, + "step": 79440 + }, + { + "epoch": 2.8872011047314485, + "grad_norm": 0.6726937890052795, + "learning_rate": 3.0160004321323053e-05, + "loss": 0.0926, + "step": 79450 + }, + { + "epoch": 2.8875645032342465, + "grad_norm": 0.509483814239502, + "learning_rate": 3.0154926629862813e-05, + "loss": 0.0803, + "step": 79460 + }, + { + "epoch": 2.887927901737045, + "grad_norm": 0.7619544863700867, + "learning_rate": 3.014984871629512e-05, + "loss": 0.1251, + "step": 79470 + }, + { + "epoch": 2.888291300239843, + "grad_norm": 1.0748878717422485, + "learning_rate": 3.0144770580838767e-05, + "loss": 0.1073, + "step": 79480 + }, + { + "epoch": 2.8886546987426414, + "grad_norm": 0.7058316469192505, + "learning_rate": 3.0139692223712555e-05, + "loss": 0.1058, + "step": 79490 + }, + { + "epoch": 2.8890180972454393, + "grad_norm": 1.453240990638733, + "learning_rate": 3.013461364513529e-05, + "loss": 0.105, + "step": 79500 + }, + { + "epoch": 2.8893814957482373, + "grad_norm": 1.7408169507980347, + "learning_rate": 3.0129534845325803e-05, + "loss": 1.6102, + "step": 79510 + }, + { + "epoch": 2.8897448942510358, + "grad_norm": 0.4194059669971466, + "learning_rate": 3.0124455824502894e-05, + "loss": 0.0936, + "step": 79520 + }, + { + "epoch": 2.8901082927538337, + "grad_norm": 7.034887790679932, + "learning_rate": 3.0119376582885427e-05, + "loss": 0.1072, + "step": 79530 + }, + { + "epoch": 2.890471691256632, + "grad_norm": 1.7293283939361572, + "learning_rate": 3.0114297120692236e-05, + "loss": 0.1205, + "step": 79540 + }, + { + "epoch": 2.89083508975943, + "grad_norm": 3.1167123317718506, + "learning_rate": 3.0109217438142184e-05, + "loss": 0.0817, + "step": 79550 + }, + { + "epoch": 2.891198488262228, + "grad_norm": 1.0231302976608276, + "learning_rate": 3.0104137535454124e-05, + "loss": 0.1408, + "step": 79560 + }, + { + "epoch": 2.8915618867650266, + "grad_norm": 0.3560333251953125, + "learning_rate": 3.0099057412846942e-05, + "loss": 0.127, + "step": 79570 + }, + { + "epoch": 2.8919252852678246, + "grad_norm": 0.43482980132102966, + "learning_rate": 3.009397707053952e-05, + "loss": 0.079, + "step": 79580 + }, + { + "epoch": 2.892288683770623, + "grad_norm": 0.8485931158065796, + "learning_rate": 3.008889650875074e-05, + "loss": 0.1206, + "step": 79590 + }, + { + "epoch": 2.892652082273421, + "grad_norm": 2.297492504119873, + "learning_rate": 3.0083815727699526e-05, + "loss": 0.0991, + "step": 79600 + }, + { + "epoch": 2.893015480776219, + "grad_norm": 0.5528286695480347, + "learning_rate": 3.0078734727604775e-05, + "loss": 0.0757, + "step": 79610 + }, + { + "epoch": 2.8933788792790174, + "grad_norm": 0.5743618607521057, + "learning_rate": 3.0073653508685424e-05, + "loss": 0.1121, + "step": 79620 + }, + { + "epoch": 2.893742277781816, + "grad_norm": 0.8158531785011292, + "learning_rate": 3.006857207116039e-05, + "loss": 0.1045, + "step": 79630 + }, + { + "epoch": 2.894105676284614, + "grad_norm": 0.7049798369407654, + "learning_rate": 3.0063490415248613e-05, + "loss": 0.1012, + "step": 79640 + }, + { + "epoch": 2.8944690747874118, + "grad_norm": 0.3521101772785187, + "learning_rate": 3.0058408541169057e-05, + "loss": 0.0765, + "step": 79650 + }, + { + "epoch": 2.89483247329021, + "grad_norm": 1.0171892642974854, + "learning_rate": 3.0053326449140674e-05, + "loss": 0.1011, + "step": 79660 + }, + { + "epoch": 2.895195871793008, + "grad_norm": 0.5262839794158936, + "learning_rate": 3.0048244139382438e-05, + "loss": 0.1322, + "step": 79670 + }, + { + "epoch": 2.8955592702958066, + "grad_norm": 1.3580801486968994, + "learning_rate": 3.0043161612113313e-05, + "loss": 0.0946, + "step": 79680 + }, + { + "epoch": 2.8959226687986046, + "grad_norm": 1.4954004287719727, + "learning_rate": 3.0038078867552306e-05, + "loss": 0.0929, + "step": 79690 + }, + { + "epoch": 2.8962860673014026, + "grad_norm": 0.6515393257141113, + "learning_rate": 3.0032995905918405e-05, + "loss": 0.1044, + "step": 79700 + }, + { + "epoch": 2.896649465804201, + "grad_norm": 0.647331953048706, + "learning_rate": 3.002791272743061e-05, + "loss": 0.0747, + "step": 79710 + }, + { + "epoch": 2.897012864306999, + "grad_norm": 0.9838120341300964, + "learning_rate": 3.0022829332307962e-05, + "loss": 0.1169, + "step": 79720 + }, + { + "epoch": 2.8973762628097974, + "grad_norm": 0.6499975919723511, + "learning_rate": 3.001774572076945e-05, + "loss": 0.1144, + "step": 79730 + }, + { + "epoch": 2.8977396613125954, + "grad_norm": 0.8443338871002197, + "learning_rate": 3.0012661893034143e-05, + "loss": 0.1184, + "step": 79740 + }, + { + "epoch": 2.8981030598153934, + "grad_norm": 0.49458226561546326, + "learning_rate": 3.0007577849321062e-05, + "loss": 0.0847, + "step": 79750 + }, + { + "epoch": 2.898466458318192, + "grad_norm": 0.5407196283340454, + "learning_rate": 3.0002493589849272e-05, + "loss": 0.0981, + "step": 79760 + }, + { + "epoch": 2.89882985682099, + "grad_norm": 0.4729011654853821, + "learning_rate": 2.9997409114837833e-05, + "loss": 0.2098, + "step": 79770 + }, + { + "epoch": 2.8991932553237882, + "grad_norm": 0.9962542653083801, + "learning_rate": 2.9992324424505815e-05, + "loss": 0.1022, + "step": 79780 + }, + { + "epoch": 2.8995566538265862, + "grad_norm": 0.4466484487056732, + "learning_rate": 2.9987239519072296e-05, + "loss": 0.0995, + "step": 79790 + }, + { + "epoch": 2.899920052329384, + "grad_norm": 1.0443413257598877, + "learning_rate": 2.9982154398756372e-05, + "loss": 0.0956, + "step": 79800 + }, + { + "epoch": 2.899920052329384, + "eval_loss": 0.33830875158309937, + "eval_runtime": 179.6694, + "eval_samples_per_second": 41.265, + "eval_steps_per_second": 5.159, + "eval_wer": 0.14741227512843322, + "step": 79800 + }, + { + "epoch": 2.9002834508321826, + "grad_norm": 0.4343029856681824, + "learning_rate": 2.9977577606928674e-05, + "loss": 3.598, + "step": 79810 + }, + { + "epoch": 2.9006468493349806, + "grad_norm": 0.4609208405017853, + "learning_rate": 2.9972492078939808e-05, + "loss": 0.3644, + "step": 79820 + }, + { + "epoch": 2.901010247837779, + "grad_norm": 0.8700145483016968, + "learning_rate": 2.9967406336703952e-05, + "loss": 0.1192, + "step": 79830 + }, + { + "epoch": 2.901373646340577, + "grad_norm": 0.6192082762718201, + "learning_rate": 2.9962320380440228e-05, + "loss": 0.1008, + "step": 79840 + }, + { + "epoch": 2.901737044843375, + "grad_norm": 0.41174137592315674, + "learning_rate": 2.995723421036778e-05, + "loss": 0.1181, + "step": 79850 + }, + { + "epoch": 2.9021004433461735, + "grad_norm": 3.049891710281372, + "learning_rate": 2.9952147826705745e-05, + "loss": 0.0981, + "step": 79860 + }, + { + "epoch": 2.9024638418489714, + "grad_norm": 0.4502425193786621, + "learning_rate": 2.9947061229673275e-05, + "loss": 0.1306, + "step": 79870 + }, + { + "epoch": 2.90282724035177, + "grad_norm": 0.5543062686920166, + "learning_rate": 2.9941974419489545e-05, + "loss": 0.0982, + "step": 79880 + }, + { + "epoch": 2.903190638854568, + "grad_norm": 0.9228424429893494, + "learning_rate": 2.9936887396373715e-05, + "loss": 0.1304, + "step": 79890 + }, + { + "epoch": 2.903554037357366, + "grad_norm": 0.6268784999847412, + "learning_rate": 2.9931800160544975e-05, + "loss": 0.0823, + "step": 79900 + }, + { + "epoch": 2.9039174358601643, + "grad_norm": 0.6508156657218933, + "learning_rate": 2.9926712712222516e-05, + "loss": 0.0938, + "step": 79910 + }, + { + "epoch": 2.9042808343629627, + "grad_norm": 0.45321550965309143, + "learning_rate": 2.9921625051625533e-05, + "loss": 0.1246, + "step": 79920 + }, + { + "epoch": 2.9046442328657607, + "grad_norm": 0.6320390105247498, + "learning_rate": 2.9916537178973242e-05, + "loss": 0.0835, + "step": 79930 + }, + { + "epoch": 2.9050076313685587, + "grad_norm": 0.7819294929504395, + "learning_rate": 2.9911449094484852e-05, + "loss": 0.1086, + "step": 79940 + }, + { + "epoch": 2.905371029871357, + "grad_norm": 0.7224891185760498, + "learning_rate": 2.9906360798379594e-05, + "loss": 0.0825, + "step": 79950 + }, + { + "epoch": 2.905734428374155, + "grad_norm": 0.7941370606422424, + "learning_rate": 2.990127229087671e-05, + "loss": 0.0839, + "step": 79960 + }, + { + "epoch": 2.9060978268769535, + "grad_norm": 0.5782437920570374, + "learning_rate": 2.9896183572195442e-05, + "loss": 0.097, + "step": 79970 + }, + { + "epoch": 2.9064612253797515, + "grad_norm": 1.7239668369293213, + "learning_rate": 2.989109464255504e-05, + "loss": 0.0919, + "step": 79980 + }, + { + "epoch": 2.9068246238825495, + "grad_norm": 1.2387197017669678, + "learning_rate": 2.988600550217478e-05, + "loss": 0.0851, + "step": 79990 + }, + { + "epoch": 2.907188022385348, + "grad_norm": 2.639697313308716, + "learning_rate": 2.9880916151273926e-05, + "loss": 0.0829, + "step": 80000 + }, + { + "epoch": 2.907551420888146, + "grad_norm": 0.9989453554153442, + "learning_rate": 2.9875826590071754e-05, + "loss": 0.0789, + "step": 80010 + }, + { + "epoch": 2.9079148193909443, + "grad_norm": 1.7626229524612427, + "learning_rate": 2.987073681878757e-05, + "loss": 0.0905, + "step": 80020 + }, + { + "epoch": 2.9082782178937423, + "grad_norm": 2.506680488586426, + "learning_rate": 2.986564683764066e-05, + "loss": 0.1006, + "step": 80030 + }, + { + "epoch": 2.9086416163965403, + "grad_norm": 1.299718976020813, + "learning_rate": 2.9860556646850347e-05, + "loss": 0.1024, + "step": 80040 + }, + { + "epoch": 2.9090050148993387, + "grad_norm": 1.0177974700927734, + "learning_rate": 2.9855466246635943e-05, + "loss": 0.0788, + "step": 80050 + }, + { + "epoch": 2.9093684134021367, + "grad_norm": 0.6795012354850769, + "learning_rate": 2.9850375637216767e-05, + "loss": 0.1015, + "step": 80060 + }, + { + "epoch": 2.909731811904935, + "grad_norm": 0.24664345383644104, + "learning_rate": 2.9845284818812164e-05, + "loss": 0.0944, + "step": 80070 + }, + { + "epoch": 2.910095210407733, + "grad_norm": 0.5156140923500061, + "learning_rate": 2.984019379164148e-05, + "loss": 0.0917, + "step": 80080 + }, + { + "epoch": 2.910458608910531, + "grad_norm": 1.331092357635498, + "learning_rate": 2.9835102555924065e-05, + "loss": 0.1069, + "step": 80090 + }, + { + "epoch": 2.9108220074133295, + "grad_norm": 0.8283532857894897, + "learning_rate": 2.983001111187928e-05, + "loss": 0.0973, + "step": 80100 + }, + { + "epoch": 2.9111854059161275, + "grad_norm": 0.8295063972473145, + "learning_rate": 2.9824919459726507e-05, + "loss": 0.0805, + "step": 80110 + }, + { + "epoch": 2.911548804418926, + "grad_norm": 1.1419576406478882, + "learning_rate": 2.981982759968513e-05, + "loss": 0.0974, + "step": 80120 + }, + { + "epoch": 2.911912202921724, + "grad_norm": 0.5481380224227905, + "learning_rate": 2.9814735531974513e-05, + "loss": 0.1007, + "step": 80130 + }, + { + "epoch": 2.912275601424522, + "grad_norm": 1.488004207611084, + "learning_rate": 2.9809643256814092e-05, + "loss": 0.1599, + "step": 80140 + }, + { + "epoch": 2.9126389999273203, + "grad_norm": 1.346227765083313, + "learning_rate": 2.980455077442324e-05, + "loss": 0.0826, + "step": 80150 + }, + { + "epoch": 2.9130023984301183, + "grad_norm": 0.6229421496391296, + "learning_rate": 2.9799458085021396e-05, + "loss": 0.1042, + "step": 80160 + }, + { + "epoch": 2.9133657969329168, + "grad_norm": 0.7724307179450989, + "learning_rate": 2.979436518882798e-05, + "loss": 0.1068, + "step": 80170 + }, + { + "epoch": 2.9137291954357147, + "grad_norm": 0.4120637774467468, + "learning_rate": 2.9789272086062426e-05, + "loss": 0.0949, + "step": 80180 + }, + { + "epoch": 2.9140925939385127, + "grad_norm": 0.8677302598953247, + "learning_rate": 2.9784178776944178e-05, + "loss": 0.1253, + "step": 80190 + }, + { + "epoch": 2.914455992441311, + "grad_norm": 0.5817800760269165, + "learning_rate": 2.9779085261692686e-05, + "loss": 0.0754, + "step": 80200 + }, + { + "epoch": 2.9148193909441096, + "grad_norm": 2.1507725715637207, + "learning_rate": 2.977399154052742e-05, + "loss": 0.2402, + "step": 80210 + }, + { + "epoch": 2.9151827894469076, + "grad_norm": 0.4526066184043884, + "learning_rate": 2.9768897613667836e-05, + "loss": 0.1243, + "step": 80220 + }, + { + "epoch": 2.9155461879497055, + "grad_norm": 1.391575574874878, + "learning_rate": 2.976380348133343e-05, + "loss": 0.1129, + "step": 80230 + }, + { + "epoch": 2.915909586452504, + "grad_norm": 0.7592736482620239, + "learning_rate": 2.9758709143743678e-05, + "loss": 0.1376, + "step": 80240 + }, + { + "epoch": 2.916272984955302, + "grad_norm": 0.671796441078186, + "learning_rate": 2.975361460111808e-05, + "loss": 0.0608, + "step": 80250 + }, + { + "epoch": 2.9166363834581004, + "grad_norm": 2.7102513313293457, + "learning_rate": 2.9748519853676142e-05, + "loss": 0.0919, + "step": 80260 + }, + { + "epoch": 2.9169997819608984, + "grad_norm": 1.3529448509216309, + "learning_rate": 2.974342490163738e-05, + "loss": 0.1079, + "step": 80270 + }, + { + "epoch": 2.9173631804636964, + "grad_norm": 0.8166323900222778, + "learning_rate": 2.9738329745221317e-05, + "loss": 0.076, + "step": 80280 + }, + { + "epoch": 2.917726578966495, + "grad_norm": 0.824936032295227, + "learning_rate": 2.973323438464748e-05, + "loss": 0.1155, + "step": 80290 + }, + { + "epoch": 2.9180899774692928, + "grad_norm": 0.4837649166584015, + "learning_rate": 2.972813882013542e-05, + "loss": 0.0775, + "step": 80300 + }, + { + "epoch": 2.918453375972091, + "grad_norm": 1.6542714834213257, + "learning_rate": 2.9723043051904676e-05, + "loss": 0.1013, + "step": 80310 + }, + { + "epoch": 2.918816774474889, + "grad_norm": 0.2745031416416168, + "learning_rate": 2.9717947080174813e-05, + "loss": 0.0914, + "step": 80320 + }, + { + "epoch": 2.919180172977687, + "grad_norm": 1.0925464630126953, + "learning_rate": 2.9712850905165402e-05, + "loss": 0.1577, + "step": 80330 + }, + { + "epoch": 2.9195435714804856, + "grad_norm": 0.6618020534515381, + "learning_rate": 2.9707754527096004e-05, + "loss": 0.1105, + "step": 80340 + }, + { + "epoch": 2.9199069699832836, + "grad_norm": 0.9204813838005066, + "learning_rate": 2.9702657946186223e-05, + "loss": 0.0855, + "step": 80350 + }, + { + "epoch": 2.920270368486082, + "grad_norm": 0.6669716835021973, + "learning_rate": 2.9697561162655634e-05, + "loss": 0.1003, + "step": 80360 + }, + { + "epoch": 2.92063376698888, + "grad_norm": 0.4183257818222046, + "learning_rate": 2.9692464176723855e-05, + "loss": 0.0998, + "step": 80370 + }, + { + "epoch": 2.920997165491678, + "grad_norm": 6.260075569152832, + "learning_rate": 2.9687366988610493e-05, + "loss": 0.1007, + "step": 80380 + }, + { + "epoch": 2.9213605639944764, + "grad_norm": 2.819657325744629, + "learning_rate": 2.9682269598535162e-05, + "loss": 0.0982, + "step": 80390 + }, + { + "epoch": 2.9217239624972744, + "grad_norm": 0.5224602818489075, + "learning_rate": 2.967717200671749e-05, + "loss": 0.1009, + "step": 80400 + }, + { + "epoch": 2.9217239624972744, + "eval_loss": 0.3313393294811249, + "eval_runtime": 180.6766, + "eval_samples_per_second": 41.035, + "eval_steps_per_second": 5.131, + "eval_wer": 0.14564234755931527, + "step": 80400 + }, + { + "epoch": 2.922087361000073, + "grad_norm": 0.504077672958374, + "learning_rate": 2.9672074213377122e-05, + "loss": 0.0892, + "step": 80410 + }, + { + "epoch": 2.922450759502871, + "grad_norm": 0.34101128578186035, + "learning_rate": 2.96669762187337e-05, + "loss": 0.1225, + "step": 80420 + }, + { + "epoch": 2.922814158005669, + "grad_norm": 0.7131312489509583, + "learning_rate": 2.966187802300688e-05, + "loss": 0.0804, + "step": 80430 + }, + { + "epoch": 2.9231775565084672, + "grad_norm": 1.1708521842956543, + "learning_rate": 2.9656779626416314e-05, + "loss": 0.1506, + "step": 80440 + }, + { + "epoch": 2.923540955011265, + "grad_norm": 1.453748106956482, + "learning_rate": 2.965168102918169e-05, + "loss": 0.1353, + "step": 80450 + }, + { + "epoch": 2.9239043535140636, + "grad_norm": 0.7086220979690552, + "learning_rate": 2.964658223152268e-05, + "loss": 0.0892, + "step": 80460 + }, + { + "epoch": 2.9242677520168616, + "grad_norm": 0.8955743312835693, + "learning_rate": 2.9641483233658973e-05, + "loss": 0.0989, + "step": 80470 + }, + { + "epoch": 2.9246311505196596, + "grad_norm": 0.5515048503875732, + "learning_rate": 2.9636384035810265e-05, + "loss": 0.1187, + "step": 80480 + }, + { + "epoch": 2.924994549022458, + "grad_norm": 1.164106011390686, + "learning_rate": 2.963128463819627e-05, + "loss": 0.1057, + "step": 80490 + }, + { + "epoch": 2.9253579475252565, + "grad_norm": 0.8334591388702393, + "learning_rate": 2.9626185041036696e-05, + "loss": 0.4326, + "step": 80500 + }, + { + "epoch": 2.9257213460280544, + "grad_norm": 0.49100545048713684, + "learning_rate": 2.9621085244551272e-05, + "loss": 0.087, + "step": 80510 + }, + { + "epoch": 2.9260847445308524, + "grad_norm": 0.5234443545341492, + "learning_rate": 2.9615985248959722e-05, + "loss": 0.0982, + "step": 80520 + }, + { + "epoch": 2.926448143033651, + "grad_norm": 1.0264884233474731, + "learning_rate": 2.961088505448179e-05, + "loss": 0.0965, + "step": 80530 + }, + { + "epoch": 2.926811541536449, + "grad_norm": 0.7391953468322754, + "learning_rate": 2.9605784661337233e-05, + "loss": 0.4112, + "step": 80540 + }, + { + "epoch": 2.9271749400392473, + "grad_norm": 4.123349666595459, + "learning_rate": 2.9600684069745803e-05, + "loss": 0.0957, + "step": 80550 + }, + { + "epoch": 2.9275383385420453, + "grad_norm": 2.021984338760376, + "learning_rate": 2.9595583279927257e-05, + "loss": 0.0952, + "step": 80560 + }, + { + "epoch": 2.9279017370448432, + "grad_norm": 0.546364426612854, + "learning_rate": 2.9590482292101383e-05, + "loss": 0.1167, + "step": 80570 + }, + { + "epoch": 2.9282651355476417, + "grad_norm": 0.6582480669021606, + "learning_rate": 2.9585381106487963e-05, + "loss": 0.1149, + "step": 80580 + }, + { + "epoch": 2.9286285340504397, + "grad_norm": 0.6442203521728516, + "learning_rate": 2.958027972330678e-05, + "loss": 0.114, + "step": 80590 + }, + { + "epoch": 2.928991932553238, + "grad_norm": 0.36104217171669006, + "learning_rate": 2.9575178142777643e-05, + "loss": 0.1073, + "step": 80600 + }, + { + "epoch": 2.929355331056036, + "grad_norm": 1.933703064918518, + "learning_rate": 2.9570076365120363e-05, + "loss": 0.1025, + "step": 80610 + }, + { + "epoch": 2.929718729558834, + "grad_norm": 0.6946010589599609, + "learning_rate": 2.956497439055475e-05, + "loss": 0.098, + "step": 80620 + }, + { + "epoch": 2.9300821280616325, + "grad_norm": 0.6851116418838501, + "learning_rate": 2.9559872219300633e-05, + "loss": 0.089, + "step": 80630 + }, + { + "epoch": 2.9304455265644305, + "grad_norm": 0.7841261625289917, + "learning_rate": 2.9554769851577847e-05, + "loss": 0.0956, + "step": 80640 + }, + { + "epoch": 2.930808925067229, + "grad_norm": 0.7110610008239746, + "learning_rate": 2.9549667287606225e-05, + "loss": 3.0796, + "step": 80650 + }, + { + "epoch": 2.931172323570027, + "grad_norm": 1.172564148902893, + "learning_rate": 2.954456452760564e-05, + "loss": 0.1005, + "step": 80660 + }, + { + "epoch": 2.931535722072825, + "grad_norm": 0.44309452176094055, + "learning_rate": 2.9539461571795928e-05, + "loss": 0.1204, + "step": 80670 + }, + { + "epoch": 2.9318991205756233, + "grad_norm": 1.936556339263916, + "learning_rate": 2.9534358420396978e-05, + "loss": 0.1174, + "step": 80680 + }, + { + "epoch": 2.9322625190784213, + "grad_norm": 0.7564278841018677, + "learning_rate": 2.9529255073628653e-05, + "loss": 0.1195, + "step": 80690 + }, + { + "epoch": 2.9326259175812197, + "grad_norm": 0.443446546792984, + "learning_rate": 2.952415153171084e-05, + "loss": 0.081, + "step": 80700 + }, + { + "epoch": 2.9329893160840177, + "grad_norm": 1.2691240310668945, + "learning_rate": 2.9519047794863434e-05, + "loss": 0.0897, + "step": 80710 + }, + { + "epoch": 2.9333527145868157, + "grad_norm": 0.9182204008102417, + "learning_rate": 2.9513943863306337e-05, + "loss": 0.1097, + "step": 80720 + }, + { + "epoch": 2.933716113089614, + "grad_norm": 0.8882256150245667, + "learning_rate": 2.950883973725947e-05, + "loss": 0.0928, + "step": 80730 + }, + { + "epoch": 2.934079511592412, + "grad_norm": 0.7944663166999817, + "learning_rate": 2.9503735416942735e-05, + "loss": 0.0942, + "step": 80740 + }, + { + "epoch": 2.9344429100952105, + "grad_norm": 0.4034799337387085, + "learning_rate": 2.9498630902576057e-05, + "loss": 0.0881, + "step": 80750 + }, + { + "epoch": 2.9348063085980085, + "grad_norm": 0.37826791405677795, + "learning_rate": 2.949352619437939e-05, + "loss": 0.0925, + "step": 80760 + }, + { + "epoch": 2.9351697071008065, + "grad_norm": 0.5798398852348328, + "learning_rate": 2.948842129257266e-05, + "loss": 0.2732, + "step": 80770 + }, + { + "epoch": 2.935533105603605, + "grad_norm": 1.879731297492981, + "learning_rate": 2.948331619737583e-05, + "loss": 0.1367, + "step": 80780 + }, + { + "epoch": 2.9358965041064033, + "grad_norm": 0.9150714874267578, + "learning_rate": 2.9478210909008856e-05, + "loss": 0.1096, + "step": 80790 + }, + { + "epoch": 2.9362599026092013, + "grad_norm": 1.1447314023971558, + "learning_rate": 2.947310542769171e-05, + "loss": 0.0902, + "step": 80800 + }, + { + "epoch": 2.9366233011119993, + "grad_norm": 4.874231338500977, + "learning_rate": 2.9467999753644367e-05, + "loss": 0.1206, + "step": 80810 + }, + { + "epoch": 2.9369866996147977, + "grad_norm": 0.6774386763572693, + "learning_rate": 2.9462893887086807e-05, + "loss": 1.3499, + "step": 80820 + }, + { + "epoch": 2.9373500981175957, + "grad_norm": 1.529350996017456, + "learning_rate": 2.945778782823903e-05, + "loss": 0.0881, + "step": 80830 + }, + { + "epoch": 2.937713496620394, + "grad_norm": 1.6456125974655151, + "learning_rate": 2.945268157732104e-05, + "loss": 0.1505, + "step": 80840 + }, + { + "epoch": 2.938076895123192, + "grad_norm": 0.8206561207771301, + "learning_rate": 2.944757513455284e-05, + "loss": 0.0873, + "step": 80850 + }, + { + "epoch": 2.93844029362599, + "grad_norm": 1.830176830291748, + "learning_rate": 2.9442468500154453e-05, + "loss": 0.1002, + "step": 80860 + }, + { + "epoch": 2.9388036921287886, + "grad_norm": 1.3282181024551392, + "learning_rate": 2.9437361674345905e-05, + "loss": 0.113, + "step": 80870 + }, + { + "epoch": 2.9391670906315865, + "grad_norm": 1.0826009511947632, + "learning_rate": 2.943225465734723e-05, + "loss": 0.0938, + "step": 80880 + }, + { + "epoch": 2.939530489134385, + "grad_norm": 0.4472619891166687, + "learning_rate": 2.9427147449378467e-05, + "loss": 0.0999, + "step": 80890 + }, + { + "epoch": 2.939893887637183, + "grad_norm": 0.5009836554527283, + "learning_rate": 2.9422040050659682e-05, + "loss": 0.0867, + "step": 80900 + }, + { + "epoch": 2.940257286139981, + "grad_norm": 1.1963528394699097, + "learning_rate": 2.941693246141092e-05, + "loss": 0.1074, + "step": 80910 + }, + { + "epoch": 2.9406206846427794, + "grad_norm": 0.3960399627685547, + "learning_rate": 2.941182468185225e-05, + "loss": 0.1298, + "step": 80920 + }, + { + "epoch": 2.9409840831455774, + "grad_norm": 0.7249006032943726, + "learning_rate": 2.940671671220376e-05, + "loss": 0.093, + "step": 80930 + }, + { + "epoch": 2.941347481648376, + "grad_norm": 0.5996330976486206, + "learning_rate": 2.9401608552685517e-05, + "loss": 0.1213, + "step": 80940 + }, + { + "epoch": 2.9417108801511738, + "grad_norm": 1.9566066265106201, + "learning_rate": 2.9396500203517628e-05, + "loss": 0.0795, + "step": 80950 + }, + { + "epoch": 2.9420742786539718, + "grad_norm": 1.3713960647583008, + "learning_rate": 2.9391391664920186e-05, + "loss": 0.0879, + "step": 80960 + }, + { + "epoch": 2.94243767715677, + "grad_norm": 0.639777660369873, + "learning_rate": 2.9386282937113306e-05, + "loss": 0.121, + "step": 80970 + }, + { + "epoch": 2.942801075659568, + "grad_norm": 0.6077538728713989, + "learning_rate": 2.9381174020317093e-05, + "loss": 0.1197, + "step": 80980 + }, + { + "epoch": 2.9431644741623666, + "grad_norm": 1.1664655208587646, + "learning_rate": 2.937606491475169e-05, + "loss": 0.1132, + "step": 80990 + }, + { + "epoch": 2.9435278726651646, + "grad_norm": 0.44675686955451965, + "learning_rate": 2.9370955620637213e-05, + "loss": 0.0951, + "step": 81000 + }, + { + "epoch": 2.9435278726651646, + "eval_loss": 0.3302467167377472, + "eval_runtime": 180.9443, + "eval_samples_per_second": 40.974, + "eval_steps_per_second": 5.123, + "eval_wer": 0.14665892134260353, + "step": 81000 + }, + { + "epoch": 2.9438912711679626, + "grad_norm": 0.5193939208984375, + "learning_rate": 2.936584613819381e-05, + "loss": 0.0736, + "step": 81010 + }, + { + "epoch": 2.944254669670761, + "grad_norm": 4.043830394744873, + "learning_rate": 2.9360736467641632e-05, + "loss": 0.146, + "step": 81020 + }, + { + "epoch": 2.944618068173559, + "grad_norm": 0.8960684537887573, + "learning_rate": 2.9355626609200832e-05, + "loss": 0.0933, + "step": 81030 + }, + { + "epoch": 2.9449814666763574, + "grad_norm": 0.4079131782054901, + "learning_rate": 2.9350516563091586e-05, + "loss": 0.1555, + "step": 81040 + }, + { + "epoch": 2.9453448651791554, + "grad_norm": 0.5445089936256409, + "learning_rate": 2.934540632953406e-05, + "loss": 0.1163, + "step": 81050 + }, + { + "epoch": 2.9457082636819534, + "grad_norm": 4.034743309020996, + "learning_rate": 2.934029590874843e-05, + "loss": 0.0817, + "step": 81060 + }, + { + "epoch": 2.946071662184752, + "grad_norm": 1.0341869592666626, + "learning_rate": 2.933518530095489e-05, + "loss": 0.1143, + "step": 81070 + }, + { + "epoch": 2.9464350606875502, + "grad_norm": 1.2305265665054321, + "learning_rate": 2.9330074506373644e-05, + "loss": 0.0945, + "step": 81080 + }, + { + "epoch": 2.946798459190348, + "grad_norm": 1.0462018251419067, + "learning_rate": 2.9324963525224897e-05, + "loss": 0.1181, + "step": 81090 + }, + { + "epoch": 2.947161857693146, + "grad_norm": 1.0071947574615479, + "learning_rate": 2.9319852357728857e-05, + "loss": 0.0935, + "step": 81100 + }, + { + "epoch": 2.9475252561959446, + "grad_norm": 0.3526189625263214, + "learning_rate": 2.931474100410575e-05, + "loss": 0.0801, + "step": 81110 + }, + { + "epoch": 2.9478886546987426, + "grad_norm": 0.22515632212162018, + "learning_rate": 2.9309629464575804e-05, + "loss": 0.0955, + "step": 81120 + }, + { + "epoch": 2.948252053201541, + "grad_norm": 1.0929369926452637, + "learning_rate": 2.930451773935926e-05, + "loss": 0.1307, + "step": 81130 + }, + { + "epoch": 2.948615451704339, + "grad_norm": 1.116426944732666, + "learning_rate": 2.9299405828676364e-05, + "loss": 0.1067, + "step": 81140 + }, + { + "epoch": 2.948978850207137, + "grad_norm": 1.081275224685669, + "learning_rate": 2.9294293732747358e-05, + "loss": 0.09, + "step": 81150 + }, + { + "epoch": 2.9493422487099354, + "grad_norm": 0.4264758825302124, + "learning_rate": 2.9289181451792526e-05, + "loss": 0.0715, + "step": 81160 + }, + { + "epoch": 2.9497056472127334, + "grad_norm": 0.48512741923332214, + "learning_rate": 2.928406898603212e-05, + "loss": 0.0862, + "step": 81170 + }, + { + "epoch": 2.950069045715532, + "grad_norm": 1.373490571975708, + "learning_rate": 2.9278956335686426e-05, + "loss": 0.0913, + "step": 81180 + }, + { + "epoch": 2.95043244421833, + "grad_norm": 0.8015252351760864, + "learning_rate": 2.9273843500975728e-05, + "loss": 0.1061, + "step": 81190 + }, + { + "epoch": 2.950795842721128, + "grad_norm": 1.1844451427459717, + "learning_rate": 2.926873048212032e-05, + "loss": 0.0869, + "step": 81200 + }, + { + "epoch": 2.9511592412239263, + "grad_norm": 0.6692541241645813, + "learning_rate": 2.9263617279340504e-05, + "loss": 0.0701, + "step": 81210 + }, + { + "epoch": 2.9515226397267242, + "grad_norm": 0.5705549120903015, + "learning_rate": 2.9258503892856585e-05, + "loss": 0.0923, + "step": 81220 + }, + { + "epoch": 2.9518860382295227, + "grad_norm": 3.464637041091919, + "learning_rate": 2.925339032288889e-05, + "loss": 0.0887, + "step": 81230 + }, + { + "epoch": 2.9522494367323207, + "grad_norm": 1.3816555738449097, + "learning_rate": 2.9248276569657745e-05, + "loss": 0.1061, + "step": 81240 + }, + { + "epoch": 2.9526128352351186, + "grad_norm": 0.42636001110076904, + "learning_rate": 2.9243162633383463e-05, + "loss": 0.0889, + "step": 81250 + }, + { + "epoch": 2.952976233737917, + "grad_norm": 1.3730337619781494, + "learning_rate": 2.9238048514286413e-05, + "loss": 0.0846, + "step": 81260 + }, + { + "epoch": 2.953339632240715, + "grad_norm": 0.708302915096283, + "learning_rate": 2.9232934212586925e-05, + "loss": 0.1031, + "step": 81270 + }, + { + "epoch": 2.9537030307435135, + "grad_norm": 0.8678138852119446, + "learning_rate": 2.9227819728505367e-05, + "loss": 0.1032, + "step": 81280 + }, + { + "epoch": 2.9540664292463115, + "grad_norm": 1.3964858055114746, + "learning_rate": 2.9222705062262096e-05, + "loss": 0.0804, + "step": 81290 + }, + { + "epoch": 2.9544298277491094, + "grad_norm": 0.6818703413009644, + "learning_rate": 2.921759021407749e-05, + "loss": 0.0871, + "step": 81300 + }, + { + "epoch": 2.954793226251908, + "grad_norm": 1.078911542892456, + "learning_rate": 2.9212475184171932e-05, + "loss": 0.084, + "step": 81310 + }, + { + "epoch": 2.955156624754706, + "grad_norm": 9.535181999206543, + "learning_rate": 2.9207359972765795e-05, + "loss": 0.0994, + "step": 81320 + }, + { + "epoch": 2.9555200232575043, + "grad_norm": 0.8145607113838196, + "learning_rate": 2.9202244580079496e-05, + "loss": 0.0932, + "step": 81330 + }, + { + "epoch": 2.9558834217603023, + "grad_norm": 0.7624212503433228, + "learning_rate": 2.919712900633343e-05, + "loss": 0.0864, + "step": 81340 + }, + { + "epoch": 2.9562468202631003, + "grad_norm": 0.8702255487442017, + "learning_rate": 2.9192013251748007e-05, + "loss": 0.0634, + "step": 81350 + }, + { + "epoch": 2.9566102187658987, + "grad_norm": 0.5847821235656738, + "learning_rate": 2.9186897316543644e-05, + "loss": 0.0777, + "step": 81360 + }, + { + "epoch": 2.956973617268697, + "grad_norm": 0.6588742136955261, + "learning_rate": 2.9181781200940776e-05, + "loss": 0.1158, + "step": 81370 + }, + { + "epoch": 2.957337015771495, + "grad_norm": 0.8579931855201721, + "learning_rate": 2.917666490515984e-05, + "loss": 0.0941, + "step": 81380 + }, + { + "epoch": 2.957700414274293, + "grad_norm": 0.9896190166473389, + "learning_rate": 2.9171548429421264e-05, + "loss": 0.1239, + "step": 81390 + }, + { + "epoch": 2.9580638127770915, + "grad_norm": 3.1119399070739746, + "learning_rate": 2.9166431773945514e-05, + "loss": 0.0973, + "step": 81400 + }, + { + "epoch": 2.9584272112798895, + "grad_norm": 0.6430637240409851, + "learning_rate": 2.9161314938953037e-05, + "loss": 0.0964, + "step": 81410 + }, + { + "epoch": 2.958790609782688, + "grad_norm": 0.16759249567985535, + "learning_rate": 2.9156197924664312e-05, + "loss": 0.0869, + "step": 81420 + }, + { + "epoch": 2.959154008285486, + "grad_norm": 0.5628185272216797, + "learning_rate": 2.915108073129981e-05, + "loss": 0.0885, + "step": 81430 + }, + { + "epoch": 2.959517406788284, + "grad_norm": 1.6805976629257202, + "learning_rate": 2.914596335908e-05, + "loss": 0.1402, + "step": 81440 + }, + { + "epoch": 2.9598808052910823, + "grad_norm": 0.996425986289978, + "learning_rate": 2.9140845808225388e-05, + "loss": 0.0823, + "step": 81450 + }, + { + "epoch": 2.9602442037938803, + "grad_norm": 0.6585590243339539, + "learning_rate": 2.9135728078956453e-05, + "loss": 0.0735, + "step": 81460 + }, + { + "epoch": 2.9606076022966787, + "grad_norm": 12.562392234802246, + "learning_rate": 2.9130610171493722e-05, + "loss": 0.1128, + "step": 81470 + }, + { + "epoch": 2.9609710007994767, + "grad_norm": 0.5774283409118652, + "learning_rate": 2.9125492086057682e-05, + "loss": 0.0844, + "step": 81480 + }, + { + "epoch": 2.9613343993022747, + "grad_norm": 0.5809444785118103, + "learning_rate": 2.912037382286888e-05, + "loss": 0.1085, + "step": 81490 + }, + { + "epoch": 2.961697797805073, + "grad_norm": 0.5010820627212524, + "learning_rate": 2.9115255382147827e-05, + "loss": 0.0803, + "step": 81500 + }, + { + "epoch": 2.962061196307871, + "grad_norm": 1.3801108598709106, + "learning_rate": 2.9110136764115055e-05, + "loss": 0.0661, + "step": 81510 + }, + { + "epoch": 2.9624245948106696, + "grad_norm": 1.027056336402893, + "learning_rate": 2.9105017968991123e-05, + "loss": 0.109, + "step": 81520 + }, + { + "epoch": 2.9627879933134675, + "grad_norm": 0.978444516658783, + "learning_rate": 2.909989899699656e-05, + "loss": 0.0935, + "step": 81530 + }, + { + "epoch": 2.9631513918162655, + "grad_norm": 2.2912325859069824, + "learning_rate": 2.909477984835195e-05, + "loss": 0.0905, + "step": 81540 + }, + { + "epoch": 2.963514790319064, + "grad_norm": 1.8258506059646606, + "learning_rate": 2.9089660523277833e-05, + "loss": 0.0858, + "step": 81550 + }, + { + "epoch": 2.963878188821862, + "grad_norm": 1.961013674736023, + "learning_rate": 2.9084541021994804e-05, + "loss": 0.0858, + "step": 81560 + }, + { + "epoch": 2.9642415873246604, + "grad_norm": 0.25523585081100464, + "learning_rate": 2.9079421344723435e-05, + "loss": 0.0943, + "step": 81570 + }, + { + "epoch": 2.9646049858274583, + "grad_norm": 0.8947268724441528, + "learning_rate": 2.9074301491684313e-05, + "loss": 0.1359, + "step": 81580 + }, + { + "epoch": 2.9649683843302563, + "grad_norm": 0.5048563480377197, + "learning_rate": 2.906918146309804e-05, + "loss": 0.101, + "step": 81590 + }, + { + "epoch": 2.9653317828330548, + "grad_norm": 0.5647857785224915, + "learning_rate": 2.9064061259185206e-05, + "loss": 0.0856, + "step": 81600 + }, + { + "epoch": 2.9653317828330548, + "eval_loss": 0.3227691650390625, + "eval_runtime": 180.1665, + "eval_samples_per_second": 41.151, + "eval_steps_per_second": 5.145, + "eval_wer": 0.14830177718881044, + "step": 81600 + }, + { + "epoch": 2.9656951813358527, + "grad_norm": 0.39782488346099854, + "learning_rate": 2.905894088016644e-05, + "loss": 0.0894, + "step": 81610 + }, + { + "epoch": 2.966058579838651, + "grad_norm": 0.8222696185112, + "learning_rate": 2.9053820326262354e-05, + "loss": 0.1044, + "step": 81620 + }, + { + "epoch": 2.966421978341449, + "grad_norm": 1.1362643241882324, + "learning_rate": 2.904869959769357e-05, + "loss": 0.0877, + "step": 81630 + }, + { + "epoch": 2.966785376844247, + "grad_norm": 1.2552587985992432, + "learning_rate": 2.904357869468073e-05, + "loss": 0.1026, + "step": 81640 + }, + { + "epoch": 2.9671487753470456, + "grad_norm": 0.7745124101638794, + "learning_rate": 2.9038457617444466e-05, + "loss": 0.0873, + "step": 81650 + }, + { + "epoch": 2.967512173849844, + "grad_norm": 2.436732769012451, + "learning_rate": 2.9033336366205432e-05, + "loss": 0.0781, + "step": 81660 + }, + { + "epoch": 2.967875572352642, + "grad_norm": 0.5839672684669495, + "learning_rate": 2.9028214941184284e-05, + "loss": 0.1152, + "step": 81670 + }, + { + "epoch": 2.96823897085544, + "grad_norm": 0.9086770415306091, + "learning_rate": 2.9023093342601694e-05, + "loss": 0.0919, + "step": 81680 + }, + { + "epoch": 2.9686023693582384, + "grad_norm": 0.9452347159385681, + "learning_rate": 2.901797157067832e-05, + "loss": 0.1022, + "step": 81690 + }, + { + "epoch": 2.9689657678610364, + "grad_norm": 0.7514773607254028, + "learning_rate": 2.9012849625634847e-05, + "loss": 1.7501, + "step": 81700 + }, + { + "epoch": 2.969329166363835, + "grad_norm": 1.142943263053894, + "learning_rate": 2.9007727507691966e-05, + "loss": 0.0906, + "step": 81710 + }, + { + "epoch": 2.969692564866633, + "grad_norm": 1.344696283340454, + "learning_rate": 2.9002605217070363e-05, + "loss": 0.106, + "step": 81720 + }, + { + "epoch": 2.970055963369431, + "grad_norm": 0.7408128976821899, + "learning_rate": 2.899748275399074e-05, + "loss": 0.1039, + "step": 81730 + }, + { + "epoch": 2.970419361872229, + "grad_norm": 1.7010142803192139, + "learning_rate": 2.8992360118673816e-05, + "loss": 0.1071, + "step": 81740 + }, + { + "epoch": 2.970782760375027, + "grad_norm": 1.1163867712020874, + "learning_rate": 2.8987237311340286e-05, + "loss": 0.0827, + "step": 81750 + }, + { + "epoch": 2.9711461588778256, + "grad_norm": 2.118901491165161, + "learning_rate": 2.8982114332210903e-05, + "loss": 0.1033, + "step": 81760 + }, + { + "epoch": 2.9715095573806236, + "grad_norm": 0.469307541847229, + "learning_rate": 2.897699118150637e-05, + "loss": 0.1066, + "step": 81770 + }, + { + "epoch": 2.9718729558834216, + "grad_norm": 0.7060539722442627, + "learning_rate": 2.8971867859447444e-05, + "loss": 0.1095, + "step": 81780 + }, + { + "epoch": 2.97223635438622, + "grad_norm": 1.0039600133895874, + "learning_rate": 2.8966744366254856e-05, + "loss": 0.1412, + "step": 81790 + }, + { + "epoch": 2.972599752889018, + "grad_norm": 1.0242499113082886, + "learning_rate": 2.8961620702149373e-05, + "loss": 0.0833, + "step": 81800 + }, + { + "epoch": 2.9729631513918164, + "grad_norm": 0.5664736032485962, + "learning_rate": 2.8956496867351752e-05, + "loss": 0.0793, + "step": 81810 + }, + { + "epoch": 2.9733265498946144, + "grad_norm": 0.5009458065032959, + "learning_rate": 2.8951372862082753e-05, + "loss": 0.0925, + "step": 81820 + }, + { + "epoch": 2.9736899483974124, + "grad_norm": 1.5864497423171997, + "learning_rate": 2.894624868656316e-05, + "loss": 0.09, + "step": 81830 + }, + { + "epoch": 2.974053346900211, + "grad_norm": 4.366128444671631, + "learning_rate": 2.894112434101375e-05, + "loss": 0.2078, + "step": 81840 + }, + { + "epoch": 2.974416745403009, + "grad_norm": 0.7028205394744873, + "learning_rate": 2.8935999825655312e-05, + "loss": 0.2251, + "step": 81850 + }, + { + "epoch": 2.9747801439058073, + "grad_norm": 0.5308458805084229, + "learning_rate": 2.8930875140708645e-05, + "loss": 0.0903, + "step": 81860 + }, + { + "epoch": 2.9751435424086052, + "grad_norm": 3.357011318206787, + "learning_rate": 2.892575028639456e-05, + "loss": 0.1105, + "step": 81870 + }, + { + "epoch": 2.975506940911403, + "grad_norm": 0.9237788319587708, + "learning_rate": 2.8920625262933864e-05, + "loss": 0.0774, + "step": 81880 + }, + { + "epoch": 2.9758703394142016, + "grad_norm": 0.480029821395874, + "learning_rate": 2.891550007054737e-05, + "loss": 0.1154, + "step": 81890 + }, + { + "epoch": 2.9762337379169996, + "grad_norm": 0.6338282823562622, + "learning_rate": 2.891037470945591e-05, + "loss": 4.1728, + "step": 81900 + }, + { + "epoch": 2.976597136419798, + "grad_norm": 0.628488302230835, + "learning_rate": 2.8905249179880318e-05, + "loss": 0.078, + "step": 81910 + }, + { + "epoch": 2.976960534922596, + "grad_norm": 1.6177397966384888, + "learning_rate": 2.8900123482041437e-05, + "loss": 0.089, + "step": 81920 + }, + { + "epoch": 2.977323933425394, + "grad_norm": 1.245584487915039, + "learning_rate": 2.8894997616160103e-05, + "loss": 0.1122, + "step": 81930 + }, + { + "epoch": 2.9776873319281925, + "grad_norm": 0.6176816821098328, + "learning_rate": 2.888987158245719e-05, + "loss": 0.0911, + "step": 81940 + }, + { + "epoch": 2.978050730430991, + "grad_norm": 0.7683790326118469, + "learning_rate": 2.888474538115355e-05, + "loss": 2.4532, + "step": 81950 + }, + { + "epoch": 2.978414128933789, + "grad_norm": 0.7443512082099915, + "learning_rate": 2.8879619012470045e-05, + "loss": 0.109, + "step": 81960 + }, + { + "epoch": 2.978777527436587, + "grad_norm": 1.6451610326766968, + "learning_rate": 2.8874492476627568e-05, + "loss": 0.1023, + "step": 81970 + }, + { + "epoch": 2.9791409259393853, + "grad_norm": 0.39093294739723206, + "learning_rate": 2.886936577384699e-05, + "loss": 0.0989, + "step": 81980 + }, + { + "epoch": 2.9795043244421833, + "grad_norm": 0.4320976436138153, + "learning_rate": 2.886423890434922e-05, + "loss": 0.1103, + "step": 81990 + }, + { + "epoch": 2.9798677229449817, + "grad_norm": 0.45375579595565796, + "learning_rate": 2.8859111868355128e-05, + "loss": 0.1922, + "step": 82000 + }, + { + "epoch": 2.9802311214477797, + "grad_norm": 1.53719162940979, + "learning_rate": 2.8853984666085644e-05, + "loss": 0.5349, + "step": 82010 + }, + { + "epoch": 2.9805945199505777, + "grad_norm": 0.5638075470924377, + "learning_rate": 2.8848857297761676e-05, + "loss": 0.1004, + "step": 82020 + }, + { + "epoch": 2.980957918453376, + "grad_norm": 0.7393288612365723, + "learning_rate": 2.8843729763604138e-05, + "loss": 0.1003, + "step": 82030 + }, + { + "epoch": 2.981321316956174, + "grad_norm": 0.5188916325569153, + "learning_rate": 2.8838602063833962e-05, + "loss": 0.109, + "step": 82040 + }, + { + "epoch": 2.9816847154589725, + "grad_norm": 1.9435770511627197, + "learning_rate": 2.883347419867208e-05, + "loss": 0.0678, + "step": 82050 + }, + { + "epoch": 2.9820481139617705, + "grad_norm": 0.795820415019989, + "learning_rate": 2.882834616833944e-05, + "loss": 0.0863, + "step": 82060 + }, + { + "epoch": 2.9824115124645685, + "grad_norm": 0.5096336007118225, + "learning_rate": 2.882321797305697e-05, + "loss": 0.0888, + "step": 82070 + }, + { + "epoch": 2.982774910967367, + "grad_norm": 1.0155811309814453, + "learning_rate": 2.881808961304565e-05, + "loss": 0.0809, + "step": 82080 + }, + { + "epoch": 2.983138309470165, + "grad_norm": 0.9700034856796265, + "learning_rate": 2.8812961088526436e-05, + "loss": 0.1124, + "step": 82090 + }, + { + "epoch": 2.9835017079729633, + "grad_norm": 1.9299287796020508, + "learning_rate": 2.8807832399720292e-05, + "loss": 0.0942, + "step": 82100 + }, + { + "epoch": 2.9838651064757613, + "grad_norm": 0.7534053325653076, + "learning_rate": 2.8802703546848204e-05, + "loss": 0.1195, + "step": 82110 + }, + { + "epoch": 2.9842285049785593, + "grad_norm": 1.0273375511169434, + "learning_rate": 2.8797574530131138e-05, + "loss": 0.1052, + "step": 82120 + }, + { + "epoch": 2.9845919034813577, + "grad_norm": 0.5971968173980713, + "learning_rate": 2.8792445349790108e-05, + "loss": 0.0975, + "step": 82130 + }, + { + "epoch": 2.9849553019841557, + "grad_norm": 1.0438076257705688, + "learning_rate": 2.8787316006046096e-05, + "loss": 0.1066, + "step": 82140 + }, + { + "epoch": 2.985318700486954, + "grad_norm": 1.0743767023086548, + "learning_rate": 2.8782186499120116e-05, + "loss": 0.0779, + "step": 82150 + }, + { + "epoch": 2.985682098989752, + "grad_norm": 0.9644399285316467, + "learning_rate": 2.8777056829233172e-05, + "loss": 0.085, + "step": 82160 + }, + { + "epoch": 2.98604549749255, + "grad_norm": 0.2966119647026062, + "learning_rate": 2.8771926996606297e-05, + "loss": 0.0879, + "step": 82170 + }, + { + "epoch": 2.9864088959953485, + "grad_norm": 0.8855867981910706, + "learning_rate": 2.87667970014605e-05, + "loss": 0.1638, + "step": 82180 + }, + { + "epoch": 2.9867722944981465, + "grad_norm": 0.5556718111038208, + "learning_rate": 2.8761666844016822e-05, + "loss": 0.1023, + "step": 82190 + }, + { + "epoch": 2.987135693000945, + "grad_norm": 0.35973209142684937, + "learning_rate": 2.8756536524496313e-05, + "loss": 0.0726, + "step": 82200 + }, + { + "epoch": 2.987135693000945, + "eval_loss": 0.32470783591270447, + "eval_runtime": 179.3561, + "eval_samples_per_second": 41.337, + "eval_steps_per_second": 5.168, + "eval_wer": 0.1449252999800316, + "step": 82200 + }, + { + "epoch": 2.987499091503743, + "grad_norm": 0.5686795711517334, + "learning_rate": 2.8751406043119998e-05, + "loss": 0.0811, + "step": 82210 + }, + { + "epoch": 2.987862490006541, + "grad_norm": 0.5881648659706116, + "learning_rate": 2.8746275400108956e-05, + "loss": 0.1118, + "step": 82220 + }, + { + "epoch": 2.9882258885093393, + "grad_norm": 0.4261440336704254, + "learning_rate": 2.8741144595684227e-05, + "loss": 0.0932, + "step": 82230 + }, + { + "epoch": 2.9885892870121378, + "grad_norm": 0.6575589776039124, + "learning_rate": 2.8736013630066894e-05, + "loss": 0.1028, + "step": 82240 + }, + { + "epoch": 2.9889526855149358, + "grad_norm": 0.4109443426132202, + "learning_rate": 2.8730882503478024e-05, + "loss": 0.0768, + "step": 82250 + }, + { + "epoch": 2.9893160840177337, + "grad_norm": 0.4477255046367645, + "learning_rate": 2.8725751216138706e-05, + "loss": 0.0736, + "step": 82260 + }, + { + "epoch": 2.989679482520532, + "grad_norm": 0.5772513747215271, + "learning_rate": 2.8720619768270023e-05, + "loss": 0.0978, + "step": 82270 + }, + { + "epoch": 2.99004288102333, + "grad_norm": 0.8295323252677917, + "learning_rate": 2.871548816009307e-05, + "loss": 0.0975, + "step": 82280 + }, + { + "epoch": 2.9904062795261286, + "grad_norm": 1.1587345600128174, + "learning_rate": 2.8710356391828953e-05, + "loss": 0.2043, + "step": 82290 + }, + { + "epoch": 2.9907696780289266, + "grad_norm": 0.5980029702186584, + "learning_rate": 2.8705224463698778e-05, + "loss": 0.0955, + "step": 82300 + }, + { + "epoch": 2.9911330765317246, + "grad_norm": 0.8250631093978882, + "learning_rate": 2.8700092375923666e-05, + "loss": 0.0931, + "step": 82310 + }, + { + "epoch": 2.991496475034523, + "grad_norm": 0.9287375211715698, + "learning_rate": 2.8694960128724735e-05, + "loss": 0.1066, + "step": 82320 + }, + { + "epoch": 2.991859873537321, + "grad_norm": 1.5283560752868652, + "learning_rate": 2.868982772232312e-05, + "loss": 0.0921, + "step": 82330 + }, + { + "epoch": 2.9922232720401194, + "grad_norm": 1.8111027479171753, + "learning_rate": 2.8684695156939955e-05, + "loss": 0.1164, + "step": 82340 + }, + { + "epoch": 2.9925866705429174, + "grad_norm": 0.3591112494468689, + "learning_rate": 2.86795624327964e-05, + "loss": 0.0832, + "step": 82350 + }, + { + "epoch": 2.9929500690457154, + "grad_norm": 1.000104308128357, + "learning_rate": 2.8674429550113578e-05, + "loss": 0.0898, + "step": 82360 + }, + { + "epoch": 2.993313467548514, + "grad_norm": 3.1828064918518066, + "learning_rate": 2.8669296509112666e-05, + "loss": 0.144, + "step": 82370 + }, + { + "epoch": 2.993676866051312, + "grad_norm": 0.5351777076721191, + "learning_rate": 2.866416331001482e-05, + "loss": 0.0879, + "step": 82380 + }, + { + "epoch": 2.99404026455411, + "grad_norm": 0.941906213760376, + "learning_rate": 2.865902995304121e-05, + "loss": 0.1301, + "step": 82390 + }, + { + "epoch": 2.994403663056908, + "grad_norm": 2.9968576431274414, + "learning_rate": 2.8653896438413024e-05, + "loss": 0.0856, + "step": 82400 + }, + { + "epoch": 2.994767061559706, + "grad_norm": 0.454728364944458, + "learning_rate": 2.8648762766351438e-05, + "loss": 0.0795, + "step": 82410 + }, + { + "epoch": 2.9951304600625046, + "grad_norm": 0.9574378728866577, + "learning_rate": 2.864362893707765e-05, + "loss": 0.0924, + "step": 82420 + }, + { + "epoch": 2.9954938585653026, + "grad_norm": 0.537486732006073, + "learning_rate": 2.8638494950812854e-05, + "loss": 0.0825, + "step": 82430 + }, + { + "epoch": 2.995857257068101, + "grad_norm": 0.4603738486766815, + "learning_rate": 2.863336080777826e-05, + "loss": 0.1493, + "step": 82440 + }, + { + "epoch": 2.996220655570899, + "grad_norm": 1.2842907905578613, + "learning_rate": 2.862822650819507e-05, + "loss": 0.0693, + "step": 82450 + }, + { + "epoch": 2.996584054073697, + "grad_norm": 1.0182005167007446, + "learning_rate": 2.862309205228451e-05, + "loss": 0.1398, + "step": 82460 + }, + { + "epoch": 2.9969474525764954, + "grad_norm": 0.6332042217254639, + "learning_rate": 2.8617957440267806e-05, + "loss": 0.1113, + "step": 82470 + }, + { + "epoch": 2.9973108510792934, + "grad_norm": 0.7729670405387878, + "learning_rate": 2.861282267236619e-05, + "loss": 0.0994, + "step": 82480 + }, + { + "epoch": 2.997674249582092, + "grad_norm": 1.1133567094802856, + "learning_rate": 2.8607687748800898e-05, + "loss": 0.1356, + "step": 82490 + }, + { + "epoch": 2.99803764808489, + "grad_norm": 0.6192561388015747, + "learning_rate": 2.860255266979318e-05, + "loss": 0.0742, + "step": 82500 + }, + { + "epoch": 2.998401046587688, + "grad_norm": 0.7631018757820129, + "learning_rate": 2.8597417435564282e-05, + "loss": 0.0794, + "step": 82510 + }, + { + "epoch": 2.9987644450904862, + "grad_norm": 0.49496951699256897, + "learning_rate": 2.859228204633547e-05, + "loss": 0.1244, + "step": 82520 + }, + { + "epoch": 2.9991278435932847, + "grad_norm": 0.5484241247177124, + "learning_rate": 2.8587146502328e-05, + "loss": 0.0882, + "step": 82530 + }, + { + "epoch": 2.9994912420960826, + "grad_norm": 0.8066346049308777, + "learning_rate": 2.8582010803763153e-05, + "loss": 0.1345, + "step": 82540 + }, + { + "epoch": 2.9998546405988806, + "grad_norm": 0.8499393463134766, + "learning_rate": 2.8576874950862205e-05, + "loss": 0.0929, + "step": 82550 + }, + { + "epoch": 3.000218039101679, + "grad_norm": 0.6916410326957703, + "learning_rate": 2.8571738943846445e-05, + "loss": 0.0913, + "step": 82560 + }, + { + "epoch": 3.000581437604477, + "grad_norm": 0.7126309871673584, + "learning_rate": 2.8566602782937162e-05, + "loss": 0.0894, + "step": 82570 + }, + { + "epoch": 3.000944836107275, + "grad_norm": 0.8689286708831787, + "learning_rate": 2.8561466468355648e-05, + "loss": 0.2882, + "step": 82580 + }, + { + "epoch": 3.0013082346100735, + "grad_norm": 0.8167956471443176, + "learning_rate": 2.8556330000323223e-05, + "loss": 0.0969, + "step": 82590 + }, + { + "epoch": 3.0016716331128714, + "grad_norm": 1.8583896160125732, + "learning_rate": 2.8551193379061186e-05, + "loss": 0.0697, + "step": 82600 + }, + { + "epoch": 3.00203503161567, + "grad_norm": 0.6422175168991089, + "learning_rate": 2.8546056604790866e-05, + "loss": 0.0794, + "step": 82610 + }, + { + "epoch": 3.002398430118468, + "grad_norm": 0.8441867232322693, + "learning_rate": 2.8540919677733584e-05, + "loss": 0.0738, + "step": 82620 + }, + { + "epoch": 3.0027618286212663, + "grad_norm": 0.6802086234092712, + "learning_rate": 2.8535782598110672e-05, + "loss": 0.0804, + "step": 82630 + }, + { + "epoch": 3.0031252271240643, + "grad_norm": 1.3518569469451904, + "learning_rate": 2.8530645366143467e-05, + "loss": 0.0901, + "step": 82640 + }, + { + "epoch": 3.0034886256268623, + "grad_norm": 0.32762405276298523, + "learning_rate": 2.852550798205331e-05, + "loss": 0.0784, + "step": 82650 + }, + { + "epoch": 3.0038520241296607, + "grad_norm": 0.43205514550209045, + "learning_rate": 2.8520370446061567e-05, + "loss": 0.0897, + "step": 82660 + }, + { + "epoch": 3.0042154226324587, + "grad_norm": 0.6062584519386292, + "learning_rate": 2.8515232758389582e-05, + "loss": 0.0806, + "step": 82670 + }, + { + "epoch": 3.004578821135257, + "grad_norm": 0.8513742089271545, + "learning_rate": 2.8510094919258722e-05, + "loss": 0.0741, + "step": 82680 + }, + { + "epoch": 3.004942219638055, + "grad_norm": 0.43795377016067505, + "learning_rate": 2.850495692889037e-05, + "loss": 0.0878, + "step": 82690 + }, + { + "epoch": 3.005305618140853, + "grad_norm": 1.4670331478118896, + "learning_rate": 2.8499818787505884e-05, + "loss": 0.0879, + "step": 82700 + }, + { + "epoch": 3.0056690166436515, + "grad_norm": 0.5501256585121155, + "learning_rate": 2.8494680495326665e-05, + "loss": 0.073, + "step": 82710 + }, + { + "epoch": 3.0060324151464495, + "grad_norm": 0.9265702962875366, + "learning_rate": 2.84895420525741e-05, + "loss": 0.0781, + "step": 82720 + }, + { + "epoch": 3.006395813649248, + "grad_norm": 0.5779339075088501, + "learning_rate": 2.8484403459469576e-05, + "loss": 0.0829, + "step": 82730 + }, + { + "epoch": 3.006759212152046, + "grad_norm": 0.31466546654701233, + "learning_rate": 2.8479264716234504e-05, + "loss": 0.0862, + "step": 82740 + }, + { + "epoch": 3.0071226106548443, + "grad_norm": 2.829972505569458, + "learning_rate": 2.84741258230903e-05, + "loss": 0.0945, + "step": 82750 + }, + { + "epoch": 3.0074860091576423, + "grad_norm": 0.4769342243671417, + "learning_rate": 2.8468986780258382e-05, + "loss": 0.0896, + "step": 82760 + }, + { + "epoch": 3.0078494076604403, + "grad_norm": 0.9461230635643005, + "learning_rate": 2.8463847587960157e-05, + "loss": 0.0749, + "step": 82770 + }, + { + "epoch": 3.0082128061632387, + "grad_norm": 0.4310390055179596, + "learning_rate": 2.8458708246417064e-05, + "loss": 0.1645, + "step": 82780 + }, + { + "epoch": 3.0085762046660367, + "grad_norm": 0.46994635462760925, + "learning_rate": 2.845356875585054e-05, + "loss": 0.0835, + "step": 82790 + }, + { + "epoch": 3.008939603168835, + "grad_norm": 1.9111509323120117, + "learning_rate": 2.844842911648203e-05, + "loss": 0.0781, + "step": 82800 + }, + { + "epoch": 3.008939603168835, + "eval_loss": 0.33264267444610596, + "eval_runtime": 179.8864, + "eval_samples_per_second": 41.215, + "eval_steps_per_second": 5.153, + "eval_wer": 0.14235663586690145, + "step": 82800 + }, + { + "epoch": 3.009303001671633, + "grad_norm": 0.5043010711669922, + "learning_rate": 2.8443289328532973e-05, + "loss": 0.0914, + "step": 82810 + }, + { + "epoch": 3.009666400174431, + "grad_norm": 1.3565329313278198, + "learning_rate": 2.8438149392224832e-05, + "loss": 0.0642, + "step": 82820 + }, + { + "epoch": 3.0100297986772295, + "grad_norm": 0.710573136806488, + "learning_rate": 2.843300930777907e-05, + "loss": 0.0714, + "step": 82830 + }, + { + "epoch": 3.0103931971800275, + "grad_norm": 0.28599199652671814, + "learning_rate": 2.8427869075417145e-05, + "loss": 0.0736, + "step": 82840 + }, + { + "epoch": 3.010756595682826, + "grad_norm": 1.063194751739502, + "learning_rate": 2.8422728695360546e-05, + "loss": 0.087, + "step": 82850 + }, + { + "epoch": 3.011119994185624, + "grad_norm": 1.3806395530700684, + "learning_rate": 2.841758816783074e-05, + "loss": 0.1306, + "step": 82860 + }, + { + "epoch": 3.011483392688422, + "grad_norm": 0.4422304332256317, + "learning_rate": 2.841244749304922e-05, + "loss": 0.0665, + "step": 82870 + }, + { + "epoch": 3.0118467911912203, + "grad_norm": 0.5602436661720276, + "learning_rate": 2.840730667123748e-05, + "loss": 0.0796, + "step": 82880 + }, + { + "epoch": 3.0122101896940183, + "grad_norm": 0.39872846007347107, + "learning_rate": 2.8402165702617016e-05, + "loss": 0.0937, + "step": 82890 + }, + { + "epoch": 3.0125735881968168, + "grad_norm": 0.5337156057357788, + "learning_rate": 2.8397024587409344e-05, + "loss": 0.08, + "step": 82900 + }, + { + "epoch": 3.0129369866996147, + "grad_norm": 0.46881571412086487, + "learning_rate": 2.8391883325835962e-05, + "loss": 0.0913, + "step": 82910 + }, + { + "epoch": 3.013300385202413, + "grad_norm": 2.061661720275879, + "learning_rate": 2.8386741918118404e-05, + "loss": 0.0662, + "step": 82920 + }, + { + "epoch": 3.013663783705211, + "grad_norm": 1.4211331605911255, + "learning_rate": 2.838160036447818e-05, + "loss": 0.0677, + "step": 82930 + }, + { + "epoch": 3.014027182208009, + "grad_norm": 1.1780022382736206, + "learning_rate": 2.837645866513683e-05, + "loss": 0.1062, + "step": 82940 + }, + { + "epoch": 3.0143905807108076, + "grad_norm": 1.3207381963729858, + "learning_rate": 2.837131682031589e-05, + "loss": 0.1001, + "step": 82950 + }, + { + "epoch": 3.0147539792136056, + "grad_norm": 0.9408227801322937, + "learning_rate": 2.8366174830236896e-05, + "loss": 0.0934, + "step": 82960 + }, + { + "epoch": 3.015117377716404, + "grad_norm": 10.08785629272461, + "learning_rate": 2.8361032695121416e-05, + "loss": 0.0665, + "step": 82970 + }, + { + "epoch": 3.015480776219202, + "grad_norm": 0.6198272705078125, + "learning_rate": 2.835589041519099e-05, + "loss": 0.0794, + "step": 82980 + }, + { + "epoch": 3.015844174722, + "grad_norm": 0.44829145073890686, + "learning_rate": 2.8350747990667192e-05, + "loss": 0.0853, + "step": 82990 + }, + { + "epoch": 3.0162075732247984, + "grad_norm": 0.6157195568084717, + "learning_rate": 2.834560542177158e-05, + "loss": 0.0791, + "step": 83000 + }, + { + "epoch": 3.0165709717275964, + "grad_norm": 0.40746578574180603, + "learning_rate": 2.8340462708725735e-05, + "loss": 0.076, + "step": 83010 + }, + { + "epoch": 3.016934370230395, + "grad_norm": 5.672430038452148, + "learning_rate": 2.833531985175124e-05, + "loss": 0.0816, + "step": 83020 + }, + { + "epoch": 3.0172977687331928, + "grad_norm": 1.2975281476974487, + "learning_rate": 2.8330176851069672e-05, + "loss": 0.0738, + "step": 83030 + }, + { + "epoch": 3.017661167235991, + "grad_norm": 0.22850178182125092, + "learning_rate": 2.8325033706902643e-05, + "loss": 0.1293, + "step": 83040 + }, + { + "epoch": 3.018024565738789, + "grad_norm": 1.8061107397079468, + "learning_rate": 2.8319890419471728e-05, + "loss": 0.0873, + "step": 83050 + }, + { + "epoch": 3.018387964241587, + "grad_norm": 0.5460423827171326, + "learning_rate": 2.8314746988998564e-05, + "loss": 0.0753, + "step": 83060 + }, + { + "epoch": 3.0187513627443856, + "grad_norm": 0.9149671196937561, + "learning_rate": 2.8310117779454733e-05, + "loss": 3.5039, + "step": 83070 + }, + { + "epoch": 3.0191147612471836, + "grad_norm": 1.0777734518051147, + "learning_rate": 2.83049740778118e-05, + "loss": 0.0834, + "step": 83080 + }, + { + "epoch": 3.019478159749982, + "grad_norm": 1.6478363275527954, + "learning_rate": 2.829983023376929e-05, + "loss": 0.1151, + "step": 83090 + }, + { + "epoch": 3.01984155825278, + "grad_norm": 0.5667656660079956, + "learning_rate": 2.8294686247548845e-05, + "loss": 0.099, + "step": 83100 + }, + { + "epoch": 3.020204956755578, + "grad_norm": 0.7810095548629761, + "learning_rate": 2.828954211937209e-05, + "loss": 0.0826, + "step": 83110 + }, + { + "epoch": 3.0205683552583764, + "grad_norm": 0.45061448216438293, + "learning_rate": 2.8284397849460664e-05, + "loss": 0.0758, + "step": 83120 + }, + { + "epoch": 3.0209317537611744, + "grad_norm": 2.199354887008667, + "learning_rate": 2.8279253438036228e-05, + "loss": 0.0767, + "step": 83130 + }, + { + "epoch": 3.021295152263973, + "grad_norm": 0.7062342166900635, + "learning_rate": 2.827410888532042e-05, + "loss": 0.0847, + "step": 83140 + }, + { + "epoch": 3.021658550766771, + "grad_norm": 1.1551927328109741, + "learning_rate": 2.826896419153492e-05, + "loss": 0.0789, + "step": 83150 + }, + { + "epoch": 3.022021949269569, + "grad_norm": 2.4032013416290283, + "learning_rate": 2.826381935690137e-05, + "loss": 0.1152, + "step": 83160 + }, + { + "epoch": 3.0223853477723672, + "grad_norm": 1.0286635160446167, + "learning_rate": 2.825867438164147e-05, + "loss": 0.0724, + "step": 83170 + }, + { + "epoch": 3.022748746275165, + "grad_norm": 0.9156087636947632, + "learning_rate": 2.8253529265976886e-05, + "loss": 0.0808, + "step": 83180 + }, + { + "epoch": 3.0231121447779636, + "grad_norm": 0.3878247141838074, + "learning_rate": 2.8248384010129293e-05, + "loss": 0.0736, + "step": 83190 + }, + { + "epoch": 3.0234755432807616, + "grad_norm": 0.7282688021659851, + "learning_rate": 2.8243238614320394e-05, + "loss": 0.0954, + "step": 83200 + }, + { + "epoch": 3.02383894178356, + "grad_norm": 0.40208378434181213, + "learning_rate": 2.8238093078771876e-05, + "loss": 0.0893, + "step": 83210 + }, + { + "epoch": 3.024202340286358, + "grad_norm": 4.144686698913574, + "learning_rate": 2.823294740370546e-05, + "loss": 0.079, + "step": 83220 + }, + { + "epoch": 3.024565738789156, + "grad_norm": 1.0858464241027832, + "learning_rate": 2.8227801589342838e-05, + "loss": 0.0808, + "step": 83230 + }, + { + "epoch": 3.0249291372919545, + "grad_norm": 1.1500715017318726, + "learning_rate": 2.822265563590573e-05, + "loss": 0.0714, + "step": 83240 + }, + { + "epoch": 3.0252925357947524, + "grad_norm": 2.532526731491089, + "learning_rate": 2.8217509543615862e-05, + "loss": 0.0838, + "step": 83250 + }, + { + "epoch": 3.025655934297551, + "grad_norm": 0.6312755942344666, + "learning_rate": 2.8212363312694945e-05, + "loss": 0.0853, + "step": 83260 + }, + { + "epoch": 3.026019332800349, + "grad_norm": 0.5501546859741211, + "learning_rate": 2.8207216943364734e-05, + "loss": 0.0705, + "step": 83270 + }, + { + "epoch": 3.026382731303147, + "grad_norm": 6.8282060623168945, + "learning_rate": 2.8202070435846946e-05, + "loss": 0.076, + "step": 83280 + }, + { + "epoch": 3.0267461298059453, + "grad_norm": 0.48019352555274963, + "learning_rate": 2.8196923790363344e-05, + "loss": 0.0919, + "step": 83290 + }, + { + "epoch": 3.0271095283087432, + "grad_norm": 0.48283651471138, + "learning_rate": 2.8191777007135667e-05, + "loss": 0.0932, + "step": 83300 + }, + { + "epoch": 3.0274729268115417, + "grad_norm": 0.7604033350944519, + "learning_rate": 2.8186630086385672e-05, + "loss": 0.1037, + "step": 83310 + }, + { + "epoch": 3.0278363253143397, + "grad_norm": 1.1358963251113892, + "learning_rate": 2.818148302833513e-05, + "loss": 0.0804, + "step": 83320 + }, + { + "epoch": 3.028199723817138, + "grad_norm": 1.2052534818649292, + "learning_rate": 2.81763358332058e-05, + "loss": 0.0753, + "step": 83330 + }, + { + "epoch": 3.028563122319936, + "grad_norm": 0.5695772767066956, + "learning_rate": 2.8171188501219464e-05, + "loss": 0.0833, + "step": 83340 + }, + { + "epoch": 3.028926520822734, + "grad_norm": 0.7455033659934998, + "learning_rate": 2.81660410325979e-05, + "loss": 0.0772, + "step": 83350 + }, + { + "epoch": 3.0292899193255325, + "grad_norm": 0.7446203231811523, + "learning_rate": 2.816089342756289e-05, + "loss": 0.1103, + "step": 83360 + }, + { + "epoch": 3.0296533178283305, + "grad_norm": 2.548922061920166, + "learning_rate": 2.8155745686336234e-05, + "loss": 0.0707, + "step": 83370 + }, + { + "epoch": 3.030016716331129, + "grad_norm": 1.2039028406143188, + "learning_rate": 2.815059780913971e-05, + "loss": 0.0767, + "step": 83380 + }, + { + "epoch": 3.030380114833927, + "grad_norm": 0.5070446729660034, + "learning_rate": 2.8145449796195152e-05, + "loss": 0.1082, + "step": 83390 + }, + { + "epoch": 3.030743513336725, + "grad_norm": 3.70131778717041, + "learning_rate": 2.814030164772434e-05, + "loss": 0.0848, + "step": 83400 + }, + { + "epoch": 3.030743513336725, + "eval_loss": 0.3220129907131195, + "eval_runtime": 179.9233, + "eval_samples_per_second": 41.206, + "eval_steps_per_second": 5.152, + "eval_wer": 0.14291938206836458, + "step": 83400 + }, + { + "epoch": 3.0311069118395233, + "grad_norm": 0.40809836983680725, + "learning_rate": 2.813515336394911e-05, + "loss": 0.0843, + "step": 83410 + }, + { + "epoch": 3.0314703103423213, + "grad_norm": 0.9957777261734009, + "learning_rate": 2.8130004945091283e-05, + "loss": 3.893, + "step": 83420 + }, + { + "epoch": 3.0318337088451197, + "grad_norm": 0.45973557233810425, + "learning_rate": 2.8124856391372673e-05, + "loss": 0.0776, + "step": 83430 + }, + { + "epoch": 3.0321971073479177, + "grad_norm": 0.7182989120483398, + "learning_rate": 2.811970770301512e-05, + "loss": 0.0772, + "step": 83440 + }, + { + "epoch": 3.0325605058507157, + "grad_norm": 0.5667139291763306, + "learning_rate": 2.811455888024046e-05, + "loss": 0.0785, + "step": 83450 + }, + { + "epoch": 3.032923904353514, + "grad_norm": 0.8791239857673645, + "learning_rate": 2.810940992327054e-05, + "loss": 0.0729, + "step": 83460 + }, + { + "epoch": 3.033287302856312, + "grad_norm": 1.0834791660308838, + "learning_rate": 2.810426083232721e-05, + "loss": 0.0763, + "step": 83470 + }, + { + "epoch": 3.0336507013591105, + "grad_norm": 1.9368480443954468, + "learning_rate": 2.809911160763233e-05, + "loss": 0.0822, + "step": 83480 + }, + { + "epoch": 3.0340140998619085, + "grad_norm": 1.531395673751831, + "learning_rate": 2.8093962249407755e-05, + "loss": 0.1231, + "step": 83490 + }, + { + "epoch": 3.034377498364707, + "grad_norm": 1.5916883945465088, + "learning_rate": 2.8088812757875347e-05, + "loss": 0.236, + "step": 83500 + }, + { + "epoch": 3.034740896867505, + "grad_norm": 0.34502482414245605, + "learning_rate": 2.8083663133256998e-05, + "loss": 0.0986, + "step": 83510 + }, + { + "epoch": 3.035104295370303, + "grad_norm": 1.0849754810333252, + "learning_rate": 2.8078513375774574e-05, + "loss": 0.0704, + "step": 83520 + }, + { + "epoch": 3.0354676938731013, + "grad_norm": 0.6940005421638489, + "learning_rate": 2.8073363485649962e-05, + "loss": 0.078, + "step": 83530 + }, + { + "epoch": 3.0358310923758993, + "grad_norm": 0.4291388988494873, + "learning_rate": 2.8068213463105054e-05, + "loss": 0.0762, + "step": 83540 + }, + { + "epoch": 3.0361944908786977, + "grad_norm": 1.4870105981826782, + "learning_rate": 2.8063063308361736e-05, + "loss": 0.0967, + "step": 83550 + }, + { + "epoch": 3.0365578893814957, + "grad_norm": 0.8705921769142151, + "learning_rate": 2.805791302164193e-05, + "loss": 0.069, + "step": 83560 + }, + { + "epoch": 3.0369212878842937, + "grad_norm": 1.3815371990203857, + "learning_rate": 2.805276260316752e-05, + "loss": 0.0682, + "step": 83570 + }, + { + "epoch": 3.037284686387092, + "grad_norm": 0.4686858654022217, + "learning_rate": 2.8047612053160443e-05, + "loss": 0.0705, + "step": 83580 + }, + { + "epoch": 3.03764808488989, + "grad_norm": 1.2113518714904785, + "learning_rate": 2.80424613718426e-05, + "loss": 0.3061, + "step": 83590 + }, + { + "epoch": 3.0380114833926886, + "grad_norm": 1.3406014442443848, + "learning_rate": 2.803731055943592e-05, + "loss": 0.0936, + "step": 83600 + }, + { + "epoch": 3.0383748818954865, + "grad_norm": 2.528625726699829, + "learning_rate": 2.803215961616234e-05, + "loss": 0.0922, + "step": 83610 + }, + { + "epoch": 3.038738280398285, + "grad_norm": 0.6336283683776855, + "learning_rate": 2.8027008542243784e-05, + "loss": 0.086, + "step": 83620 + }, + { + "epoch": 3.039101678901083, + "grad_norm": 1.0377708673477173, + "learning_rate": 2.8021857337902208e-05, + "loss": 0.0734, + "step": 83630 + }, + { + "epoch": 3.039465077403881, + "grad_norm": 0.5187166929244995, + "learning_rate": 2.8016706003359545e-05, + "loss": 0.0859, + "step": 83640 + }, + { + "epoch": 3.0398284759066794, + "grad_norm": 0.650485634803772, + "learning_rate": 2.801155453883775e-05, + "loss": 0.091, + "step": 83650 + }, + { + "epoch": 3.0401918744094774, + "grad_norm": 1.6549148559570312, + "learning_rate": 2.800640294455879e-05, + "loss": 0.0801, + "step": 83660 + }, + { + "epoch": 3.040555272912276, + "grad_norm": 0.75333172082901, + "learning_rate": 2.8001251220744628e-05, + "loss": 0.0705, + "step": 83670 + }, + { + "epoch": 3.0409186714150738, + "grad_norm": 1.4784330129623413, + "learning_rate": 2.799609936761723e-05, + "loss": 0.0881, + "step": 83680 + }, + { + "epoch": 3.0412820699178718, + "grad_norm": 0.5964809656143188, + "learning_rate": 2.7990947385398563e-05, + "loss": 0.0899, + "step": 83690 + }, + { + "epoch": 3.04164546842067, + "grad_norm": 0.6587772369384766, + "learning_rate": 2.7985795274310622e-05, + "loss": 0.081, + "step": 83700 + }, + { + "epoch": 3.042008866923468, + "grad_norm": 0.6639897227287292, + "learning_rate": 2.7980643034575375e-05, + "loss": 0.1012, + "step": 83710 + }, + { + "epoch": 3.0423722654262666, + "grad_norm": 0.4024466872215271, + "learning_rate": 2.797549066641484e-05, + "loss": 0.0641, + "step": 83720 + }, + { + "epoch": 3.0427356639290646, + "grad_norm": 0.6157781481742859, + "learning_rate": 2.797033817005099e-05, + "loss": 0.074, + "step": 83730 + }, + { + "epoch": 3.0430990624318626, + "grad_norm": 0.43719834089279175, + "learning_rate": 2.7965185545705835e-05, + "loss": 0.0881, + "step": 83740 + }, + { + "epoch": 3.043462460934661, + "grad_norm": 1.6078770160675049, + "learning_rate": 2.796003279360139e-05, + "loss": 0.0706, + "step": 83750 + }, + { + "epoch": 3.043825859437459, + "grad_norm": 0.7965851426124573, + "learning_rate": 2.7954879913959658e-05, + "loss": 0.0649, + "step": 83760 + }, + { + "epoch": 3.0441892579402574, + "grad_norm": 0.8984066843986511, + "learning_rate": 2.7949726907002672e-05, + "loss": 0.0769, + "step": 83770 + }, + { + "epoch": 3.0445526564430554, + "grad_norm": 2.9094557762145996, + "learning_rate": 2.794457377295244e-05, + "loss": 0.0899, + "step": 83780 + }, + { + "epoch": 3.044916054945854, + "grad_norm": 0.49150341749191284, + "learning_rate": 2.793942051203101e-05, + "loss": 1.4083, + "step": 83790 + }, + { + "epoch": 3.045279453448652, + "grad_norm": 2.3720130920410156, + "learning_rate": 2.7934267124460407e-05, + "loss": 0.0894, + "step": 83800 + }, + { + "epoch": 3.04564285195145, + "grad_norm": 0.31627193093299866, + "learning_rate": 2.7929113610462665e-05, + "loss": 0.0815, + "step": 83810 + }, + { + "epoch": 3.046006250454248, + "grad_norm": 0.43195366859436035, + "learning_rate": 2.792395997025985e-05, + "loss": 0.0851, + "step": 83820 + }, + { + "epoch": 3.046369648957046, + "grad_norm": 0.6193608641624451, + "learning_rate": 2.7918806204073995e-05, + "loss": 0.0718, + "step": 83830 + }, + { + "epoch": 3.0467330474598446, + "grad_norm": 0.6569747924804688, + "learning_rate": 2.791365231212717e-05, + "loss": 0.097, + "step": 83840 + }, + { + "epoch": 3.0470964459626426, + "grad_norm": 2.10140323638916, + "learning_rate": 2.7908498294641437e-05, + "loss": 0.0959, + "step": 83850 + }, + { + "epoch": 3.0474598444654406, + "grad_norm": 0.7795642018318176, + "learning_rate": 2.7903344151838856e-05, + "loss": 0.0731, + "step": 83860 + }, + { + "epoch": 3.047823242968239, + "grad_norm": 0.37401872873306274, + "learning_rate": 2.7898189883941507e-05, + "loss": 0.6129, + "step": 83870 + }, + { + "epoch": 3.048186641471037, + "grad_norm": 0.9135899543762207, + "learning_rate": 2.7893035491171466e-05, + "loss": 0.0889, + "step": 83880 + }, + { + "epoch": 3.0485500399738354, + "grad_norm": 0.572894275188446, + "learning_rate": 2.7887880973750817e-05, + "loss": 0.0953, + "step": 83890 + }, + { + "epoch": 3.0489134384766334, + "grad_norm": 0.4888230264186859, + "learning_rate": 2.7882726331901655e-05, + "loss": 0.0821, + "step": 83900 + }, + { + "epoch": 3.049276836979432, + "grad_norm": 0.5517320036888123, + "learning_rate": 2.787757156584608e-05, + "loss": 0.0905, + "step": 83910 + }, + { + "epoch": 3.04964023548223, + "grad_norm": 0.5624126195907593, + "learning_rate": 2.7872416675806174e-05, + "loss": 0.0658, + "step": 83920 + }, + { + "epoch": 3.050003633985028, + "grad_norm": 0.606499969959259, + "learning_rate": 2.7867261662004058e-05, + "loss": 0.0772, + "step": 83930 + }, + { + "epoch": 3.0503670324878263, + "grad_norm": 0.30192533135414124, + "learning_rate": 2.7862106524661835e-05, + "loss": 0.0782, + "step": 83940 + }, + { + "epoch": 3.0507304309906242, + "grad_norm": 0.5810015797615051, + "learning_rate": 2.7856951264001623e-05, + "loss": 0.0724, + "step": 83950 + }, + { + "epoch": 3.0510938294934227, + "grad_norm": 0.8986351490020752, + "learning_rate": 2.7851795880245556e-05, + "loss": 0.082, + "step": 83960 + }, + { + "epoch": 3.0514572279962207, + "grad_norm": 0.655348539352417, + "learning_rate": 2.7846640373615745e-05, + "loss": 0.0694, + "step": 83970 + }, + { + "epoch": 3.0518206264990186, + "grad_norm": 0.9153086543083191, + "learning_rate": 2.784148474433433e-05, + "loss": 0.0749, + "step": 83980 + }, + { + "epoch": 3.052184025001817, + "grad_norm": 0.4834425151348114, + "learning_rate": 2.783632899262345e-05, + "loss": 0.072, + "step": 83990 + }, + { + "epoch": 3.052547423504615, + "grad_norm": 0.8401370644569397, + "learning_rate": 2.783117311870524e-05, + "loss": 0.0781, + "step": 84000 + }, + { + "epoch": 3.052547423504615, + "eval_loss": 0.3286387026309967, + "eval_runtime": 180.0952, + "eval_samples_per_second": 41.167, + "eval_steps_per_second": 5.147, + "eval_wer": 0.14074100967560405, + "step": 84000 + }, + { + "epoch": 3.0529108220074135, + "grad_norm": 0.7353171706199646, + "learning_rate": 2.7826017122801855e-05, + "loss": 0.0685, + "step": 84010 + }, + { + "epoch": 3.0532742205102115, + "grad_norm": 0.4934822916984558, + "learning_rate": 2.7820861005135445e-05, + "loss": 0.0677, + "step": 84020 + }, + { + "epoch": 3.0536376190130095, + "grad_norm": 0.6039226651191711, + "learning_rate": 2.781570476592818e-05, + "loss": 0.071, + "step": 84030 + }, + { + "epoch": 3.054001017515808, + "grad_norm": 0.7619723677635193, + "learning_rate": 2.7810548405402215e-05, + "loss": 0.0811, + "step": 84040 + }, + { + "epoch": 3.054364416018606, + "grad_norm": 0.9137590527534485, + "learning_rate": 2.7805391923779704e-05, + "loss": 0.104, + "step": 84050 + }, + { + "epoch": 3.0547278145214043, + "grad_norm": 0.40205273032188416, + "learning_rate": 2.7800235321282854e-05, + "loss": 0.0756, + "step": 84060 + }, + { + "epoch": 3.0550912130242023, + "grad_norm": 0.5475990772247314, + "learning_rate": 2.779507859813381e-05, + "loss": 0.0918, + "step": 84070 + }, + { + "epoch": 3.0554546115270007, + "grad_norm": 0.5701851844787598, + "learning_rate": 2.778992175455479e-05, + "loss": 0.0744, + "step": 84080 + }, + { + "epoch": 3.0558180100297987, + "grad_norm": 0.2219076156616211, + "learning_rate": 2.7784764790767957e-05, + "loss": 0.2494, + "step": 84090 + }, + { + "epoch": 3.0561814085325967, + "grad_norm": 4.648733615875244, + "learning_rate": 2.7779607706995524e-05, + "loss": 0.1106, + "step": 84100 + }, + { + "epoch": 3.056544807035395, + "grad_norm": 0.6483043432235718, + "learning_rate": 2.7774450503459687e-05, + "loss": 0.0938, + "step": 84110 + }, + { + "epoch": 3.056908205538193, + "grad_norm": 0.7879406213760376, + "learning_rate": 2.7769293180382645e-05, + "loss": 0.0727, + "step": 84120 + }, + { + "epoch": 3.0572716040409915, + "grad_norm": 0.540431022644043, + "learning_rate": 2.7764135737986613e-05, + "loss": 0.0692, + "step": 84130 + }, + { + "epoch": 3.0576350025437895, + "grad_norm": 0.498374879360199, + "learning_rate": 2.7758978176493805e-05, + "loss": 0.0985, + "step": 84140 + }, + { + "epoch": 3.0579984010465875, + "grad_norm": 2.0039825439453125, + "learning_rate": 2.775382049612645e-05, + "loss": 0.1214, + "step": 84150 + }, + { + "epoch": 3.058361799549386, + "grad_norm": 0.6101000905036926, + "learning_rate": 2.7748662697106757e-05, + "loss": 0.0777, + "step": 84160 + }, + { + "epoch": 3.058725198052184, + "grad_norm": 1.092410683631897, + "learning_rate": 2.7743504779656976e-05, + "loss": 0.0606, + "step": 84170 + }, + { + "epoch": 3.0590885965549823, + "grad_norm": 0.5803566575050354, + "learning_rate": 2.773834674399934e-05, + "loss": 0.0756, + "step": 84180 + }, + { + "epoch": 3.0594519950577803, + "grad_norm": 0.4680107831954956, + "learning_rate": 2.7733188590356073e-05, + "loss": 0.0878, + "step": 84190 + }, + { + "epoch": 3.0598153935605787, + "grad_norm": 4.1969380378723145, + "learning_rate": 2.7728030318949448e-05, + "loss": 0.0911, + "step": 84200 + }, + { + "epoch": 3.0601787920633767, + "grad_norm": 0.45293834805488586, + "learning_rate": 2.7722871930001694e-05, + "loss": 0.1119, + "step": 84210 + }, + { + "epoch": 3.0605421905661747, + "grad_norm": 1.130631685256958, + "learning_rate": 2.7718229279634755e-05, + "loss": 2.6943, + "step": 84220 + }, + { + "epoch": 3.060905589068973, + "grad_norm": 0.3156570494174957, + "learning_rate": 2.7713070667971197e-05, + "loss": 0.0641, + "step": 84230 + }, + { + "epoch": 3.061268987571771, + "grad_norm": 0.860851526260376, + "learning_rate": 2.7707911939411078e-05, + "loss": 0.093, + "step": 84240 + }, + { + "epoch": 3.0616323860745696, + "grad_norm": 1.0920404195785522, + "learning_rate": 2.7702753094176664e-05, + "loss": 0.1426, + "step": 84250 + }, + { + "epoch": 3.0619957845773675, + "grad_norm": 0.4944891333580017, + "learning_rate": 2.769759413249024e-05, + "loss": 0.1016, + "step": 84260 + }, + { + "epoch": 3.0623591830801655, + "grad_norm": 0.503200113773346, + "learning_rate": 2.7692435054574084e-05, + "loss": 0.0753, + "step": 84270 + }, + { + "epoch": 3.062722581582964, + "grad_norm": 0.9437207579612732, + "learning_rate": 2.7687275860650476e-05, + "loss": 0.0665, + "step": 84280 + }, + { + "epoch": 3.063085980085762, + "grad_norm": 0.91017085313797, + "learning_rate": 2.7682116550941716e-05, + "loss": 0.1268, + "step": 84290 + }, + { + "epoch": 3.0634493785885604, + "grad_norm": 1.9001520872116089, + "learning_rate": 2.7676957125670096e-05, + "loss": 0.0818, + "step": 84300 + }, + { + "epoch": 3.0638127770913584, + "grad_norm": 0.6754614114761353, + "learning_rate": 2.767179758505792e-05, + "loss": 0.0902, + "step": 84310 + }, + { + "epoch": 3.0641761755941563, + "grad_norm": 1.6156865358352661, + "learning_rate": 2.766663792932749e-05, + "loss": 0.0744, + "step": 84320 + }, + { + "epoch": 3.0645395740969548, + "grad_norm": 0.5703209042549133, + "learning_rate": 2.7661478158701122e-05, + "loss": 0.082, + "step": 84330 + }, + { + "epoch": 3.0649029725997528, + "grad_norm": 0.4021599590778351, + "learning_rate": 2.7656318273401128e-05, + "loss": 0.0933, + "step": 84340 + }, + { + "epoch": 3.065266371102551, + "grad_norm": 0.5536443591117859, + "learning_rate": 2.7651158273649825e-05, + "loss": 0.0867, + "step": 84350 + }, + { + "epoch": 3.065629769605349, + "grad_norm": 0.4906123876571655, + "learning_rate": 2.7645998159669557e-05, + "loss": 0.0784, + "step": 84360 + }, + { + "epoch": 3.0659931681081476, + "grad_norm": 1.6186575889587402, + "learning_rate": 2.7640837931682627e-05, + "loss": 0.1294, + "step": 84370 + }, + { + "epoch": 3.0663565666109456, + "grad_norm": 1.145196557044983, + "learning_rate": 2.76356775899114e-05, + "loss": 0.0663, + "step": 84380 + }, + { + "epoch": 3.0667199651137436, + "grad_norm": 0.4692659378051758, + "learning_rate": 2.76305171345782e-05, + "loss": 0.091, + "step": 84390 + }, + { + "epoch": 3.067083363616542, + "grad_norm": 3.6405162811279297, + "learning_rate": 2.7625356565905376e-05, + "loss": 0.0835, + "step": 84400 + }, + { + "epoch": 3.06744676211934, + "grad_norm": 1.138875126838684, + "learning_rate": 2.762019588411528e-05, + "loss": 0.1015, + "step": 84410 + }, + { + "epoch": 3.0678101606221384, + "grad_norm": 0.8834431767463684, + "learning_rate": 2.7615035089430262e-05, + "loss": 0.0729, + "step": 84420 + }, + { + "epoch": 3.0681735591249364, + "grad_norm": 0.36101579666137695, + "learning_rate": 2.7609874182072693e-05, + "loss": 0.0694, + "step": 84430 + }, + { + "epoch": 3.0685369576277344, + "grad_norm": 0.5472438931465149, + "learning_rate": 2.760471316226492e-05, + "loss": 0.0904, + "step": 84440 + }, + { + "epoch": 3.068900356130533, + "grad_norm": 0.7334174513816833, + "learning_rate": 2.7599552030229337e-05, + "loss": 0.1055, + "step": 84450 + }, + { + "epoch": 3.069263754633331, + "grad_norm": 3.81465220451355, + "learning_rate": 2.75943907861883e-05, + "loss": 0.0912, + "step": 84460 + }, + { + "epoch": 3.069627153136129, + "grad_norm": 0.5324450135231018, + "learning_rate": 2.7589229430364196e-05, + "loss": 0.3292, + "step": 84470 + }, + { + "epoch": 3.069990551638927, + "grad_norm": 0.7580345273017883, + "learning_rate": 2.758406796297941e-05, + "loss": 0.1129, + "step": 84480 + }, + { + "epoch": 3.0703539501417256, + "grad_norm": 0.8137314915657043, + "learning_rate": 2.757890638425632e-05, + "loss": 0.0886, + "step": 84490 + }, + { + "epoch": 3.0707173486445236, + "grad_norm": 1.109212875366211, + "learning_rate": 2.7573744694417343e-05, + "loss": 0.0996, + "step": 84500 + }, + { + "epoch": 3.0710807471473216, + "grad_norm": 0.991165816783905, + "learning_rate": 2.7568582893684852e-05, + "loss": 0.1194, + "step": 84510 + }, + { + "epoch": 3.07144414565012, + "grad_norm": 0.8121623396873474, + "learning_rate": 2.7563420982281275e-05, + "loss": 0.0811, + "step": 84520 + }, + { + "epoch": 3.071807544152918, + "grad_norm": 0.5951569080352783, + "learning_rate": 2.7558258960429007e-05, + "loss": 0.0876, + "step": 84530 + }, + { + "epoch": 3.0721709426557164, + "grad_norm": 0.7901829481124878, + "learning_rate": 2.7553096828350454e-05, + "loss": 0.0911, + "step": 84540 + }, + { + "epoch": 3.0725343411585144, + "grad_norm": 0.6474528312683105, + "learning_rate": 2.7547934586268048e-05, + "loss": 0.1048, + "step": 84550 + }, + { + "epoch": 3.0728977396613124, + "grad_norm": 0.7994125485420227, + "learning_rate": 2.75427722344042e-05, + "loss": 0.0767, + "step": 84560 + }, + { + "epoch": 3.073261138164111, + "grad_norm": 0.5310266017913818, + "learning_rate": 2.7537609772981353e-05, + "loss": 0.0821, + "step": 84570 + }, + { + "epoch": 3.073624536666909, + "grad_norm": 0.7052491307258606, + "learning_rate": 2.753244720222193e-05, + "loss": 0.0586, + "step": 84580 + }, + { + "epoch": 3.0739879351697073, + "grad_norm": 0.32586991786956787, + "learning_rate": 2.7527284522348362e-05, + "loss": 0.096, + "step": 84590 + }, + { + "epoch": 3.0743513336725052, + "grad_norm": 1.7913528680801392, + "learning_rate": 2.7522121733583102e-05, + "loss": 0.0981, + "step": 84600 + }, + { + "epoch": 3.0743513336725052, + "eval_loss": 0.33099600672721863, + "eval_runtime": 180.5075, + "eval_samples_per_second": 41.073, + "eval_steps_per_second": 5.136, + "eval_wer": 0.14155789932288926, + "step": 84600 + }, + { + "epoch": 3.0747147321753037, + "grad_norm": 0.8068686723709106, + "learning_rate": 2.7516958836148583e-05, + "loss": 0.121, + "step": 84610 + }, + { + "epoch": 3.0750781306781017, + "grad_norm": 2.0272300243377686, + "learning_rate": 2.7511795830267268e-05, + "loss": 0.2779, + "step": 84620 + }, + { + "epoch": 3.0754415291808996, + "grad_norm": 0.571723997592926, + "learning_rate": 2.7506632716161603e-05, + "loss": 0.0819, + "step": 84630 + }, + { + "epoch": 3.075804927683698, + "grad_norm": 0.5370312929153442, + "learning_rate": 2.7501469494054056e-05, + "loss": 0.1944, + "step": 84640 + }, + { + "epoch": 3.076168326186496, + "grad_norm": 0.9437683820724487, + "learning_rate": 2.749630616416709e-05, + "loss": 0.0791, + "step": 84650 + }, + { + "epoch": 3.0765317246892945, + "grad_norm": 0.29546335339546204, + "learning_rate": 2.749114272672317e-05, + "loss": 0.1011, + "step": 84660 + }, + { + "epoch": 3.0768951231920925, + "grad_norm": 1.31322181224823, + "learning_rate": 2.7485979181944778e-05, + "loss": 0.0709, + "step": 84670 + }, + { + "epoch": 3.0772585216948904, + "grad_norm": 0.3159250319004059, + "learning_rate": 2.748081553005438e-05, + "loss": 0.0715, + "step": 84680 + }, + { + "epoch": 3.077621920197689, + "grad_norm": 0.9355630278587341, + "learning_rate": 2.747565177127448e-05, + "loss": 0.5317, + "step": 84690 + }, + { + "epoch": 3.077985318700487, + "grad_norm": 0.462240993976593, + "learning_rate": 2.7470487905827542e-05, + "loss": 0.0819, + "step": 84700 + }, + { + "epoch": 3.0783487172032853, + "grad_norm": 1.28692626953125, + "learning_rate": 2.7465323933936077e-05, + "loss": 0.0753, + "step": 84710 + }, + { + "epoch": 3.0787121157060833, + "grad_norm": 0.413011372089386, + "learning_rate": 2.7460159855822577e-05, + "loss": 0.0573, + "step": 84720 + }, + { + "epoch": 3.0790755142088813, + "grad_norm": 0.4972487986087799, + "learning_rate": 2.745499567170953e-05, + "loss": 0.7906, + "step": 84730 + }, + { + "epoch": 3.0794389127116797, + "grad_norm": 0.48013266921043396, + "learning_rate": 2.744983138181947e-05, + "loss": 0.0815, + "step": 84740 + }, + { + "epoch": 3.0798023112144777, + "grad_norm": 0.597145676612854, + "learning_rate": 2.7444666986374877e-05, + "loss": 0.0945, + "step": 84750 + }, + { + "epoch": 3.080165709717276, + "grad_norm": 1.169415831565857, + "learning_rate": 2.7439502485598296e-05, + "loss": 0.0948, + "step": 84760 + }, + { + "epoch": 3.080529108220074, + "grad_norm": 0.42434608936309814, + "learning_rate": 2.7434337879712217e-05, + "loss": 0.937, + "step": 84770 + }, + { + "epoch": 3.0808925067228725, + "grad_norm": 1.248987078666687, + "learning_rate": 2.7429173168939194e-05, + "loss": 0.0861, + "step": 84780 + }, + { + "epoch": 3.0812559052256705, + "grad_norm": 0.3468557894229889, + "learning_rate": 2.742400835350174e-05, + "loss": 0.1311, + "step": 84790 + }, + { + "epoch": 3.0816193037284685, + "grad_norm": 0.5820382237434387, + "learning_rate": 2.7418843433622387e-05, + "loss": 0.0835, + "step": 84800 + }, + { + "epoch": 3.081982702231267, + "grad_norm": 0.7529087066650391, + "learning_rate": 2.741367840952368e-05, + "loss": 0.0695, + "step": 84810 + }, + { + "epoch": 3.082346100734065, + "grad_norm": 0.4199211895465851, + "learning_rate": 2.7408513281428156e-05, + "loss": 0.0736, + "step": 84820 + }, + { + "epoch": 3.0827094992368633, + "grad_norm": 0.3389349579811096, + "learning_rate": 2.7403348049558363e-05, + "loss": 0.0825, + "step": 84830 + }, + { + "epoch": 3.0830728977396613, + "grad_norm": 0.3386542797088623, + "learning_rate": 2.739818271413686e-05, + "loss": 0.0791, + "step": 84840 + }, + { + "epoch": 3.0834362962424593, + "grad_norm": 1.0355015993118286, + "learning_rate": 2.7393017275386186e-05, + "loss": 0.0781, + "step": 84850 + }, + { + "epoch": 3.0837996947452577, + "grad_norm": 0.6449688673019409, + "learning_rate": 2.7387851733528918e-05, + "loss": 0.078, + "step": 84860 + }, + { + "epoch": 3.0841630932480557, + "grad_norm": 0.6027774214744568, + "learning_rate": 2.7382686088787613e-05, + "loss": 0.0954, + "step": 84870 + }, + { + "epoch": 3.084526491750854, + "grad_norm": 0.3841930627822876, + "learning_rate": 2.737752034138485e-05, + "loss": 0.0701, + "step": 84880 + }, + { + "epoch": 3.084889890253652, + "grad_norm": 0.4170621633529663, + "learning_rate": 2.7372354491543184e-05, + "loss": 0.0901, + "step": 84890 + }, + { + "epoch": 3.0852532887564506, + "grad_norm": 0.8120031356811523, + "learning_rate": 2.736718853948521e-05, + "loss": 0.0809, + "step": 84900 + }, + { + "epoch": 3.0856166872592485, + "grad_norm": 0.36227869987487793, + "learning_rate": 2.73620224854335e-05, + "loss": 0.1081, + "step": 84910 + }, + { + "epoch": 3.0859800857620465, + "grad_norm": 0.6383411884307861, + "learning_rate": 2.7356856329610646e-05, + "loss": 0.0825, + "step": 84920 + }, + { + "epoch": 3.086343484264845, + "grad_norm": 0.4589132070541382, + "learning_rate": 2.7351690072239246e-05, + "loss": 0.0631, + "step": 84930 + }, + { + "epoch": 3.086706882767643, + "grad_norm": 0.5622755885124207, + "learning_rate": 2.734652371354188e-05, + "loss": 0.0708, + "step": 84940 + }, + { + "epoch": 3.0870702812704414, + "grad_norm": 2.5855283737182617, + "learning_rate": 2.7341357253741155e-05, + "loss": 0.0931, + "step": 84950 + }, + { + "epoch": 3.0874336797732393, + "grad_norm": 0.4554671049118042, + "learning_rate": 2.7336190693059688e-05, + "loss": 0.0817, + "step": 84960 + }, + { + "epoch": 3.0877970782760373, + "grad_norm": 0.824824869632721, + "learning_rate": 2.7331024031720065e-05, + "loss": 0.0966, + "step": 84970 + }, + { + "epoch": 3.0881604767788358, + "grad_norm": 1.0286164283752441, + "learning_rate": 2.7325857269944916e-05, + "loss": 0.0838, + "step": 84980 + }, + { + "epoch": 3.0885238752816337, + "grad_norm": 0.5027440190315247, + "learning_rate": 2.7320690407956844e-05, + "loss": 0.0819, + "step": 84990 + }, + { + "epoch": 3.088887273784432, + "grad_norm": 0.5407890677452087, + "learning_rate": 2.7315523445978486e-05, + "loss": 0.1, + "step": 85000 + }, + { + "epoch": 3.08925067228723, + "grad_norm": 0.7916487455368042, + "learning_rate": 2.731035638423246e-05, + "loss": 0.0742, + "step": 85010 + }, + { + "epoch": 3.089614070790028, + "grad_norm": 189.06448364257812, + "learning_rate": 2.7305189222941403e-05, + "loss": 3.2542, + "step": 85020 + }, + { + "epoch": 3.0899774692928266, + "grad_norm": 1.7703773975372314, + "learning_rate": 2.730002196232794e-05, + "loss": 0.0749, + "step": 85030 + }, + { + "epoch": 3.0903408677956246, + "grad_norm": 0.6317950487136841, + "learning_rate": 2.7294854602614712e-05, + "loss": 0.0938, + "step": 85040 + }, + { + "epoch": 3.090704266298423, + "grad_norm": 1.0689524412155151, + "learning_rate": 2.7289687144024368e-05, + "loss": 0.0934, + "step": 85050 + }, + { + "epoch": 3.091067664801221, + "grad_norm": 0.41926833987236023, + "learning_rate": 2.7284519586779545e-05, + "loss": 0.0997, + "step": 85060 + }, + { + "epoch": 3.0914310633040194, + "grad_norm": 0.8323477506637573, + "learning_rate": 2.727935193110291e-05, + "loss": 0.0701, + "step": 85070 + }, + { + "epoch": 3.0917944618068174, + "grad_norm": 1.0824620723724365, + "learning_rate": 2.7274184177217104e-05, + "loss": 0.0732, + "step": 85080 + }, + { + "epoch": 3.0921578603096154, + "grad_norm": 0.6392784118652344, + "learning_rate": 2.72690163253448e-05, + "loss": 0.1011, + "step": 85090 + }, + { + "epoch": 3.092521258812414, + "grad_norm": 1.3124550580978394, + "learning_rate": 2.7263848375708646e-05, + "loss": 0.0908, + "step": 85100 + }, + { + "epoch": 3.092884657315212, + "grad_norm": 0.42463892698287964, + "learning_rate": 2.7258680328531326e-05, + "loss": 0.0768, + "step": 85110 + }, + { + "epoch": 3.09324805581801, + "grad_norm": 0.8673943281173706, + "learning_rate": 2.725351218403551e-05, + "loss": 0.101, + "step": 85120 + }, + { + "epoch": 3.093611454320808, + "grad_norm": 0.26485633850097656, + "learning_rate": 2.7248343942443866e-05, + "loss": 0.0684, + "step": 85130 + }, + { + "epoch": 3.093974852823606, + "grad_norm": 1.3009984493255615, + "learning_rate": 2.7243175603979088e-05, + "loss": 0.1174, + "step": 85140 + }, + { + "epoch": 3.0943382513264046, + "grad_norm": 1.3685442209243774, + "learning_rate": 2.723800716886385e-05, + "loss": 0.0942, + "step": 85150 + }, + { + "epoch": 3.0947016498292026, + "grad_norm": 1.106117844581604, + "learning_rate": 2.7232838637320846e-05, + "loss": 0.1083, + "step": 85160 + }, + { + "epoch": 3.095065048332001, + "grad_norm": 0.48407310247421265, + "learning_rate": 2.722767000957277e-05, + "loss": 0.0596, + "step": 85170 + }, + { + "epoch": 3.095428446834799, + "grad_norm": 0.6600604057312012, + "learning_rate": 2.7222501285842315e-05, + "loss": 0.0893, + "step": 85180 + }, + { + "epoch": 3.0957918453375974, + "grad_norm": 0.9965558648109436, + "learning_rate": 2.7217332466352192e-05, + "loss": 0.1042, + "step": 85190 + }, + { + "epoch": 3.0961552438403954, + "grad_norm": 0.9833908081054688, + "learning_rate": 2.7212163551325105e-05, + "loss": 0.0964, + "step": 85200 + }, + { + "epoch": 3.0961552438403954, + "eval_loss": 0.3095722794532776, + "eval_runtime": 179.4671, + "eval_samples_per_second": 41.311, + "eval_steps_per_second": 5.165, + "eval_wer": 0.14271969793236153, + "step": 85200 + }, + { + "epoch": 3.0965186423431934, + "grad_norm": 2.14083194732666, + "learning_rate": 2.720699454098376e-05, + "loss": 0.087, + "step": 85210 + }, + { + "epoch": 3.096882040845992, + "grad_norm": 1.5555180311203003, + "learning_rate": 2.720182543555087e-05, + "loss": 0.0806, + "step": 85220 + }, + { + "epoch": 3.09724543934879, + "grad_norm": 0.7186011075973511, + "learning_rate": 2.7196656235249157e-05, + "loss": 0.0788, + "step": 85230 + }, + { + "epoch": 3.0976088378515882, + "grad_norm": 0.28633779287338257, + "learning_rate": 2.719148694030134e-05, + "loss": 0.1674, + "step": 85240 + }, + { + "epoch": 3.0979722363543862, + "grad_norm": 0.8628358244895935, + "learning_rate": 2.718631755093016e-05, + "loss": 0.0893, + "step": 85250 + }, + { + "epoch": 3.098335634857184, + "grad_norm": 0.41827306151390076, + "learning_rate": 2.718114806735832e-05, + "loss": 0.0989, + "step": 85260 + }, + { + "epoch": 3.0986990333599826, + "grad_norm": 0.5456732511520386, + "learning_rate": 2.7175978489808577e-05, + "loss": 0.0693, + "step": 85270 + }, + { + "epoch": 3.0990624318627806, + "grad_norm": 0.7632073760032654, + "learning_rate": 2.7170808818503667e-05, + "loss": 0.1152, + "step": 85280 + }, + { + "epoch": 3.099425830365579, + "grad_norm": 0.419622540473938, + "learning_rate": 2.716563905366632e-05, + "loss": 0.0821, + "step": 85290 + }, + { + "epoch": 3.099789228868377, + "grad_norm": 0.8410579562187195, + "learning_rate": 2.71604691955193e-05, + "loss": 0.0906, + "step": 85300 + }, + { + "epoch": 3.100152627371175, + "grad_norm": 1.2106753587722778, + "learning_rate": 2.7155299244285343e-05, + "loss": 0.0966, + "step": 85310 + }, + { + "epoch": 3.1005160258739735, + "grad_norm": 1.0297480821609497, + "learning_rate": 2.7150129200187213e-05, + "loss": 0.0814, + "step": 85320 + }, + { + "epoch": 3.1008794243767714, + "grad_norm": 0.5016379356384277, + "learning_rate": 2.7144959063447667e-05, + "loss": 0.0808, + "step": 85330 + }, + { + "epoch": 3.10124282287957, + "grad_norm": 0.8718425035476685, + "learning_rate": 2.7139788834289463e-05, + "loss": 0.094, + "step": 85340 + }, + { + "epoch": 3.101606221382368, + "grad_norm": 1.8094639778137207, + "learning_rate": 2.7134618512935372e-05, + "loss": 0.097, + "step": 85350 + }, + { + "epoch": 3.1019696198851663, + "grad_norm": 0.45816463232040405, + "learning_rate": 2.7129448099608167e-05, + "loss": 0.0649, + "step": 85360 + }, + { + "epoch": 3.1023330183879643, + "grad_norm": 5.5397868156433105, + "learning_rate": 2.7124277594530624e-05, + "loss": 0.0717, + "step": 85370 + }, + { + "epoch": 3.1026964168907623, + "grad_norm": 0.3643113076686859, + "learning_rate": 2.7119106997925513e-05, + "loss": 0.0649, + "step": 85380 + }, + { + "epoch": 3.1030598153935607, + "grad_norm": 1.1016013622283936, + "learning_rate": 2.711393631001562e-05, + "loss": 0.0926, + "step": 85390 + }, + { + "epoch": 3.1034232138963587, + "grad_norm": 2.1157453060150146, + "learning_rate": 2.7108765531023733e-05, + "loss": 0.0879, + "step": 85400 + }, + { + "epoch": 3.103786612399157, + "grad_norm": 0.9492454528808594, + "learning_rate": 2.7103594661172644e-05, + "loss": 0.0777, + "step": 85410 + }, + { + "epoch": 3.104150010901955, + "grad_norm": 0.3126516342163086, + "learning_rate": 2.7098423700685143e-05, + "loss": 0.0567, + "step": 85420 + }, + { + "epoch": 3.104513409404753, + "grad_norm": 0.3263763189315796, + "learning_rate": 2.7093252649784035e-05, + "loss": 0.067, + "step": 85430 + }, + { + "epoch": 3.1048768079075515, + "grad_norm": 0.8375474214553833, + "learning_rate": 2.7088081508692115e-05, + "loss": 0.0907, + "step": 85440 + }, + { + "epoch": 3.1052402064103495, + "grad_norm": 0.8583451509475708, + "learning_rate": 2.7082910277632194e-05, + "loss": 0.1196, + "step": 85450 + }, + { + "epoch": 3.105603604913148, + "grad_norm": 1.2481297254562378, + "learning_rate": 2.707773895682708e-05, + "loss": 0.0821, + "step": 85460 + }, + { + "epoch": 3.105967003415946, + "grad_norm": 0.3148916959762573, + "learning_rate": 2.7072567546499585e-05, + "loss": 0.0726, + "step": 85470 + }, + { + "epoch": 3.1063304019187443, + "grad_norm": 0.3623039126396179, + "learning_rate": 2.7067396046872533e-05, + "loss": 0.0876, + "step": 85480 + }, + { + "epoch": 3.1066938004215423, + "grad_norm": 0.5890102982521057, + "learning_rate": 2.7062224458168733e-05, + "loss": 0.1002, + "step": 85490 + }, + { + "epoch": 3.1070571989243403, + "grad_norm": 1.0585196018218994, + "learning_rate": 2.705705278061103e-05, + "loss": 0.0956, + "step": 85500 + }, + { + "epoch": 3.1074205974271387, + "grad_norm": 0.3192172944545746, + "learning_rate": 2.7051881014422232e-05, + "loss": 0.1025, + "step": 85510 + }, + { + "epoch": 3.1077839959299367, + "grad_norm": 0.47871172428131104, + "learning_rate": 2.7046709159825184e-05, + "loss": 0.092, + "step": 85520 + }, + { + "epoch": 3.108147394432735, + "grad_norm": 0.516779899597168, + "learning_rate": 2.704153721704273e-05, + "loss": 0.0672, + "step": 85530 + }, + { + "epoch": 3.108510792935533, + "grad_norm": 0.5075317621231079, + "learning_rate": 2.7036365186297685e-05, + "loss": 0.0803, + "step": 85540 + }, + { + "epoch": 3.108874191438331, + "grad_norm": 0.664806604385376, + "learning_rate": 2.703119306781292e-05, + "loss": 0.0831, + "step": 85550 + }, + { + "epoch": 3.1092375899411295, + "grad_norm": 0.8135377168655396, + "learning_rate": 2.702602086181127e-05, + "loss": 0.1029, + "step": 85560 + }, + { + "epoch": 3.1096009884439275, + "grad_norm": 1.3879525661468506, + "learning_rate": 2.702084856851559e-05, + "loss": 0.0779, + "step": 85570 + }, + { + "epoch": 3.109964386946726, + "grad_norm": 0.43086570501327515, + "learning_rate": 2.7015676188148732e-05, + "loss": 0.0809, + "step": 85580 + }, + { + "epoch": 3.110327785449524, + "grad_norm": 0.5117005705833435, + "learning_rate": 2.7010503720933567e-05, + "loss": 0.103, + "step": 85590 + }, + { + "epoch": 3.110691183952322, + "grad_norm": 1.2577043771743774, + "learning_rate": 2.7005331167092945e-05, + "loss": 0.1145, + "step": 85600 + }, + { + "epoch": 3.1110545824551203, + "grad_norm": 0.43926846981048584, + "learning_rate": 2.7000158526849734e-05, + "loss": 0.5229, + "step": 85610 + }, + { + "epoch": 3.1114179809579183, + "grad_norm": 0.6876174807548523, + "learning_rate": 2.6994985800426813e-05, + "loss": 0.0711, + "step": 85620 + }, + { + "epoch": 3.1117813794607168, + "grad_norm": 0.5573021769523621, + "learning_rate": 2.6989812988047053e-05, + "loss": 0.0801, + "step": 85630 + }, + { + "epoch": 3.1121447779635147, + "grad_norm": 0.8710441589355469, + "learning_rate": 2.6984640089933332e-05, + "loss": 0.0774, + "step": 85640 + }, + { + "epoch": 3.112508176466313, + "grad_norm": 0.9022230505943298, + "learning_rate": 2.697946710630853e-05, + "loss": 0.0983, + "step": 85650 + }, + { + "epoch": 3.112871574969111, + "grad_norm": 1.3726192712783813, + "learning_rate": 2.6974294037395533e-05, + "loss": 0.1686, + "step": 85660 + }, + { + "epoch": 3.113234973471909, + "grad_norm": 0.6950212121009827, + "learning_rate": 2.6969120883417228e-05, + "loss": 0.0732, + "step": 85670 + }, + { + "epoch": 3.1135983719747076, + "grad_norm": 0.5118197202682495, + "learning_rate": 2.696394764459651e-05, + "loss": 0.1856, + "step": 85680 + }, + { + "epoch": 3.1139617704775056, + "grad_norm": 0.7940520644187927, + "learning_rate": 2.6958774321156278e-05, + "loss": 0.0907, + "step": 85690 + }, + { + "epoch": 3.114325168980304, + "grad_norm": 4.099023342132568, + "learning_rate": 2.6953600913319427e-05, + "loss": 0.0699, + "step": 85700 + }, + { + "epoch": 3.114688567483102, + "grad_norm": 0.6201004385948181, + "learning_rate": 2.694842742130887e-05, + "loss": 0.0649, + "step": 85710 + }, + { + "epoch": 3.1150519659859, + "grad_norm": 1.4649277925491333, + "learning_rate": 2.6943253845347506e-05, + "loss": 0.0812, + "step": 85720 + }, + { + "epoch": 3.1154153644886984, + "grad_norm": 1.2290292978286743, + "learning_rate": 2.6938080185658242e-05, + "loss": 0.088, + "step": 85730 + }, + { + "epoch": 3.1157787629914964, + "grad_norm": 3.397437334060669, + "learning_rate": 2.6932906442464005e-05, + "loss": 0.1178, + "step": 85740 + }, + { + "epoch": 3.116142161494295, + "grad_norm": 1.0771185159683228, + "learning_rate": 2.69277326159877e-05, + "loss": 0.086, + "step": 85750 + }, + { + "epoch": 3.116505559997093, + "grad_norm": 0.9583094716072083, + "learning_rate": 2.692255870645226e-05, + "loss": 0.0721, + "step": 85760 + }, + { + "epoch": 3.116868958499891, + "grad_norm": 0.3439558744430542, + "learning_rate": 2.6917384714080606e-05, + "loss": 0.0629, + "step": 85770 + }, + { + "epoch": 3.117232357002689, + "grad_norm": 0.909960150718689, + "learning_rate": 2.691221063909567e-05, + "loss": 0.0839, + "step": 85780 + }, + { + "epoch": 3.117595755505487, + "grad_norm": 0.5029392242431641, + "learning_rate": 2.6907036481720377e-05, + "loss": 0.09, + "step": 85790 + }, + { + "epoch": 3.1179591540082856, + "grad_norm": 0.8775815963745117, + "learning_rate": 2.6901862242177667e-05, + "loss": 0.0989, + "step": 85800 + }, + { + "epoch": 3.1179591540082856, + "eval_loss": 0.33487364649772644, + "eval_runtime": 179.4409, + "eval_samples_per_second": 41.317, + "eval_steps_per_second": 5.166, + "eval_wer": 0.14279231034545356, + "step": 85800 + }, + { + "epoch": 3.1183225525110836, + "grad_norm": 0.7273783683776855, + "learning_rate": 2.689668792069048e-05, + "loss": 0.0869, + "step": 85810 + }, + { + "epoch": 3.118685951013882, + "grad_norm": 0.24581924080848694, + "learning_rate": 2.689151351748176e-05, + "loss": 0.0647, + "step": 85820 + }, + { + "epoch": 3.11904934951668, + "grad_norm": 0.9781889915466309, + "learning_rate": 2.688633903277445e-05, + "loss": 0.0781, + "step": 85830 + }, + { + "epoch": 3.119412748019478, + "grad_norm": 0.4281591773033142, + "learning_rate": 2.688116446679151e-05, + "loss": 0.1053, + "step": 85840 + }, + { + "epoch": 3.1197761465222764, + "grad_norm": 1.24199378490448, + "learning_rate": 2.6875989819755876e-05, + "loss": 0.0792, + "step": 85850 + }, + { + "epoch": 3.1201395450250744, + "grad_norm": 1.8490883111953735, + "learning_rate": 2.6870815091890523e-05, + "loss": 0.1136, + "step": 85860 + }, + { + "epoch": 3.120502943527873, + "grad_norm": 0.7298690676689148, + "learning_rate": 2.6865640283418398e-05, + "loss": 0.082, + "step": 85870 + }, + { + "epoch": 3.120866342030671, + "grad_norm": 0.5065509080886841, + "learning_rate": 2.6860465394562478e-05, + "loss": 0.0723, + "step": 85880 + }, + { + "epoch": 3.121229740533469, + "grad_norm": 0.9484612941741943, + "learning_rate": 2.6855290425545713e-05, + "loss": 0.1147, + "step": 85890 + }, + { + "epoch": 3.1215931390362672, + "grad_norm": 0.5501681566238403, + "learning_rate": 2.68501153765911e-05, + "loss": 0.1155, + "step": 85900 + }, + { + "epoch": 3.121956537539065, + "grad_norm": 1.5682904720306396, + "learning_rate": 2.684494024792159e-05, + "loss": 0.1119, + "step": 85910 + }, + { + "epoch": 3.1223199360418636, + "grad_norm": 0.8644644618034363, + "learning_rate": 2.6839765039760168e-05, + "loss": 0.0715, + "step": 85920 + }, + { + "epoch": 3.1226833345446616, + "grad_norm": 0.5618588924407959, + "learning_rate": 2.683458975232982e-05, + "loss": 0.0808, + "step": 85930 + }, + { + "epoch": 3.12304673304746, + "grad_norm": 0.45507627725601196, + "learning_rate": 2.682941438585352e-05, + "loss": 0.0747, + "step": 85940 + }, + { + "epoch": 3.123410131550258, + "grad_norm": 0.4180305302143097, + "learning_rate": 2.682423894055427e-05, + "loss": 0.0642, + "step": 85950 + }, + { + "epoch": 3.123773530053056, + "grad_norm": 1.2814691066741943, + "learning_rate": 2.6819063416655054e-05, + "loss": 0.1001, + "step": 85960 + }, + { + "epoch": 3.1241369285558545, + "grad_norm": 0.5419327020645142, + "learning_rate": 2.6813887814378864e-05, + "loss": 1.3428, + "step": 85970 + }, + { + "epoch": 3.1245003270586524, + "grad_norm": 0.9683062434196472, + "learning_rate": 2.6808712133948705e-05, + "loss": 0.0685, + "step": 85980 + }, + { + "epoch": 3.124863725561451, + "grad_norm": 1.1266988515853882, + "learning_rate": 2.6803536375587572e-05, + "loss": 0.103, + "step": 85990 + }, + { + "epoch": 3.125227124064249, + "grad_norm": 0.9783998727798462, + "learning_rate": 2.679836053951848e-05, + "loss": 0.0788, + "step": 86000 + }, + { + "epoch": 3.125590522567047, + "grad_norm": 2.445962905883789, + "learning_rate": 2.6793184625964425e-05, + "loss": 0.0841, + "step": 86010 + }, + { + "epoch": 3.1259539210698453, + "grad_norm": 0.41320154070854187, + "learning_rate": 2.678800863514843e-05, + "loss": 0.0663, + "step": 86020 + }, + { + "epoch": 3.1263173195726432, + "grad_norm": 0.4643288850784302, + "learning_rate": 2.6782832567293504e-05, + "loss": 0.0801, + "step": 86030 + }, + { + "epoch": 3.1266807180754417, + "grad_norm": 0.5545969605445862, + "learning_rate": 2.677765642262266e-05, + "loss": 0.0976, + "step": 86040 + }, + { + "epoch": 3.1270441165782397, + "grad_norm": 0.8363248109817505, + "learning_rate": 2.677248020135893e-05, + "loss": 0.0715, + "step": 86050 + }, + { + "epoch": 3.127407515081038, + "grad_norm": 1.5308492183685303, + "learning_rate": 2.6767303903725332e-05, + "loss": 0.1622, + "step": 86060 + }, + { + "epoch": 3.127770913583836, + "grad_norm": 2.161001682281494, + "learning_rate": 2.6762127529944903e-05, + "loss": 0.081, + "step": 86070 + }, + { + "epoch": 3.128134312086634, + "grad_norm": 0.6405352354049683, + "learning_rate": 2.6756951080240662e-05, + "loss": 0.0689, + "step": 86080 + }, + { + "epoch": 3.1284977105894325, + "grad_norm": 12.528480529785156, + "learning_rate": 2.675177455483565e-05, + "loss": 0.9386, + "step": 86090 + }, + { + "epoch": 3.1288611090922305, + "grad_norm": 3.0616352558135986, + "learning_rate": 2.674659795395291e-05, + "loss": 0.0755, + "step": 86100 + }, + { + "epoch": 3.129224507595029, + "grad_norm": 0.49330297112464905, + "learning_rate": 2.6741421277815475e-05, + "loss": 0.0792, + "step": 86110 + }, + { + "epoch": 3.129587906097827, + "grad_norm": 7.257319450378418, + "learning_rate": 2.6736244526646398e-05, + "loss": 0.0865, + "step": 86120 + }, + { + "epoch": 3.129951304600625, + "grad_norm": 1.1979786157608032, + "learning_rate": 2.6731067700668712e-05, + "loss": 0.0974, + "step": 86130 + }, + { + "epoch": 3.1303147031034233, + "grad_norm": 0.2251403033733368, + "learning_rate": 2.6725890800105486e-05, + "loss": 0.0755, + "step": 86140 + }, + { + "epoch": 3.1306781016062213, + "grad_norm": 0.9207643270492554, + "learning_rate": 2.6720713825179767e-05, + "loss": 0.1739, + "step": 86150 + }, + { + "epoch": 3.1310415001090197, + "grad_norm": 0.5833568572998047, + "learning_rate": 2.671553677611461e-05, + "loss": 0.0747, + "step": 86160 + }, + { + "epoch": 3.1314048986118177, + "grad_norm": 0.5385452508926392, + "learning_rate": 2.6710359653133078e-05, + "loss": 2.187, + "step": 86170 + }, + { + "epoch": 3.1317682971146157, + "grad_norm": 0.7017218470573425, + "learning_rate": 2.670518245645823e-05, + "loss": 0.0838, + "step": 86180 + }, + { + "epoch": 3.132131695617414, + "grad_norm": 0.682905375957489, + "learning_rate": 2.670000518631314e-05, + "loss": 0.0946, + "step": 86190 + }, + { + "epoch": 3.132495094120212, + "grad_norm": 0.5854523181915283, + "learning_rate": 2.669482784292087e-05, + "loss": 0.1085, + "step": 86200 + }, + { + "epoch": 3.1328584926230105, + "grad_norm": 0.4975448548793793, + "learning_rate": 2.6689650426504504e-05, + "loss": 0.0851, + "step": 86210 + }, + { + "epoch": 3.1332218911258085, + "grad_norm": 3.5613021850585938, + "learning_rate": 2.6684472937287115e-05, + "loss": 0.0911, + "step": 86220 + }, + { + "epoch": 3.1335852896286065, + "grad_norm": 1.3843308687210083, + "learning_rate": 2.6679295375491776e-05, + "loss": 0.0627, + "step": 86230 + }, + { + "epoch": 3.133948688131405, + "grad_norm": 3.7349839210510254, + "learning_rate": 2.6674117741341575e-05, + "loss": 0.0982, + "step": 86240 + }, + { + "epoch": 3.134312086634203, + "grad_norm": 0.9458445906639099, + "learning_rate": 2.66689400350596e-05, + "loss": 0.1023, + "step": 86250 + }, + { + "epoch": 3.1346754851370013, + "grad_norm": 0.45296090841293335, + "learning_rate": 2.6663762256868928e-05, + "loss": 0.0625, + "step": 86260 + }, + { + "epoch": 3.1350388836397993, + "grad_norm": 0.7236283421516418, + "learning_rate": 2.665858440699267e-05, + "loss": 0.0708, + "step": 86270 + }, + { + "epoch": 3.1354022821425978, + "grad_norm": 2.072012424468994, + "learning_rate": 2.665340648565391e-05, + "loss": 0.0683, + "step": 86280 + }, + { + "epoch": 3.1357656806453957, + "grad_norm": 0.8755659461021423, + "learning_rate": 2.6648228493075744e-05, + "loss": 0.0866, + "step": 86290 + }, + { + "epoch": 3.1361290791481937, + "grad_norm": 0.7825614213943481, + "learning_rate": 2.6643050429481275e-05, + "loss": 0.0887, + "step": 86300 + }, + { + "epoch": 3.136492477650992, + "grad_norm": 0.9423545002937317, + "learning_rate": 2.6637872295093612e-05, + "loss": 0.093, + "step": 86310 + }, + { + "epoch": 3.13685587615379, + "grad_norm": 0.37489980459213257, + "learning_rate": 2.6632694090135856e-05, + "loss": 0.0797, + "step": 86320 + }, + { + "epoch": 3.1372192746565886, + "grad_norm": 0.4927680790424347, + "learning_rate": 2.6627515814831126e-05, + "loss": 0.0708, + "step": 86330 + }, + { + "epoch": 3.1375826731593865, + "grad_norm": 0.7253931760787964, + "learning_rate": 2.6622337469402537e-05, + "loss": 0.0974, + "step": 86340 + }, + { + "epoch": 3.137946071662185, + "grad_norm": 0.8817862272262573, + "learning_rate": 2.6617159054073182e-05, + "loss": 0.0893, + "step": 86350 + }, + { + "epoch": 3.138309470164983, + "grad_norm": 0.9500261545181274, + "learning_rate": 2.6611980569066208e-05, + "loss": 0.0715, + "step": 86360 + }, + { + "epoch": 3.138672868667781, + "grad_norm": 0.5115967988967896, + "learning_rate": 2.660680201460472e-05, + "loss": 0.0713, + "step": 86370 + }, + { + "epoch": 3.1390362671705794, + "grad_norm": 0.752263069152832, + "learning_rate": 2.6601623390911857e-05, + "loss": 0.0685, + "step": 86380 + }, + { + "epoch": 3.1393996656733774, + "grad_norm": 0.39566388726234436, + "learning_rate": 2.6596444698210738e-05, + "loss": 1.6043, + "step": 86390 + }, + { + "epoch": 3.139763064176176, + "grad_norm": 0.3248102068901062, + "learning_rate": 2.6591265936724495e-05, + "loss": 0.2024, + "step": 86400 + }, + { + "epoch": 3.139763064176176, + "eval_loss": 0.30238452553749084, + "eval_runtime": 179.688, + "eval_samples_per_second": 41.26, + "eval_steps_per_second": 5.159, + "eval_wer": 0.142565396554541, + "step": 86400 + }, + { + "epoch": 3.1401264626789738, + "grad_norm": 0.7450832724571228, + "learning_rate": 2.6586087106676272e-05, + "loss": 0.0831, + "step": 86410 + }, + { + "epoch": 3.1404898611817718, + "grad_norm": 0.35402756929397583, + "learning_rate": 2.658090820828919e-05, + "loss": 0.0802, + "step": 86420 + }, + { + "epoch": 3.14085325968457, + "grad_norm": 0.7039386034011841, + "learning_rate": 2.65757292417864e-05, + "loss": 0.0693, + "step": 86430 + }, + { + "epoch": 3.141216658187368, + "grad_norm": 0.5584103465080261, + "learning_rate": 2.6570550207391043e-05, + "loss": 0.0747, + "step": 86440 + }, + { + "epoch": 3.1415800566901666, + "grad_norm": 0.5645958185195923, + "learning_rate": 2.656537110532627e-05, + "loss": 0.0887, + "step": 86450 + }, + { + "epoch": 3.1419434551929646, + "grad_norm": 0.7481971979141235, + "learning_rate": 2.656019193581522e-05, + "loss": 0.1027, + "step": 86460 + }, + { + "epoch": 3.1423068536957626, + "grad_norm": 0.48159149289131165, + "learning_rate": 2.6555012699081057e-05, + "loss": 0.0642, + "step": 86470 + }, + { + "epoch": 3.142670252198561, + "grad_norm": 0.6558395028114319, + "learning_rate": 2.654983339534693e-05, + "loss": 0.0767, + "step": 86480 + }, + { + "epoch": 3.143033650701359, + "grad_norm": 0.8240875601768494, + "learning_rate": 2.654465402483599e-05, + "loss": 0.1036, + "step": 86490 + }, + { + "epoch": 3.1433970492041574, + "grad_norm": 1.9741828441619873, + "learning_rate": 2.6539474587771406e-05, + "loss": 0.0872, + "step": 86500 + }, + { + "epoch": 3.1437604477069554, + "grad_norm": 0.4783799350261688, + "learning_rate": 2.6534295084376337e-05, + "loss": 3.1495, + "step": 86510 + }, + { + "epoch": 3.1441238462097534, + "grad_norm": 0.5813152194023132, + "learning_rate": 2.652911551487396e-05, + "loss": 0.078, + "step": 86520 + }, + { + "epoch": 3.144487244712552, + "grad_norm": 1.1872795820236206, + "learning_rate": 2.6523935879487432e-05, + "loss": 0.0817, + "step": 86530 + }, + { + "epoch": 3.14485064321535, + "grad_norm": 0.5647823214530945, + "learning_rate": 2.6518756178439925e-05, + "loss": 1.1158, + "step": 86540 + }, + { + "epoch": 3.1452140417181482, + "grad_norm": 1.1907984018325806, + "learning_rate": 2.6513576411954627e-05, + "loss": 0.1104, + "step": 86550 + }, + { + "epoch": 3.145577440220946, + "grad_norm": 0.5368396043777466, + "learning_rate": 2.6508396580254697e-05, + "loss": 0.0929, + "step": 86560 + }, + { + "epoch": 3.1459408387237446, + "grad_norm": 0.34880682826042175, + "learning_rate": 2.650321668356333e-05, + "loss": 0.0714, + "step": 86570 + }, + { + "epoch": 3.1463042372265426, + "grad_norm": 0.4075338542461395, + "learning_rate": 2.6498036722103703e-05, + "loss": 0.0836, + "step": 86580 + }, + { + "epoch": 3.1466676357293406, + "grad_norm": 5.59214973449707, + "learning_rate": 2.6492856696099006e-05, + "loss": 0.0829, + "step": 86590 + }, + { + "epoch": 3.147031034232139, + "grad_norm": 0.9753894209861755, + "learning_rate": 2.6487676605772426e-05, + "loss": 0.0899, + "step": 86600 + }, + { + "epoch": 3.147394432734937, + "grad_norm": 0.3571523129940033, + "learning_rate": 2.648249645134715e-05, + "loss": 0.0722, + "step": 86610 + }, + { + "epoch": 3.1477578312377354, + "grad_norm": 0.5043341517448425, + "learning_rate": 2.647731623304638e-05, + "loss": 0.0663, + "step": 86620 + }, + { + "epoch": 3.1481212297405334, + "grad_norm": 5.329759120941162, + "learning_rate": 2.647213595109331e-05, + "loss": 0.0844, + "step": 86630 + }, + { + "epoch": 3.148484628243332, + "grad_norm": 0.3437232971191406, + "learning_rate": 2.6466955605711136e-05, + "loss": 0.1001, + "step": 86640 + }, + { + "epoch": 3.14884802674613, + "grad_norm": 2.181110143661499, + "learning_rate": 2.646177519712307e-05, + "loss": 0.0972, + "step": 86650 + }, + { + "epoch": 3.149211425248928, + "grad_norm": 0.8751040101051331, + "learning_rate": 2.6456594725552302e-05, + "loss": 0.0811, + "step": 86660 + }, + { + "epoch": 3.1495748237517263, + "grad_norm": 0.36447498202323914, + "learning_rate": 2.6451414191222062e-05, + "loss": 0.0606, + "step": 86670 + }, + { + "epoch": 3.1499382222545242, + "grad_norm": 0.47651347517967224, + "learning_rate": 2.644623359435554e-05, + "loss": 0.0665, + "step": 86680 + }, + { + "epoch": 3.1503016207573227, + "grad_norm": 0.4340047836303711, + "learning_rate": 2.6441052935175964e-05, + "loss": 0.0787, + "step": 86690 + }, + { + "epoch": 3.1506650192601207, + "grad_norm": 6.734339714050293, + "learning_rate": 2.6435872213906538e-05, + "loss": 0.0808, + "step": 86700 + }, + { + "epoch": 3.1510284177629186, + "grad_norm": 0.3784515857696533, + "learning_rate": 2.6430691430770494e-05, + "loss": 0.0739, + "step": 86710 + }, + { + "epoch": 3.151391816265717, + "grad_norm": 1.0528838634490967, + "learning_rate": 2.6425510585991047e-05, + "loss": 0.0704, + "step": 86720 + }, + { + "epoch": 3.151755214768515, + "grad_norm": 0.8241010308265686, + "learning_rate": 2.6420329679791412e-05, + "loss": 0.0657, + "step": 86730 + }, + { + "epoch": 3.1521186132713135, + "grad_norm": 0.5599542260169983, + "learning_rate": 2.6415148712394833e-05, + "loss": 0.0743, + "step": 86740 + }, + { + "epoch": 3.1524820117741115, + "grad_norm": 1.3045130968093872, + "learning_rate": 2.6409967684024522e-05, + "loss": 0.0808, + "step": 86750 + }, + { + "epoch": 3.1528454102769095, + "grad_norm": 0.8639160394668579, + "learning_rate": 2.640478659490373e-05, + "loss": 0.0784, + "step": 86760 + }, + { + "epoch": 3.153208808779708, + "grad_norm": 0.4310116767883301, + "learning_rate": 2.6399605445255677e-05, + "loss": 0.0792, + "step": 86770 + }, + { + "epoch": 3.153572207282506, + "grad_norm": 0.40688809752464294, + "learning_rate": 2.6394424235303606e-05, + "loss": 0.069, + "step": 86780 + }, + { + "epoch": 3.1539356057853043, + "grad_norm": 0.7720523476600647, + "learning_rate": 2.638924296527076e-05, + "loss": 0.0791, + "step": 86790 + }, + { + "epoch": 3.1542990042881023, + "grad_norm": 0.520601212978363, + "learning_rate": 2.638406163538037e-05, + "loss": 0.0901, + "step": 86800 + }, + { + "epoch": 3.1546624027909003, + "grad_norm": 0.37290602922439575, + "learning_rate": 2.6378880245855698e-05, + "loss": 0.1093, + "step": 86810 + }, + { + "epoch": 3.1550258012936987, + "grad_norm": 0.9572335481643677, + "learning_rate": 2.637369879691997e-05, + "loss": 0.6554, + "step": 86820 + }, + { + "epoch": 3.1553891997964967, + "grad_norm": 0.8544325232505798, + "learning_rate": 2.6368517288796456e-05, + "loss": 0.0846, + "step": 86830 + }, + { + "epoch": 3.155752598299295, + "grad_norm": 1.4652429819107056, + "learning_rate": 2.6363335721708403e-05, + "loss": 0.1105, + "step": 86840 + }, + { + "epoch": 3.156115996802093, + "grad_norm": 3.825007438659668, + "learning_rate": 2.6358154095879063e-05, + "loss": 0.095, + "step": 86850 + }, + { + "epoch": 3.1564793953048915, + "grad_norm": 1.822260856628418, + "learning_rate": 2.6352972411531696e-05, + "loss": 0.0825, + "step": 86860 + }, + { + "epoch": 3.1568427938076895, + "grad_norm": 0.46824485063552856, + "learning_rate": 2.6347790668889553e-05, + "loss": 0.0716, + "step": 86870 + }, + { + "epoch": 3.1572061923104875, + "grad_norm": 1.1813397407531738, + "learning_rate": 2.6342608868175916e-05, + "loss": 0.0891, + "step": 86880 + }, + { + "epoch": 3.157569590813286, + "grad_norm": 0.7336288690567017, + "learning_rate": 2.6337427009614034e-05, + "loss": 0.1021, + "step": 86890 + }, + { + "epoch": 3.157932989316084, + "grad_norm": 0.4944972097873688, + "learning_rate": 2.6332245093427187e-05, + "loss": 0.0873, + "step": 86900 + }, + { + "epoch": 3.1582963878188823, + "grad_norm": 0.5169403553009033, + "learning_rate": 2.6327063119838634e-05, + "loss": 0.0966, + "step": 86910 + }, + { + "epoch": 3.1586597863216803, + "grad_norm": 0.6052831411361694, + "learning_rate": 2.6321881089071655e-05, + "loss": 0.0665, + "step": 86920 + }, + { + "epoch": 3.1590231848244787, + "grad_norm": 0.4833454489707947, + "learning_rate": 2.6316699001349526e-05, + "loss": 0.0732, + "step": 86930 + }, + { + "epoch": 3.1593865833272767, + "grad_norm": 0.6335532069206238, + "learning_rate": 2.6311516856895512e-05, + "loss": 0.0874, + "step": 86940 + }, + { + "epoch": 3.1597499818300747, + "grad_norm": 1.6006765365600586, + "learning_rate": 2.630633465593291e-05, + "loss": 0.1257, + "step": 86950 + }, + { + "epoch": 3.160113380332873, + "grad_norm": 0.3963083028793335, + "learning_rate": 2.6301152398684998e-05, + "loss": 0.0822, + "step": 86960 + }, + { + "epoch": 3.160476778835671, + "grad_norm": 0.44627153873443604, + "learning_rate": 2.6295970085375054e-05, + "loss": 0.0622, + "step": 86970 + }, + { + "epoch": 3.1608401773384696, + "grad_norm": 0.7904446125030518, + "learning_rate": 2.629078771622637e-05, + "loss": 0.0753, + "step": 86980 + }, + { + "epoch": 3.1612035758412675, + "grad_norm": 0.5941157937049866, + "learning_rate": 2.628560529146224e-05, + "loss": 0.0884, + "step": 86990 + }, + { + "epoch": 3.1615669743440655, + "grad_norm": 0.6121344566345215, + "learning_rate": 2.6280422811305948e-05, + "loss": 0.0678, + "step": 87000 + }, + { + "epoch": 3.1615669743440655, + "eval_loss": 0.33216938376426697, + "eval_runtime": 180.0386, + "eval_samples_per_second": 41.18, + "eval_steps_per_second": 5.149, + "eval_wer": 0.1409860765697896, + "step": 87000 + }, + { + "epoch": 3.161930372846864, + "grad_norm": 1.1521292924880981, + "learning_rate": 2.6275240275980795e-05, + "loss": 0.0904, + "step": 87010 + }, + { + "epoch": 3.162293771349662, + "grad_norm": 0.48100772500038147, + "learning_rate": 2.6270057685710074e-05, + "loss": 0.0725, + "step": 87020 + }, + { + "epoch": 3.1626571698524604, + "grad_norm": 0.4358821511268616, + "learning_rate": 2.6264875040717092e-05, + "loss": 0.0714, + "step": 87030 + }, + { + "epoch": 3.1630205683552584, + "grad_norm": 0.907632052898407, + "learning_rate": 2.625969234122514e-05, + "loss": 0.0949, + "step": 87040 + }, + { + "epoch": 3.1633839668580563, + "grad_norm": 0.6242339015007019, + "learning_rate": 2.6254509587457527e-05, + "loss": 0.0856, + "step": 87050 + }, + { + "epoch": 3.1637473653608548, + "grad_norm": 0.8153877854347229, + "learning_rate": 2.6249326779637555e-05, + "loss": 0.1291, + "step": 87060 + }, + { + "epoch": 3.1641107638636528, + "grad_norm": 1.0637160539627075, + "learning_rate": 2.6244143917988544e-05, + "loss": 0.0709, + "step": 87070 + }, + { + "epoch": 3.164474162366451, + "grad_norm": 0.33303794264793396, + "learning_rate": 2.6238961002733796e-05, + "loss": 0.0828, + "step": 87080 + }, + { + "epoch": 3.164837560869249, + "grad_norm": 0.676785945892334, + "learning_rate": 2.623377803409663e-05, + "loss": 0.1132, + "step": 87090 + }, + { + "epoch": 3.165200959372047, + "grad_norm": 1.7492424249649048, + "learning_rate": 2.6228595012300356e-05, + "loss": 0.0876, + "step": 87100 + }, + { + "epoch": 3.1655643578748456, + "grad_norm": 0.5225327610969543, + "learning_rate": 2.622341193756829e-05, + "loss": 0.1208, + "step": 87110 + }, + { + "epoch": 3.1659277563776436, + "grad_norm": 0.5629643201828003, + "learning_rate": 2.6218228810123763e-05, + "loss": 0.0862, + "step": 87120 + }, + { + "epoch": 3.166291154880442, + "grad_norm": 1.335174560546875, + "learning_rate": 2.6213045630190084e-05, + "loss": 0.0746, + "step": 87130 + }, + { + "epoch": 3.16665455338324, + "grad_norm": 0.6635318398475647, + "learning_rate": 2.6207862397990597e-05, + "loss": 0.0903, + "step": 87140 + }, + { + "epoch": 3.1670179518860384, + "grad_norm": 0.9929222464561462, + "learning_rate": 2.620267911374861e-05, + "loss": 0.1024, + "step": 87150 + }, + { + "epoch": 3.1673813503888364, + "grad_norm": 0.5913928151130676, + "learning_rate": 2.619749577768745e-05, + "loss": 0.114, + "step": 87160 + }, + { + "epoch": 3.1677447488916344, + "grad_norm": 0.9411669373512268, + "learning_rate": 2.6192312390030472e-05, + "loss": 0.0878, + "step": 87170 + }, + { + "epoch": 3.168108147394433, + "grad_norm": 2.4124414920806885, + "learning_rate": 2.6187128951000982e-05, + "loss": 0.0937, + "step": 87180 + }, + { + "epoch": 3.168471545897231, + "grad_norm": 0.3180709481239319, + "learning_rate": 2.6181945460822343e-05, + "loss": 0.0956, + "step": 87190 + }, + { + "epoch": 3.168834944400029, + "grad_norm": 0.9713842272758484, + "learning_rate": 2.6176761919717867e-05, + "loss": 0.111, + "step": 87200 + }, + { + "epoch": 3.169198342902827, + "grad_norm": 0.7940172553062439, + "learning_rate": 2.6171578327910918e-05, + "loss": 0.0794, + "step": 87210 + }, + { + "epoch": 3.1695617414056256, + "grad_norm": 0.7418053150177002, + "learning_rate": 2.6166394685624823e-05, + "loss": 0.0692, + "step": 87220 + }, + { + "epoch": 3.1699251399084236, + "grad_norm": 0.4988052546977997, + "learning_rate": 2.6161210993082925e-05, + "loss": 0.0702, + "step": 87230 + }, + { + "epoch": 3.1702885384112216, + "grad_norm": 0.41006624698638916, + "learning_rate": 2.6156027250508587e-05, + "loss": 0.1156, + "step": 87240 + }, + { + "epoch": 3.17065193691402, + "grad_norm": 0.7204731702804565, + "learning_rate": 2.615084345812514e-05, + "loss": 0.0822, + "step": 87250 + }, + { + "epoch": 3.171015335416818, + "grad_norm": 0.5459019541740417, + "learning_rate": 2.6145659616155948e-05, + "loss": 0.076, + "step": 87260 + }, + { + "epoch": 3.1713787339196164, + "grad_norm": 0.5608823299407959, + "learning_rate": 2.6140475724824355e-05, + "loss": 0.076, + "step": 87270 + }, + { + "epoch": 3.1717421324224144, + "grad_norm": 0.6672724485397339, + "learning_rate": 2.613529178435372e-05, + "loss": 0.0617, + "step": 87280 + }, + { + "epoch": 3.1721055309252124, + "grad_norm": 0.4979981482028961, + "learning_rate": 2.6130107794967412e-05, + "loss": 0.0852, + "step": 87290 + }, + { + "epoch": 3.172468929428011, + "grad_norm": 4.004956245422363, + "learning_rate": 2.612492375688877e-05, + "loss": 0.1044, + "step": 87300 + }, + { + "epoch": 3.172832327930809, + "grad_norm": 0.3019026517868042, + "learning_rate": 2.611973967034117e-05, + "loss": 0.1081, + "step": 87310 + }, + { + "epoch": 3.1731957264336073, + "grad_norm": 0.5312141180038452, + "learning_rate": 2.6114555535547964e-05, + "loss": 0.072, + "step": 87320 + }, + { + "epoch": 3.1735591249364052, + "grad_norm": 0.5587911605834961, + "learning_rate": 2.6109371352732537e-05, + "loss": 0.0663, + "step": 87330 + }, + { + "epoch": 3.1739225234392032, + "grad_norm": 0.7223249077796936, + "learning_rate": 2.610418712211824e-05, + "loss": 0.0689, + "step": 87340 + }, + { + "epoch": 3.1742859219420017, + "grad_norm": 0.9574065208435059, + "learning_rate": 2.6099002843928444e-05, + "loss": 0.1149, + "step": 87350 + }, + { + "epoch": 3.1746493204447996, + "grad_norm": 1.298269510269165, + "learning_rate": 2.6093818518386535e-05, + "loss": 0.0749, + "step": 87360 + }, + { + "epoch": 3.175012718947598, + "grad_norm": 215.23536682128906, + "learning_rate": 2.6088634145715867e-05, + "loss": 3.8479, + "step": 87370 + }, + { + "epoch": 3.175376117450396, + "grad_norm": 0.8245293498039246, + "learning_rate": 2.608344972613984e-05, + "loss": 0.0947, + "step": 87380 + }, + { + "epoch": 3.1757395159531945, + "grad_norm": 0.462643027305603, + "learning_rate": 2.6078265259881805e-05, + "loss": 0.0876, + "step": 87390 + }, + { + "epoch": 3.1761029144559925, + "grad_norm": 1.069300651550293, + "learning_rate": 2.607308074716517e-05, + "loss": 0.0977, + "step": 87400 + }, + { + "epoch": 3.1764663129587904, + "grad_norm": 0.46647369861602783, + "learning_rate": 2.6067896188213296e-05, + "loss": 0.1076, + "step": 87410 + }, + { + "epoch": 3.176829711461589, + "grad_norm": 0.46599775552749634, + "learning_rate": 2.6062711583249578e-05, + "loss": 0.1663, + "step": 87420 + }, + { + "epoch": 3.177193109964387, + "grad_norm": 0.6090976595878601, + "learning_rate": 2.60575269324974e-05, + "loss": 0.0798, + "step": 87430 + }, + { + "epoch": 3.1775565084671853, + "grad_norm": 3.0198044776916504, + "learning_rate": 2.6052342236180144e-05, + "loss": 0.0952, + "step": 87440 + }, + { + "epoch": 3.1779199069699833, + "grad_norm": 0.8521358370780945, + "learning_rate": 2.604715749452121e-05, + "loss": 0.0942, + "step": 87450 + }, + { + "epoch": 3.1782833054727813, + "grad_norm": 1.1309806108474731, + "learning_rate": 2.604197270774398e-05, + "loss": 0.0858, + "step": 87460 + }, + { + "epoch": 3.1786467039755797, + "grad_norm": 2.8067383766174316, + "learning_rate": 2.603678787607186e-05, + "loss": 0.0718, + "step": 87470 + }, + { + "epoch": 3.1790101024783777, + "grad_norm": 1.6722538471221924, + "learning_rate": 2.603160299972824e-05, + "loss": 0.0787, + "step": 87480 + }, + { + "epoch": 3.179373500981176, + "grad_norm": 5.742082595825195, + "learning_rate": 2.602641807893651e-05, + "loss": 0.1014, + "step": 87490 + }, + { + "epoch": 3.179736899483974, + "grad_norm": 1.0743470191955566, + "learning_rate": 2.6021233113920078e-05, + "loss": 0.1036, + "step": 87500 + }, + { + "epoch": 3.1801002979867725, + "grad_norm": 2.879175901412964, + "learning_rate": 2.6016048104902345e-05, + "loss": 0.1001, + "step": 87510 + }, + { + "epoch": 3.1804636964895705, + "grad_norm": 0.37631672620773315, + "learning_rate": 2.601086305210672e-05, + "loss": 0.0747, + "step": 87520 + }, + { + "epoch": 3.1808270949923685, + "grad_norm": 0.38719940185546875, + "learning_rate": 2.6005677955756603e-05, + "loss": 0.8272, + "step": 87530 + }, + { + "epoch": 3.181190493495167, + "grad_norm": 0.6106435060501099, + "learning_rate": 2.6000492816075395e-05, + "loss": 0.085, + "step": 87540 + }, + { + "epoch": 3.181553891997965, + "grad_norm": 0.6239719986915588, + "learning_rate": 2.5995307633286515e-05, + "loss": 0.079, + "step": 87550 + }, + { + "epoch": 3.1819172905007633, + "grad_norm": 0.8448728919029236, + "learning_rate": 2.5990122407613366e-05, + "loss": 0.0744, + "step": 87560 + }, + { + "epoch": 3.1822806890035613, + "grad_norm": 0.7143642902374268, + "learning_rate": 2.598493713927937e-05, + "loss": 0.0686, + "step": 87570 + }, + { + "epoch": 3.1826440875063593, + "grad_norm": 0.44550129771232605, + "learning_rate": 2.597975182850793e-05, + "loss": 0.0585, + "step": 87580 + }, + { + "epoch": 3.1830074860091577, + "grad_norm": 0.43901458382606506, + "learning_rate": 2.5974566475522478e-05, + "loss": 0.0831, + "step": 87590 + }, + { + "epoch": 3.1833708845119557, + "grad_norm": 1.0826005935668945, + "learning_rate": 2.5969381080546417e-05, + "loss": 0.0793, + "step": 87600 + }, + { + "epoch": 3.1833708845119557, + "eval_loss": 0.3327307105064392, + "eval_runtime": 180.1166, + "eval_samples_per_second": 41.162, + "eval_steps_per_second": 5.147, + "eval_wer": 0.1424837075898125, + "step": 87600 + }, + { + "epoch": 3.183734283014754, + "grad_norm": 0.43947404623031616, + "learning_rate": 2.596419564380318e-05, + "loss": 0.1618, + "step": 87610 + }, + { + "epoch": 3.184097681517552, + "grad_norm": 1.2502785921096802, + "learning_rate": 2.5959010165516185e-05, + "loss": 0.081, + "step": 87620 + }, + { + "epoch": 3.18446108002035, + "grad_norm": 0.45034274458885193, + "learning_rate": 2.5953824645908848e-05, + "loss": 0.0821, + "step": 87630 + }, + { + "epoch": 3.1848244785231485, + "grad_norm": 0.5315303206443787, + "learning_rate": 2.5948639085204607e-05, + "loss": 0.1093, + "step": 87640 + }, + { + "epoch": 3.1851878770259465, + "grad_norm": 1.181911826133728, + "learning_rate": 2.5943453483626873e-05, + "loss": 0.0766, + "step": 87650 + }, + { + "epoch": 3.185551275528745, + "grad_norm": 0.4337679147720337, + "learning_rate": 2.5938267841399096e-05, + "loss": 0.0886, + "step": 87660 + }, + { + "epoch": 3.185914674031543, + "grad_norm": 4.992573261260986, + "learning_rate": 2.5933082158744686e-05, + "loss": 0.0955, + "step": 87670 + }, + { + "epoch": 3.1862780725343414, + "grad_norm": 0.401584267616272, + "learning_rate": 2.5927896435887094e-05, + "loss": 0.0802, + "step": 87680 + }, + { + "epoch": 3.1866414710371394, + "grad_norm": 0.39341244101524353, + "learning_rate": 2.592271067304975e-05, + "loss": 0.092, + "step": 87690 + }, + { + "epoch": 3.1870048695399373, + "grad_norm": 0.8146529197692871, + "learning_rate": 2.5917524870456074e-05, + "loss": 0.0875, + "step": 87700 + }, + { + "epoch": 3.1873682680427358, + "grad_norm": 0.4977721869945526, + "learning_rate": 2.5912339028329524e-05, + "loss": 0.0709, + "step": 87710 + }, + { + "epoch": 3.1877316665455337, + "grad_norm": 0.9400126338005066, + "learning_rate": 2.5907153146893527e-05, + "loss": 0.0756, + "step": 87720 + }, + { + "epoch": 3.188095065048332, + "grad_norm": 1.8934528827667236, + "learning_rate": 2.590196722637153e-05, + "loss": 0.1116, + "step": 87730 + }, + { + "epoch": 3.18845846355113, + "grad_norm": 0.3290179669857025, + "learning_rate": 2.5896781266986974e-05, + "loss": 0.0794, + "step": 87740 + }, + { + "epoch": 3.188821862053928, + "grad_norm": 0.42296847701072693, + "learning_rate": 2.589159526896331e-05, + "loss": 0.0917, + "step": 87750 + }, + { + "epoch": 3.1891852605567266, + "grad_norm": 1.1522185802459717, + "learning_rate": 2.5886409232523966e-05, + "loss": 0.0855, + "step": 87760 + }, + { + "epoch": 3.1895486590595246, + "grad_norm": 0.9515447616577148, + "learning_rate": 2.5881223157892415e-05, + "loss": 0.0809, + "step": 87770 + }, + { + "epoch": 3.189912057562323, + "grad_norm": 0.8773075342178345, + "learning_rate": 2.587603704529209e-05, + "loss": 0.0754, + "step": 87780 + }, + { + "epoch": 3.190275456065121, + "grad_norm": 0.6473771333694458, + "learning_rate": 2.587085089494644e-05, + "loss": 0.0887, + "step": 87790 + }, + { + "epoch": 3.1906388545679194, + "grad_norm": 1.6101555824279785, + "learning_rate": 2.586566470707893e-05, + "loss": 0.0901, + "step": 87800 + }, + { + "epoch": 3.1910022530707174, + "grad_norm": 0.25950807332992554, + "learning_rate": 2.5860478481912996e-05, + "loss": 0.1501, + "step": 87810 + }, + { + "epoch": 3.1913656515735154, + "grad_norm": 1.339440941810608, + "learning_rate": 2.585529221967212e-05, + "loss": 0.0645, + "step": 87820 + }, + { + "epoch": 3.191729050076314, + "grad_norm": 0.8550190329551697, + "learning_rate": 2.5850105920579736e-05, + "loss": 0.0686, + "step": 87830 + }, + { + "epoch": 3.192092448579112, + "grad_norm": 0.5492476224899292, + "learning_rate": 2.584491958485932e-05, + "loss": 0.0913, + "step": 87840 + }, + { + "epoch": 3.19245584708191, + "grad_norm": 0.8055387735366821, + "learning_rate": 2.5839733212734323e-05, + "loss": 0.0886, + "step": 87850 + }, + { + "epoch": 3.192819245584708, + "grad_norm": 0.5844281315803528, + "learning_rate": 2.5834546804428207e-05, + "loss": 0.0589, + "step": 87860 + }, + { + "epoch": 3.193182644087506, + "grad_norm": 0.47842904925346375, + "learning_rate": 2.582936036016444e-05, + "loss": 0.0664, + "step": 87870 + }, + { + "epoch": 3.1935460425903046, + "grad_norm": 0.5088633894920349, + "learning_rate": 2.5824173880166486e-05, + "loss": 0.0858, + "step": 87880 + }, + { + "epoch": 3.1939094410931026, + "grad_norm": 0.4572700262069702, + "learning_rate": 2.581898736465781e-05, + "loss": 0.0813, + "step": 87890 + }, + { + "epoch": 3.194272839595901, + "grad_norm": 1.1615513563156128, + "learning_rate": 2.5813800813861884e-05, + "loss": 0.1006, + "step": 87900 + }, + { + "epoch": 3.194636238098699, + "grad_norm": 0.410319060087204, + "learning_rate": 2.5808614228002174e-05, + "loss": 0.0918, + "step": 87910 + }, + { + "epoch": 3.194999636601497, + "grad_norm": 0.28735676407814026, + "learning_rate": 2.5803427607302154e-05, + "loss": 0.0691, + "step": 87920 + }, + { + "epoch": 3.1953630351042954, + "grad_norm": 0.9969823956489563, + "learning_rate": 2.5798240951985302e-05, + "loss": 0.0749, + "step": 87930 + }, + { + "epoch": 3.1957264336070934, + "grad_norm": 1.1910429000854492, + "learning_rate": 2.579305426227509e-05, + "loss": 0.1894, + "step": 87940 + }, + { + "epoch": 3.196089832109892, + "grad_norm": 0.9283407926559448, + "learning_rate": 2.5787867538394982e-05, + "loss": 0.0854, + "step": 87950 + }, + { + "epoch": 3.19645323061269, + "grad_norm": 0.5121541619300842, + "learning_rate": 2.5782680780568476e-05, + "loss": 0.4152, + "step": 87960 + }, + { + "epoch": 3.1968166291154883, + "grad_norm": 0.6104360222816467, + "learning_rate": 2.577749398901903e-05, + "loss": 0.0793, + "step": 87970 + }, + { + "epoch": 3.1971800276182862, + "grad_norm": 0.3884080648422241, + "learning_rate": 2.577230716397014e-05, + "loss": 0.0808, + "step": 87980 + }, + { + "epoch": 3.197543426121084, + "grad_norm": 1.0838251113891602, + "learning_rate": 2.5767120305645277e-05, + "loss": 0.0883, + "step": 87990 + }, + { + "epoch": 3.1979068246238826, + "grad_norm": 0.7806040048599243, + "learning_rate": 2.5761933414267936e-05, + "loss": 0.0721, + "step": 88000 + }, + { + "epoch": 3.1982702231266806, + "grad_norm": 0.4943070709705353, + "learning_rate": 2.575674649006159e-05, + "loss": 0.0751, + "step": 88010 + }, + { + "epoch": 3.198633621629479, + "grad_norm": 1.1027365922927856, + "learning_rate": 2.575155953324973e-05, + "loss": 0.0714, + "step": 88020 + }, + { + "epoch": 3.198997020132277, + "grad_norm": 0.5191725492477417, + "learning_rate": 2.5746372544055847e-05, + "loss": 0.0699, + "step": 88030 + }, + { + "epoch": 3.199360418635075, + "grad_norm": 0.9866086840629578, + "learning_rate": 2.5741185522703424e-05, + "loss": 0.1314, + "step": 88040 + }, + { + "epoch": 3.1997238171378735, + "grad_norm": 2.110804319381714, + "learning_rate": 2.5735998469415956e-05, + "loss": 0.0873, + "step": 88050 + }, + { + "epoch": 3.2000872156406714, + "grad_norm": 0.9099189639091492, + "learning_rate": 2.573081138441693e-05, + "loss": 0.1216, + "step": 88060 + }, + { + "epoch": 3.20045061414347, + "grad_norm": 0.6158716082572937, + "learning_rate": 2.572562426792985e-05, + "loss": 0.0683, + "step": 88070 + }, + { + "epoch": 3.200814012646268, + "grad_norm": 0.6630299091339111, + "learning_rate": 2.5720437120178186e-05, + "loss": 0.089, + "step": 88080 + }, + { + "epoch": 3.2011774111490663, + "grad_norm": 0.4174249768257141, + "learning_rate": 2.5715249941385467e-05, + "loss": 0.0949, + "step": 88090 + }, + { + "epoch": 3.2015408096518643, + "grad_norm": 0.7170994877815247, + "learning_rate": 2.5710062731775164e-05, + "loss": 0.0864, + "step": 88100 + }, + { + "epoch": 3.2019042081546623, + "grad_norm": 0.5610363483428955, + "learning_rate": 2.5704875491570784e-05, + "loss": 0.0878, + "step": 88110 + }, + { + "epoch": 3.2022676066574607, + "grad_norm": 0.41790106892585754, + "learning_rate": 2.5699688220995834e-05, + "loss": 3.9644, + "step": 88120 + }, + { + "epoch": 3.2026310051602587, + "grad_norm": 0.8751020431518555, + "learning_rate": 2.5694500920273795e-05, + "loss": 0.0729, + "step": 88130 + }, + { + "epoch": 3.202994403663057, + "grad_norm": 0.5966963768005371, + "learning_rate": 2.5689313589628193e-05, + "loss": 0.187, + "step": 88140 + }, + { + "epoch": 3.203357802165855, + "grad_norm": 0.8189564943313599, + "learning_rate": 2.5684126229282516e-05, + "loss": 0.0749, + "step": 88150 + }, + { + "epoch": 3.203721200668653, + "grad_norm": 1.0785588026046753, + "learning_rate": 2.5678938839460283e-05, + "loss": 0.0835, + "step": 88160 + }, + { + "epoch": 3.2040845991714515, + "grad_norm": 1.5373785495758057, + "learning_rate": 2.5673751420384983e-05, + "loss": 0.0789, + "step": 88170 + }, + { + "epoch": 3.2044479976742495, + "grad_norm": 0.6006679534912109, + "learning_rate": 2.566856397228013e-05, + "loss": 0.0694, + "step": 88180 + }, + { + "epoch": 3.204811396177048, + "grad_norm": 0.9026358127593994, + "learning_rate": 2.5663376495369242e-05, + "loss": 0.0847, + "step": 88190 + }, + { + "epoch": 3.205174794679846, + "grad_norm": 1.896236777305603, + "learning_rate": 2.565818898987581e-05, + "loss": 0.0814, + "step": 88200 + }, + { + "epoch": 3.205174794679846, + "eval_loss": 0.3309068977832794, + "eval_runtime": 180.8021, + "eval_samples_per_second": 41.006, + "eval_steps_per_second": 5.127, + "eval_wer": 0.142601702761087, + "step": 88200 + }, + { + "epoch": 3.205538193182644, + "grad_norm": 0.5485680103302002, + "learning_rate": 2.565300145602336e-05, + "loss": 0.0943, + "step": 88210 + }, + { + "epoch": 3.2059015916854423, + "grad_norm": 0.5330252647399902, + "learning_rate": 2.564833265149393e-05, + "loss": 0.8654, + "step": 88220 + }, + { + "epoch": 3.2062649901882403, + "grad_norm": 1.7009663581848145, + "learning_rate": 2.564314506437512e-05, + "loss": 0.0911, + "step": 88230 + }, + { + "epoch": 3.2066283886910387, + "grad_norm": 0.3746320903301239, + "learning_rate": 2.563795744954548e-05, + "loss": 0.0656, + "step": 88240 + }, + { + "epoch": 3.2069917871938367, + "grad_norm": 1.1512928009033203, + "learning_rate": 2.5632769807228512e-05, + "loss": 0.0897, + "step": 88250 + }, + { + "epoch": 3.207355185696635, + "grad_norm": 3.660646915435791, + "learning_rate": 2.562758213764776e-05, + "loss": 0.0859, + "step": 88260 + }, + { + "epoch": 3.207718584199433, + "grad_norm": 0.3683335781097412, + "learning_rate": 2.5622394441026716e-05, + "loss": 0.1157, + "step": 88270 + }, + { + "epoch": 3.208081982702231, + "grad_norm": 0.9600638151168823, + "learning_rate": 2.561720671758891e-05, + "loss": 0.077, + "step": 88280 + }, + { + "epoch": 3.2084453812050295, + "grad_norm": 1.4279944896697998, + "learning_rate": 2.5612018967557866e-05, + "loss": 0.0889, + "step": 88290 + }, + { + "epoch": 3.2088087797078275, + "grad_norm": 0.37430229783058167, + "learning_rate": 2.5606831191157103e-05, + "loss": 0.0924, + "step": 88300 + }, + { + "epoch": 3.209172178210626, + "grad_norm": 0.4506620764732361, + "learning_rate": 2.5601643388610137e-05, + "loss": 0.095, + "step": 88310 + }, + { + "epoch": 3.209535576713424, + "grad_norm": 0.5026054382324219, + "learning_rate": 2.5596455560140504e-05, + "loss": 0.0703, + "step": 88320 + }, + { + "epoch": 3.209898975216222, + "grad_norm": 0.2740514278411865, + "learning_rate": 2.559126770597173e-05, + "loss": 0.081, + "step": 88330 + }, + { + "epoch": 3.2102623737190203, + "grad_norm": 0.4658343493938446, + "learning_rate": 2.558607982632732e-05, + "loss": 0.1092, + "step": 88340 + }, + { + "epoch": 3.2106257722218183, + "grad_norm": 0.7769015431404114, + "learning_rate": 2.5580891921430822e-05, + "loss": 0.0985, + "step": 88350 + }, + { + "epoch": 3.2109891707246168, + "grad_norm": 0.4521372616291046, + "learning_rate": 2.5575703991505758e-05, + "loss": 0.0765, + "step": 88360 + }, + { + "epoch": 3.2113525692274147, + "grad_norm": 0.41286054253578186, + "learning_rate": 2.5570516036775656e-05, + "loss": 0.0758, + "step": 88370 + }, + { + "epoch": 3.211715967730213, + "grad_norm": 0.5163076519966125, + "learning_rate": 2.5565328057464044e-05, + "loss": 0.0822, + "step": 88380 + }, + { + "epoch": 3.212079366233011, + "grad_norm": 0.8043489456176758, + "learning_rate": 2.556014005379447e-05, + "loss": 0.0742, + "step": 88390 + }, + { + "epoch": 3.212442764735809, + "grad_norm": 5.606634616851807, + "learning_rate": 2.555495202599044e-05, + "loss": 0.0879, + "step": 88400 + }, + { + "epoch": 3.2128061632386076, + "grad_norm": 0.7967556118965149, + "learning_rate": 2.554976397427551e-05, + "loss": 0.0851, + "step": 88410 + }, + { + "epoch": 3.2131695617414056, + "grad_norm": 0.8802252411842346, + "learning_rate": 2.5544575898873208e-05, + "loss": 0.067, + "step": 88420 + }, + { + "epoch": 3.213532960244204, + "grad_norm": 1.1108994483947754, + "learning_rate": 2.553938780000706e-05, + "loss": 0.092, + "step": 88430 + }, + { + "epoch": 3.213896358747002, + "grad_norm": 0.37617167830467224, + "learning_rate": 2.5534199677900618e-05, + "loss": 0.1041, + "step": 88440 + }, + { + "epoch": 3.2142597572498, + "grad_norm": 0.6323724389076233, + "learning_rate": 2.5529011532777407e-05, + "loss": 0.0805, + "step": 88450 + }, + { + "epoch": 3.2146231557525984, + "grad_norm": 1.353440284729004, + "learning_rate": 2.5523823364860978e-05, + "loss": 0.0927, + "step": 88460 + }, + { + "epoch": 3.2149865542553964, + "grad_norm": 0.5883220434188843, + "learning_rate": 2.551863517437486e-05, + "loss": 0.0714, + "step": 88470 + }, + { + "epoch": 3.215349952758195, + "grad_norm": 0.545572817325592, + "learning_rate": 2.551344696154259e-05, + "loss": 0.063, + "step": 88480 + }, + { + "epoch": 3.215713351260993, + "grad_norm": 1.073169469833374, + "learning_rate": 2.550825872658773e-05, + "loss": 0.1216, + "step": 88490 + }, + { + "epoch": 3.2160767497637908, + "grad_norm": 0.45323723554611206, + "learning_rate": 2.5503070469733804e-05, + "loss": 0.0881, + "step": 88500 + }, + { + "epoch": 3.216440148266589, + "grad_norm": 0.47871801257133484, + "learning_rate": 2.5497882191204365e-05, + "loss": 0.1577, + "step": 88510 + }, + { + "epoch": 3.216803546769387, + "grad_norm": 0.21129682660102844, + "learning_rate": 2.5492693891222952e-05, + "loss": 0.0599, + "step": 88520 + }, + { + "epoch": 3.2171669452721856, + "grad_norm": 1.4315932989120483, + "learning_rate": 2.548750557001311e-05, + "loss": 0.1047, + "step": 88530 + }, + { + "epoch": 3.2175303437749836, + "grad_norm": 0.4861602187156677, + "learning_rate": 2.5482317227798393e-05, + "loss": 0.0807, + "step": 88540 + }, + { + "epoch": 3.217893742277782, + "grad_norm": 0.9959172010421753, + "learning_rate": 2.547712886480233e-05, + "loss": 0.0929, + "step": 88550 + }, + { + "epoch": 3.21825714078058, + "grad_norm": 1.7969993352890015, + "learning_rate": 2.5471940481248496e-05, + "loss": 0.0884, + "step": 88560 + }, + { + "epoch": 3.218620539283378, + "grad_norm": 0.49897995591163635, + "learning_rate": 2.5466752077360417e-05, + "loss": 0.069, + "step": 88570 + }, + { + "epoch": 3.2189839377861764, + "grad_norm": 1.5426241159439087, + "learning_rate": 2.5461563653361658e-05, + "loss": 0.0803, + "step": 88580 + }, + { + "epoch": 3.2193473362889744, + "grad_norm": 0.3128204345703125, + "learning_rate": 2.5456375209475765e-05, + "loss": 0.08, + "step": 88590 + }, + { + "epoch": 3.219710734791773, + "grad_norm": 1.4503716230392456, + "learning_rate": 2.5451186745926276e-05, + "loss": 0.0908, + "step": 88600 + }, + { + "epoch": 3.220074133294571, + "grad_norm": 1.7355505228042603, + "learning_rate": 2.5445998262936765e-05, + "loss": 0.0784, + "step": 88610 + }, + { + "epoch": 3.220437531797369, + "grad_norm": 0.8922900557518005, + "learning_rate": 2.5440809760730773e-05, + "loss": 0.1035, + "step": 88620 + }, + { + "epoch": 3.2208009303001672, + "grad_norm": 0.41246405243873596, + "learning_rate": 2.543562123953186e-05, + "loss": 0.0966, + "step": 88630 + }, + { + "epoch": 3.221164328802965, + "grad_norm": 0.5685162544250488, + "learning_rate": 2.5430432699563577e-05, + "loss": 0.0869, + "step": 88640 + }, + { + "epoch": 3.2215277273057636, + "grad_norm": 1.1315709352493286, + "learning_rate": 2.5425244141049477e-05, + "loss": 0.096, + "step": 88650 + }, + { + "epoch": 3.2218911258085616, + "grad_norm": 0.5966361165046692, + "learning_rate": 2.542005556421312e-05, + "loss": 0.0861, + "step": 88660 + }, + { + "epoch": 3.22225452431136, + "grad_norm": 0.6581082344055176, + "learning_rate": 2.541486696927806e-05, + "loss": 0.0604, + "step": 88670 + }, + { + "epoch": 3.222617922814158, + "grad_norm": 0.4752594530582428, + "learning_rate": 2.540967835646787e-05, + "loss": 0.2735, + "step": 88680 + }, + { + "epoch": 3.222981321316956, + "grad_norm": 1.1282252073287964, + "learning_rate": 2.5404489726006085e-05, + "loss": 0.0928, + "step": 88690 + }, + { + "epoch": 3.2233447198197545, + "grad_norm": 0.7483789324760437, + "learning_rate": 2.5399301078116287e-05, + "loss": 0.1104, + "step": 88700 + }, + { + "epoch": 3.2237081183225524, + "grad_norm": 0.338220477104187, + "learning_rate": 2.5394112413022024e-05, + "loss": 0.0564, + "step": 88710 + }, + { + "epoch": 3.224071516825351, + "grad_norm": 0.5026964545249939, + "learning_rate": 2.538892373094685e-05, + "loss": 0.0771, + "step": 88720 + }, + { + "epoch": 3.224434915328149, + "grad_norm": 0.5595789551734924, + "learning_rate": 2.5383735032114348e-05, + "loss": 0.0752, + "step": 88730 + }, + { + "epoch": 3.224798313830947, + "grad_norm": 0.37049752473831177, + "learning_rate": 2.5378546316748066e-05, + "loss": 0.0846, + "step": 88740 + }, + { + "epoch": 3.2251617123337453, + "grad_norm": 0.7598865628242493, + "learning_rate": 2.5373357585071568e-05, + "loss": 0.0657, + "step": 88750 + }, + { + "epoch": 3.2255251108365433, + "grad_norm": 2.472294569015503, + "learning_rate": 2.536816883730842e-05, + "loss": 0.1018, + "step": 88760 + }, + { + "epoch": 3.2258885093393417, + "grad_norm": 0.44734901189804077, + "learning_rate": 2.5362980073682186e-05, + "loss": 0.08, + "step": 88770 + }, + { + "epoch": 3.2262519078421397, + "grad_norm": 0.9893713593482971, + "learning_rate": 2.5357791294416433e-05, + "loss": 0.097, + "step": 88780 + }, + { + "epoch": 3.2266153063449377, + "grad_norm": 0.5891297459602356, + "learning_rate": 2.5352602499734725e-05, + "loss": 0.0738, + "step": 88790 + }, + { + "epoch": 3.226978704847736, + "grad_norm": 1.1289516687393188, + "learning_rate": 2.5347413689860634e-05, + "loss": 0.0851, + "step": 88800 + }, + { + "epoch": 3.226978704847736, + "eval_loss": 0.32641828060150146, + "eval_runtime": 181.2974, + "eval_samples_per_second": 40.894, + "eval_steps_per_second": 5.113, + "eval_wer": 0.13833572349193093, + "step": 88800 + }, + { + "epoch": 3.227342103350534, + "grad_norm": 1.0237905979156494, + "learning_rate": 2.5342224865017715e-05, + "loss": 0.0795, + "step": 88810 + }, + { + "epoch": 3.2277055018533325, + "grad_norm": 1.2821190357208252, + "learning_rate": 2.5337036025429555e-05, + "loss": 0.0745, + "step": 88820 + }, + { + "epoch": 3.2280689003561305, + "grad_norm": 0.4639264941215515, + "learning_rate": 2.5331847171319707e-05, + "loss": 0.0825, + "step": 88830 + }, + { + "epoch": 3.228432298858929, + "grad_norm": 0.4527970254421234, + "learning_rate": 2.5327177190389585e-05, + "loss": 1.9595, + "step": 88840 + }, + { + "epoch": 3.228795697361727, + "grad_norm": 4.547738552093506, + "learning_rate": 2.532198830930447e-05, + "loss": 0.0768, + "step": 88850 + }, + { + "epoch": 3.229159095864525, + "grad_norm": 1.0073570013046265, + "learning_rate": 2.5316799414346026e-05, + "loss": 0.0892, + "step": 88860 + }, + { + "epoch": 3.2295224943673233, + "grad_norm": 0.5932965874671936, + "learning_rate": 2.5311610505737827e-05, + "loss": 0.064, + "step": 88870 + }, + { + "epoch": 3.2298858928701213, + "grad_norm": 0.6178048253059387, + "learning_rate": 2.530642158370343e-05, + "loss": 0.0592, + "step": 88880 + }, + { + "epoch": 3.2302492913729197, + "grad_norm": 0.31980982422828674, + "learning_rate": 2.5301232648466427e-05, + "loss": 0.1194, + "step": 88890 + }, + { + "epoch": 3.2306126898757177, + "grad_norm": 0.910460889339447, + "learning_rate": 2.529604370025037e-05, + "loss": 0.084, + "step": 88900 + }, + { + "epoch": 3.2309760883785157, + "grad_norm": 0.4696938991546631, + "learning_rate": 2.5290854739278857e-05, + "loss": 0.1013, + "step": 88910 + }, + { + "epoch": 3.231339486881314, + "grad_norm": 0.5140429139137268, + "learning_rate": 2.5286184663683343e-05, + "loss": 1.1616, + "step": 88920 + }, + { + "epoch": 3.231702885384112, + "grad_norm": 1.7606545686721802, + "learning_rate": 2.5280995679092382e-05, + "loss": 0.0786, + "step": 88930 + }, + { + "epoch": 3.2320662838869105, + "grad_norm": 1.0249334573745728, + "learning_rate": 2.527580668239431e-05, + "loss": 0.0797, + "step": 88940 + }, + { + "epoch": 3.2324296823897085, + "grad_norm": 0.7938006520271301, + "learning_rate": 2.5270617673812712e-05, + "loss": 0.0956, + "step": 88950 + }, + { + "epoch": 3.232793080892507, + "grad_norm": 0.998557984828949, + "learning_rate": 2.526542865357116e-05, + "loss": 0.0837, + "step": 88960 + }, + { + "epoch": 3.233156479395305, + "grad_norm": 3.8933629989624023, + "learning_rate": 2.5260239621893232e-05, + "loss": 0.0633, + "step": 88970 + }, + { + "epoch": 3.233519877898103, + "grad_norm": 0.6665277481079102, + "learning_rate": 2.525556948378978e-05, + "loss": 2.7581, + "step": 88980 + }, + { + "epoch": 3.2338832764009013, + "grad_norm": 3.3742940425872803, + "learning_rate": 2.52503804309987e-05, + "loss": 0.0782, + "step": 88990 + }, + { + "epoch": 3.2342466749036993, + "grad_norm": 0.37812352180480957, + "learning_rate": 2.524519136741961e-05, + "loss": 0.0937, + "step": 89000 + }, + { + "epoch": 3.2346100734064978, + "grad_norm": 0.42430004477500916, + "learning_rate": 2.5240002293276092e-05, + "loss": 0.067, + "step": 89010 + }, + { + "epoch": 3.2349734719092957, + "grad_norm": 0.669116735458374, + "learning_rate": 2.523481320879174e-05, + "loss": 0.0901, + "step": 89020 + }, + { + "epoch": 3.2353368704120937, + "grad_norm": 0.8977360725402832, + "learning_rate": 2.5229624114190113e-05, + "loss": 0.0802, + "step": 89030 + }, + { + "epoch": 3.235700268914892, + "grad_norm": 0.6342353224754333, + "learning_rate": 2.5224435009694808e-05, + "loss": 0.316, + "step": 89040 + }, + { + "epoch": 3.23606366741769, + "grad_norm": 1.2511061429977417, + "learning_rate": 2.5219245895529385e-05, + "loss": 0.0862, + "step": 89050 + }, + { + "epoch": 3.2364270659204886, + "grad_norm": 0.7103098034858704, + "learning_rate": 2.5214056771917448e-05, + "loss": 0.0743, + "step": 89060 + }, + { + "epoch": 3.2367904644232866, + "grad_norm": 1.1459332704544067, + "learning_rate": 2.5208867639082562e-05, + "loss": 0.0808, + "step": 89070 + }, + { + "epoch": 3.2371538629260845, + "grad_norm": 0.4795287251472473, + "learning_rate": 2.5203678497248314e-05, + "loss": 0.0596, + "step": 89080 + }, + { + "epoch": 3.237517261428883, + "grad_norm": 1.7111159563064575, + "learning_rate": 2.5198489346638282e-05, + "loss": 0.0803, + "step": 89090 + }, + { + "epoch": 3.237880659931681, + "grad_norm": 0.9549298286437988, + "learning_rate": 2.519330018747605e-05, + "loss": 0.0808, + "step": 89100 + }, + { + "epoch": 3.2382440584344794, + "grad_norm": 1.708971381187439, + "learning_rate": 2.5188111019985204e-05, + "loss": 0.072, + "step": 89110 + }, + { + "epoch": 3.2386074569372774, + "grad_norm": 1.0324857234954834, + "learning_rate": 2.5182921844389317e-05, + "loss": 0.0593, + "step": 89120 + }, + { + "epoch": 3.238970855440076, + "grad_norm": 0.7414169311523438, + "learning_rate": 2.517773266091199e-05, + "loss": 0.067, + "step": 89130 + }, + { + "epoch": 3.2393342539428738, + "grad_norm": 0.739587128162384, + "learning_rate": 2.5172543469776783e-05, + "loss": 0.0832, + "step": 89140 + }, + { + "epoch": 3.2396976524456718, + "grad_norm": 0.6406792402267456, + "learning_rate": 2.5167354271207298e-05, + "loss": 0.0713, + "step": 89150 + }, + { + "epoch": 3.24006105094847, + "grad_norm": 0.40510469675064087, + "learning_rate": 2.5162165065427114e-05, + "loss": 0.0698, + "step": 89160 + }, + { + "epoch": 3.240424449451268, + "grad_norm": 0.508594810962677, + "learning_rate": 2.515697585265981e-05, + "loss": 0.0691, + "step": 89170 + }, + { + "epoch": 3.2407878479540666, + "grad_norm": 0.5306017994880676, + "learning_rate": 2.5151786633128983e-05, + "loss": 0.0975, + "step": 89180 + }, + { + "epoch": 3.2411512464568646, + "grad_norm": 1.0172330141067505, + "learning_rate": 2.51465974070582e-05, + "loss": 0.0897, + "step": 89190 + }, + { + "epoch": 3.2415146449596626, + "grad_norm": 1.291803240776062, + "learning_rate": 2.514140817467106e-05, + "loss": 0.0812, + "step": 89200 + }, + { + "epoch": 3.241878043462461, + "grad_norm": 0.5256122946739197, + "learning_rate": 2.513621893619115e-05, + "loss": 0.1297, + "step": 89210 + }, + { + "epoch": 3.242241441965259, + "grad_norm": 1.4141759872436523, + "learning_rate": 2.513102969184204e-05, + "loss": 0.0578, + "step": 89220 + }, + { + "epoch": 3.2426048404680574, + "grad_norm": 0.35552000999450684, + "learning_rate": 2.5125840441847336e-05, + "loss": 0.0774, + "step": 89230 + }, + { + "epoch": 3.2429682389708554, + "grad_norm": 0.5986491441726685, + "learning_rate": 2.512065118643061e-05, + "loss": 0.096, + "step": 89240 + }, + { + "epoch": 3.243331637473654, + "grad_norm": 0.6903038024902344, + "learning_rate": 2.5115461925815458e-05, + "loss": 0.0827, + "step": 89250 + }, + { + "epoch": 3.243695035976452, + "grad_norm": 0.6188151240348816, + "learning_rate": 2.5110272660225454e-05, + "loss": 0.0887, + "step": 89260 + }, + { + "epoch": 3.24405843447925, + "grad_norm": 0.27323612570762634, + "learning_rate": 2.5105083389884205e-05, + "loss": 1.8973, + "step": 89270 + }, + { + "epoch": 3.2444218329820482, + "grad_norm": 0.4321115016937256, + "learning_rate": 2.5099894115015284e-05, + "loss": 0.091, + "step": 89280 + }, + { + "epoch": 3.244785231484846, + "grad_norm": 0.5513198375701904, + "learning_rate": 2.5094704835842276e-05, + "loss": 0.0713, + "step": 89290 + }, + { + "epoch": 3.2451486299876446, + "grad_norm": 0.5108364224433899, + "learning_rate": 2.5089515552588776e-05, + "loss": 0.0746, + "step": 89300 + }, + { + "epoch": 3.2455120284904426, + "grad_norm": 0.5740257501602173, + "learning_rate": 2.5084326265478365e-05, + "loss": 0.0713, + "step": 89310 + }, + { + "epoch": 3.2458754269932406, + "grad_norm": 0.46265411376953125, + "learning_rate": 2.507913697473464e-05, + "loss": 0.0768, + "step": 89320 + }, + { + "epoch": 3.246238825496039, + "grad_norm": 0.573864758014679, + "learning_rate": 2.5073947680581184e-05, + "loss": 0.0928, + "step": 89330 + }, + { + "epoch": 3.246602223998837, + "grad_norm": 0.34501180052757263, + "learning_rate": 2.5068758383241586e-05, + "loss": 0.0843, + "step": 89340 + }, + { + "epoch": 3.2469656225016355, + "grad_norm": 1.3891141414642334, + "learning_rate": 2.5063569082939437e-05, + "loss": 0.0825, + "step": 89350 + }, + { + "epoch": 3.2473290210044334, + "grad_norm": 0.40869995951652527, + "learning_rate": 2.505837977989831e-05, + "loss": 2.1322, + "step": 89360 + }, + { + "epoch": 3.2476924195072314, + "grad_norm": 0.3573366403579712, + "learning_rate": 2.505319047434182e-05, + "loss": 0.0659, + "step": 89370 + }, + { + "epoch": 3.24805581801003, + "grad_norm": 1.208050012588501, + "learning_rate": 2.5048001166493536e-05, + "loss": 0.0684, + "step": 89380 + }, + { + "epoch": 3.248419216512828, + "grad_norm": 2.167421340942383, + "learning_rate": 2.5042811856577054e-05, + "loss": 0.5461, + "step": 89390 + }, + { + "epoch": 3.2487826150156263, + "grad_norm": 0.7320494055747986, + "learning_rate": 2.5037622544815958e-05, + "loss": 0.0968, + "step": 89400 + }, + { + "epoch": 3.2487826150156263, + "eval_loss": 0.28867700695991516, + "eval_runtime": 180.0941, + "eval_samples_per_second": 41.167, + "eval_steps_per_second": 5.147, + "eval_wer": 0.1410223827763356, + "step": 89400 + }, + { + "epoch": 3.2491460135184242, + "grad_norm": 0.4674376845359802, + "learning_rate": 2.5032433231433845e-05, + "loss": 0.1201, + "step": 89410 + }, + { + "epoch": 3.2495094120212227, + "grad_norm": 0.5038551688194275, + "learning_rate": 2.5027243916654296e-05, + "loss": 0.0698, + "step": 89420 + }, + { + "epoch": 3.2498728105240207, + "grad_norm": 0.5582560896873474, + "learning_rate": 2.5022054600700902e-05, + "loss": 0.0666, + "step": 89430 + }, + { + "epoch": 3.2502362090268186, + "grad_norm": 3.2635886669158936, + "learning_rate": 2.501686528379726e-05, + "loss": 0.0858, + "step": 89440 + }, + { + "epoch": 3.250599607529617, + "grad_norm": 6.544740676879883, + "learning_rate": 2.5011675966166947e-05, + "loss": 0.0792, + "step": 89450 + }, + { + "epoch": 3.250963006032415, + "grad_norm": 0.33596140146255493, + "learning_rate": 2.500648664803356e-05, + "loss": 0.0678, + "step": 89460 + }, + { + "epoch": 3.2513264045352135, + "grad_norm": 0.7997293472290039, + "learning_rate": 2.500129732962069e-05, + "loss": 0.0636, + "step": 89470 + }, + { + "epoch": 3.2516898030380115, + "grad_norm": 0.2458629459142685, + "learning_rate": 2.4996108011151922e-05, + "loss": 0.0711, + "step": 89480 + }, + { + "epoch": 3.25205320154081, + "grad_norm": 0.5491876006126404, + "learning_rate": 2.4990918692850834e-05, + "loss": 0.0982, + "step": 89490 + }, + { + "epoch": 3.252416600043608, + "grad_norm": 1.1274678707122803, + "learning_rate": 2.4985729374941038e-05, + "loss": 0.1037, + "step": 89500 + }, + { + "epoch": 3.252779998546406, + "grad_norm": 0.576329231262207, + "learning_rate": 2.4980540057646117e-05, + "loss": 0.0813, + "step": 89510 + }, + { + "epoch": 3.2531433970492043, + "grad_norm": 2.285304069519043, + "learning_rate": 2.497535074118965e-05, + "loss": 0.0752, + "step": 89520 + }, + { + "epoch": 3.2535067955520023, + "grad_norm": 1.1110707521438599, + "learning_rate": 2.4970161425795233e-05, + "loss": 0.0663, + "step": 89530 + }, + { + "epoch": 3.2538701940548007, + "grad_norm": 0.6601670980453491, + "learning_rate": 2.4964972111686443e-05, + "loss": 0.0782, + "step": 89540 + }, + { + "epoch": 3.2542335925575987, + "grad_norm": 1.520925521850586, + "learning_rate": 2.4959782799086897e-05, + "loss": 0.0842, + "step": 89550 + }, + { + "epoch": 3.2545969910603967, + "grad_norm": 0.5269960165023804, + "learning_rate": 2.495459348822016e-05, + "loss": 0.0606, + "step": 89560 + }, + { + "epoch": 3.254960389563195, + "grad_norm": 2.8767483234405518, + "learning_rate": 2.494940417930983e-05, + "loss": 1.3044, + "step": 89570 + }, + { + "epoch": 3.255323788065993, + "grad_norm": 0.3898687958717346, + "learning_rate": 2.4944214872579484e-05, + "loss": 0.1111, + "step": 89580 + }, + { + "epoch": 3.2556871865687915, + "grad_norm": 0.7580005526542664, + "learning_rate": 2.4939025568252732e-05, + "loss": 0.1151, + "step": 89590 + }, + { + "epoch": 3.2560505850715895, + "grad_norm": 1.3324828147888184, + "learning_rate": 2.4933836266553147e-05, + "loss": 0.0821, + "step": 89600 + }, + { + "epoch": 3.2564139835743875, + "grad_norm": 0.6260429620742798, + "learning_rate": 2.4928646967704323e-05, + "loss": 0.0501, + "step": 89610 + }, + { + "epoch": 3.256777382077186, + "grad_norm": 0.3619268238544464, + "learning_rate": 2.492345767192985e-05, + "loss": 0.0667, + "step": 89620 + }, + { + "epoch": 3.257140780579984, + "grad_norm": 1.3483206033706665, + "learning_rate": 2.4918268379453302e-05, + "loss": 0.062, + "step": 89630 + }, + { + "epoch": 3.2575041790827823, + "grad_norm": 31.592674255371094, + "learning_rate": 2.491307909049829e-05, + "loss": 0.2416, + "step": 89640 + }, + { + "epoch": 3.2578675775855803, + "grad_norm": 0.5905751585960388, + "learning_rate": 2.490788980528839e-05, + "loss": 0.0816, + "step": 89650 + }, + { + "epoch": 3.2582309760883783, + "grad_norm": 1.89798903465271, + "learning_rate": 2.4902700524047194e-05, + "loss": 0.0913, + "step": 89660 + }, + { + "epoch": 3.2585943745911767, + "grad_norm": 0.883533239364624, + "learning_rate": 2.489751124699828e-05, + "loss": 0.0588, + "step": 89670 + }, + { + "epoch": 3.2589577730939747, + "grad_norm": 0.48883989453315735, + "learning_rate": 2.4892321974365235e-05, + "loss": 0.0794, + "step": 89680 + }, + { + "epoch": 3.259321171596773, + "grad_norm": 0.4255521595478058, + "learning_rate": 2.4887132706371663e-05, + "loss": 0.0727, + "step": 89690 + }, + { + "epoch": 3.259684570099571, + "grad_norm": 1.9494534730911255, + "learning_rate": 2.488194344324114e-05, + "loss": 0.0798, + "step": 89700 + }, + { + "epoch": 3.260047968602369, + "grad_norm": 0.4608819782733917, + "learning_rate": 2.4876754185197255e-05, + "loss": 0.1128, + "step": 89710 + }, + { + "epoch": 3.2604113671051675, + "grad_norm": 0.2869090139865875, + "learning_rate": 2.487156493246359e-05, + "loss": 0.0729, + "step": 89720 + }, + { + "epoch": 3.2607747656079655, + "grad_norm": 0.4189178943634033, + "learning_rate": 2.4866375685263733e-05, + "loss": 0.0711, + "step": 89730 + }, + { + "epoch": 3.261138164110764, + "grad_norm": 0.8501338958740234, + "learning_rate": 2.4861186443821273e-05, + "loss": 0.0915, + "step": 89740 + }, + { + "epoch": 3.261501562613562, + "grad_norm": 1.5623184442520142, + "learning_rate": 2.48559972083598e-05, + "loss": 0.0871, + "step": 89750 + }, + { + "epoch": 3.2618649611163604, + "grad_norm": 0.3259228765964508, + "learning_rate": 2.4850807979102895e-05, + "loss": 0.0864, + "step": 89760 + }, + { + "epoch": 3.2622283596191584, + "grad_norm": 1.2263481616973877, + "learning_rate": 2.4845618756274147e-05, + "loss": 0.0599, + "step": 89770 + }, + { + "epoch": 3.262591758121957, + "grad_norm": 4.90409517288208, + "learning_rate": 2.484042954009712e-05, + "loss": 0.0895, + "step": 89780 + }, + { + "epoch": 3.2629551566247548, + "grad_norm": 0.4526398479938507, + "learning_rate": 2.483524033079543e-05, + "loss": 0.0734, + "step": 89790 + }, + { + "epoch": 3.2633185551275528, + "grad_norm": 0.7492665648460388, + "learning_rate": 2.483005112859265e-05, + "loss": 0.0904, + "step": 89800 + }, + { + "epoch": 3.263681953630351, + "grad_norm": 0.5449044108390808, + "learning_rate": 2.482486193371236e-05, + "loss": 0.0764, + "step": 89810 + }, + { + "epoch": 3.264045352133149, + "grad_norm": 0.5052310824394226, + "learning_rate": 2.4819672746378137e-05, + "loss": 0.0617, + "step": 89820 + }, + { + "epoch": 3.2644087506359476, + "grad_norm": 0.5267361998558044, + "learning_rate": 2.4814483566813582e-05, + "loss": 0.077, + "step": 89830 + }, + { + "epoch": 3.2647721491387456, + "grad_norm": 1.3752968311309814, + "learning_rate": 2.4809294395242273e-05, + "loss": 0.0719, + "step": 89840 + }, + { + "epoch": 3.2651355476415436, + "grad_norm": 1.6243940591812134, + "learning_rate": 2.4804105231887787e-05, + "loss": 0.1557, + "step": 89850 + }, + { + "epoch": 3.265498946144342, + "grad_norm": 1.946386694908142, + "learning_rate": 2.479891607697371e-05, + "loss": 0.0984, + "step": 89860 + }, + { + "epoch": 3.26586234464714, + "grad_norm": 1.5876291990280151, + "learning_rate": 2.4793726930723615e-05, + "loss": 0.0768, + "step": 89870 + }, + { + "epoch": 3.2662257431499384, + "grad_norm": 0.359464555978775, + "learning_rate": 2.4788537793361104e-05, + "loss": 0.0734, + "step": 89880 + }, + { + "epoch": 3.2665891416527364, + "grad_norm": 1.2184336185455322, + "learning_rate": 2.4783348665109743e-05, + "loss": 0.0963, + "step": 89890 + }, + { + "epoch": 3.2669525401555344, + "grad_norm": 0.5689897537231445, + "learning_rate": 2.477815954619312e-05, + "loss": 0.0818, + "step": 89900 + }, + { + "epoch": 3.267315938658333, + "grad_norm": 0.8023828268051147, + "learning_rate": 2.4773489347334143e-05, + "loss": 0.0947, + "step": 89910 + }, + { + "epoch": 3.267679337161131, + "grad_norm": 0.41300055384635925, + "learning_rate": 2.476830024676948e-05, + "loss": 0.0646, + "step": 89920 + }, + { + "epoch": 3.268042735663929, + "grad_norm": 1.4045326709747314, + "learning_rate": 2.4763111156187922e-05, + "loss": 0.0702, + "step": 89930 + }, + { + "epoch": 3.268406134166727, + "grad_norm": 0.2815437614917755, + "learning_rate": 2.4757922075813077e-05, + "loss": 0.0844, + "step": 89940 + }, + { + "epoch": 3.268769532669525, + "grad_norm": 0.8915033340454102, + "learning_rate": 2.475273300586851e-05, + "loss": 0.1112, + "step": 89950 + }, + { + "epoch": 3.2691329311723236, + "grad_norm": 0.7407945394515991, + "learning_rate": 2.474754394657779e-05, + "loss": 0.0744, + "step": 89960 + }, + { + "epoch": 3.2694963296751216, + "grad_norm": 0.6766940951347351, + "learning_rate": 2.474235489816451e-05, + "loss": 0.771, + "step": 89970 + }, + { + "epoch": 3.26985972817792, + "grad_norm": 0.9781772494316101, + "learning_rate": 2.4737165860852227e-05, + "loss": 0.5104, + "step": 89980 + }, + { + "epoch": 3.270223126680718, + "grad_norm": 0.4817918539047241, + "learning_rate": 2.4731976834864544e-05, + "loss": 0.105, + "step": 89990 + }, + { + "epoch": 3.270586525183516, + "grad_norm": 0.5263279676437378, + "learning_rate": 2.4726787820425023e-05, + "loss": 0.0872, + "step": 90000 + }, + { + "epoch": 3.270586525183516, + "eval_loss": 0.29889926314353943, + "eval_runtime": 179.1872, + "eval_samples_per_second": 41.376, + "eval_steps_per_second": 5.173, + "eval_wer": 0.14040517726505347, + "step": 90000 + }, + { + "epoch": 3.2709499236863144, + "grad_norm": 21.08318328857422, + "learning_rate": 2.4721598817757247e-05, + "loss": 0.3013, + "step": 90010 + }, + { + "epoch": 3.2713133221891124, + "grad_norm": 0.4224644601345062, + "learning_rate": 2.4716409827084783e-05, + "loss": 0.079, + "step": 90020 + }, + { + "epoch": 3.271676720691911, + "grad_norm": 0.4048929214477539, + "learning_rate": 2.4711220848631203e-05, + "loss": 0.0684, + "step": 90030 + }, + { + "epoch": 3.272040119194709, + "grad_norm": 0.522287905216217, + "learning_rate": 2.47060318826201e-05, + "loss": 0.1021, + "step": 90040 + }, + { + "epoch": 3.2724035176975073, + "grad_norm": 1.5194038152694702, + "learning_rate": 2.4700842929275033e-05, + "loss": 0.0878, + "step": 90050 + }, + { + "epoch": 3.2727669162003052, + "grad_norm": 0.3342646062374115, + "learning_rate": 2.4695653988819584e-05, + "loss": 0.0697, + "step": 90060 + }, + { + "epoch": 3.2731303147031037, + "grad_norm": 0.7479965686798096, + "learning_rate": 2.469046506147732e-05, + "loss": 0.0573, + "step": 90070 + }, + { + "epoch": 3.2734937132059017, + "grad_norm": 0.4107944071292877, + "learning_rate": 2.4685276147471804e-05, + "loss": 0.0553, + "step": 90080 + }, + { + "epoch": 3.2738571117086996, + "grad_norm": 0.8151494860649109, + "learning_rate": 2.468008724702663e-05, + "loss": 2.1021, + "step": 90090 + }, + { + "epoch": 3.274220510211498, + "grad_norm": 0.6547772884368896, + "learning_rate": 2.4674898360365357e-05, + "loss": 0.0704, + "step": 90100 + }, + { + "epoch": 3.274583908714296, + "grad_norm": 0.7743812203407288, + "learning_rate": 2.4669709487711562e-05, + "loss": 0.0689, + "step": 90110 + }, + { + "epoch": 3.2749473072170945, + "grad_norm": 0.546514630317688, + "learning_rate": 2.4664520629288795e-05, + "loss": 0.0693, + "step": 90120 + }, + { + "epoch": 3.2753107057198925, + "grad_norm": 1.609788179397583, + "learning_rate": 2.4659331785320652e-05, + "loss": 0.0814, + "step": 90130 + }, + { + "epoch": 3.2756741042226905, + "grad_norm": 0.8801899552345276, + "learning_rate": 2.4654142956030692e-05, + "loss": 0.0745, + "step": 90140 + }, + { + "epoch": 3.276037502725489, + "grad_norm": 0.5829627513885498, + "learning_rate": 2.464895414164248e-05, + "loss": 0.0597, + "step": 90150 + }, + { + "epoch": 3.276400901228287, + "grad_norm": 0.7144574522972107, + "learning_rate": 2.4643765342379584e-05, + "loss": 0.1288, + "step": 90160 + }, + { + "epoch": 3.2767642997310853, + "grad_norm": 0.4121115505695343, + "learning_rate": 2.4638576558465562e-05, + "loss": 0.0911, + "step": 90170 + }, + { + "epoch": 3.2771276982338833, + "grad_norm": 0.4146971106529236, + "learning_rate": 2.4633387790124003e-05, + "loss": 0.066, + "step": 90180 + }, + { + "epoch": 3.2774910967366813, + "grad_norm": 0.6736780405044556, + "learning_rate": 2.4628199037578457e-05, + "loss": 0.0708, + "step": 90190 + }, + { + "epoch": 3.2778544952394797, + "grad_norm": 2.1393494606018066, + "learning_rate": 2.462301030105249e-05, + "loss": 0.0841, + "step": 90200 + }, + { + "epoch": 3.2782178937422777, + "grad_norm": 1.2542799711227417, + "learning_rate": 2.461782158076967e-05, + "loss": 0.0797, + "step": 90210 + }, + { + "epoch": 3.278581292245076, + "grad_norm": 1.3703997135162354, + "learning_rate": 2.461263287695355e-05, + "loss": 0.0578, + "step": 90220 + }, + { + "epoch": 3.278944690747874, + "grad_norm": 0.32917872071266174, + "learning_rate": 2.4607444189827708e-05, + "loss": 0.0609, + "step": 90230 + }, + { + "epoch": 3.279308089250672, + "grad_norm": 0.6270015835762024, + "learning_rate": 2.4602255519615702e-05, + "loss": 0.0925, + "step": 90240 + }, + { + "epoch": 3.2796714877534705, + "grad_norm": 0.564975917339325, + "learning_rate": 2.4597066866541087e-05, + "loss": 0.0892, + "step": 90250 + }, + { + "epoch": 3.2800348862562685, + "grad_norm": 0.6385096311569214, + "learning_rate": 2.4591878230827433e-05, + "loss": 0.0876, + "step": 90260 + }, + { + "epoch": 3.280398284759067, + "grad_norm": 1.597822666168213, + "learning_rate": 2.4586689612698277e-05, + "loss": 0.1932, + "step": 90270 + }, + { + "epoch": 3.280761683261865, + "grad_norm": 0.7112919688224792, + "learning_rate": 2.458150101237721e-05, + "loss": 0.0928, + "step": 90280 + }, + { + "epoch": 3.281125081764663, + "grad_norm": 0.4111541509628296, + "learning_rate": 2.4576312430087772e-05, + "loss": 0.068, + "step": 90290 + }, + { + "epoch": 3.2814884802674613, + "grad_norm": 0.9870650172233582, + "learning_rate": 2.4571123866053525e-05, + "loss": 0.08, + "step": 90300 + }, + { + "epoch": 3.2818518787702593, + "grad_norm": 2.479681968688965, + "learning_rate": 2.4565935320498016e-05, + "loss": 4.4447, + "step": 90310 + }, + { + "epoch": 3.2822152772730577, + "grad_norm": 0.686183512210846, + "learning_rate": 2.456074679364482e-05, + "loss": 0.0719, + "step": 90320 + }, + { + "epoch": 3.2825786757758557, + "grad_norm": 0.5197970271110535, + "learning_rate": 2.455555828571748e-05, + "loss": 0.0548, + "step": 90330 + }, + { + "epoch": 3.282942074278654, + "grad_norm": 1.1354804039001465, + "learning_rate": 2.4550369796939547e-05, + "loss": 0.0908, + "step": 90340 + }, + { + "epoch": 3.283305472781452, + "grad_norm": 0.7994649410247803, + "learning_rate": 2.4545181327534588e-05, + "loss": 0.0862, + "step": 90350 + }, + { + "epoch": 3.2836688712842506, + "grad_norm": 1.2239160537719727, + "learning_rate": 2.4539992877726135e-05, + "loss": 0.2734, + "step": 90360 + }, + { + "epoch": 3.2840322697870485, + "grad_norm": 2.116908311843872, + "learning_rate": 2.4534804447737762e-05, + "loss": 0.0638, + "step": 90370 + }, + { + "epoch": 3.2843956682898465, + "grad_norm": 0.5922090411186218, + "learning_rate": 2.452961603779301e-05, + "loss": 0.0883, + "step": 90380 + }, + { + "epoch": 3.284759066792645, + "grad_norm": 0.7629362940788269, + "learning_rate": 2.4524427648115427e-05, + "loss": 0.07, + "step": 90390 + }, + { + "epoch": 3.285122465295443, + "grad_norm": 0.5463785529136658, + "learning_rate": 2.451923927892857e-05, + "loss": 0.0813, + "step": 90400 + }, + { + "epoch": 3.2854858637982414, + "grad_norm": 0.6801964044570923, + "learning_rate": 2.4514050930455965e-05, + "loss": 0.0673, + "step": 90410 + }, + { + "epoch": 3.2858492623010394, + "grad_norm": 1.022977590560913, + "learning_rate": 2.450886260292119e-05, + "loss": 0.0685, + "step": 90420 + }, + { + "epoch": 3.2862126608038373, + "grad_norm": 0.8831082582473755, + "learning_rate": 2.450367429654778e-05, + "loss": 0.0728, + "step": 90430 + }, + { + "epoch": 3.2865760593066358, + "grad_norm": 0.6400480270385742, + "learning_rate": 2.4498486011559277e-05, + "loss": 0.0967, + "step": 90440 + }, + { + "epoch": 3.2869394578094338, + "grad_norm": 0.8099062442779541, + "learning_rate": 2.4493297748179225e-05, + "loss": 0.066, + "step": 90450 + }, + { + "epoch": 3.287302856312232, + "grad_norm": 0.3613360524177551, + "learning_rate": 2.4488109506631163e-05, + "loss": 0.1143, + "step": 90460 + }, + { + "epoch": 3.28766625481503, + "grad_norm": 0.5503920316696167, + "learning_rate": 2.448292128713865e-05, + "loss": 0.0591, + "step": 90470 + }, + { + "epoch": 3.288029653317828, + "grad_norm": 0.34772297739982605, + "learning_rate": 2.447773308992522e-05, + "loss": 0.0738, + "step": 90480 + }, + { + "epoch": 3.2883930518206266, + "grad_norm": 1.3450576066970825, + "learning_rate": 2.4472544915214414e-05, + "loss": 0.0832, + "step": 90490 + }, + { + "epoch": 3.2887564503234246, + "grad_norm": 0.5817814469337463, + "learning_rate": 2.4467356763229756e-05, + "loss": 0.0962, + "step": 90500 + }, + { + "epoch": 3.289119848826223, + "grad_norm": 0.669734001159668, + "learning_rate": 2.446216863419481e-05, + "loss": 0.0779, + "step": 90510 + }, + { + "epoch": 3.289483247329021, + "grad_norm": 1.3286347389221191, + "learning_rate": 2.445698052833311e-05, + "loss": 0.07, + "step": 90520 + }, + { + "epoch": 3.289846645831819, + "grad_norm": 1.7495746612548828, + "learning_rate": 2.4451792445868186e-05, + "loss": 0.07, + "step": 90530 + }, + { + "epoch": 3.2902100443346174, + "grad_norm": 0.6228033304214478, + "learning_rate": 2.4446604387023572e-05, + "loss": 0.1081, + "step": 90540 + }, + { + "epoch": 3.2905734428374154, + "grad_norm": 0.8681657910346985, + "learning_rate": 2.4441416352022793e-05, + "loss": 0.0826, + "step": 90550 + }, + { + "epoch": 3.290936841340214, + "grad_norm": 1.1657065153121948, + "learning_rate": 2.4436228341089412e-05, + "loss": 0.0801, + "step": 90560 + }, + { + "epoch": 3.291300239843012, + "grad_norm": 1.196022391319275, + "learning_rate": 2.4431040354446944e-05, + "loss": 0.0665, + "step": 90570 + }, + { + "epoch": 3.2916636383458098, + "grad_norm": 0.8635385632514954, + "learning_rate": 2.442585239231892e-05, + "loss": 0.0694, + "step": 90580 + }, + { + "epoch": 3.292027036848608, + "grad_norm": 0.664546549320221, + "learning_rate": 2.442066445492888e-05, + "loss": 0.0938, + "step": 90590 + }, + { + "epoch": 3.292390435351406, + "grad_norm": 0.6976110339164734, + "learning_rate": 2.441547654250033e-05, + "loss": 0.0818, + "step": 90600 + }, + { + "epoch": 3.292390435351406, + "eval_loss": 0.33689337968826294, + "eval_runtime": 179.0908, + "eval_samples_per_second": 41.398, + "eval_steps_per_second": 5.176, + "eval_wer": 0.1381995752173834, + "step": 90600 + }, + { + "epoch": 3.2927538338542046, + "grad_norm": 0.48501139879226685, + "learning_rate": 2.441028865525683e-05, + "loss": 0.0915, + "step": 90610 + }, + { + "epoch": 3.2931172323570026, + "grad_norm": 0.7468106746673584, + "learning_rate": 2.4405100793421892e-05, + "loss": 0.1325, + "step": 90620 + }, + { + "epoch": 3.293480630859801, + "grad_norm": 0.38419654965400696, + "learning_rate": 2.4399912957219048e-05, + "loss": 0.071, + "step": 90630 + }, + { + "epoch": 3.293844029362599, + "grad_norm": 0.44008463621139526, + "learning_rate": 2.4394725146871817e-05, + "loss": 0.0885, + "step": 90640 + }, + { + "epoch": 3.2942074278653974, + "grad_norm": 4.322512149810791, + "learning_rate": 2.4389537362603714e-05, + "loss": 0.0892, + "step": 90650 + }, + { + "epoch": 3.2945708263681954, + "grad_norm": 0.5549167990684509, + "learning_rate": 2.4384349604638285e-05, + "loss": 0.0908, + "step": 90660 + }, + { + "epoch": 3.2949342248709934, + "grad_norm": 0.5810611248016357, + "learning_rate": 2.437916187319904e-05, + "loss": 0.0805, + "step": 90670 + }, + { + "epoch": 3.295297623373792, + "grad_norm": 0.5657238364219666, + "learning_rate": 2.4373974168509507e-05, + "loss": 0.0607, + "step": 90680 + }, + { + "epoch": 3.29566102187659, + "grad_norm": 0.6547468304634094, + "learning_rate": 2.436878649079318e-05, + "loss": 0.0987, + "step": 90690 + }, + { + "epoch": 3.2960244203793883, + "grad_norm": 1.354154109954834, + "learning_rate": 2.4363598840273617e-05, + "loss": 0.0898, + "step": 90700 + }, + { + "epoch": 3.2963878188821862, + "grad_norm": 3.738417387008667, + "learning_rate": 2.4358411217174308e-05, + "loss": 0.0716, + "step": 90710 + }, + { + "epoch": 3.2967512173849842, + "grad_norm": 1.3510026931762695, + "learning_rate": 2.435322362171878e-05, + "loss": 0.0855, + "step": 90720 + }, + { + "epoch": 3.2971146158877827, + "grad_norm": 1.4833030700683594, + "learning_rate": 2.4348036054130545e-05, + "loss": 0.0859, + "step": 90730 + }, + { + "epoch": 3.2974780143905806, + "grad_norm": 0.327749639749527, + "learning_rate": 2.4342848514633107e-05, + "loss": 0.0798, + "step": 90740 + }, + { + "epoch": 3.297841412893379, + "grad_norm": 2.8632781505584717, + "learning_rate": 2.4337661003449998e-05, + "loss": 0.0958, + "step": 90750 + }, + { + "epoch": 3.298204811396177, + "grad_norm": 1.3341703414916992, + "learning_rate": 2.433247352080472e-05, + "loss": 2.2308, + "step": 90760 + }, + { + "epoch": 3.298568209898975, + "grad_norm": 1.0177751779556274, + "learning_rate": 2.4327286066920785e-05, + "loss": 0.0721, + "step": 90770 + }, + { + "epoch": 3.2989316084017735, + "grad_norm": 0.5713348388671875, + "learning_rate": 2.4322098642021697e-05, + "loss": 0.0657, + "step": 90780 + }, + { + "epoch": 3.2992950069045714, + "grad_norm": 0.32323822379112244, + "learning_rate": 2.4316911246330955e-05, + "loss": 0.2474, + "step": 90790 + }, + { + "epoch": 3.29965840540737, + "grad_norm": 0.4968611001968384, + "learning_rate": 2.431172388007209e-05, + "loss": 0.0831, + "step": 90800 + }, + { + "epoch": 3.300021803910168, + "grad_norm": 0.9985103607177734, + "learning_rate": 2.430653654346859e-05, + "loss": 0.0802, + "step": 90810 + }, + { + "epoch": 3.300385202412966, + "grad_norm": 0.5295760631561279, + "learning_rate": 2.430134923674397e-05, + "loss": 0.0702, + "step": 90820 + }, + { + "epoch": 3.3007486009157643, + "grad_norm": 0.6976117491722107, + "learning_rate": 2.429616196012172e-05, + "loss": 0.0797, + "step": 90830 + }, + { + "epoch": 3.3011119994185623, + "grad_norm": 0.5984934568405151, + "learning_rate": 2.4290974713825338e-05, + "loss": 0.3996, + "step": 90840 + }, + { + "epoch": 3.3014753979213607, + "grad_norm": 0.9891178607940674, + "learning_rate": 2.4285787498078343e-05, + "loss": 0.0631, + "step": 90850 + }, + { + "epoch": 3.3018387964241587, + "grad_norm": 0.42125028371810913, + "learning_rate": 2.4280600313104227e-05, + "loss": 0.0814, + "step": 90860 + }, + { + "epoch": 3.3022021949269567, + "grad_norm": 0.25509488582611084, + "learning_rate": 2.427541315912648e-05, + "loss": 0.0648, + "step": 90870 + }, + { + "epoch": 3.302565593429755, + "grad_norm": 0.5609679222106934, + "learning_rate": 2.42702260363686e-05, + "loss": 0.0945, + "step": 90880 + }, + { + "epoch": 3.302928991932553, + "grad_norm": 1.2793158292770386, + "learning_rate": 2.4265038945054076e-05, + "loss": 0.0853, + "step": 90890 + }, + { + "epoch": 3.3032923904353515, + "grad_norm": 0.8435468673706055, + "learning_rate": 2.425985188540642e-05, + "loss": 0.1439, + "step": 90900 + }, + { + "epoch": 3.3036557889381495, + "grad_norm": 1.7698181867599487, + "learning_rate": 2.425466485764911e-05, + "loss": 0.073, + "step": 90910 + }, + { + "epoch": 3.304019187440948, + "grad_norm": 0.8225585222244263, + "learning_rate": 2.4249477862005646e-05, + "loss": 0.0733, + "step": 90920 + }, + { + "epoch": 3.304382585943746, + "grad_norm": 1.2218375205993652, + "learning_rate": 2.4244290898699494e-05, + "loss": 0.0723, + "step": 90930 + }, + { + "epoch": 3.3047459844465443, + "grad_norm": 2.20505952835083, + "learning_rate": 2.423910396795417e-05, + "loss": 0.0815, + "step": 90940 + }, + { + "epoch": 3.3051093829493423, + "grad_norm": 0.4661131501197815, + "learning_rate": 2.423391706999315e-05, + "loss": 0.0872, + "step": 90950 + }, + { + "epoch": 3.3054727814521403, + "grad_norm": 0.6685827970504761, + "learning_rate": 2.4228730205039916e-05, + "loss": 0.066, + "step": 90960 + }, + { + "epoch": 3.3058361799549387, + "grad_norm": 0.89743572473526, + "learning_rate": 2.4223543373317954e-05, + "loss": 0.0665, + "step": 90970 + }, + { + "epoch": 3.3061995784577367, + "grad_norm": 0.9550055265426636, + "learning_rate": 2.4218356575050734e-05, + "loss": 0.0722, + "step": 90980 + }, + { + "epoch": 3.306562976960535, + "grad_norm": 0.4779164791107178, + "learning_rate": 2.421316981046176e-05, + "loss": 0.0724, + "step": 90990 + }, + { + "epoch": 3.306926375463333, + "grad_norm": 1.0296430587768555, + "learning_rate": 2.4207983079774498e-05, + "loss": 0.0718, + "step": 91000 + }, + { + "epoch": 3.307289773966131, + "grad_norm": 0.6730550527572632, + "learning_rate": 2.4202796383212427e-05, + "loss": 0.0622, + "step": 91010 + }, + { + "epoch": 3.3076531724689295, + "grad_norm": 0.5089064240455627, + "learning_rate": 2.4197609720999027e-05, + "loss": 0.056, + "step": 91020 + }, + { + "epoch": 3.3080165709717275, + "grad_norm": 1.3337105512619019, + "learning_rate": 2.4192423093357754e-05, + "loss": 0.0799, + "step": 91030 + }, + { + "epoch": 3.308379969474526, + "grad_norm": 0.7988773584365845, + "learning_rate": 2.418723650051211e-05, + "loss": 0.094, + "step": 91040 + }, + { + "epoch": 3.308743367977324, + "grad_norm": 2.004700183868408, + "learning_rate": 2.4182049942685553e-05, + "loss": 0.073, + "step": 91050 + }, + { + "epoch": 3.309106766480122, + "grad_norm": 0.5091060996055603, + "learning_rate": 2.4176863420101553e-05, + "loss": 0.0869, + "step": 91060 + }, + { + "epoch": 3.3094701649829203, + "grad_norm": 1.5232765674591064, + "learning_rate": 2.417167693298358e-05, + "loss": 0.0926, + "step": 91070 + }, + { + "epoch": 3.3098335634857183, + "grad_norm": 2.1751208305358887, + "learning_rate": 2.416649048155509e-05, + "loss": 0.0679, + "step": 91080 + }, + { + "epoch": 3.3101969619885168, + "grad_norm": 0.7265444397926331, + "learning_rate": 2.416130406603957e-05, + "loss": 0.6392, + "step": 91090 + }, + { + "epoch": 3.3105603604913147, + "grad_norm": 1.931279182434082, + "learning_rate": 2.4156117686660473e-05, + "loss": 0.097, + "step": 91100 + }, + { + "epoch": 3.3109237589941127, + "grad_norm": 0.5885264277458191, + "learning_rate": 2.4150931343641264e-05, + "loss": 0.087, + "step": 91110 + }, + { + "epoch": 3.311287157496911, + "grad_norm": 1.2313237190246582, + "learning_rate": 2.4145745037205388e-05, + "loss": 0.0718, + "step": 91120 + }, + { + "epoch": 3.311650555999709, + "grad_norm": 0.4470736086368561, + "learning_rate": 2.4140558767576336e-05, + "loss": 0.0614, + "step": 91130 + }, + { + "epoch": 3.3120139545025076, + "grad_norm": 0.9219857454299927, + "learning_rate": 2.4135372534977542e-05, + "loss": 0.1004, + "step": 91140 + }, + { + "epoch": 3.3123773530053056, + "grad_norm": 0.5581304430961609, + "learning_rate": 2.4130186339632473e-05, + "loss": 0.0861, + "step": 91150 + }, + { + "epoch": 3.3127407515081035, + "grad_norm": 0.6495917439460754, + "learning_rate": 2.412500018176458e-05, + "loss": 0.0892, + "step": 91160 + }, + { + "epoch": 3.313104150010902, + "grad_norm": 0.8792677521705627, + "learning_rate": 2.4119814061597304e-05, + "loss": 0.0804, + "step": 91170 + }, + { + "epoch": 3.3134675485137, + "grad_norm": 0.7889745235443115, + "learning_rate": 2.411462797935412e-05, + "loss": 0.068, + "step": 91180 + }, + { + "epoch": 3.3138309470164984, + "grad_norm": 0.883449137210846, + "learning_rate": 2.4109441935258465e-05, + "loss": 0.1013, + "step": 91190 + }, + { + "epoch": 3.3141943455192964, + "grad_norm": 0.7440003156661987, + "learning_rate": 2.410425592953379e-05, + "loss": 0.0737, + "step": 91200 + }, + { + "epoch": 3.3141943455192964, + "eval_loss": 0.3343234956264496, + "eval_runtime": 179.7554, + "eval_samples_per_second": 41.245, + "eval_steps_per_second": 5.157, + "eval_wer": 0.13966997658249677, + "step": 91200 + }, + { + "epoch": 3.314557744022095, + "grad_norm": 0.6900414228439331, + "learning_rate": 2.409906996240353e-05, + "loss": 0.0944, + "step": 91210 + }, + { + "epoch": 3.314921142524893, + "grad_norm": 1.4145492315292358, + "learning_rate": 2.4093884034091148e-05, + "loss": 0.0583, + "step": 91220 + }, + { + "epoch": 3.315284541027691, + "grad_norm": 1.502017855644226, + "learning_rate": 2.4088698144820076e-05, + "loss": 0.073, + "step": 91230 + }, + { + "epoch": 3.315647939530489, + "grad_norm": 0.4413808584213257, + "learning_rate": 2.4083512294813765e-05, + "loss": 0.0817, + "step": 91240 + }, + { + "epoch": 3.316011338033287, + "grad_norm": 4.0073957443237305, + "learning_rate": 2.4078326484295642e-05, + "loss": 0.0913, + "step": 91250 + }, + { + "epoch": 3.3163747365360856, + "grad_norm": 1.807396650314331, + "learning_rate": 2.407314071348915e-05, + "loss": 0.0713, + "step": 91260 + }, + { + "epoch": 3.3167381350388836, + "grad_norm": 0.6919573545455933, + "learning_rate": 2.4067954982617726e-05, + "loss": 0.0856, + "step": 91270 + }, + { + "epoch": 3.317101533541682, + "grad_norm": 0.31657874584198, + "learning_rate": 2.4062769291904814e-05, + "loss": 0.0806, + "step": 91280 + }, + { + "epoch": 3.31746493204448, + "grad_norm": 2.0308666229248047, + "learning_rate": 2.4057583641573835e-05, + "loss": 0.0987, + "step": 91290 + }, + { + "epoch": 3.317828330547278, + "grad_norm": 1.023868441581726, + "learning_rate": 2.4052398031848224e-05, + "loss": 0.0692, + "step": 91300 + }, + { + "epoch": 3.3181917290500764, + "grad_norm": 0.5633085370063782, + "learning_rate": 2.4047212462951402e-05, + "loss": 0.0779, + "step": 91310 + }, + { + "epoch": 3.3185551275528744, + "grad_norm": 0.48355531692504883, + "learning_rate": 2.4042026935106812e-05, + "loss": 0.0692, + "step": 91320 + }, + { + "epoch": 3.318918526055673, + "grad_norm": 1.3398791551589966, + "learning_rate": 2.4036841448537876e-05, + "loss": 0.0805, + "step": 91330 + }, + { + "epoch": 3.319281924558471, + "grad_norm": 0.36602282524108887, + "learning_rate": 2.4031656003468016e-05, + "loss": 0.0789, + "step": 91340 + }, + { + "epoch": 3.319645323061269, + "grad_norm": 0.6087961196899414, + "learning_rate": 2.4026470600120643e-05, + "loss": 0.0816, + "step": 91350 + }, + { + "epoch": 3.3200087215640672, + "grad_norm": 0.5523571372032166, + "learning_rate": 2.402128523871919e-05, + "loss": 0.0945, + "step": 91360 + }, + { + "epoch": 3.320372120066865, + "grad_norm": 0.5356813669204712, + "learning_rate": 2.401609991948708e-05, + "loss": 0.0543, + "step": 91370 + }, + { + "epoch": 3.3207355185696636, + "grad_norm": 0.7490220665931702, + "learning_rate": 2.4010914642647725e-05, + "loss": 0.0802, + "step": 91380 + }, + { + "epoch": 3.3210989170724616, + "grad_norm": 0.3810897469520569, + "learning_rate": 2.4005729408424538e-05, + "loss": 0.0986, + "step": 91390 + }, + { + "epoch": 3.3214623155752596, + "grad_norm": 2.465951919555664, + "learning_rate": 2.4000544217040926e-05, + "loss": 0.0977, + "step": 91400 + }, + { + "epoch": 3.321825714078058, + "grad_norm": 0.69112628698349, + "learning_rate": 2.399535906872031e-05, + "loss": 0.0679, + "step": 91410 + }, + { + "epoch": 3.322189112580856, + "grad_norm": 0.594582200050354, + "learning_rate": 2.3990173963686103e-05, + "loss": 0.0622, + "step": 91420 + }, + { + "epoch": 3.3225525110836545, + "grad_norm": 0.5401979088783264, + "learning_rate": 2.3984988902161706e-05, + "loss": 0.0557, + "step": 91430 + }, + { + "epoch": 3.3229159095864524, + "grad_norm": 0.34108448028564453, + "learning_rate": 2.397980388437052e-05, + "loss": 0.0941, + "step": 91440 + }, + { + "epoch": 3.3232793080892504, + "grad_norm": 0.8815538287162781, + "learning_rate": 2.3974618910535958e-05, + "loss": 0.0491, + "step": 91450 + }, + { + "epoch": 3.323642706592049, + "grad_norm": 1.5742990970611572, + "learning_rate": 2.3969433980881417e-05, + "loss": 0.151, + "step": 91460 + }, + { + "epoch": 3.324006105094847, + "grad_norm": 0.6522462368011475, + "learning_rate": 2.3964249095630302e-05, + "loss": 0.0642, + "step": 91470 + }, + { + "epoch": 3.3243695035976453, + "grad_norm": 2.5433623790740967, + "learning_rate": 2.3959064255006012e-05, + "loss": 0.0702, + "step": 91480 + }, + { + "epoch": 3.3247329021004433, + "grad_norm": 0.345841646194458, + "learning_rate": 2.395387945923193e-05, + "loss": 0.1386, + "step": 91490 + }, + { + "epoch": 3.3250963006032417, + "grad_norm": 0.6606214642524719, + "learning_rate": 2.394869470853146e-05, + "loss": 0.3363, + "step": 91500 + }, + { + "epoch": 3.3254596991060397, + "grad_norm": 0.638674795627594, + "learning_rate": 2.3943510003128004e-05, + "loss": 0.0987, + "step": 91510 + }, + { + "epoch": 3.325823097608838, + "grad_norm": 0.4758436381816864, + "learning_rate": 2.393832534324494e-05, + "loss": 0.061, + "step": 91520 + }, + { + "epoch": 3.326186496111636, + "grad_norm": 4.208878040313721, + "learning_rate": 2.393314072910565e-05, + "loss": 0.0707, + "step": 91530 + }, + { + "epoch": 3.326549894614434, + "grad_norm": 0.49438315629959106, + "learning_rate": 2.3927956160933544e-05, + "loss": 0.0792, + "step": 91540 + }, + { + "epoch": 3.3269132931172325, + "grad_norm": 1.3179932832717896, + "learning_rate": 2.3922771638951983e-05, + "loss": 0.0857, + "step": 91550 + }, + { + "epoch": 3.3272766916200305, + "grad_norm": 0.5983903408050537, + "learning_rate": 2.3917587163384365e-05, + "loss": 0.0743, + "step": 91560 + }, + { + "epoch": 3.327640090122829, + "grad_norm": 0.30007684230804443, + "learning_rate": 2.3912402734454063e-05, + "loss": 0.0538, + "step": 91570 + }, + { + "epoch": 3.328003488625627, + "grad_norm": 2.637645959854126, + "learning_rate": 2.3907218352384452e-05, + "loss": 0.066, + "step": 91580 + }, + { + "epoch": 3.328366887128425, + "grad_norm": 0.9720343351364136, + "learning_rate": 2.3902034017398923e-05, + "loss": 0.0899, + "step": 91590 + }, + { + "epoch": 3.3287302856312233, + "grad_norm": 0.5020787715911865, + "learning_rate": 2.3896849729720834e-05, + "loss": 0.0858, + "step": 91600 + }, + { + "epoch": 3.3290936841340213, + "grad_norm": 0.7373344898223877, + "learning_rate": 2.3891665489573573e-05, + "loss": 0.0704, + "step": 91610 + }, + { + "epoch": 3.3294570826368197, + "grad_norm": 0.8118528127670288, + "learning_rate": 2.3886481297180494e-05, + "loss": 0.0735, + "step": 91620 + }, + { + "epoch": 3.3298204811396177, + "grad_norm": 0.37041768431663513, + "learning_rate": 2.388129715276498e-05, + "loss": 0.0747, + "step": 91630 + }, + { + "epoch": 3.3301838796424157, + "grad_norm": 0.9788088798522949, + "learning_rate": 2.3876113056550392e-05, + "loss": 0.0728, + "step": 91640 + }, + { + "epoch": 3.330547278145214, + "grad_norm": 1.582607626914978, + "learning_rate": 2.3870929008760087e-05, + "loss": 0.0798, + "step": 91650 + }, + { + "epoch": 3.330910676648012, + "grad_norm": 4.744373321533203, + "learning_rate": 2.386574500961744e-05, + "loss": 0.0922, + "step": 91660 + }, + { + "epoch": 3.3312740751508105, + "grad_norm": 0.6684284806251526, + "learning_rate": 2.38605610593458e-05, + "loss": 0.0615, + "step": 91670 + }, + { + "epoch": 3.3316374736536085, + "grad_norm": 0.49481749534606934, + "learning_rate": 2.3855377158168535e-05, + "loss": 0.0838, + "step": 91680 + }, + { + "epoch": 3.3320008721564065, + "grad_norm": 0.9517963528633118, + "learning_rate": 2.385019330630899e-05, + "loss": 0.0822, + "step": 91690 + }, + { + "epoch": 3.332364270659205, + "grad_norm": 1.185115098953247, + "learning_rate": 2.384500950399053e-05, + "loss": 0.0902, + "step": 91700 + }, + { + "epoch": 3.332727669162003, + "grad_norm": 0.7998439073562622, + "learning_rate": 2.38398257514365e-05, + "loss": 0.0977, + "step": 91710 + }, + { + "epoch": 3.3330910676648013, + "grad_norm": 0.7033588886260986, + "learning_rate": 2.383464204887025e-05, + "loss": 4.2273, + "step": 91720 + }, + { + "epoch": 3.3334544661675993, + "grad_norm": 0.37441256642341614, + "learning_rate": 2.3829458396515128e-05, + "loss": 0.0806, + "step": 91730 + }, + { + "epoch": 3.3338178646703973, + "grad_norm": 1.9060165882110596, + "learning_rate": 2.3824274794594473e-05, + "loss": 1.8747, + "step": 91740 + }, + { + "epoch": 3.3341812631731957, + "grad_norm": 1.7355316877365112, + "learning_rate": 2.3819091243331643e-05, + "loss": 0.0927, + "step": 91750 + }, + { + "epoch": 3.3345446616759937, + "grad_norm": 0.8641614317893982, + "learning_rate": 2.381390774294996e-05, + "loss": 0.1018, + "step": 91760 + }, + { + "epoch": 3.334908060178792, + "grad_norm": 1.4142506122589111, + "learning_rate": 2.380872429367278e-05, + "loss": 0.0714, + "step": 91770 + }, + { + "epoch": 3.33527145868159, + "grad_norm": 0.9375418424606323, + "learning_rate": 2.3803540895723433e-05, + "loss": 0.0922, + "step": 91780 + }, + { + "epoch": 3.3356348571843886, + "grad_norm": 0.28046151995658875, + "learning_rate": 2.3798357549325245e-05, + "loss": 0.0794, + "step": 91790 + }, + { + "epoch": 3.3359982556871866, + "grad_norm": 0.9596878290176392, + "learning_rate": 2.3793174254701557e-05, + "loss": 0.0864, + "step": 91800 + }, + { + "epoch": 3.3359982556871866, + "eval_loss": 0.3040144741535187, + "eval_runtime": 179.6763, + "eval_samples_per_second": 41.263, + "eval_steps_per_second": 5.159, + "eval_wer": 0.1398333545119538, + "step": 91800 + }, + { + "epoch": 3.336361654189985, + "grad_norm": 1.36545729637146, + "learning_rate": 2.3787991012075697e-05, + "loss": 2.5138, + "step": 91810 + }, + { + "epoch": 3.336725052692783, + "grad_norm": 1.0148299932479858, + "learning_rate": 2.3782807821670993e-05, + "loss": 0.0763, + "step": 91820 + }, + { + "epoch": 3.337088451195581, + "grad_norm": 1.4392248392105103, + "learning_rate": 2.3777624683710768e-05, + "loss": 0.0623, + "step": 91830 + }, + { + "epoch": 3.3374518496983794, + "grad_norm": 0.4060908854007721, + "learning_rate": 2.3772441598418347e-05, + "loss": 0.1011, + "step": 91840 + }, + { + "epoch": 3.3378152482011774, + "grad_norm": 2.786815643310547, + "learning_rate": 2.3767258566017045e-05, + "loss": 0.0724, + "step": 91850 + }, + { + "epoch": 3.338178646703976, + "grad_norm": 0.658647894859314, + "learning_rate": 2.3762075586730194e-05, + "loss": 0.0953, + "step": 91860 + }, + { + "epoch": 3.338542045206774, + "grad_norm": 0.6986158490180969, + "learning_rate": 2.3756892660781096e-05, + "loss": 0.0742, + "step": 91870 + }, + { + "epoch": 3.3389054437095718, + "grad_norm": 0.4815951883792877, + "learning_rate": 2.375170978839307e-05, + "loss": 0.0624, + "step": 91880 + }, + { + "epoch": 3.33926884221237, + "grad_norm": 0.5518103241920471, + "learning_rate": 2.3746526969789432e-05, + "loss": 0.0692, + "step": 91890 + }, + { + "epoch": 3.339632240715168, + "grad_norm": 0.5782762765884399, + "learning_rate": 2.374134420519348e-05, + "loss": 0.0755, + "step": 91900 + }, + { + "epoch": 3.3399956392179666, + "grad_norm": 0.35663267970085144, + "learning_rate": 2.3736161494828535e-05, + "loss": 0.0877, + "step": 91910 + }, + { + "epoch": 3.3403590377207646, + "grad_norm": 0.7183496952056885, + "learning_rate": 2.373097883891789e-05, + "loss": 0.0501, + "step": 91920 + }, + { + "epoch": 3.3407224362235626, + "grad_norm": 0.6491569876670837, + "learning_rate": 2.3725796237684853e-05, + "loss": 0.1178, + "step": 91930 + }, + { + "epoch": 3.341085834726361, + "grad_norm": 0.3948687016963959, + "learning_rate": 2.372061369135272e-05, + "loss": 0.0991, + "step": 91940 + }, + { + "epoch": 3.341449233229159, + "grad_norm": 0.5275573134422302, + "learning_rate": 2.3715431200144793e-05, + "loss": 0.1027, + "step": 91950 + }, + { + "epoch": 3.3418126317319574, + "grad_norm": 0.5675976872444153, + "learning_rate": 2.371024876428437e-05, + "loss": 0.0734, + "step": 91960 + }, + { + "epoch": 3.3421760302347554, + "grad_norm": 2.3037304878234863, + "learning_rate": 2.3705066383994738e-05, + "loss": 0.0692, + "step": 91970 + }, + { + "epoch": 3.3425394287375534, + "grad_norm": 0.8419054746627808, + "learning_rate": 2.369988405949918e-05, + "loss": 0.0693, + "step": 91980 + }, + { + "epoch": 3.342902827240352, + "grad_norm": 0.7216833829879761, + "learning_rate": 2.3694701791020994e-05, + "loss": 0.9264, + "step": 91990 + }, + { + "epoch": 3.34326622574315, + "grad_norm": 0.7542405724525452, + "learning_rate": 2.3689519578783467e-05, + "loss": 0.0725, + "step": 92000 + }, + { + "epoch": 3.3436296242459482, + "grad_norm": 1.4204015731811523, + "learning_rate": 2.368433742300988e-05, + "loss": 0.0826, + "step": 92010 + }, + { + "epoch": 3.343993022748746, + "grad_norm": 3.0590789318084717, + "learning_rate": 2.3679155323923514e-05, + "loss": 0.0686, + "step": 92020 + }, + { + "epoch": 3.344356421251544, + "grad_norm": 0.6067277789115906, + "learning_rate": 2.3673973281747634e-05, + "loss": 0.0887, + "step": 92030 + }, + { + "epoch": 3.3447198197543426, + "grad_norm": 0.8979749083518982, + "learning_rate": 2.3668791296705533e-05, + "loss": 0.0774, + "step": 92040 + }, + { + "epoch": 3.3450832182571406, + "grad_norm": 2.3768551349639893, + "learning_rate": 2.3663609369020484e-05, + "loss": 0.0991, + "step": 92050 + }, + { + "epoch": 3.345446616759939, + "grad_norm": 0.9666934609413147, + "learning_rate": 2.365842749891575e-05, + "loss": 0.0693, + "step": 92060 + }, + { + "epoch": 3.345810015262737, + "grad_norm": 0.48444709181785583, + "learning_rate": 2.3653245686614603e-05, + "loss": 0.0599, + "step": 92070 + }, + { + "epoch": 3.3461734137655355, + "grad_norm": 0.5083462595939636, + "learning_rate": 2.36480639323403e-05, + "loss": 0.0561, + "step": 92080 + }, + { + "epoch": 3.3465368122683334, + "grad_norm": 0.41698160767555237, + "learning_rate": 2.3642882236316115e-05, + "loss": 0.0903, + "step": 92090 + }, + { + "epoch": 3.346900210771132, + "grad_norm": 0.29507341980934143, + "learning_rate": 2.3637700598765313e-05, + "loss": 0.0729, + "step": 92100 + }, + { + "epoch": 3.34726360927393, + "grad_norm": 0.414693146944046, + "learning_rate": 2.3632519019911142e-05, + "loss": 0.1065, + "step": 92110 + }, + { + "epoch": 3.347627007776728, + "grad_norm": 0.46231353282928467, + "learning_rate": 2.3627337499976855e-05, + "loss": 0.5836, + "step": 92120 + }, + { + "epoch": 3.3479904062795263, + "grad_norm": 0.6952545046806335, + "learning_rate": 2.362215603918571e-05, + "loss": 0.0865, + "step": 92130 + }, + { + "epoch": 3.3483538047823243, + "grad_norm": 0.5076987743377686, + "learning_rate": 2.361697463776097e-05, + "loss": 0.0992, + "step": 92140 + }, + { + "epoch": 3.3487172032851227, + "grad_norm": 0.7665526866912842, + "learning_rate": 2.3611793295925865e-05, + "loss": 0.0715, + "step": 92150 + }, + { + "epoch": 3.3490806017879207, + "grad_norm": 0.5041813254356384, + "learning_rate": 2.360661201390365e-05, + "loss": 0.0631, + "step": 92160 + }, + { + "epoch": 3.3494440002907186, + "grad_norm": 0.6250981092453003, + "learning_rate": 2.360143079191756e-05, + "loss": 0.0595, + "step": 92170 + }, + { + "epoch": 3.349807398793517, + "grad_norm": 0.5737594962120056, + "learning_rate": 2.3596249630190846e-05, + "loss": 0.0698, + "step": 92180 + }, + { + "epoch": 3.350170797296315, + "grad_norm": 0.33314749598503113, + "learning_rate": 2.359106852894674e-05, + "loss": 0.058, + "step": 92190 + }, + { + "epoch": 3.3505341957991135, + "grad_norm": 1.2376643419265747, + "learning_rate": 2.3585887488408483e-05, + "loss": 0.0721, + "step": 92200 + }, + { + "epoch": 3.3508975943019115, + "grad_norm": 6.422807216644287, + "learning_rate": 2.35807065087993e-05, + "loss": 0.1657, + "step": 92210 + }, + { + "epoch": 3.3512609928047095, + "grad_norm": 1.1383922100067139, + "learning_rate": 2.357552559034241e-05, + "loss": 0.0738, + "step": 92220 + }, + { + "epoch": 3.351624391307508, + "grad_norm": 0.5397285223007202, + "learning_rate": 2.357034473326107e-05, + "loss": 0.0631, + "step": 92230 + }, + { + "epoch": 3.351987789810306, + "grad_norm": 0.4672096073627472, + "learning_rate": 2.3565163937778485e-05, + "loss": 0.0836, + "step": 92240 + }, + { + "epoch": 3.3523511883131043, + "grad_norm": 0.714462161064148, + "learning_rate": 2.3559983204117886e-05, + "loss": 0.0699, + "step": 92250 + }, + { + "epoch": 3.3527145868159023, + "grad_norm": 0.5036824941635132, + "learning_rate": 2.355480253250248e-05, + "loss": 0.6057, + "step": 92260 + }, + { + "epoch": 3.3530779853187003, + "grad_norm": 0.6620817184448242, + "learning_rate": 2.3549621923155486e-05, + "loss": 0.0665, + "step": 92270 + }, + { + "epoch": 3.3534413838214987, + "grad_norm": 0.5807569622993469, + "learning_rate": 2.354444137630013e-05, + "loss": 0.0691, + "step": 92280 + }, + { + "epoch": 3.3538047823242967, + "grad_norm": 0.5693409442901611, + "learning_rate": 2.3539260892159618e-05, + "loss": 0.0721, + "step": 92290 + }, + { + "epoch": 3.354168180827095, + "grad_norm": 0.7940452098846436, + "learning_rate": 2.3534080470957157e-05, + "loss": 0.0749, + "step": 92300 + }, + { + "epoch": 3.354531579329893, + "grad_norm": 0.3544544577598572, + "learning_rate": 2.352890011291594e-05, + "loss": 0.0782, + "step": 92310 + }, + { + "epoch": 3.354894977832691, + "grad_norm": 0.5041877031326294, + "learning_rate": 2.3523719818259196e-05, + "loss": 0.0592, + "step": 92320 + }, + { + "epoch": 3.3552583763354895, + "grad_norm": 0.4897719919681549, + "learning_rate": 2.3518539587210112e-05, + "loss": 0.0678, + "step": 92330 + }, + { + "epoch": 3.3556217748382875, + "grad_norm": 1.1827727556228638, + "learning_rate": 2.3513359419991884e-05, + "loss": 0.0838, + "step": 92340 + }, + { + "epoch": 3.355985173341086, + "grad_norm": 0.646219789981842, + "learning_rate": 2.3508179316827713e-05, + "loss": 0.068, + "step": 92350 + }, + { + "epoch": 3.356348571843884, + "grad_norm": 0.49982723593711853, + "learning_rate": 2.3502999277940772e-05, + "loss": 0.0703, + "step": 92360 + }, + { + "epoch": 3.3567119703466823, + "grad_norm": 0.8181835412979126, + "learning_rate": 2.3497819303554276e-05, + "loss": 0.0847, + "step": 92370 + }, + { + "epoch": 3.3570753688494803, + "grad_norm": 1.0289931297302246, + "learning_rate": 2.3492639393891408e-05, + "loss": 0.0778, + "step": 92380 + }, + { + "epoch": 3.3574387673522788, + "grad_norm": 0.6994947791099548, + "learning_rate": 2.348745954917534e-05, + "loss": 0.0865, + "step": 92390 + }, + { + "epoch": 3.3578021658550767, + "grad_norm": 0.9630132913589478, + "learning_rate": 2.348227976962926e-05, + "loss": 0.0974, + "step": 92400 + }, + { + "epoch": 3.3578021658550767, + "eval_loss": 0.31169602274894714, + "eval_runtime": 178.7618, + "eval_samples_per_second": 41.474, + "eval_steps_per_second": 5.186, + "eval_wer": 0.14020549312905042, + "step": 92400 + }, + { + "epoch": 3.3581655643578747, + "grad_norm": 0.2899853587150574, + "learning_rate": 2.3477100055476334e-05, + "loss": 0.0749, + "step": 92410 + }, + { + "epoch": 3.358528962860673, + "grad_norm": 0.3742106854915619, + "learning_rate": 2.347192040693976e-05, + "loss": 0.0738, + "step": 92420 + }, + { + "epoch": 3.358892361363471, + "grad_norm": 0.5455346703529358, + "learning_rate": 2.3466740824242695e-05, + "loss": 0.063, + "step": 92430 + }, + { + "epoch": 3.3592557598662696, + "grad_norm": 0.29540014266967773, + "learning_rate": 2.3461561307608315e-05, + "loss": 0.0981, + "step": 92440 + }, + { + "epoch": 3.3596191583690675, + "grad_norm": 1.146060824394226, + "learning_rate": 2.3456381857259785e-05, + "loss": 0.0866, + "step": 92450 + }, + { + "epoch": 3.3599825568718655, + "grad_norm": 0.8182836174964905, + "learning_rate": 2.345120247342026e-05, + "loss": 0.1022, + "step": 92460 + }, + { + "epoch": 3.360345955374664, + "grad_norm": 0.33180689811706543, + "learning_rate": 2.3446023156312915e-05, + "loss": 0.0737, + "step": 92470 + }, + { + "epoch": 3.360709353877462, + "grad_norm": 0.6751521229743958, + "learning_rate": 2.3440843906160907e-05, + "loss": 0.0652, + "step": 92480 + }, + { + "epoch": 3.3610727523802604, + "grad_norm": 0.37743857502937317, + "learning_rate": 2.3435664723187384e-05, + "loss": 0.0711, + "step": 92490 + }, + { + "epoch": 3.3614361508830584, + "grad_norm": 0.6196742057800293, + "learning_rate": 2.3430485607615494e-05, + "loss": 0.0684, + "step": 92500 + }, + { + "epoch": 3.3617995493858563, + "grad_norm": 0.6973705291748047, + "learning_rate": 2.3425306559668404e-05, + "loss": 0.1036, + "step": 92510 + }, + { + "epoch": 3.3621629478886548, + "grad_norm": 0.2943952679634094, + "learning_rate": 2.3420127579569257e-05, + "loss": 0.131, + "step": 92520 + }, + { + "epoch": 3.3625263463914528, + "grad_norm": 0.7704665660858154, + "learning_rate": 2.3414948667541187e-05, + "loss": 0.062, + "step": 92530 + }, + { + "epoch": 3.362889744894251, + "grad_norm": 0.4366964101791382, + "learning_rate": 2.3409769823807337e-05, + "loss": 0.2334, + "step": 92540 + }, + { + "epoch": 3.363253143397049, + "grad_norm": 1.0481702089309692, + "learning_rate": 2.340459104859084e-05, + "loss": 0.0911, + "step": 92550 + }, + { + "epoch": 3.363616541899847, + "grad_norm": 0.21111765503883362, + "learning_rate": 2.339941234211485e-05, + "loss": 0.0729, + "step": 92560 + }, + { + "epoch": 3.3639799404026456, + "grad_norm": 0.42436930537223816, + "learning_rate": 2.3394233704602484e-05, + "loss": 0.6901, + "step": 92570 + }, + { + "epoch": 3.3643433389054436, + "grad_norm": 1.4946538209915161, + "learning_rate": 2.3389055136276874e-05, + "loss": 0.0645, + "step": 92580 + }, + { + "epoch": 3.364706737408242, + "grad_norm": 0.7017802000045776, + "learning_rate": 2.3383876637361148e-05, + "loss": 0.0914, + "step": 92590 + }, + { + "epoch": 3.36507013591104, + "grad_norm": 0.6439974904060364, + "learning_rate": 2.337869820807842e-05, + "loss": 0.0786, + "step": 92600 + }, + { + "epoch": 3.365433534413838, + "grad_norm": 0.9561066627502441, + "learning_rate": 2.337351984865182e-05, + "loss": 0.0977, + "step": 92610 + }, + { + "epoch": 3.3657969329166364, + "grad_norm": 0.6108697652816772, + "learning_rate": 2.336834155930447e-05, + "loss": 0.0608, + "step": 92620 + }, + { + "epoch": 3.3661603314194344, + "grad_norm": 0.5634490251541138, + "learning_rate": 2.3363163340259476e-05, + "loss": 0.2946, + "step": 92630 + }, + { + "epoch": 3.366523729922233, + "grad_norm": 0.5420652031898499, + "learning_rate": 2.335798519173995e-05, + "loss": 0.0797, + "step": 92640 + }, + { + "epoch": 3.366887128425031, + "grad_norm": 0.5687423944473267, + "learning_rate": 2.3352807113968985e-05, + "loss": 0.069, + "step": 92650 + }, + { + "epoch": 3.3672505269278292, + "grad_norm": 0.5029795169830322, + "learning_rate": 2.3347629107169715e-05, + "loss": 0.0894, + "step": 92660 + }, + { + "epoch": 3.367613925430627, + "grad_norm": 0.4098545014858246, + "learning_rate": 2.3342451171565227e-05, + "loss": 0.0689, + "step": 92670 + }, + { + "epoch": 3.3679773239334256, + "grad_norm": 0.4281129539012909, + "learning_rate": 2.333727330737862e-05, + "loss": 0.0883, + "step": 92680 + }, + { + "epoch": 3.3683407224362236, + "grad_norm": 1.2608349323272705, + "learning_rate": 2.333209551483298e-05, + "loss": 0.0812, + "step": 92690 + }, + { + "epoch": 3.3687041209390216, + "grad_norm": 0.6354079842567444, + "learning_rate": 2.332691779415142e-05, + "loss": 0.1057, + "step": 92700 + }, + { + "epoch": 3.36906751944182, + "grad_norm": 0.5533850193023682, + "learning_rate": 2.3321740145557018e-05, + "loss": 0.081, + "step": 92710 + }, + { + "epoch": 3.369430917944618, + "grad_norm": 0.7266316413879395, + "learning_rate": 2.3316562569272865e-05, + "loss": 0.0744, + "step": 92720 + }, + { + "epoch": 3.3697943164474164, + "grad_norm": 0.7804214358329773, + "learning_rate": 2.3311385065522038e-05, + "loss": 0.0609, + "step": 92730 + }, + { + "epoch": 3.3701577149502144, + "grad_norm": 1.9746454954147339, + "learning_rate": 2.330620763452761e-05, + "loss": 0.09, + "step": 92740 + }, + { + "epoch": 3.3705211134530124, + "grad_norm": 0.7240809202194214, + "learning_rate": 2.330103027651268e-05, + "loss": 0.2196, + "step": 92750 + }, + { + "epoch": 3.370884511955811, + "grad_norm": 0.37045255303382874, + "learning_rate": 2.3295852991700314e-05, + "loss": 0.0786, + "step": 92760 + }, + { + "epoch": 3.371247910458609, + "grad_norm": 0.32502445578575134, + "learning_rate": 2.3290675780313577e-05, + "loss": 0.0598, + "step": 92770 + }, + { + "epoch": 3.3716113089614073, + "grad_norm": 0.5612372159957886, + "learning_rate": 2.3285498642575535e-05, + "loss": 0.0646, + "step": 92780 + }, + { + "epoch": 3.3719747074642052, + "grad_norm": 0.3779931664466858, + "learning_rate": 2.328032157870925e-05, + "loss": 0.0859, + "step": 92790 + }, + { + "epoch": 3.3723381059670032, + "grad_norm": 1.5073649883270264, + "learning_rate": 2.3275144588937797e-05, + "loss": 0.0893, + "step": 92800 + }, + { + "epoch": 3.3727015044698017, + "grad_norm": 2.095792293548584, + "learning_rate": 2.3269967673484227e-05, + "loss": 0.0537, + "step": 92810 + }, + { + "epoch": 3.3730649029725996, + "grad_norm": 1.275193452835083, + "learning_rate": 2.32647908325716e-05, + "loss": 0.0813, + "step": 92820 + }, + { + "epoch": 3.373428301475398, + "grad_norm": 0.4080447256565094, + "learning_rate": 2.3259614066422957e-05, + "loss": 0.0716, + "step": 92830 + }, + { + "epoch": 3.373791699978196, + "grad_norm": 0.6313503980636597, + "learning_rate": 2.325443737526134e-05, + "loss": 0.0792, + "step": 92840 + }, + { + "epoch": 3.374155098480994, + "grad_norm": 0.5047944188117981, + "learning_rate": 2.324926075930982e-05, + "loss": 0.0532, + "step": 92850 + }, + { + "epoch": 3.3745184969837925, + "grad_norm": 1.3697469234466553, + "learning_rate": 2.3244084218791422e-05, + "loss": 0.0832, + "step": 92860 + }, + { + "epoch": 3.3748818954865905, + "grad_norm": 0.7875816226005554, + "learning_rate": 2.3238907753929188e-05, + "loss": 0.0736, + "step": 92870 + }, + { + "epoch": 3.375245293989389, + "grad_norm": 0.43678218126296997, + "learning_rate": 2.3233731364946143e-05, + "loss": 0.5897, + "step": 92880 + }, + { + "epoch": 3.375608692492187, + "grad_norm": 0.36059579253196716, + "learning_rate": 2.322855505206534e-05, + "loss": 0.1002, + "step": 92890 + }, + { + "epoch": 3.375972090994985, + "grad_norm": 0.7359516620635986, + "learning_rate": 2.3223378815509795e-05, + "loss": 0.1078, + "step": 92900 + }, + { + "epoch": 3.3763354894977833, + "grad_norm": 0.4600794017314911, + "learning_rate": 2.3218202655502538e-05, + "loss": 0.0678, + "step": 92910 + }, + { + "epoch": 3.3766988880005813, + "grad_norm": 0.5198982357978821, + "learning_rate": 2.321302657226659e-05, + "loss": 0.0595, + "step": 92920 + }, + { + "epoch": 3.3770622865033797, + "grad_norm": 1.273722529411316, + "learning_rate": 2.320785056602495e-05, + "loss": 0.0675, + "step": 92930 + }, + { + "epoch": 3.3774256850061777, + "grad_norm": 0.33449608087539673, + "learning_rate": 2.3202674637000675e-05, + "loss": 0.0971, + "step": 92940 + }, + { + "epoch": 3.377789083508976, + "grad_norm": 0.9987308382987976, + "learning_rate": 2.3197498785416746e-05, + "loss": 0.127, + "step": 92950 + }, + { + "epoch": 3.378152482011774, + "grad_norm": 0.3726591467857361, + "learning_rate": 2.3192323011496186e-05, + "loss": 0.0732, + "step": 92960 + }, + { + "epoch": 3.3785158805145725, + "grad_norm": 1.3593406677246094, + "learning_rate": 2.3187147315461994e-05, + "loss": 0.0619, + "step": 92970 + }, + { + "epoch": 3.3788792790173705, + "grad_norm": 0.44492968916893005, + "learning_rate": 2.3181971697537165e-05, + "loss": 0.0628, + "step": 92980 + }, + { + "epoch": 3.3792426775201685, + "grad_norm": 0.7739204168319702, + "learning_rate": 2.3176796157944713e-05, + "loss": 0.0881, + "step": 92990 + }, + { + "epoch": 3.379606076022967, + "grad_norm": 0.6815133690834045, + "learning_rate": 2.317162069690763e-05, + "loss": 0.0798, + "step": 93000 + }, + { + "epoch": 3.379606076022967, + "eval_loss": 0.3195069134235382, + "eval_runtime": 179.6087, + "eval_samples_per_second": 41.279, + "eval_steps_per_second": 5.161, + "eval_wer": 0.13676548005881606, + "step": 93000 + }, + { + "epoch": 3.379969474525765, + "grad_norm": 0.5755239725112915, + "learning_rate": 2.316644531464891e-05, + "loss": 0.072, + "step": 93010 + }, + { + "epoch": 3.3803328730285633, + "grad_norm": 0.6259827017784119, + "learning_rate": 2.3161270011391535e-05, + "loss": 0.0585, + "step": 93020 + }, + { + "epoch": 3.3806962715313613, + "grad_norm": 0.35530751943588257, + "learning_rate": 2.315609478735848e-05, + "loss": 0.0735, + "step": 93030 + }, + { + "epoch": 3.3810596700341593, + "grad_norm": 0.5004699230194092, + "learning_rate": 2.3150919642772752e-05, + "loss": 0.0884, + "step": 93040 + }, + { + "epoch": 3.3814230685369577, + "grad_norm": 0.8541852235794067, + "learning_rate": 2.3145744577857316e-05, + "loss": 0.0844, + "step": 93050 + }, + { + "epoch": 3.3817864670397557, + "grad_norm": 1.4642248153686523, + "learning_rate": 2.314056959283515e-05, + "loss": 0.0759, + "step": 93060 + }, + { + "epoch": 3.382149865542554, + "grad_norm": 1.696931004524231, + "learning_rate": 2.3135394687929225e-05, + "loss": 0.0605, + "step": 93070 + }, + { + "epoch": 3.382513264045352, + "grad_norm": 2.729449987411499, + "learning_rate": 2.31302198633625e-05, + "loss": 0.068, + "step": 93080 + }, + { + "epoch": 3.38287666254815, + "grad_norm": 0.6647607684135437, + "learning_rate": 2.3125045119357953e-05, + "loss": 0.0832, + "step": 93090 + }, + { + "epoch": 3.3832400610509485, + "grad_norm": 0.4802834093570709, + "learning_rate": 2.3119870456138545e-05, + "loss": 0.0764, + "step": 93100 + }, + { + "epoch": 3.3836034595537465, + "grad_norm": 1.4223417043685913, + "learning_rate": 2.311469587392723e-05, + "loss": 0.0824, + "step": 93110 + }, + { + "epoch": 3.383966858056545, + "grad_norm": 1.8741366863250732, + "learning_rate": 2.310952137294695e-05, + "loss": 0.0715, + "step": 93120 + }, + { + "epoch": 3.384330256559343, + "grad_norm": 0.6077579855918884, + "learning_rate": 2.3104346953420676e-05, + "loss": 0.0657, + "step": 93130 + }, + { + "epoch": 3.384693655062141, + "grad_norm": 0.47796136140823364, + "learning_rate": 2.3099172615571353e-05, + "loss": 0.0814, + "step": 93140 + }, + { + "epoch": 3.3850570535649394, + "grad_norm": 1.0871938467025757, + "learning_rate": 2.3093998359621916e-05, + "loss": 0.0979, + "step": 93150 + }, + { + "epoch": 3.3854204520677373, + "grad_norm": 0.46470001339912415, + "learning_rate": 2.3088824185795305e-05, + "loss": 0.0796, + "step": 93160 + }, + { + "epoch": 3.3857838505705358, + "grad_norm": 0.7592546343803406, + "learning_rate": 2.3083650094314453e-05, + "loss": 0.0859, + "step": 93170 + }, + { + "epoch": 3.3861472490733338, + "grad_norm": 0.29902932047843933, + "learning_rate": 2.307847608540231e-05, + "loss": 0.0703, + "step": 93180 + }, + { + "epoch": 3.3865106475761317, + "grad_norm": 0.5462153553962708, + "learning_rate": 2.307330215928179e-05, + "loss": 0.069, + "step": 93190 + }, + { + "epoch": 3.38687404607893, + "grad_norm": 1.0044230222702026, + "learning_rate": 2.3068128316175834e-05, + "loss": 0.0714, + "step": 93200 + }, + { + "epoch": 3.387237444581728, + "grad_norm": 0.9057084918022156, + "learning_rate": 2.306295455630735e-05, + "loss": 0.0883, + "step": 93210 + }, + { + "epoch": 3.3876008430845266, + "grad_norm": 0.3063741624355316, + "learning_rate": 2.3057780879899252e-05, + "loss": 0.0672, + "step": 93220 + }, + { + "epoch": 3.3879642415873246, + "grad_norm": 0.4650433361530304, + "learning_rate": 2.3052607287174475e-05, + "loss": 0.0631, + "step": 93230 + }, + { + "epoch": 3.388327640090123, + "grad_norm": 0.5927343368530273, + "learning_rate": 2.3047433778355925e-05, + "loss": 0.1448, + "step": 93240 + }, + { + "epoch": 3.388691038592921, + "grad_norm": 0.8953503370285034, + "learning_rate": 2.3042260353666503e-05, + "loss": 0.0677, + "step": 93250 + }, + { + "epoch": 3.3890544370957194, + "grad_norm": 1.3331018686294556, + "learning_rate": 2.303708701332912e-05, + "loss": 0.0651, + "step": 93260 + }, + { + "epoch": 3.3894178355985174, + "grad_norm": 0.5606054067611694, + "learning_rate": 2.303191375756666e-05, + "loss": 0.1045, + "step": 93270 + }, + { + "epoch": 3.3897812341013154, + "grad_norm": 0.4888154864311218, + "learning_rate": 2.3026740586602043e-05, + "loss": 0.0742, + "step": 93280 + }, + { + "epoch": 3.390144632604114, + "grad_norm": 0.45617663860321045, + "learning_rate": 2.3021567500658156e-05, + "loss": 0.0822, + "step": 93290 + }, + { + "epoch": 3.390508031106912, + "grad_norm": 0.9321984052658081, + "learning_rate": 2.3016394499957886e-05, + "loss": 0.0726, + "step": 93300 + }, + { + "epoch": 3.39087142960971, + "grad_norm": 0.3022707402706146, + "learning_rate": 2.3011221584724108e-05, + "loss": 0.0992, + "step": 93310 + }, + { + "epoch": 3.391234828112508, + "grad_norm": 1.342934250831604, + "learning_rate": 2.3006048755179723e-05, + "loss": 0.0897, + "step": 93320 + }, + { + "epoch": 3.391598226615306, + "grad_norm": 0.9461100101470947, + "learning_rate": 2.3000876011547607e-05, + "loss": 0.0695, + "step": 93330 + }, + { + "epoch": 3.3919616251181046, + "grad_norm": 0.7840179204940796, + "learning_rate": 2.299570335405063e-05, + "loss": 0.0822, + "step": 93340 + }, + { + "epoch": 3.3923250236209026, + "grad_norm": 0.6755959391593933, + "learning_rate": 2.2990530782911664e-05, + "loss": 0.1182, + "step": 93350 + }, + { + "epoch": 3.392688422123701, + "grad_norm": 0.8153521418571472, + "learning_rate": 2.2985358298353566e-05, + "loss": 0.0852, + "step": 93360 + }, + { + "epoch": 3.393051820626499, + "grad_norm": 0.6111595630645752, + "learning_rate": 2.2980185900599222e-05, + "loss": 0.0635, + "step": 93370 + }, + { + "epoch": 3.393415219129297, + "grad_norm": 0.3639895021915436, + "learning_rate": 2.297501358987148e-05, + "loss": 0.1101, + "step": 93380 + }, + { + "epoch": 3.3937786176320954, + "grad_norm": 0.7763181924819946, + "learning_rate": 2.2969841366393195e-05, + "loss": 0.088, + "step": 93390 + }, + { + "epoch": 3.3941420161348934, + "grad_norm": 1.595831274986267, + "learning_rate": 2.2964669230387228e-05, + "loss": 0.1059, + "step": 93400 + }, + { + "epoch": 3.394505414637692, + "grad_norm": 0.5875428915023804, + "learning_rate": 2.2959497182076408e-05, + "loss": 0.0743, + "step": 93410 + }, + { + "epoch": 3.39486881314049, + "grad_norm": 1.4419046640396118, + "learning_rate": 2.2954325221683606e-05, + "loss": 0.085, + "step": 93420 + }, + { + "epoch": 3.395232211643288, + "grad_norm": 0.7197487354278564, + "learning_rate": 2.294915334943165e-05, + "loss": 0.0828, + "step": 93430 + }, + { + "epoch": 3.3955956101460862, + "grad_norm": 0.403689444065094, + "learning_rate": 2.294398156554338e-05, + "loss": 0.0969, + "step": 93440 + }, + { + "epoch": 3.3959590086488842, + "grad_norm": 0.3563007712364197, + "learning_rate": 2.2938809870241632e-05, + "loss": 0.1025, + "step": 93450 + }, + { + "epoch": 3.3963224071516827, + "grad_norm": 0.41774362325668335, + "learning_rate": 2.2933638263749218e-05, + "loss": 0.09, + "step": 93460 + }, + { + "epoch": 3.3966858056544806, + "grad_norm": 1.4661532640457153, + "learning_rate": 2.2928466746288993e-05, + "loss": 0.5754, + "step": 93470 + }, + { + "epoch": 3.3970492041572786, + "grad_norm": 0.6082340478897095, + "learning_rate": 2.2923295318083766e-05, + "loss": 0.0627, + "step": 93480 + }, + { + "epoch": 3.397412602660077, + "grad_norm": 1.2264482975006104, + "learning_rate": 2.2918123979356353e-05, + "loss": 0.0981, + "step": 93490 + }, + { + "epoch": 3.397776001162875, + "grad_norm": 1.3761318922042847, + "learning_rate": 2.2912952730329555e-05, + "loss": 0.0723, + "step": 93500 + }, + { + "epoch": 3.3981393996656735, + "grad_norm": 0.6967355608940125, + "learning_rate": 2.290778157122622e-05, + "loss": 0.0643, + "step": 93510 + }, + { + "epoch": 3.3985027981684715, + "grad_norm": 0.5545636415481567, + "learning_rate": 2.2902610502269122e-05, + "loss": 0.0619, + "step": 93520 + }, + { + "epoch": 3.39886619667127, + "grad_norm": 0.4898998737335205, + "learning_rate": 2.289743952368108e-05, + "loss": 1.7187, + "step": 93530 + }, + { + "epoch": 3.399229595174068, + "grad_norm": 0.593694806098938, + "learning_rate": 2.2892268635684885e-05, + "loss": 0.0942, + "step": 93540 + }, + { + "epoch": 3.3995929936768663, + "grad_norm": 0.8465686440467834, + "learning_rate": 2.2887097838503327e-05, + "loss": 0.0883, + "step": 93550 + }, + { + "epoch": 3.3999563921796643, + "grad_norm": 0.9347935318946838, + "learning_rate": 2.2881927132359214e-05, + "loss": 0.0872, + "step": 93560 + }, + { + "epoch": 3.4003197906824623, + "grad_norm": 0.5704132914543152, + "learning_rate": 2.287675651747533e-05, + "loss": 1.4816, + "step": 93570 + }, + { + "epoch": 3.4006831891852607, + "grad_norm": 0.25210240483283997, + "learning_rate": 2.287158599407445e-05, + "loss": 0.0696, + "step": 93580 + }, + { + "epoch": 3.4010465876880587, + "grad_norm": 0.7095610499382019, + "learning_rate": 2.2866415562379356e-05, + "loss": 0.0818, + "step": 93590 + }, + { + "epoch": 3.401409986190857, + "grad_norm": 1.3713339567184448, + "learning_rate": 2.2861245222612812e-05, + "loss": 0.088, + "step": 93600 + }, + { + "epoch": 3.401409986190857, + "eval_loss": 0.29974231123924255, + "eval_runtime": 178.5228, + "eval_samples_per_second": 41.53, + "eval_steps_per_second": 5.193, + "eval_wer": 0.13912538348430664, + "step": 93600 + }, + { + "epoch": 3.401773384693655, + "grad_norm": 1.1637344360351562, + "learning_rate": 2.285659199560597e-05, + "loss": 3.4301, + "step": 93610 + }, + { + "epoch": 3.402136783196453, + "grad_norm": 0.5053747296333313, + "learning_rate": 2.285142183111744e-05, + "loss": 0.0693, + "step": 93620 + }, + { + "epoch": 3.4025001816992515, + "grad_norm": 0.740875780582428, + "learning_rate": 2.2846251759203496e-05, + "loss": 0.0629, + "step": 93630 + }, + { + "epoch": 3.4028635802020495, + "grad_norm": 5.731196880340576, + "learning_rate": 2.2841081780086904e-05, + "loss": 0.0998, + "step": 93640 + }, + { + "epoch": 3.403226978704848, + "grad_norm": 0.7567720413208008, + "learning_rate": 2.2835911893990414e-05, + "loss": 0.0844, + "step": 93650 + }, + { + "epoch": 3.403590377207646, + "grad_norm": 0.4909075200557709, + "learning_rate": 2.283074210113677e-05, + "loss": 0.0724, + "step": 93660 + }, + { + "epoch": 3.403953775710444, + "grad_norm": 0.5206305980682373, + "learning_rate": 2.282557240174874e-05, + "loss": 0.0583, + "step": 93670 + }, + { + "epoch": 3.4043171742132423, + "grad_norm": 0.6271891593933105, + "learning_rate": 2.2820402796049063e-05, + "loss": 0.0776, + "step": 93680 + }, + { + "epoch": 3.4046805727160403, + "grad_norm": 0.6527193784713745, + "learning_rate": 2.281523328426047e-05, + "loss": 0.085, + "step": 93690 + }, + { + "epoch": 3.4050439712188387, + "grad_norm": 0.8757163882255554, + "learning_rate": 2.2810063866605706e-05, + "loss": 0.0707, + "step": 93700 + }, + { + "epoch": 3.4054073697216367, + "grad_norm": 0.8993749022483826, + "learning_rate": 2.280489454330748e-05, + "loss": 0.0713, + "step": 93710 + }, + { + "epoch": 3.4057707682244347, + "grad_norm": 0.611003577709198, + "learning_rate": 2.2799725314588555e-05, + "loss": 0.0635, + "step": 93720 + }, + { + "epoch": 3.406134166727233, + "grad_norm": 0.4539841413497925, + "learning_rate": 2.2794556180671636e-05, + "loss": 0.0718, + "step": 93730 + }, + { + "epoch": 3.406497565230031, + "grad_norm": 0.2736055850982666, + "learning_rate": 2.2789387141779445e-05, + "loss": 0.1861, + "step": 93740 + }, + { + "epoch": 3.4068609637328295, + "grad_norm": 0.6625291109085083, + "learning_rate": 2.2784218198134695e-05, + "loss": 0.107, + "step": 93750 + }, + { + "epoch": 3.4072243622356275, + "grad_norm": 0.49704796075820923, + "learning_rate": 2.277904934996009e-05, + "loss": 0.0621, + "step": 93760 + }, + { + "epoch": 3.4075877607384255, + "grad_norm": 0.471886545419693, + "learning_rate": 2.2773880597478356e-05, + "loss": 0.1414, + "step": 93770 + }, + { + "epoch": 3.407951159241224, + "grad_norm": 0.4752335548400879, + "learning_rate": 2.2768711940912185e-05, + "loss": 0.0657, + "step": 93780 + }, + { + "epoch": 3.408314557744022, + "grad_norm": 0.6662150025367737, + "learning_rate": 2.276354338048428e-05, + "loss": 0.0933, + "step": 93790 + }, + { + "epoch": 3.4086779562468204, + "grad_norm": 0.9255740642547607, + "learning_rate": 2.275837491641732e-05, + "loss": 0.1099, + "step": 93800 + }, + { + "epoch": 3.4090413547496183, + "grad_norm": 0.515019953250885, + "learning_rate": 2.2753206548934024e-05, + "loss": 0.0861, + "step": 93810 + }, + { + "epoch": 3.4094047532524168, + "grad_norm": 0.33219701051712036, + "learning_rate": 2.2748038278257063e-05, + "loss": 0.0842, + "step": 93820 + }, + { + "epoch": 3.4097681517552147, + "grad_norm": 0.40631362795829773, + "learning_rate": 2.2742870104609114e-05, + "loss": 0.0673, + "step": 93830 + }, + { + "epoch": 3.410131550258013, + "grad_norm": 0.4353393316268921, + "learning_rate": 2.2737702028212868e-05, + "loss": 0.0768, + "step": 93840 + }, + { + "epoch": 3.410494948760811, + "grad_norm": 1.10258150100708, + "learning_rate": 2.273253404929098e-05, + "loss": 0.0884, + "step": 93850 + }, + { + "epoch": 3.410858347263609, + "grad_norm": 3.386838912963867, + "learning_rate": 2.2727366168066142e-05, + "loss": 0.1114, + "step": 93860 + }, + { + "epoch": 3.4112217457664076, + "grad_norm": 1.1398248672485352, + "learning_rate": 2.2722198384761008e-05, + "loss": 0.0541, + "step": 93870 + }, + { + "epoch": 3.4115851442692056, + "grad_norm": 0.950499951839447, + "learning_rate": 2.2717030699598245e-05, + "loss": 0.0961, + "step": 93880 + }, + { + "epoch": 3.411948542772004, + "grad_norm": 0.48193359375, + "learning_rate": 2.2711863112800506e-05, + "loss": 0.4101, + "step": 93890 + }, + { + "epoch": 3.412311941274802, + "grad_norm": 0.827944815158844, + "learning_rate": 2.270669562459043e-05, + "loss": 0.0864, + "step": 93900 + }, + { + "epoch": 3.4126753397776, + "grad_norm": 1.7731389999389648, + "learning_rate": 2.270152823519069e-05, + "loss": 0.1077, + "step": 93910 + }, + { + "epoch": 3.4130387382803984, + "grad_norm": 0.750033438205719, + "learning_rate": 2.2696360944823923e-05, + "loss": 0.1013, + "step": 93920 + }, + { + "epoch": 3.4134021367831964, + "grad_norm": 1.206369161605835, + "learning_rate": 2.269119375371277e-05, + "loss": 0.3487, + "step": 93930 + }, + { + "epoch": 3.413765535285995, + "grad_norm": 0.38662195205688477, + "learning_rate": 2.2686026662079858e-05, + "loss": 0.1012, + "step": 93940 + }, + { + "epoch": 3.414128933788793, + "grad_norm": 0.5544074177742004, + "learning_rate": 2.2680859670147815e-05, + "loss": 0.0925, + "step": 93950 + }, + { + "epoch": 3.4144923322915908, + "grad_norm": 1.074537754058838, + "learning_rate": 2.267569277813929e-05, + "loss": 0.0875, + "step": 93960 + }, + { + "epoch": 3.414855730794389, + "grad_norm": 3.1776864528656006, + "learning_rate": 2.267052598627689e-05, + "loss": 3.4631, + "step": 93970 + }, + { + "epoch": 3.415219129297187, + "grad_norm": 0.5976073741912842, + "learning_rate": 2.266535929478324e-05, + "loss": 0.0741, + "step": 93980 + }, + { + "epoch": 3.4155825277999856, + "grad_norm": 0.507327675819397, + "learning_rate": 2.2660192703880935e-05, + "loss": 0.0834, + "step": 93990 + }, + { + "epoch": 3.4159459263027836, + "grad_norm": 0.46804341673851013, + "learning_rate": 2.2655026213792617e-05, + "loss": 0.0912, + "step": 94000 + }, + { + "epoch": 3.4163093248055816, + "grad_norm": 0.6629424095153809, + "learning_rate": 2.2649859824740876e-05, + "loss": 0.0829, + "step": 94010 + }, + { + "epoch": 3.41667272330838, + "grad_norm": 1.2816437482833862, + "learning_rate": 2.2644693536948315e-05, + "loss": 0.0895, + "step": 94020 + }, + { + "epoch": 3.417036121811178, + "grad_norm": 0.32198429107666016, + "learning_rate": 2.2639527350637525e-05, + "loss": 0.0852, + "step": 94030 + }, + { + "epoch": 3.4173995203139764, + "grad_norm": 0.49124446511268616, + "learning_rate": 2.263436126603109e-05, + "loss": 0.5835, + "step": 94040 + }, + { + "epoch": 3.4177629188167744, + "grad_norm": 0.45547664165496826, + "learning_rate": 2.262919528335163e-05, + "loss": 0.0716, + "step": 94050 + }, + { + "epoch": 3.4181263173195724, + "grad_norm": 0.4709664285182953, + "learning_rate": 2.2624029402821705e-05, + "loss": 0.0837, + "step": 94060 + }, + { + "epoch": 3.418489715822371, + "grad_norm": 0.8313547372817993, + "learning_rate": 2.2618863624663898e-05, + "loss": 0.0755, + "step": 94070 + }, + { + "epoch": 3.418853114325169, + "grad_norm": 0.8527863025665283, + "learning_rate": 2.2613697949100782e-05, + "loss": 0.0617, + "step": 94080 + }, + { + "epoch": 3.4192165128279672, + "grad_norm": 0.39365309476852417, + "learning_rate": 2.2608532376354932e-05, + "loss": 0.08, + "step": 94090 + }, + { + "epoch": 3.419579911330765, + "grad_norm": 0.9611566662788391, + "learning_rate": 2.2603366906648916e-05, + "loss": 0.0964, + "step": 94100 + }, + { + "epoch": 3.4199433098335636, + "grad_norm": 0.5890967845916748, + "learning_rate": 2.2598201540205294e-05, + "loss": 0.0627, + "step": 94110 + }, + { + "epoch": 3.4203067083363616, + "grad_norm": 0.24214434623718262, + "learning_rate": 2.259303627724662e-05, + "loss": 0.0614, + "step": 94120 + }, + { + "epoch": 3.42067010683916, + "grad_norm": 0.6111648678779602, + "learning_rate": 2.2587871117995445e-05, + "loss": 0.0712, + "step": 94130 + }, + { + "epoch": 3.421033505341958, + "grad_norm": 3.44565486907959, + "learning_rate": 2.2582706062674325e-05, + "loss": 0.6448, + "step": 94140 + }, + { + "epoch": 3.421396903844756, + "grad_norm": 2.6556754112243652, + "learning_rate": 2.25775411115058e-05, + "loss": 0.0893, + "step": 94150 + }, + { + "epoch": 3.4217603023475545, + "grad_norm": 1.2639325857162476, + "learning_rate": 2.257237626471241e-05, + "loss": 0.0819, + "step": 94160 + }, + { + "epoch": 3.4221237008503524, + "grad_norm": 0.7145587801933289, + "learning_rate": 2.2567211522516685e-05, + "loss": 0.0752, + "step": 94170 + }, + { + "epoch": 3.422487099353151, + "grad_norm": 1.2810157537460327, + "learning_rate": 2.2562046885141167e-05, + "loss": 0.0675, + "step": 94180 + }, + { + "epoch": 3.422850497855949, + "grad_norm": 0.4140676259994507, + "learning_rate": 2.2556882352808367e-05, + "loss": 0.0819, + "step": 94190 + }, + { + "epoch": 3.423213896358747, + "grad_norm": 0.9687098860740662, + "learning_rate": 2.2551717925740817e-05, + "loss": 0.0944, + "step": 94200 + }, + { + "epoch": 3.423213896358747, + "eval_loss": 0.31988459825515747, + "eval_runtime": 180.0602, + "eval_samples_per_second": 41.175, + "eval_steps_per_second": 5.148, + "eval_wer": 0.1369107048850001, + "step": 94200 + }, + { + "epoch": 3.4235772948615453, + "grad_norm": 0.8577378392219543, + "learning_rate": 2.2546553604161032e-05, + "loss": 0.0767, + "step": 94210 + }, + { + "epoch": 3.4239406933643433, + "grad_norm": 0.5929591655731201, + "learning_rate": 2.254138938829152e-05, + "loss": 0.0623, + "step": 94220 + }, + { + "epoch": 3.4243040918671417, + "grad_norm": 0.5392001867294312, + "learning_rate": 2.2536225278354787e-05, + "loss": 0.0848, + "step": 94230 + }, + { + "epoch": 3.4246674903699397, + "grad_norm": 0.5557697415351868, + "learning_rate": 2.253106127457335e-05, + "loss": 0.0784, + "step": 94240 + }, + { + "epoch": 3.4250308888727377, + "grad_norm": 0.5030058026313782, + "learning_rate": 2.2525897377169696e-05, + "loss": 0.0786, + "step": 94250 + }, + { + "epoch": 3.425394287375536, + "grad_norm": 1.1300536394119263, + "learning_rate": 2.2520733586366323e-05, + "loss": 0.0868, + "step": 94260 + }, + { + "epoch": 3.425757685878334, + "grad_norm": 0.6011260747909546, + "learning_rate": 2.2515569902385714e-05, + "loss": 0.0811, + "step": 94270 + }, + { + "epoch": 3.4261210843811325, + "grad_norm": 0.9162232279777527, + "learning_rate": 2.2510406325450357e-05, + "loss": 0.0749, + "step": 94280 + }, + { + "epoch": 3.4264844828839305, + "grad_norm": 1.2079869508743286, + "learning_rate": 2.2505242855782737e-05, + "loss": 0.0844, + "step": 94290 + }, + { + "epoch": 3.4268478813867285, + "grad_norm": 0.703209638595581, + "learning_rate": 2.2500079493605327e-05, + "loss": 0.0921, + "step": 94300 + }, + { + "epoch": 3.427211279889527, + "grad_norm": 1.7158406972885132, + "learning_rate": 2.249491623914059e-05, + "loss": 0.0942, + "step": 94310 + }, + { + "epoch": 3.427574678392325, + "grad_norm": 0.5088964700698853, + "learning_rate": 2.248975309261101e-05, + "loss": 0.0584, + "step": 94320 + }, + { + "epoch": 3.4279380768951233, + "grad_norm": 0.3864693343639374, + "learning_rate": 2.2484590054239024e-05, + "loss": 0.0579, + "step": 94330 + }, + { + "epoch": 3.4283014753979213, + "grad_norm": 0.4104454517364502, + "learning_rate": 2.2479427124247117e-05, + "loss": 0.1607, + "step": 94340 + }, + { + "epoch": 3.4286648739007197, + "grad_norm": 1.0903159379959106, + "learning_rate": 2.247426430285772e-05, + "loss": 0.0783, + "step": 94350 + }, + { + "epoch": 3.4290282724035177, + "grad_norm": 0.8514654636383057, + "learning_rate": 2.2469101590293284e-05, + "loss": 0.065, + "step": 94360 + }, + { + "epoch": 3.4293916709063157, + "grad_norm": 0.6110685467720032, + "learning_rate": 2.246393898677626e-05, + "loss": 0.0635, + "step": 94370 + }, + { + "epoch": 3.429755069409114, + "grad_norm": 0.38304954767227173, + "learning_rate": 2.245877649252908e-05, + "loss": 0.069, + "step": 94380 + }, + { + "epoch": 3.430118467911912, + "grad_norm": 1.2440117597579956, + "learning_rate": 2.245361410777418e-05, + "loss": 0.1056, + "step": 94390 + }, + { + "epoch": 3.4304818664147105, + "grad_norm": 0.7492786645889282, + "learning_rate": 2.2448451832733987e-05, + "loss": 0.0809, + "step": 94400 + }, + { + "epoch": 3.4308452649175085, + "grad_norm": 0.4058247208595276, + "learning_rate": 2.244328966763093e-05, + "loss": 0.0934, + "step": 94410 + }, + { + "epoch": 3.431208663420307, + "grad_norm": 1.7672019004821777, + "learning_rate": 2.243812761268742e-05, + "loss": 0.0696, + "step": 94420 + }, + { + "epoch": 3.431572061923105, + "grad_norm": 0.4631694555282593, + "learning_rate": 2.2432965668125878e-05, + "loss": 2.0515, + "step": 94430 + }, + { + "epoch": 3.431935460425903, + "grad_norm": 1.0094584226608276, + "learning_rate": 2.2427803834168716e-05, + "loss": 0.0686, + "step": 94440 + }, + { + "epoch": 3.4322988589287013, + "grad_norm": 0.7469279766082764, + "learning_rate": 2.2422642111038328e-05, + "loss": 0.0753, + "step": 94450 + }, + { + "epoch": 3.4326622574314993, + "grad_norm": 0.5456721186637878, + "learning_rate": 2.2417480498957126e-05, + "loss": 0.1019, + "step": 94460 + }, + { + "epoch": 3.4330256559342978, + "grad_norm": 2.7943344116210938, + "learning_rate": 2.2412318998147492e-05, + "loss": 0.0705, + "step": 94470 + }, + { + "epoch": 3.4333890544370957, + "grad_norm": 5.648090839385986, + "learning_rate": 2.2407157608831836e-05, + "loss": 0.0853, + "step": 94480 + }, + { + "epoch": 3.4337524529398937, + "grad_norm": 0.7561296224594116, + "learning_rate": 2.2401996331232528e-05, + "loss": 0.0796, + "step": 94490 + }, + { + "epoch": 3.434115851442692, + "grad_norm": 1.078397274017334, + "learning_rate": 2.2396835165571954e-05, + "loss": 0.0706, + "step": 94500 + }, + { + "epoch": 3.43447924994549, + "grad_norm": 0.31419476866722107, + "learning_rate": 2.2391674112072498e-05, + "loss": 0.0871, + "step": 94510 + }, + { + "epoch": 3.4348426484482886, + "grad_norm": 0.4185982644557953, + "learning_rate": 2.2386513170956513e-05, + "loss": 0.0643, + "step": 94520 + }, + { + "epoch": 3.4352060469510866, + "grad_norm": 0.8538812398910522, + "learning_rate": 2.2381352342446385e-05, + "loss": 0.0627, + "step": 94530 + }, + { + "epoch": 3.4355694454538845, + "grad_norm": 0.42258143424987793, + "learning_rate": 2.2376191626764462e-05, + "loss": 0.0774, + "step": 94540 + }, + { + "epoch": 3.435932843956683, + "grad_norm": 0.7472050786018372, + "learning_rate": 2.237103102413311e-05, + "loss": 0.0717, + "step": 94550 + }, + { + "epoch": 3.436296242459481, + "grad_norm": 1.003833293914795, + "learning_rate": 2.2365870534774678e-05, + "loss": 0.1762, + "step": 94560 + }, + { + "epoch": 3.4366596409622794, + "grad_norm": 3.8594932556152344, + "learning_rate": 2.2360710158911507e-05, + "loss": 0.0672, + "step": 94570 + }, + { + "epoch": 3.4370230394650774, + "grad_norm": 2.909346103668213, + "learning_rate": 2.235554989676595e-05, + "loss": 0.0676, + "step": 94580 + }, + { + "epoch": 3.4373864379678754, + "grad_norm": 0.6960200667381287, + "learning_rate": 2.235038974856033e-05, + "loss": 0.0823, + "step": 94590 + }, + { + "epoch": 3.437749836470674, + "grad_norm": 0.5080627202987671, + "learning_rate": 2.2345229714516998e-05, + "loss": 0.1036, + "step": 94600 + }, + { + "epoch": 3.4381132349734718, + "grad_norm": 0.4168925881385803, + "learning_rate": 2.2340069794858267e-05, + "loss": 0.0925, + "step": 94610 + }, + { + "epoch": 3.43847663347627, + "grad_norm": 0.32866325974464417, + "learning_rate": 2.233490998980647e-05, + "loss": 0.0729, + "step": 94620 + }, + { + "epoch": 3.438840031979068, + "grad_norm": 0.5904275178909302, + "learning_rate": 2.2329750299583913e-05, + "loss": 0.2434, + "step": 94630 + }, + { + "epoch": 3.4392034304818666, + "grad_norm": 0.8439253568649292, + "learning_rate": 2.232459072441292e-05, + "loss": 0.0738, + "step": 94640 + }, + { + "epoch": 3.4395668289846646, + "grad_norm": 2.668860912322998, + "learning_rate": 2.2319431264515792e-05, + "loss": 0.0934, + "step": 94650 + }, + { + "epoch": 3.4399302274874626, + "grad_norm": 0.3184053301811218, + "learning_rate": 2.231427192011483e-05, + "loss": 0.0986, + "step": 94660 + }, + { + "epoch": 3.440293625990261, + "grad_norm": 1.6480865478515625, + "learning_rate": 2.2309112691432337e-05, + "loss": 3.1534, + "step": 94670 + }, + { + "epoch": 3.440657024493059, + "grad_norm": 1.2210397720336914, + "learning_rate": 2.2303953578690602e-05, + "loss": 0.0899, + "step": 94680 + }, + { + "epoch": 3.4410204229958574, + "grad_norm": 0.8659685254096985, + "learning_rate": 2.2298794582111922e-05, + "loss": 0.0755, + "step": 94690 + }, + { + "epoch": 3.4413838214986554, + "grad_norm": 1.4027395248413086, + "learning_rate": 2.229363570191857e-05, + "loss": 0.0773, + "step": 94700 + }, + { + "epoch": 3.441747220001454, + "grad_norm": 0.47845137119293213, + "learning_rate": 2.228847693833282e-05, + "loss": 0.0872, + "step": 94710 + }, + { + "epoch": 3.442110618504252, + "grad_norm": 1.3525196313858032, + "learning_rate": 2.228331829157695e-05, + "loss": 0.0653, + "step": 94720 + }, + { + "epoch": 3.44247401700705, + "grad_norm": 1.1353908777236938, + "learning_rate": 2.2278159761873235e-05, + "loss": 0.0758, + "step": 94730 + }, + { + "epoch": 3.4428374155098482, + "grad_norm": 1.6163307428359985, + "learning_rate": 2.2273001349443935e-05, + "loss": 0.8463, + "step": 94740 + }, + { + "epoch": 3.443200814012646, + "grad_norm": 0.5957239866256714, + "learning_rate": 2.22678430545113e-05, + "loss": 0.0654, + "step": 94750 + }, + { + "epoch": 3.4435642125154446, + "grad_norm": 1.3498693704605103, + "learning_rate": 2.2262684877297586e-05, + "loss": 0.0716, + "step": 94760 + }, + { + "epoch": 3.4439276110182426, + "grad_norm": 4.1495208740234375, + "learning_rate": 2.2257526818025036e-05, + "loss": 0.0578, + "step": 94770 + }, + { + "epoch": 3.4442910095210406, + "grad_norm": 0.5842284560203552, + "learning_rate": 2.2252368876915903e-05, + "loss": 0.0851, + "step": 94780 + }, + { + "epoch": 3.444654408023839, + "grad_norm": 0.6782490015029907, + "learning_rate": 2.2247211054192425e-05, + "loss": 0.0831, + "step": 94790 + }, + { + "epoch": 3.445017806526637, + "grad_norm": 1.1757545471191406, + "learning_rate": 2.224205335007682e-05, + "loss": 0.0995, + "step": 94800 + }, + { + "epoch": 3.445017806526637, + "eval_loss": 0.31772205233573914, + "eval_runtime": 178.9226, + "eval_samples_per_second": 41.437, + "eval_steps_per_second": 5.181, + "eval_wer": 0.1385807903861165, + "step": 94800 + }, + { + "epoch": 3.4453812050294355, + "grad_norm": 2.117018461227417, + "learning_rate": 2.223689576479132e-05, + "loss": 0.0569, + "step": 94810 + }, + { + "epoch": 3.4457446035322334, + "grad_norm": 0.7748499512672424, + "learning_rate": 2.2231738298558158e-05, + "loss": 0.0602, + "step": 94820 + }, + { + "epoch": 3.4461080020350314, + "grad_norm": 0.43736356496810913, + "learning_rate": 2.2226580951599544e-05, + "loss": 0.0952, + "step": 94830 + }, + { + "epoch": 3.44647140053783, + "grad_norm": 0.4574269950389862, + "learning_rate": 2.222142372413769e-05, + "loss": 0.07, + "step": 94840 + }, + { + "epoch": 3.446834799040628, + "grad_norm": 0.883139431476593, + "learning_rate": 2.2216266616394793e-05, + "loss": 0.089, + "step": 94850 + }, + { + "epoch": 3.4471981975434263, + "grad_norm": 0.8173096179962158, + "learning_rate": 2.2211109628593067e-05, + "loss": 0.1002, + "step": 94860 + }, + { + "epoch": 3.4475615960462243, + "grad_norm": 0.5318263173103333, + "learning_rate": 2.2205952760954704e-05, + "loss": 0.0586, + "step": 94870 + }, + { + "epoch": 3.4479249945490222, + "grad_norm": 0.18389153480529785, + "learning_rate": 2.2200796013701898e-05, + "loss": 0.0589, + "step": 94880 + }, + { + "epoch": 3.4482883930518207, + "grad_norm": 0.47492220997810364, + "learning_rate": 2.2195639387056833e-05, + "loss": 0.6377, + "step": 94890 + }, + { + "epoch": 3.4486517915546187, + "grad_norm": 0.3797650635242462, + "learning_rate": 2.219048288124168e-05, + "loss": 0.086, + "step": 94900 + }, + { + "epoch": 3.449015190057417, + "grad_norm": 1.8727638721466064, + "learning_rate": 2.218532649647863e-05, + "loss": 0.0876, + "step": 94910 + }, + { + "epoch": 3.449378588560215, + "grad_norm": 1.5461180210113525, + "learning_rate": 2.218017023298985e-05, + "loss": 0.0739, + "step": 94920 + }, + { + "epoch": 3.4497419870630135, + "grad_norm": 0.7635065317153931, + "learning_rate": 2.2175014090997497e-05, + "loss": 0.0724, + "step": 94930 + }, + { + "epoch": 3.4501053855658115, + "grad_norm": 0.9960238337516785, + "learning_rate": 2.216985807072374e-05, + "loss": 0.1, + "step": 94940 + }, + { + "epoch": 3.4504687840686095, + "grad_norm": 0.8764038681983948, + "learning_rate": 2.2164702172390717e-05, + "loss": 0.0761, + "step": 94950 + }, + { + "epoch": 3.450832182571408, + "grad_norm": 0.46660447120666504, + "learning_rate": 2.21595463962206e-05, + "loss": 0.0609, + "step": 94960 + }, + { + "epoch": 3.451195581074206, + "grad_norm": 0.7150638103485107, + "learning_rate": 2.215439074243552e-05, + "loss": 0.0697, + "step": 94970 + }, + { + "epoch": 3.4515589795770043, + "grad_norm": 1.3236603736877441, + "learning_rate": 2.2149235211257624e-05, + "loss": 0.0812, + "step": 94980 + }, + { + "epoch": 3.4519223780798023, + "grad_norm": 0.8594760894775391, + "learning_rate": 2.214407980290903e-05, + "loss": 0.0899, + "step": 94990 + }, + { + "epoch": 3.4522857765826007, + "grad_norm": 0.6767681837081909, + "learning_rate": 2.2138924517611874e-05, + "loss": 0.0807, + "step": 95000 + }, + { + "epoch": 3.4526491750853987, + "grad_norm": 0.4284761846065521, + "learning_rate": 2.213376935558829e-05, + "loss": 0.0719, + "step": 95010 + }, + { + "epoch": 3.4530125735881967, + "grad_norm": 0.35260239243507385, + "learning_rate": 2.2128614317060385e-05, + "loss": 0.0685, + "step": 95020 + }, + { + "epoch": 3.453375972090995, + "grad_norm": 0.7802332043647766, + "learning_rate": 2.2123459402250275e-05, + "loss": 0.0772, + "step": 95030 + }, + { + "epoch": 3.453739370593793, + "grad_norm": 0.5842748284339905, + "learning_rate": 2.211830461138005e-05, + "loss": 0.0744, + "step": 95040 + }, + { + "epoch": 3.4541027690965915, + "grad_norm": 0.6803625822067261, + "learning_rate": 2.2113149944671842e-05, + "loss": 2.0802, + "step": 95050 + }, + { + "epoch": 3.4544661675993895, + "grad_norm": 3.472691774368286, + "learning_rate": 2.2107995402347726e-05, + "loss": 0.1059, + "step": 95060 + }, + { + "epoch": 3.4548295661021875, + "grad_norm": 0.8646115660667419, + "learning_rate": 2.21028409846298e-05, + "loss": 0.0745, + "step": 95070 + }, + { + "epoch": 3.455192964604986, + "grad_norm": 0.9967368245124817, + "learning_rate": 2.2097686691740148e-05, + "loss": 0.0703, + "step": 95080 + }, + { + "epoch": 3.455556363107784, + "grad_norm": 3.0647103786468506, + "learning_rate": 2.2092532523900842e-05, + "loss": 0.0838, + "step": 95090 + }, + { + "epoch": 3.4559197616105823, + "grad_norm": 1.3772906064987183, + "learning_rate": 2.208737848133397e-05, + "loss": 0.1081, + "step": 95100 + }, + { + "epoch": 3.4562831601133803, + "grad_norm": 0.9632165431976318, + "learning_rate": 2.20822245642616e-05, + "loss": 0.0736, + "step": 95110 + }, + { + "epoch": 3.4566465586161783, + "grad_norm": 0.6966424584388733, + "learning_rate": 2.207707077290579e-05, + "loss": 0.0656, + "step": 95120 + }, + { + "epoch": 3.4570099571189767, + "grad_norm": 0.45801427960395813, + "learning_rate": 2.2071917107488604e-05, + "loss": 0.0616, + "step": 95130 + }, + { + "epoch": 3.4573733556217747, + "grad_norm": 0.4734851121902466, + "learning_rate": 2.206676356823208e-05, + "loss": 0.0897, + "step": 95140 + }, + { + "epoch": 3.457736754124573, + "grad_norm": 0.842993438243866, + "learning_rate": 2.2061610155358287e-05, + "loss": 0.0648, + "step": 95150 + }, + { + "epoch": 3.458100152627371, + "grad_norm": 4.05435848236084, + "learning_rate": 2.2056456869089256e-05, + "loss": 0.0896, + "step": 95160 + }, + { + "epoch": 3.458463551130169, + "grad_norm": 0.8322821855545044, + "learning_rate": 2.2051303709647027e-05, + "loss": 0.07, + "step": 95170 + }, + { + "epoch": 3.4588269496329676, + "grad_norm": 0.5541922450065613, + "learning_rate": 2.2046150677253618e-05, + "loss": 0.0852, + "step": 95180 + }, + { + "epoch": 3.4591903481357655, + "grad_norm": 0.5772917866706848, + "learning_rate": 2.2040997772131077e-05, + "loss": 0.0837, + "step": 95190 + }, + { + "epoch": 3.459553746638564, + "grad_norm": 0.5851882100105286, + "learning_rate": 2.2035844994501418e-05, + "loss": 0.089, + "step": 95200 + }, + { + "epoch": 3.459917145141362, + "grad_norm": 1.0712413787841797, + "learning_rate": 2.2030692344586647e-05, + "loss": 0.1188, + "step": 95210 + }, + { + "epoch": 3.4602805436441604, + "grad_norm": 0.4828985333442688, + "learning_rate": 2.202553982260878e-05, + "loss": 0.0737, + "step": 95220 + }, + { + "epoch": 3.4606439421469584, + "grad_norm": 0.3994056284427643, + "learning_rate": 2.2020387428789807e-05, + "loss": 0.0601, + "step": 95230 + }, + { + "epoch": 3.4610073406497563, + "grad_norm": 0.8096089363098145, + "learning_rate": 2.201523516335175e-05, + "loss": 0.081, + "step": 95240 + }, + { + "epoch": 3.4613707391525548, + "grad_norm": 1.1060764789581299, + "learning_rate": 2.2010083026516588e-05, + "loss": 0.0827, + "step": 95250 + }, + { + "epoch": 3.4617341376553528, + "grad_norm": 0.3895101547241211, + "learning_rate": 2.2004931018506313e-05, + "loss": 0.0759, + "step": 95260 + }, + { + "epoch": 3.462097536158151, + "grad_norm": 0.4792887568473816, + "learning_rate": 2.1999779139542903e-05, + "loss": 0.0839, + "step": 95270 + }, + { + "epoch": 3.462460934660949, + "grad_norm": 0.8363034129142761, + "learning_rate": 2.1994627389848325e-05, + "loss": 0.0668, + "step": 95280 + }, + { + "epoch": 3.4628243331637476, + "grad_norm": 0.4887206256389618, + "learning_rate": 2.198947576964457e-05, + "loss": 0.0653, + "step": 95290 + }, + { + "epoch": 3.4631877316665456, + "grad_norm": 4.43966817855835, + "learning_rate": 2.198432427915359e-05, + "loss": 0.113, + "step": 95300 + }, + { + "epoch": 3.4635511301693436, + "grad_norm": 0.7550996541976929, + "learning_rate": 2.197917291859735e-05, + "loss": 0.1061, + "step": 95310 + }, + { + "epoch": 3.463914528672142, + "grad_norm": 0.64354407787323, + "learning_rate": 2.1974021688197797e-05, + "loss": 0.0585, + "step": 95320 + }, + { + "epoch": 3.46427792717494, + "grad_norm": 1.6076833009719849, + "learning_rate": 2.1968870588176877e-05, + "loss": 0.0663, + "step": 95330 + }, + { + "epoch": 3.4646413256777384, + "grad_norm": 0.5022009015083313, + "learning_rate": 2.1963719618756548e-05, + "loss": 0.1249, + "step": 95340 + }, + { + "epoch": 3.4650047241805364, + "grad_norm": 0.5584505796432495, + "learning_rate": 2.1958568780158736e-05, + "loss": 0.082, + "step": 95350 + }, + { + "epoch": 3.4653681226833344, + "grad_norm": 1.0235668420791626, + "learning_rate": 2.1953418072605375e-05, + "loss": 0.079, + "step": 95360 + }, + { + "epoch": 3.465731521186133, + "grad_norm": 1.6643403768539429, + "learning_rate": 2.194826749631839e-05, + "loss": 0.0644, + "step": 95370 + }, + { + "epoch": 3.466094919688931, + "grad_norm": 0.6343129873275757, + "learning_rate": 2.1943117051519688e-05, + "loss": 0.0642, + "step": 95380 + }, + { + "epoch": 3.4664583181917292, + "grad_norm": 0.3787631392478943, + "learning_rate": 2.193796673843121e-05, + "loss": 0.0764, + "step": 95390 + }, + { + "epoch": 3.466821716694527, + "grad_norm": 0.6121103763580322, + "learning_rate": 2.1932816557274846e-05, + "loss": 0.1018, + "step": 95400 + }, + { + "epoch": 3.466821716694527, + "eval_loss": 0.32295721769332886, + "eval_runtime": 179.2759, + "eval_samples_per_second": 41.355, + "eval_steps_per_second": 5.171, + "eval_wer": 0.13823588142392942, + "step": 95400 + }, + { + "epoch": 3.467185115197325, + "grad_norm": 0.6954236626625061, + "learning_rate": 2.1927666508272505e-05, + "loss": 0.0636, + "step": 95410 + }, + { + "epoch": 3.4675485137001236, + "grad_norm": 1.4941720962524414, + "learning_rate": 2.1922516591646072e-05, + "loss": 0.0725, + "step": 95420 + }, + { + "epoch": 3.4679119122029216, + "grad_norm": 0.487307071685791, + "learning_rate": 2.1917366807617463e-05, + "loss": 0.0668, + "step": 95430 + }, + { + "epoch": 3.46827531070572, + "grad_norm": 0.6700599193572998, + "learning_rate": 2.1912217156408547e-05, + "loss": 0.0679, + "step": 95440 + }, + { + "epoch": 3.468638709208518, + "grad_norm": 0.6767346858978271, + "learning_rate": 2.1907067638241208e-05, + "loss": 0.0813, + "step": 95450 + }, + { + "epoch": 3.469002107711316, + "grad_norm": 2.5367612838745117, + "learning_rate": 2.190191825333732e-05, + "loss": 0.1077, + "step": 95460 + }, + { + "epoch": 3.4693655062141144, + "grad_norm": 0.5402595400810242, + "learning_rate": 2.1896769001918742e-05, + "loss": 0.073, + "step": 95470 + }, + { + "epoch": 3.4697289047169124, + "grad_norm": 0.6329225301742554, + "learning_rate": 2.1891619884207354e-05, + "loss": 0.085, + "step": 95480 + }, + { + "epoch": 3.470092303219711, + "grad_norm": 0.5831683874130249, + "learning_rate": 2.1886470900425008e-05, + "loss": 0.0892, + "step": 95490 + }, + { + "epoch": 3.470455701722509, + "grad_norm": 2.0767431259155273, + "learning_rate": 2.188132205079355e-05, + "loss": 0.0894, + "step": 95500 + }, + { + "epoch": 3.4708191002253073, + "grad_norm": 0.6538608074188232, + "learning_rate": 2.1876173335534835e-05, + "loss": 0.0625, + "step": 95510 + }, + { + "epoch": 3.4711824987281052, + "grad_norm": 2.3269765377044678, + "learning_rate": 2.1871024754870677e-05, + "loss": 0.066, + "step": 95520 + }, + { + "epoch": 3.4715458972309032, + "grad_norm": 0.8417708873748779, + "learning_rate": 2.1865876309022947e-05, + "loss": 0.0716, + "step": 95530 + }, + { + "epoch": 3.4719092957337017, + "grad_norm": 0.220072403550148, + "learning_rate": 2.186072799821345e-05, + "loss": 0.0811, + "step": 95540 + }, + { + "epoch": 3.4722726942364996, + "grad_norm": 0.7579740285873413, + "learning_rate": 2.185557982266402e-05, + "loss": 0.0944, + "step": 95550 + }, + { + "epoch": 3.472636092739298, + "grad_norm": 0.6758162379264832, + "learning_rate": 2.1850431782596466e-05, + "loss": 0.0807, + "step": 95560 + }, + { + "epoch": 3.472999491242096, + "grad_norm": 1.1941717863082886, + "learning_rate": 2.1845283878232585e-05, + "loss": 0.0713, + "step": 95570 + }, + { + "epoch": 3.4733628897448945, + "grad_norm": 1.0327314138412476, + "learning_rate": 2.1840136109794213e-05, + "loss": 0.094, + "step": 95580 + }, + { + "epoch": 3.4737262882476925, + "grad_norm": 0.4671393036842346, + "learning_rate": 2.183498847750313e-05, + "loss": 0.074, + "step": 95590 + }, + { + "epoch": 3.4740896867504905, + "grad_norm": 0.4938909113407135, + "learning_rate": 2.1829840981581134e-05, + "loss": 0.0846, + "step": 95600 + }, + { + "epoch": 3.474453085253289, + "grad_norm": 1.201877474784851, + "learning_rate": 2.182469362225e-05, + "loss": 0.0852, + "step": 95610 + }, + { + "epoch": 3.474816483756087, + "grad_norm": 0.8695741295814514, + "learning_rate": 2.181954639973153e-05, + "loss": 0.0635, + "step": 95620 + }, + { + "epoch": 3.4751798822588853, + "grad_norm": 0.5013503432273865, + "learning_rate": 2.1814399314247492e-05, + "loss": 0.0752, + "step": 95630 + }, + { + "epoch": 3.4755432807616833, + "grad_norm": 0.45891714096069336, + "learning_rate": 2.180925236601965e-05, + "loss": 0.0795, + "step": 95640 + }, + { + "epoch": 3.4759066792644813, + "grad_norm": 0.9783682823181152, + "learning_rate": 2.1804105555269772e-05, + "loss": 0.0782, + "step": 95650 + }, + { + "epoch": 3.4762700777672797, + "grad_norm": 0.3729395270347595, + "learning_rate": 2.17989588822196e-05, + "loss": 0.0787, + "step": 95660 + }, + { + "epoch": 3.4766334762700777, + "grad_norm": 0.9214646220207214, + "learning_rate": 2.1793812347090918e-05, + "loss": 0.0526, + "step": 95670 + }, + { + "epoch": 3.476996874772876, + "grad_norm": 0.4130672216415405, + "learning_rate": 2.178866595010545e-05, + "loss": 0.0806, + "step": 95680 + }, + { + "epoch": 3.477360273275674, + "grad_norm": 0.38258814811706543, + "learning_rate": 2.178351969148494e-05, + "loss": 0.08, + "step": 95690 + }, + { + "epoch": 3.477723671778472, + "grad_norm": 0.7837095260620117, + "learning_rate": 2.1778373571451124e-05, + "loss": 0.187, + "step": 95700 + }, + { + "epoch": 3.4780870702812705, + "grad_norm": 1.1877186298370361, + "learning_rate": 2.177322759022572e-05, + "loss": 0.0982, + "step": 95710 + }, + { + "epoch": 3.4784504687840685, + "grad_norm": 0.8703027367591858, + "learning_rate": 2.1768081748030463e-05, + "loss": 0.066, + "step": 95720 + }, + { + "epoch": 3.478813867286867, + "grad_norm": 0.47968795895576477, + "learning_rate": 2.176293604508707e-05, + "loss": 0.0758, + "step": 95730 + }, + { + "epoch": 3.479177265789665, + "grad_norm": 0.5117019414901733, + "learning_rate": 2.175779048161724e-05, + "loss": 0.0838, + "step": 95740 + }, + { + "epoch": 3.479540664292463, + "grad_norm": 0.738413393497467, + "learning_rate": 2.1752645057842686e-05, + "loss": 0.1082, + "step": 95750 + }, + { + "epoch": 3.4799040627952613, + "grad_norm": 1.208146333694458, + "learning_rate": 2.1747499773985092e-05, + "loss": 0.0773, + "step": 95760 + }, + { + "epoch": 3.4802674612980593, + "grad_norm": 0.4587624967098236, + "learning_rate": 2.1742354630266172e-05, + "loss": 0.8829, + "step": 95770 + }, + { + "epoch": 3.4806308598008577, + "grad_norm": 0.5824334025382996, + "learning_rate": 2.1737209626907594e-05, + "loss": 0.0732, + "step": 95780 + }, + { + "epoch": 3.4809942583036557, + "grad_norm": 0.649512767791748, + "learning_rate": 2.173206476413105e-05, + "loss": 0.0883, + "step": 95790 + }, + { + "epoch": 3.481357656806454, + "grad_norm": 1.6176427602767944, + "learning_rate": 2.1726920042158194e-05, + "loss": 0.1347, + "step": 95800 + }, + { + "epoch": 3.481721055309252, + "grad_norm": 0.5751357674598694, + "learning_rate": 2.1721775461210718e-05, + "loss": 0.0736, + "step": 95810 + }, + { + "epoch": 3.48208445381205, + "grad_norm": 0.701714813709259, + "learning_rate": 2.171663102151028e-05, + "loss": 0.0742, + "step": 95820 + }, + { + "epoch": 3.4824478523148485, + "grad_norm": 0.4845195710659027, + "learning_rate": 2.1711486723278522e-05, + "loss": 0.0668, + "step": 95830 + }, + { + "epoch": 3.4828112508176465, + "grad_norm": 0.7643341422080994, + "learning_rate": 2.1706342566737105e-05, + "loss": 0.0767, + "step": 95840 + }, + { + "epoch": 3.483174649320445, + "grad_norm": 0.4030478894710541, + "learning_rate": 2.170119855210765e-05, + "loss": 0.0836, + "step": 95850 + }, + { + "epoch": 3.483538047823243, + "grad_norm": 0.9113463163375854, + "learning_rate": 2.1696054679611828e-05, + "loss": 0.1111, + "step": 95860 + }, + { + "epoch": 3.4839014463260414, + "grad_norm": 0.46975961327552795, + "learning_rate": 2.1690910949471255e-05, + "loss": 0.0592, + "step": 95870 + }, + { + "epoch": 3.4842648448288394, + "grad_norm": 0.4419771134853363, + "learning_rate": 2.1685767361907554e-05, + "loss": 0.0996, + "step": 95880 + }, + { + "epoch": 3.4846282433316373, + "grad_norm": 0.6320810914039612, + "learning_rate": 2.168062391714235e-05, + "loss": 0.1194, + "step": 95890 + }, + { + "epoch": 3.4849916418344358, + "grad_norm": 0.3942996859550476, + "learning_rate": 2.1675480615397234e-05, + "loss": 0.0825, + "step": 95900 + }, + { + "epoch": 3.4853550403372338, + "grad_norm": 0.3566824197769165, + "learning_rate": 2.167033745689384e-05, + "loss": 0.0666, + "step": 95910 + }, + { + "epoch": 3.485718438840032, + "grad_norm": 0.48564252257347107, + "learning_rate": 2.1665194441853765e-05, + "loss": 0.0687, + "step": 95920 + }, + { + "epoch": 3.48608183734283, + "grad_norm": 0.3420655429363251, + "learning_rate": 2.166005157049859e-05, + "loss": 0.0627, + "step": 95930 + }, + { + "epoch": 3.486445235845628, + "grad_norm": 0.3921029567718506, + "learning_rate": 2.165490884304991e-05, + "loss": 0.0621, + "step": 95940 + }, + { + "epoch": 3.4868086343484266, + "grad_norm": 0.609154224395752, + "learning_rate": 2.1649766259729298e-05, + "loss": 0.0788, + "step": 95950 + }, + { + "epoch": 3.4871720328512246, + "grad_norm": 0.4822045564651489, + "learning_rate": 2.1644623820758347e-05, + "loss": 0.086, + "step": 95960 + }, + { + "epoch": 3.487535431354023, + "grad_norm": 0.9078019261360168, + "learning_rate": 2.1639481526358616e-05, + "loss": 0.0632, + "step": 95970 + }, + { + "epoch": 3.487898829856821, + "grad_norm": 0.43268144130706787, + "learning_rate": 2.163433937675167e-05, + "loss": 0.704, + "step": 95980 + }, + { + "epoch": 3.488262228359619, + "grad_norm": 0.4717707633972168, + "learning_rate": 2.1629197372159055e-05, + "loss": 0.0839, + "step": 95990 + }, + { + "epoch": 3.4886256268624174, + "grad_norm": 1.3348411321640015, + "learning_rate": 2.162405551280234e-05, + "loss": 0.0634, + "step": 96000 + }, + { + "epoch": 3.4886256268624174, + "eval_loss": 0.3231545090675354, + "eval_runtime": 179.599, + "eval_samples_per_second": 41.281, + "eval_steps_per_second": 5.161, + "eval_wer": 0.13457803111441902, + "step": 96000 + }, + { + "epoch": 3.4889890253652154, + "grad_norm": 0.6468439698219299, + "learning_rate": 2.1618913798903064e-05, + "loss": 0.0726, + "step": 96010 + }, + { + "epoch": 3.489352423868014, + "grad_norm": 0.6067370772361755, + "learning_rate": 2.1613772230682762e-05, + "loss": 0.0662, + "step": 96020 + }, + { + "epoch": 3.489715822370812, + "grad_norm": 4.149559020996094, + "learning_rate": 2.1608630808362966e-05, + "loss": 0.0685, + "step": 96030 + }, + { + "epoch": 3.4900792208736098, + "grad_norm": 0.5562649369239807, + "learning_rate": 2.1603489532165194e-05, + "loss": 0.0805, + "step": 96040 + }, + { + "epoch": 3.490442619376408, + "grad_norm": 1.4775289297103882, + "learning_rate": 2.159834840231098e-05, + "loss": 0.0917, + "step": 96050 + }, + { + "epoch": 3.490806017879206, + "grad_norm": 0.4842393100261688, + "learning_rate": 2.1593207419021833e-05, + "loss": 0.0956, + "step": 96060 + }, + { + "epoch": 3.4911694163820046, + "grad_norm": 0.5330924391746521, + "learning_rate": 2.1588066582519257e-05, + "loss": 1.4342, + "step": 96070 + }, + { + "epoch": 3.4915328148848026, + "grad_norm": 0.2691187858581543, + "learning_rate": 2.158292589302475e-05, + "loss": 0.0785, + "step": 96080 + }, + { + "epoch": 3.491896213387601, + "grad_norm": 0.42408475279808044, + "learning_rate": 2.15777853507598e-05, + "loss": 0.0903, + "step": 96090 + }, + { + "epoch": 3.492259611890399, + "grad_norm": 2.1866910457611084, + "learning_rate": 2.157264495594591e-05, + "loss": 0.0759, + "step": 96100 + }, + { + "epoch": 3.492623010393197, + "grad_norm": 0.2777433693408966, + "learning_rate": 2.1567504708804557e-05, + "loss": 0.097, + "step": 96110 + }, + { + "epoch": 3.4929864088959954, + "grad_norm": 0.4376242756843567, + "learning_rate": 2.156236460955721e-05, + "loss": 0.0691, + "step": 96120 + }, + { + "epoch": 3.4933498073987934, + "grad_norm": 0.6688746213912964, + "learning_rate": 2.1557224658425347e-05, + "loss": 0.1149, + "step": 96130 + }, + { + "epoch": 3.493713205901592, + "grad_norm": 1.0745078325271606, + "learning_rate": 2.155208485563041e-05, + "loss": 0.0955, + "step": 96140 + }, + { + "epoch": 3.49407660440439, + "grad_norm": 2.5494561195373535, + "learning_rate": 2.154694520139388e-05, + "loss": 0.1092, + "step": 96150 + }, + { + "epoch": 3.4944400029071883, + "grad_norm": 0.681524932384491, + "learning_rate": 2.1541805695937192e-05, + "loss": 0.0966, + "step": 96160 + }, + { + "epoch": 3.4948034014099862, + "grad_norm": 0.8694483637809753, + "learning_rate": 2.1536666339481797e-05, + "loss": 0.2408, + "step": 96170 + }, + { + "epoch": 3.4951667999127842, + "grad_norm": 0.36922356486320496, + "learning_rate": 2.1531527132249113e-05, + "loss": 0.0785, + "step": 96180 + }, + { + "epoch": 3.4955301984155827, + "grad_norm": 0.3221088945865631, + "learning_rate": 2.1526388074460594e-05, + "loss": 0.0954, + "step": 96190 + }, + { + "epoch": 3.4958935969183806, + "grad_norm": 0.868403434753418, + "learning_rate": 2.1521249166337658e-05, + "loss": 0.086, + "step": 96200 + }, + { + "epoch": 3.496256995421179, + "grad_norm": 0.6299740076065063, + "learning_rate": 2.1516110408101714e-05, + "loss": 0.0829, + "step": 96210 + }, + { + "epoch": 3.496620393923977, + "grad_norm": 0.744305431842804, + "learning_rate": 2.1510971799974177e-05, + "loss": 0.0669, + "step": 96220 + }, + { + "epoch": 3.496983792426775, + "grad_norm": 0.9115591049194336, + "learning_rate": 2.1505833342176442e-05, + "loss": 0.0635, + "step": 96230 + }, + { + "epoch": 3.4973471909295735, + "grad_norm": 0.4782477617263794, + "learning_rate": 2.1500695034929926e-05, + "loss": 0.0879, + "step": 96240 + }, + { + "epoch": 3.4977105894323715, + "grad_norm": 0.6979982852935791, + "learning_rate": 2.1495556878456014e-05, + "loss": 0.0725, + "step": 96250 + }, + { + "epoch": 3.49807398793517, + "grad_norm": 0.4281218945980072, + "learning_rate": 2.1490932666723033e-05, + "loss": 3.0583, + "step": 96260 + }, + { + "epoch": 3.498437386437968, + "grad_norm": 2.243551254272461, + "learning_rate": 2.148579479732697e-05, + "loss": 0.0783, + "step": 96270 + }, + { + "epoch": 3.498800784940766, + "grad_norm": 0.8817722797393799, + "learning_rate": 2.1480657079345505e-05, + "loss": 0.0782, + "step": 96280 + }, + { + "epoch": 3.4991641834435643, + "grad_norm": 0.6434484124183655, + "learning_rate": 2.1475519513000002e-05, + "loss": 0.0924, + "step": 96290 + }, + { + "epoch": 3.4995275819463623, + "grad_norm": 0.7872079014778137, + "learning_rate": 2.1470382098511813e-05, + "loss": 0.0852, + "step": 96300 + }, + { + "epoch": 3.4998909804491607, + "grad_norm": 1.4359321594238281, + "learning_rate": 2.1465244836102312e-05, + "loss": 0.0759, + "step": 96310 + }, + { + "epoch": 3.5002543789519587, + "grad_norm": 0.307167112827301, + "learning_rate": 2.1460107725992838e-05, + "loss": 0.0755, + "step": 96320 + }, + { + "epoch": 3.5006177774547567, + "grad_norm": 0.6518979072570801, + "learning_rate": 2.1454970768404724e-05, + "loss": 0.063, + "step": 96330 + }, + { + "epoch": 3.500981175957555, + "grad_norm": 1.06710684299469, + "learning_rate": 2.1449833963559293e-05, + "loss": 0.0851, + "step": 96340 + }, + { + "epoch": 3.501344574460353, + "grad_norm": 0.6126148104667664, + "learning_rate": 2.14446973116779e-05, + "loss": 0.0888, + "step": 96350 + }, + { + "epoch": 3.5017079729631515, + "grad_norm": 0.7071236968040466, + "learning_rate": 2.1439560812981848e-05, + "loss": 0.1182, + "step": 96360 + }, + { + "epoch": 3.5020713714659495, + "grad_norm": 2.222172737121582, + "learning_rate": 2.1434424467692455e-05, + "loss": 0.1948, + "step": 96370 + }, + { + "epoch": 3.5024347699687475, + "grad_norm": 1.0301228761672974, + "learning_rate": 2.142928827603102e-05, + "loss": 0.0754, + "step": 96380 + }, + { + "epoch": 3.502798168471546, + "grad_norm": 1.2635794878005981, + "learning_rate": 2.142415223821884e-05, + "loss": 0.0892, + "step": 96390 + }, + { + "epoch": 3.5031615669743443, + "grad_norm": 0.6022728681564331, + "learning_rate": 2.141901635447723e-05, + "loss": 0.0853, + "step": 96400 + }, + { + "epoch": 3.5035249654771423, + "grad_norm": 1.4563450813293457, + "learning_rate": 2.1413880625027462e-05, + "loss": 0.1804, + "step": 96410 + }, + { + "epoch": 3.5038883639799403, + "grad_norm": 0.5189762711524963, + "learning_rate": 2.140874505009082e-05, + "loss": 0.068, + "step": 96420 + }, + { + "epoch": 3.5042517624827387, + "grad_norm": 1.153496503829956, + "learning_rate": 2.1403609629888578e-05, + "loss": 0.1057, + "step": 96430 + }, + { + "epoch": 3.5046151609855367, + "grad_norm": 0.8835933208465576, + "learning_rate": 2.139847436464199e-05, + "loss": 0.0967, + "step": 96440 + }, + { + "epoch": 3.504978559488335, + "grad_norm": 0.9221778512001038, + "learning_rate": 2.1393339254572334e-05, + "loss": 0.0876, + "step": 96450 + }, + { + "epoch": 3.505341957991133, + "grad_norm": 0.32772889733314514, + "learning_rate": 2.138820429990086e-05, + "loss": 0.0628, + "step": 96460 + }, + { + "epoch": 3.505705356493931, + "grad_norm": 0.482295960187912, + "learning_rate": 2.138306950084881e-05, + "loss": 0.0768, + "step": 96470 + }, + { + "epoch": 3.5060687549967295, + "grad_norm": 0.7036164999008179, + "learning_rate": 2.1377934857637427e-05, + "loss": 0.0804, + "step": 96480 + }, + { + "epoch": 3.5064321534995275, + "grad_norm": 0.6191168427467346, + "learning_rate": 2.137280037048793e-05, + "loss": 0.0966, + "step": 96490 + }, + { + "epoch": 3.506795552002326, + "grad_norm": 1.3983656167984009, + "learning_rate": 2.1367666039621575e-05, + "loss": 0.0961, + "step": 96500 + }, + { + "epoch": 3.507158950505124, + "grad_norm": 0.36156126856803894, + "learning_rate": 2.1362531865259564e-05, + "loss": 0.1172, + "step": 96510 + }, + { + "epoch": 3.507522349007922, + "grad_norm": 0.7854031324386597, + "learning_rate": 2.135739784762311e-05, + "loss": 0.0675, + "step": 96520 + }, + { + "epoch": 3.5078857475107204, + "grad_norm": 1.4320755004882812, + "learning_rate": 2.135226398693342e-05, + "loss": 0.0677, + "step": 96530 + }, + { + "epoch": 3.5082491460135183, + "grad_norm": 1.0837007761001587, + "learning_rate": 2.13471302834117e-05, + "loss": 0.064, + "step": 96540 + }, + { + "epoch": 3.5086125445163168, + "grad_norm": 2.1308250427246094, + "learning_rate": 2.134199673727914e-05, + "loss": 0.094, + "step": 96550 + }, + { + "epoch": 3.5089759430191148, + "grad_norm": 0.3453007638454437, + "learning_rate": 2.1336863348756927e-05, + "loss": 0.1269, + "step": 96560 + }, + { + "epoch": 3.5093393415219127, + "grad_norm": 0.2881056070327759, + "learning_rate": 2.133173011806624e-05, + "loss": 0.1384, + "step": 96570 + }, + { + "epoch": 3.509702740024711, + "grad_norm": 1.0903687477111816, + "learning_rate": 2.1326597045428236e-05, + "loss": 0.0777, + "step": 96580 + }, + { + "epoch": 3.510066138527509, + "grad_norm": 0.44503089785575867, + "learning_rate": 2.1321464131064105e-05, + "loss": 0.0847, + "step": 96590 + }, + { + "epoch": 3.5104295370303076, + "grad_norm": 0.562317430973053, + "learning_rate": 2.1316331375195002e-05, + "loss": 0.0654, + "step": 96600 + }, + { + "epoch": 3.5104295370303076, + "eval_loss": 0.3297887444496155, + "eval_runtime": 180.5411, + "eval_samples_per_second": 41.065, + "eval_steps_per_second": 5.135, + "eval_wer": 0.13554922213952475, + "step": 96600 + }, + { + "epoch": 3.5107929355331056, + "grad_norm": 0.9230484962463379, + "learning_rate": 2.131119877804207e-05, + "loss": 0.0732, + "step": 96610 + }, + { + "epoch": 3.5111563340359035, + "grad_norm": 4.310789585113525, + "learning_rate": 2.1306066339826457e-05, + "loss": 0.1152, + "step": 96620 + }, + { + "epoch": 3.511519732538702, + "grad_norm": 0.3380495011806488, + "learning_rate": 2.1300934060769296e-05, + "loss": 0.2172, + "step": 96630 + }, + { + "epoch": 3.5118831310415, + "grad_norm": 0.524796187877655, + "learning_rate": 2.129580194109173e-05, + "loss": 0.0933, + "step": 96640 + }, + { + "epoch": 3.5122465295442984, + "grad_norm": 0.6239128708839417, + "learning_rate": 2.1290669981014882e-05, + "loss": 0.0738, + "step": 96650 + }, + { + "epoch": 3.5126099280470964, + "grad_norm": 0.976793646812439, + "learning_rate": 2.128553818075987e-05, + "loss": 0.0777, + "step": 96660 + }, + { + "epoch": 3.5129733265498944, + "grad_norm": 1.0099037885665894, + "learning_rate": 2.1280406540547794e-05, + "loss": 0.0661, + "step": 96670 + }, + { + "epoch": 3.513336725052693, + "grad_norm": 0.6158185601234436, + "learning_rate": 2.127527506059976e-05, + "loss": 0.0782, + "step": 96680 + }, + { + "epoch": 3.513700123555491, + "grad_norm": 0.28984469175338745, + "learning_rate": 2.1270143741136884e-05, + "loss": 0.0849, + "step": 96690 + }, + { + "epoch": 3.514063522058289, + "grad_norm": 1.0383470058441162, + "learning_rate": 2.126501258238024e-05, + "loss": 0.0993, + "step": 96700 + }, + { + "epoch": 3.514426920561087, + "grad_norm": 0.32447656989097595, + "learning_rate": 2.1259881584550912e-05, + "loss": 0.0801, + "step": 96710 + }, + { + "epoch": 3.5147903190638856, + "grad_norm": 0.6273922920227051, + "learning_rate": 2.1254750747869972e-05, + "loss": 0.0771, + "step": 96720 + }, + { + "epoch": 3.5151537175666836, + "grad_norm": 0.7431948781013489, + "learning_rate": 2.1249620072558508e-05, + "loss": 0.09, + "step": 96730 + }, + { + "epoch": 3.515517116069482, + "grad_norm": 0.4066786468029022, + "learning_rate": 2.124448955883757e-05, + "loss": 0.0772, + "step": 96740 + }, + { + "epoch": 3.51588051457228, + "grad_norm": 0.31640565395355225, + "learning_rate": 2.1239359206928214e-05, + "loss": 0.0722, + "step": 96750 + }, + { + "epoch": 3.516243913075078, + "grad_norm": 0.4770644009113312, + "learning_rate": 2.1234229017051488e-05, + "loss": 0.0742, + "step": 96760 + }, + { + "epoch": 3.5166073115778764, + "grad_norm": 0.25661101937294006, + "learning_rate": 2.122909898942843e-05, + "loss": 2.1078, + "step": 96770 + }, + { + "epoch": 3.5169707100806744, + "grad_norm": 0.7563055157661438, + "learning_rate": 2.1223969124280086e-05, + "loss": 0.0827, + "step": 96780 + }, + { + "epoch": 3.517334108583473, + "grad_norm": 0.690726637840271, + "learning_rate": 2.1218839421827474e-05, + "loss": 0.0743, + "step": 96790 + }, + { + "epoch": 3.517697507086271, + "grad_norm": 1.4046658277511597, + "learning_rate": 2.1213709882291623e-05, + "loss": 0.0789, + "step": 96800 + }, + { + "epoch": 3.518060905589069, + "grad_norm": 0.5457620620727539, + "learning_rate": 2.1208580505893542e-05, + "loss": 0.1015, + "step": 96810 + }, + { + "epoch": 3.5184243040918672, + "grad_norm": 1.5679082870483398, + "learning_rate": 2.1203451292854222e-05, + "loss": 0.0695, + "step": 96820 + }, + { + "epoch": 3.5187877025946652, + "grad_norm": 0.6426934003829956, + "learning_rate": 2.119832224339469e-05, + "loss": 0.0706, + "step": 96830 + }, + { + "epoch": 3.5191511010974637, + "grad_norm": 2.433039665222168, + "learning_rate": 2.119319335773593e-05, + "loss": 0.1022, + "step": 96840 + }, + { + "epoch": 3.5195144996002616, + "grad_norm": 1.6734727621078491, + "learning_rate": 2.118806463609892e-05, + "loss": 0.0889, + "step": 96850 + }, + { + "epoch": 3.5198778981030596, + "grad_norm": 0.5065173506736755, + "learning_rate": 2.1182936078704636e-05, + "loss": 0.1207, + "step": 96860 + }, + { + "epoch": 3.520241296605858, + "grad_norm": 0.8844788670539856, + "learning_rate": 2.117780768577406e-05, + "loss": 0.0761, + "step": 96870 + }, + { + "epoch": 3.520604695108656, + "grad_norm": 0.48743927478790283, + "learning_rate": 2.117267945752815e-05, + "loss": 0.0719, + "step": 96880 + }, + { + "epoch": 3.5209680936114545, + "grad_norm": 0.8191256523132324, + "learning_rate": 2.116755139418787e-05, + "loss": 0.0796, + "step": 96890 + }, + { + "epoch": 3.5213314921142524, + "grad_norm": 0.6994876265525818, + "learning_rate": 2.116242349597416e-05, + "loss": 0.0961, + "step": 96900 + }, + { + "epoch": 3.5216948906170504, + "grad_norm": 0.2623302638530731, + "learning_rate": 2.115729576310796e-05, + "loss": 0.0902, + "step": 96910 + }, + { + "epoch": 3.522058289119849, + "grad_norm": 0.44012176990509033, + "learning_rate": 2.1152168195810222e-05, + "loss": 0.0695, + "step": 96920 + }, + { + "epoch": 3.522421687622647, + "grad_norm": 0.39194273948669434, + "learning_rate": 2.114704079430187e-05, + "loss": 0.0626, + "step": 96930 + }, + { + "epoch": 3.5227850861254453, + "grad_norm": 2.427734851837158, + "learning_rate": 2.1141913558803818e-05, + "loss": 0.084, + "step": 96940 + }, + { + "epoch": 3.5231484846282433, + "grad_norm": 0.9978201985359192, + "learning_rate": 2.1136786489536975e-05, + "loss": 0.1378, + "step": 96950 + }, + { + "epoch": 3.5235118831310412, + "grad_norm": 0.9334393739700317, + "learning_rate": 2.113165958672226e-05, + "loss": 0.0942, + "step": 96960 + }, + { + "epoch": 3.5238752816338397, + "grad_norm": 0.5911192893981934, + "learning_rate": 2.1126532850580578e-05, + "loss": 0.0585, + "step": 96970 + }, + { + "epoch": 3.524238680136638, + "grad_norm": 0.35171112418174744, + "learning_rate": 2.112140628133281e-05, + "loss": 0.0747, + "step": 96980 + }, + { + "epoch": 3.524602078639436, + "grad_norm": 0.4741590917110443, + "learning_rate": 2.1116279879199846e-05, + "loss": 0.1048, + "step": 96990 + }, + { + "epoch": 3.524965477142234, + "grad_norm": 4.185072422027588, + "learning_rate": 2.1111153644402558e-05, + "loss": 0.095, + "step": 97000 + }, + { + "epoch": 3.5253288756450325, + "grad_norm": 0.4775453209877014, + "learning_rate": 2.110602757716182e-05, + "loss": 0.0866, + "step": 97010 + }, + { + "epoch": 3.5256922741478305, + "grad_norm": 4.56588077545166, + "learning_rate": 2.110090167769851e-05, + "loss": 0.0696, + "step": 97020 + }, + { + "epoch": 3.526055672650629, + "grad_norm": 0.5117329359054565, + "learning_rate": 2.109577594623347e-05, + "loss": 0.0655, + "step": 97030 + }, + { + "epoch": 3.526419071153427, + "grad_norm": 0.45523038506507874, + "learning_rate": 2.109065038298755e-05, + "loss": 0.0838, + "step": 97040 + }, + { + "epoch": 3.526782469656225, + "grad_norm": 0.7840531468391418, + "learning_rate": 2.10855249881816e-05, + "loss": 0.0915, + "step": 97050 + }, + { + "epoch": 3.5271458681590233, + "grad_norm": 0.42048537731170654, + "learning_rate": 2.1080399762036447e-05, + "loss": 0.0882, + "step": 97060 + }, + { + "epoch": 3.5275092666618213, + "grad_norm": 0.36128684878349304, + "learning_rate": 2.1075274704772924e-05, + "loss": 0.0609, + "step": 97070 + }, + { + "epoch": 3.5278726651646197, + "grad_norm": 0.4942743182182312, + "learning_rate": 2.107014981661185e-05, + "loss": 0.0883, + "step": 97080 + }, + { + "epoch": 3.5282360636674177, + "grad_norm": 0.442184716463089, + "learning_rate": 2.1065025097774034e-05, + "loss": 0.1055, + "step": 97090 + }, + { + "epoch": 3.5285994621702157, + "grad_norm": 0.8446380496025085, + "learning_rate": 2.105990054848029e-05, + "loss": 0.069, + "step": 97100 + }, + { + "epoch": 3.528962860673014, + "grad_norm": 0.5443778038024902, + "learning_rate": 2.1054776168951412e-05, + "loss": 0.0868, + "step": 97110 + }, + { + "epoch": 3.529326259175812, + "grad_norm": 0.6392617225646973, + "learning_rate": 2.1049651959408194e-05, + "loss": 0.0639, + "step": 97120 + }, + { + "epoch": 3.5296896576786105, + "grad_norm": 0.7572323083877563, + "learning_rate": 2.104452792007141e-05, + "loss": 0.0894, + "step": 97130 + }, + { + "epoch": 3.5300530561814085, + "grad_norm": 1.0398154258728027, + "learning_rate": 2.1039404051161852e-05, + "loss": 0.0756, + "step": 97140 + }, + { + "epoch": 3.5304164546842065, + "grad_norm": 1.265731930732727, + "learning_rate": 2.1034280352900277e-05, + "loss": 0.0822, + "step": 97150 + }, + { + "epoch": 3.530779853187005, + "grad_norm": 0.5231419205665588, + "learning_rate": 2.1029156825507453e-05, + "loss": 0.0723, + "step": 97160 + }, + { + "epoch": 3.531143251689803, + "grad_norm": 0.6905182600021362, + "learning_rate": 2.1024033469204134e-05, + "loss": 0.0611, + "step": 97170 + }, + { + "epoch": 3.5315066501926013, + "grad_norm": 0.5370905995368958, + "learning_rate": 2.1018910284211067e-05, + "loss": 0.0803, + "step": 97180 + }, + { + "epoch": 3.5318700486953993, + "grad_norm": 0.41777387261390686, + "learning_rate": 2.1013787270748992e-05, + "loss": 0.0868, + "step": 97190 + }, + { + "epoch": 3.5322334471981973, + "grad_norm": 0.6612346172332764, + "learning_rate": 2.1008664429038633e-05, + "loss": 0.0969, + "step": 97200 + }, + { + "epoch": 3.5322334471981973, + "eval_loss": 0.33734017610549927, + "eval_runtime": 179.3542, + "eval_samples_per_second": 41.337, + "eval_steps_per_second": 5.169, + "eval_wer": 0.13551291593297873, + "step": 97200 + }, + { + "epoch": 3.5325968457009957, + "grad_norm": 0.5762473940849304, + "learning_rate": 2.1003541759300732e-05, + "loss": 0.0925, + "step": 97210 + }, + { + "epoch": 3.5329602442037937, + "grad_norm": 1.3973640203475952, + "learning_rate": 2.0998419261755994e-05, + "loss": 0.1158, + "step": 97220 + }, + { + "epoch": 3.533323642706592, + "grad_norm": 0.452146977186203, + "learning_rate": 2.099329693662513e-05, + "loss": 0.0726, + "step": 97230 + }, + { + "epoch": 3.53368704120939, + "grad_norm": 0.8005980253219604, + "learning_rate": 2.0988174784128847e-05, + "loss": 0.0796, + "step": 97240 + }, + { + "epoch": 3.534050439712188, + "grad_norm": 0.8147309422492981, + "learning_rate": 2.0983052804487835e-05, + "loss": 0.0779, + "step": 97250 + }, + { + "epoch": 3.5344138382149866, + "grad_norm": 1.6006520986557007, + "learning_rate": 2.097793099792279e-05, + "loss": 0.0699, + "step": 97260 + }, + { + "epoch": 3.534777236717785, + "grad_norm": 0.44036349654197693, + "learning_rate": 2.0972809364654384e-05, + "loss": 0.0663, + "step": 97270 + }, + { + "epoch": 3.535140635220583, + "grad_norm": 0.8019761443138123, + "learning_rate": 2.0967687904903296e-05, + "loss": 0.0705, + "step": 97280 + }, + { + "epoch": 3.535504033723381, + "grad_norm": 0.49069860577583313, + "learning_rate": 2.0962566618890188e-05, + "loss": 0.0941, + "step": 97290 + }, + { + "epoch": 3.5358674322261794, + "grad_norm": 0.7364464998245239, + "learning_rate": 2.095744550683572e-05, + "loss": 0.0856, + "step": 97300 + }, + { + "epoch": 3.5362308307289774, + "grad_norm": 0.6838924288749695, + "learning_rate": 2.0952324568960543e-05, + "loss": 0.0649, + "step": 97310 + }, + { + "epoch": 3.536594229231776, + "grad_norm": 0.5033947229385376, + "learning_rate": 2.0947203805485293e-05, + "loss": 0.0916, + "step": 97320 + }, + { + "epoch": 3.536957627734574, + "grad_norm": 1.3875548839569092, + "learning_rate": 2.0942083216630622e-05, + "loss": 0.0615, + "step": 97330 + }, + { + "epoch": 3.5373210262373718, + "grad_norm": 0.6745208501815796, + "learning_rate": 2.0936962802617137e-05, + "loss": 0.077, + "step": 97340 + }, + { + "epoch": 3.53768442474017, + "grad_norm": 0.5911176800727844, + "learning_rate": 2.093184256366547e-05, + "loss": 0.0725, + "step": 97350 + }, + { + "epoch": 3.538047823242968, + "grad_norm": 0.6544129848480225, + "learning_rate": 2.092672249999623e-05, + "loss": 0.1027, + "step": 97360 + }, + { + "epoch": 3.5384112217457666, + "grad_norm": 0.6527793407440186, + "learning_rate": 2.0921602611830036e-05, + "loss": 0.0601, + "step": 97370 + }, + { + "epoch": 3.5387746202485646, + "grad_norm": 0.569724977016449, + "learning_rate": 2.0916482899387467e-05, + "loss": 0.0709, + "step": 97380 + }, + { + "epoch": 3.5391380187513626, + "grad_norm": 0.6393773555755615, + "learning_rate": 2.0911363362889118e-05, + "loss": 0.0999, + "step": 97390 + }, + { + "epoch": 3.539501417254161, + "grad_norm": 1.7416220903396606, + "learning_rate": 2.090624400255558e-05, + "loss": 0.0772, + "step": 97400 + }, + { + "epoch": 3.539864815756959, + "grad_norm": 0.6363007426261902, + "learning_rate": 2.0901124818607417e-05, + "loss": 0.0923, + "step": 97410 + }, + { + "epoch": 3.5402282142597574, + "grad_norm": 0.5386870503425598, + "learning_rate": 2.0896005811265207e-05, + "loss": 0.0625, + "step": 97420 + }, + { + "epoch": 3.5405916127625554, + "grad_norm": 1.5430821180343628, + "learning_rate": 2.0890886980749504e-05, + "loss": 0.0777, + "step": 97430 + }, + { + "epoch": 3.5409550112653534, + "grad_norm": 0.41363903880119324, + "learning_rate": 2.0885768327280854e-05, + "loss": 0.1062, + "step": 97440 + }, + { + "epoch": 3.541318409768152, + "grad_norm": 0.7380490303039551, + "learning_rate": 2.0880649851079812e-05, + "loss": 0.0607, + "step": 97450 + }, + { + "epoch": 3.54168180827095, + "grad_norm": 0.5600608587265015, + "learning_rate": 2.0875531552366914e-05, + "loss": 0.0785, + "step": 97460 + }, + { + "epoch": 3.5420452067737482, + "grad_norm": 0.46273377537727356, + "learning_rate": 2.0870413431362687e-05, + "loss": 0.0725, + "step": 97470 + }, + { + "epoch": 3.542408605276546, + "grad_norm": 1.1162970066070557, + "learning_rate": 2.0865295488287655e-05, + "loss": 0.0599, + "step": 97480 + }, + { + "epoch": 3.542772003779344, + "grad_norm": 0.5625380277633667, + "learning_rate": 2.0860177723362316e-05, + "loss": 0.0767, + "step": 97490 + }, + { + "epoch": 3.5431354022821426, + "grad_norm": 0.7391917109489441, + "learning_rate": 2.08550601368072e-05, + "loss": 0.0871, + "step": 97500 + }, + { + "epoch": 3.5434988007849406, + "grad_norm": 0.8282499313354492, + "learning_rate": 2.084994272884279e-05, + "loss": 0.0997, + "step": 97510 + }, + { + "epoch": 3.543862199287739, + "grad_norm": 1.1234526634216309, + "learning_rate": 2.0844825499689587e-05, + "loss": 0.068, + "step": 97520 + }, + { + "epoch": 3.544225597790537, + "grad_norm": 0.3851841986179352, + "learning_rate": 2.083970844956806e-05, + "loss": 0.1154, + "step": 97530 + }, + { + "epoch": 3.544588996293335, + "grad_norm": 0.4419216811656952, + "learning_rate": 2.0834591578698704e-05, + "loss": 0.0946, + "step": 97540 + }, + { + "epoch": 3.5449523947961334, + "grad_norm": 0.4050745368003845, + "learning_rate": 2.082947488730197e-05, + "loss": 0.0904, + "step": 97550 + }, + { + "epoch": 3.545315793298932, + "grad_norm": 0.9602497220039368, + "learning_rate": 2.082435837559833e-05, + "loss": 0.0892, + "step": 97560 + }, + { + "epoch": 3.54567919180173, + "grad_norm": 1.1554557085037231, + "learning_rate": 2.081924204380823e-05, + "loss": 0.0786, + "step": 97570 + }, + { + "epoch": 3.546042590304528, + "grad_norm": 0.33205777406692505, + "learning_rate": 2.0814125892152105e-05, + "loss": 0.0993, + "step": 97580 + }, + { + "epoch": 3.5464059888073263, + "grad_norm": 0.7094134092330933, + "learning_rate": 2.080900992085041e-05, + "loss": 0.0809, + "step": 97590 + }, + { + "epoch": 3.5467693873101243, + "grad_norm": 0.7614325284957886, + "learning_rate": 2.0803894130123562e-05, + "loss": 0.0984, + "step": 97600 + }, + { + "epoch": 3.5471327858129227, + "grad_norm": 0.18374527990818024, + "learning_rate": 2.0798778520191994e-05, + "loss": 0.1204, + "step": 97610 + }, + { + "epoch": 3.5474961843157207, + "grad_norm": 0.5296422243118286, + "learning_rate": 2.0793663091276107e-05, + "loss": 0.0515, + "step": 97620 + }, + { + "epoch": 3.5478595828185187, + "grad_norm": 0.4739711880683899, + "learning_rate": 2.078854784359631e-05, + "loss": 0.0678, + "step": 97630 + }, + { + "epoch": 3.548222981321317, + "grad_norm": 0.4815096855163574, + "learning_rate": 2.0783432777373e-05, + "loss": 0.0897, + "step": 97640 + }, + { + "epoch": 3.548586379824115, + "grad_norm": 0.6116788387298584, + "learning_rate": 2.0778317892826574e-05, + "loss": 0.0893, + "step": 97650 + }, + { + "epoch": 3.5489497783269135, + "grad_norm": 1.2628870010375977, + "learning_rate": 2.077320319017741e-05, + "loss": 0.1013, + "step": 97660 + }, + { + "epoch": 3.5493131768297115, + "grad_norm": 0.7226377129554749, + "learning_rate": 2.076808866964588e-05, + "loss": 0.0703, + "step": 97670 + }, + { + "epoch": 3.5496765753325095, + "grad_norm": 0.8184316158294678, + "learning_rate": 2.0762974331452344e-05, + "loss": 0.0705, + "step": 97680 + }, + { + "epoch": 3.550039973835308, + "grad_norm": 0.7743292450904846, + "learning_rate": 2.0757860175817176e-05, + "loss": 0.0931, + "step": 97690 + }, + { + "epoch": 3.550403372338106, + "grad_norm": 0.8815683722496033, + "learning_rate": 2.075274620296072e-05, + "loss": 0.0708, + "step": 97700 + }, + { + "epoch": 3.5507667708409043, + "grad_norm": 0.478040486574173, + "learning_rate": 2.074763241310332e-05, + "loss": 0.0812, + "step": 97710 + }, + { + "epoch": 3.5511301693437023, + "grad_norm": 0.7928186058998108, + "learning_rate": 2.0742518806465306e-05, + "loss": 0.0594, + "step": 97720 + }, + { + "epoch": 3.5514935678465003, + "grad_norm": 0.3057189881801605, + "learning_rate": 2.0737405383267002e-05, + "loss": 0.0703, + "step": 97730 + }, + { + "epoch": 3.5518569663492987, + "grad_norm": 0.7220034599304199, + "learning_rate": 2.0732292143728744e-05, + "loss": 0.0692, + "step": 97740 + }, + { + "epoch": 3.5522203648520967, + "grad_norm": 0.40750908851623535, + "learning_rate": 2.0727179088070833e-05, + "loss": 0.0778, + "step": 97750 + }, + { + "epoch": 3.552583763354895, + "grad_norm": 0.542911171913147, + "learning_rate": 2.072206621651357e-05, + "loss": 0.084, + "step": 97760 + }, + { + "epoch": 3.552947161857693, + "grad_norm": 0.37515130639076233, + "learning_rate": 2.0716953529277244e-05, + "loss": 0.175, + "step": 97770 + }, + { + "epoch": 3.553310560360491, + "grad_norm": 0.7778531908988953, + "learning_rate": 2.071184102658216e-05, + "loss": 0.0599, + "step": 97780 + }, + { + "epoch": 3.5536739588632895, + "grad_norm": 0.4933672249317169, + "learning_rate": 2.0706728708648593e-05, + "loss": 0.0913, + "step": 97790 + }, + { + "epoch": 3.5540373573660875, + "grad_norm": 0.4018378257751465, + "learning_rate": 2.0701616575696807e-05, + "loss": 0.1055, + "step": 97800 + }, + { + "epoch": 3.5540373573660875, + "eval_loss": 0.3385712802410126, + "eval_runtime": 179.9463, + "eval_samples_per_second": 41.201, + "eval_steps_per_second": 5.152, + "eval_wer": 0.13599397316971337, + "step": 97800 + }, + { + "epoch": 3.554400755868886, + "grad_norm": 1.089429497718811, + "learning_rate": 2.069650462794707e-05, + "loss": 0.0872, + "step": 97810 + }, + { + "epoch": 3.554764154371684, + "grad_norm": 0.2929953932762146, + "learning_rate": 2.0691392865619623e-05, + "loss": 0.0634, + "step": 97820 + }, + { + "epoch": 3.555127552874482, + "grad_norm": 2.5961267948150635, + "learning_rate": 2.0686281288934743e-05, + "loss": 0.0939, + "step": 97830 + }, + { + "epoch": 3.5554909513772803, + "grad_norm": 0.6162799596786499, + "learning_rate": 2.0681169898112652e-05, + "loss": 0.0877, + "step": 97840 + }, + { + "epoch": 3.5558543498800788, + "grad_norm": 0.6476810574531555, + "learning_rate": 2.0676058693373583e-05, + "loss": 0.0798, + "step": 97850 + }, + { + "epoch": 3.5562177483828767, + "grad_norm": 0.8159734010696411, + "learning_rate": 2.067094767493776e-05, + "loss": 0.0762, + "step": 97860 + }, + { + "epoch": 3.5565811468856747, + "grad_norm": 4.349702835083008, + "learning_rate": 2.0665836843025387e-05, + "loss": 0.0897, + "step": 97870 + }, + { + "epoch": 3.556944545388473, + "grad_norm": 0.616278886795044, + "learning_rate": 2.0660726197856696e-05, + "loss": 0.0996, + "step": 97880 + }, + { + "epoch": 3.557307943891271, + "grad_norm": 0.6084387302398682, + "learning_rate": 2.0655615739651874e-05, + "loss": 0.0876, + "step": 97890 + }, + { + "epoch": 3.5576713423940696, + "grad_norm": 0.9170116186141968, + "learning_rate": 2.0650505468631114e-05, + "loss": 0.0824, + "step": 97900 + }, + { + "epoch": 3.5580347408968676, + "grad_norm": 1.841464638710022, + "learning_rate": 2.0645395385014584e-05, + "loss": 0.0863, + "step": 97910 + }, + { + "epoch": 3.5583981393996655, + "grad_norm": 1.3778067827224731, + "learning_rate": 2.0640285489022483e-05, + "loss": 0.072, + "step": 97920 + }, + { + "epoch": 3.558761537902464, + "grad_norm": 0.49323418736457825, + "learning_rate": 2.063517578087497e-05, + "loss": 0.0877, + "step": 97930 + }, + { + "epoch": 3.559124936405262, + "grad_norm": 1.1919158697128296, + "learning_rate": 2.0630066260792206e-05, + "loss": 0.0773, + "step": 97940 + }, + { + "epoch": 3.5594883349080604, + "grad_norm": 0.5721442103385925, + "learning_rate": 2.0624956928994335e-05, + "loss": 0.0723, + "step": 97950 + }, + { + "epoch": 3.5598517334108584, + "grad_norm": 0.7183822989463806, + "learning_rate": 2.0619847785701494e-05, + "loss": 0.0705, + "step": 97960 + }, + { + "epoch": 3.5602151319136564, + "grad_norm": 0.5326651334762573, + "learning_rate": 2.0614738831133836e-05, + "loss": 0.0699, + "step": 97970 + }, + { + "epoch": 3.560578530416455, + "grad_norm": 0.797173023223877, + "learning_rate": 2.0609630065511482e-05, + "loss": 0.0937, + "step": 97980 + }, + { + "epoch": 3.5609419289192528, + "grad_norm": 0.49510565400123596, + "learning_rate": 2.0604521489054547e-05, + "loss": 0.077, + "step": 97990 + }, + { + "epoch": 3.561305327422051, + "grad_norm": 0.6226716041564941, + "learning_rate": 2.059941310198314e-05, + "loss": 0.0858, + "step": 98000 + }, + { + "epoch": 3.561668725924849, + "grad_norm": 0.6301719546318054, + "learning_rate": 2.0594304904517355e-05, + "loss": 0.1003, + "step": 98010 + }, + { + "epoch": 3.562032124427647, + "grad_norm": 0.9622916579246521, + "learning_rate": 2.0589196896877304e-05, + "loss": 0.0663, + "step": 98020 + }, + { + "epoch": 3.5623955229304456, + "grad_norm": 0.38021838665008545, + "learning_rate": 2.058408907928307e-05, + "loss": 0.0638, + "step": 98030 + }, + { + "epoch": 3.5627589214332436, + "grad_norm": 0.8715338706970215, + "learning_rate": 2.0578981451954723e-05, + "loss": 0.1114, + "step": 98040 + }, + { + "epoch": 3.563122319936042, + "grad_norm": 0.40252813696861267, + "learning_rate": 2.0573874015112337e-05, + "loss": 0.0942, + "step": 98050 + }, + { + "epoch": 3.56348571843884, + "grad_norm": 0.9255901575088501, + "learning_rate": 2.056876676897596e-05, + "loss": 0.074, + "step": 98060 + }, + { + "epoch": 3.563849116941638, + "grad_norm": 0.5384101867675781, + "learning_rate": 2.0563659713765664e-05, + "loss": 0.0983, + "step": 98070 + }, + { + "epoch": 3.5642125154444364, + "grad_norm": 0.4538724422454834, + "learning_rate": 2.055855284970149e-05, + "loss": 0.0769, + "step": 98080 + }, + { + "epoch": 3.5645759139472344, + "grad_norm": 0.8953961133956909, + "learning_rate": 2.055344617700347e-05, + "loss": 0.0899, + "step": 98090 + }, + { + "epoch": 3.564939312450033, + "grad_norm": 2.060249090194702, + "learning_rate": 2.0548339695891625e-05, + "loss": 0.081, + "step": 98100 + }, + { + "epoch": 3.565302710952831, + "grad_norm": 0.4483121335506439, + "learning_rate": 2.054323340658599e-05, + "loss": 0.0894, + "step": 98110 + }, + { + "epoch": 3.565666109455629, + "grad_norm": 0.5069551467895508, + "learning_rate": 2.0538127309306572e-05, + "loss": 0.0892, + "step": 98120 + }, + { + "epoch": 3.566029507958427, + "grad_norm": 1.5760865211486816, + "learning_rate": 2.0533021404273375e-05, + "loss": 0.0611, + "step": 98130 + }, + { + "epoch": 3.5663929064612256, + "grad_norm": 0.7342379093170166, + "learning_rate": 2.0527915691706388e-05, + "loss": 0.0638, + "step": 98140 + }, + { + "epoch": 3.5667563049640236, + "grad_norm": 1.9149831533432007, + "learning_rate": 2.0522810171825597e-05, + "loss": 0.0889, + "step": 98150 + }, + { + "epoch": 3.5671197034668216, + "grad_norm": 0.7659269571304321, + "learning_rate": 2.0517704844850993e-05, + "loss": 0.0989, + "step": 98160 + }, + { + "epoch": 3.56748310196962, + "grad_norm": 0.696357250213623, + "learning_rate": 2.051259971100254e-05, + "loss": 0.0673, + "step": 98170 + }, + { + "epoch": 3.567846500472418, + "grad_norm": 0.9041746258735657, + "learning_rate": 2.0507494770500197e-05, + "loss": 0.0644, + "step": 98180 + }, + { + "epoch": 3.5682098989752165, + "grad_norm": 0.9114351868629456, + "learning_rate": 2.0502390023563923e-05, + "loss": 0.0838, + "step": 98190 + }, + { + "epoch": 3.5685732974780144, + "grad_norm": 3.253389596939087, + "learning_rate": 2.0497285470413645e-05, + "loss": 0.0796, + "step": 98200 + }, + { + "epoch": 3.5689366959808124, + "grad_norm": 0.4176378548145294, + "learning_rate": 2.0492181111269333e-05, + "loss": 0.0853, + "step": 98210 + }, + { + "epoch": 3.569300094483611, + "grad_norm": 0.5694284439086914, + "learning_rate": 2.048707694635089e-05, + "loss": 0.0584, + "step": 98220 + }, + { + "epoch": 3.569663492986409, + "grad_norm": 1.451493740081787, + "learning_rate": 2.048197297587825e-05, + "loss": 0.0784, + "step": 98230 + }, + { + "epoch": 3.5700268914892073, + "grad_norm": 0.7314192652702332, + "learning_rate": 2.047686920007132e-05, + "loss": 0.0838, + "step": 98240 + }, + { + "epoch": 3.5703902899920053, + "grad_norm": 1.8838427066802979, + "learning_rate": 2.0471765619149992e-05, + "loss": 0.0795, + "step": 98250 + }, + { + "epoch": 3.5707536884948032, + "grad_norm": 1.1557743549346924, + "learning_rate": 2.0466662233334176e-05, + "loss": 0.0831, + "step": 98260 + }, + { + "epoch": 3.5711170869976017, + "grad_norm": 0.4788287580013275, + "learning_rate": 2.0461559042843762e-05, + "loss": 0.0695, + "step": 98270 + }, + { + "epoch": 3.5714804855003996, + "grad_norm": 0.6254255771636963, + "learning_rate": 2.0456456047898618e-05, + "loss": 0.0753, + "step": 98280 + }, + { + "epoch": 3.571843884003198, + "grad_norm": 0.5113767981529236, + "learning_rate": 2.0451353248718608e-05, + "loss": 0.2545, + "step": 98290 + }, + { + "epoch": 3.572207282505996, + "grad_norm": 5.397027015686035, + "learning_rate": 2.044625064552361e-05, + "loss": 0.124, + "step": 98300 + }, + { + "epoch": 3.572570681008794, + "grad_norm": 1.188666820526123, + "learning_rate": 2.044114823853347e-05, + "loss": 0.0784, + "step": 98310 + }, + { + "epoch": 3.5729340795115925, + "grad_norm": 0.7919589281082153, + "learning_rate": 2.0436046027968033e-05, + "loss": 0.0778, + "step": 98320 + }, + { + "epoch": 3.5732974780143905, + "grad_norm": 0.7725494503974915, + "learning_rate": 2.0430944014047135e-05, + "loss": 0.078, + "step": 98330 + }, + { + "epoch": 3.573660876517189, + "grad_norm": 0.5569599866867065, + "learning_rate": 2.0425842196990592e-05, + "loss": 0.0849, + "step": 98340 + }, + { + "epoch": 3.574024275019987, + "grad_norm": 0.9411738514900208, + "learning_rate": 2.0420740577018243e-05, + "loss": 0.1068, + "step": 98350 + }, + { + "epoch": 3.574387673522785, + "grad_norm": 0.9715979695320129, + "learning_rate": 2.0415639154349893e-05, + "loss": 0.0867, + "step": 98360 + }, + { + "epoch": 3.5747510720255833, + "grad_norm": 0.4651619493961334, + "learning_rate": 2.0410537929205336e-05, + "loss": 0.0678, + "step": 98370 + }, + { + "epoch": 3.5751144705283813, + "grad_norm": 0.8822535872459412, + "learning_rate": 2.040543690180437e-05, + "loss": 0.073, + "step": 98380 + }, + { + "epoch": 3.5754778690311797, + "grad_norm": 2.6987640857696533, + "learning_rate": 2.0400336072366772e-05, + "loss": 0.0967, + "step": 98390 + }, + { + "epoch": 3.5758412675339777, + "grad_norm": 3.3761913776397705, + "learning_rate": 2.0395235441112336e-05, + "loss": 0.0978, + "step": 98400 + }, + { + "epoch": 3.5758412675339777, + "eval_loss": 0.32311710715293884, + "eval_runtime": 179.7355, + "eval_samples_per_second": 41.249, + "eval_steps_per_second": 5.158, + "eval_wer": 0.1381451159075644, + "step": 98400 + }, + { + "epoch": 3.5762046660367757, + "grad_norm": 0.4446313977241516, + "learning_rate": 2.0390135008260823e-05, + "loss": 0.5498, + "step": 98410 + }, + { + "epoch": 3.576568064539574, + "grad_norm": 2.1820120811462402, + "learning_rate": 2.038503477403199e-05, + "loss": 0.0726, + "step": 98420 + }, + { + "epoch": 3.5769314630423725, + "grad_norm": 0.36170488595962524, + "learning_rate": 2.037993473864559e-05, + "loss": 0.0754, + "step": 98430 + }, + { + "epoch": 3.5772948615451705, + "grad_norm": 0.7031393051147461, + "learning_rate": 2.0374834902321352e-05, + "loss": 0.083, + "step": 98440 + }, + { + "epoch": 3.5776582600479685, + "grad_norm": 3.428339719772339, + "learning_rate": 2.036973526527903e-05, + "loss": 0.0938, + "step": 98450 + }, + { + "epoch": 3.578021658550767, + "grad_norm": 0.41565191745758057, + "learning_rate": 2.0364635827738347e-05, + "loss": 0.0878, + "step": 98460 + }, + { + "epoch": 3.578385057053565, + "grad_norm": 0.4806919991970062, + "learning_rate": 2.0359536589919005e-05, + "loss": 0.2698, + "step": 98470 + }, + { + "epoch": 3.5787484555563633, + "grad_norm": 12.289422988891602, + "learning_rate": 2.0354437552040717e-05, + "loss": 0.2066, + "step": 98480 + }, + { + "epoch": 3.5791118540591613, + "grad_norm": 0.4719155430793762, + "learning_rate": 2.0349848589081458e-05, + "loss": 0.3243, + "step": 98490 + }, + { + "epoch": 3.5794752525619593, + "grad_norm": 1.4685314893722534, + "learning_rate": 2.034474993169644e-05, + "loss": 0.0745, + "step": 98500 + }, + { + "epoch": 3.5798386510647577, + "grad_norm": 0.3435138165950775, + "learning_rate": 2.0339651474889595e-05, + "loss": 0.0915, + "step": 98510 + }, + { + "epoch": 3.5802020495675557, + "grad_norm": 1.1693735122680664, + "learning_rate": 2.033455321888059e-05, + "loss": 0.0781, + "step": 98520 + }, + { + "epoch": 3.580565448070354, + "grad_norm": 0.24628905951976776, + "learning_rate": 2.0329455163889084e-05, + "loss": 0.063, + "step": 98530 + }, + { + "epoch": 3.580928846573152, + "grad_norm": 1.4354606866836548, + "learning_rate": 2.0324357310134738e-05, + "loss": 0.1105, + "step": 98540 + }, + { + "epoch": 3.58129224507595, + "grad_norm": 0.526882529258728, + "learning_rate": 2.0319259657837186e-05, + "loss": 0.0967, + "step": 98550 + }, + { + "epoch": 3.5816556435787485, + "grad_norm": 0.3548150360584259, + "learning_rate": 2.0314162207216096e-05, + "loss": 0.0647, + "step": 98560 + }, + { + "epoch": 3.5820190420815465, + "grad_norm": 0.8922990560531616, + "learning_rate": 2.0309064958491082e-05, + "loss": 0.07, + "step": 98570 + }, + { + "epoch": 3.582382440584345, + "grad_norm": 0.615703284740448, + "learning_rate": 2.030396791188177e-05, + "loss": 0.0667, + "step": 98580 + }, + { + "epoch": 3.582745839087143, + "grad_norm": 0.4474499225616455, + "learning_rate": 2.0298871067607768e-05, + "loss": 0.0679, + "step": 98590 + }, + { + "epoch": 3.583109237589941, + "grad_norm": 0.8109280467033386, + "learning_rate": 2.029377442588868e-05, + "loss": 0.0848, + "step": 98600 + }, + { + "epoch": 3.5834726360927394, + "grad_norm": 0.7447373270988464, + "learning_rate": 2.0288677986944116e-05, + "loss": 0.0649, + "step": 98610 + }, + { + "epoch": 3.5838360345955373, + "grad_norm": 0.4216397702693939, + "learning_rate": 2.0283581750993653e-05, + "loss": 0.0683, + "step": 98620 + }, + { + "epoch": 3.5841994330983358, + "grad_norm": 0.39569661021232605, + "learning_rate": 2.0278485718256873e-05, + "loss": 0.0643, + "step": 98630 + }, + { + "epoch": 3.5845628316011338, + "grad_norm": 0.3963533043861389, + "learning_rate": 2.027338988895333e-05, + "loss": 0.077, + "step": 98640 + }, + { + "epoch": 3.5849262301039317, + "grad_norm": 1.1114723682403564, + "learning_rate": 2.0268294263302615e-05, + "loss": 0.0787, + "step": 98650 + }, + { + "epoch": 3.58528962860673, + "grad_norm": 0.5580173134803772, + "learning_rate": 2.0263198841524262e-05, + "loss": 0.118, + "step": 98660 + }, + { + "epoch": 3.585653027109528, + "grad_norm": 0.6341946721076965, + "learning_rate": 2.0258103623837817e-05, + "loss": 0.0648, + "step": 98670 + }, + { + "epoch": 3.5860164256123266, + "grad_norm": 0.7503349184989929, + "learning_rate": 2.0253008610462818e-05, + "loss": 0.0822, + "step": 98680 + }, + { + "epoch": 3.5863798241151246, + "grad_norm": 0.47050052881240845, + "learning_rate": 2.0247913801618773e-05, + "loss": 0.1365, + "step": 98690 + }, + { + "epoch": 3.5867432226179226, + "grad_norm": 0.7919335961341858, + "learning_rate": 2.0242819197525228e-05, + "loss": 0.0775, + "step": 98700 + }, + { + "epoch": 3.587106621120721, + "grad_norm": 0.4918152093887329, + "learning_rate": 2.023772479840168e-05, + "loss": 0.0759, + "step": 98710 + }, + { + "epoch": 3.5874700196235194, + "grad_norm": 1.0589570999145508, + "learning_rate": 2.0232630604467623e-05, + "loss": 0.1025, + "step": 98720 + }, + { + "epoch": 3.5878334181263174, + "grad_norm": 0.4748883843421936, + "learning_rate": 2.022753661594255e-05, + "loss": 0.0647, + "step": 98730 + }, + { + "epoch": 3.5881968166291154, + "grad_norm": 0.5574440360069275, + "learning_rate": 2.0222442833045936e-05, + "loss": 0.0877, + "step": 98740 + }, + { + "epoch": 3.588560215131914, + "grad_norm": 1.1840908527374268, + "learning_rate": 2.021734925599727e-05, + "loss": 0.0612, + "step": 98750 + }, + { + "epoch": 3.588923613634712, + "grad_norm": 0.5361800789833069, + "learning_rate": 2.021225588501601e-05, + "loss": 0.0797, + "step": 98760 + }, + { + "epoch": 3.5892870121375102, + "grad_norm": 1.2590301036834717, + "learning_rate": 2.0207162720321604e-05, + "loss": 0.0819, + "step": 98770 + }, + { + "epoch": 3.589650410640308, + "grad_norm": 0.4360603392124176, + "learning_rate": 2.0202069762133506e-05, + "loss": 0.0844, + "step": 98780 + }, + { + "epoch": 3.590013809143106, + "grad_norm": 0.6287516951560974, + "learning_rate": 2.019697701067114e-05, + "loss": 0.0888, + "step": 98790 + }, + { + "epoch": 3.5903772076459046, + "grad_norm": 0.5893504023551941, + "learning_rate": 2.019188446615396e-05, + "loss": 0.0795, + "step": 98800 + }, + { + "epoch": 3.5907406061487026, + "grad_norm": 1.4572880268096924, + "learning_rate": 2.0186792128801368e-05, + "loss": 0.0922, + "step": 98810 + }, + { + "epoch": 3.591104004651501, + "grad_norm": 0.6769400835037231, + "learning_rate": 2.018169999883278e-05, + "loss": 0.062, + "step": 98820 + }, + { + "epoch": 3.591467403154299, + "grad_norm": 1.1272927522659302, + "learning_rate": 2.017660807646758e-05, + "loss": 0.0594, + "step": 98830 + }, + { + "epoch": 3.591830801657097, + "grad_norm": 3.9643101692199707, + "learning_rate": 2.017151636192519e-05, + "loss": 0.0732, + "step": 98840 + }, + { + "epoch": 3.5921942001598954, + "grad_norm": 0.613264262676239, + "learning_rate": 2.016642485542498e-05, + "loss": 0.0745, + "step": 98850 + }, + { + "epoch": 3.5925575986626934, + "grad_norm": 0.725082516670227, + "learning_rate": 2.0161333557186326e-05, + "loss": 0.0739, + "step": 98860 + }, + { + "epoch": 3.592920997165492, + "grad_norm": 0.7394050359725952, + "learning_rate": 2.0156242467428593e-05, + "loss": 0.0617, + "step": 98870 + }, + { + "epoch": 3.59328439566829, + "grad_norm": 0.339306116104126, + "learning_rate": 2.0151151586371125e-05, + "loss": 0.0641, + "step": 98880 + }, + { + "epoch": 3.593647794171088, + "grad_norm": 0.6532690525054932, + "learning_rate": 2.0146060914233296e-05, + "loss": 0.0702, + "step": 98890 + }, + { + "epoch": 3.5940111926738862, + "grad_norm": 2.9444310665130615, + "learning_rate": 2.014097045123443e-05, + "loss": 0.0673, + "step": 98900 + }, + { + "epoch": 3.5943745911766842, + "grad_norm": 0.4563717544078827, + "learning_rate": 2.013588019759386e-05, + "loss": 0.0695, + "step": 98910 + }, + { + "epoch": 3.5947379896794827, + "grad_norm": 0.5301656126976013, + "learning_rate": 2.0130790153530905e-05, + "loss": 0.0769, + "step": 98920 + }, + { + "epoch": 3.5951013881822806, + "grad_norm": 0.7959988713264465, + "learning_rate": 2.0125700319264867e-05, + "loss": 0.0784, + "step": 98930 + }, + { + "epoch": 3.5954647866850786, + "grad_norm": 0.7127143740653992, + "learning_rate": 2.012061069501507e-05, + "loss": 0.1098, + "step": 98940 + }, + { + "epoch": 3.595828185187877, + "grad_norm": 1.249788761138916, + "learning_rate": 2.0115521281000797e-05, + "loss": 0.0763, + "step": 98950 + }, + { + "epoch": 3.596191583690675, + "grad_norm": 0.8544566631317139, + "learning_rate": 2.0110432077441333e-05, + "loss": 0.0813, + "step": 98960 + }, + { + "epoch": 3.5965549821934735, + "grad_norm": 0.741630494594574, + "learning_rate": 2.0105343084555955e-05, + "loss": 0.0738, + "step": 98970 + }, + { + "epoch": 3.5969183806962715, + "grad_norm": 0.9736884832382202, + "learning_rate": 2.0100254302563915e-05, + "loss": 0.0734, + "step": 98980 + }, + { + "epoch": 3.5972817791990694, + "grad_norm": 0.22562777996063232, + "learning_rate": 2.0095165731684496e-05, + "loss": 0.0766, + "step": 98990 + }, + { + "epoch": 3.597645177701868, + "grad_norm": 0.6679416298866272, + "learning_rate": 2.0090077372136935e-05, + "loss": 0.0986, + "step": 99000 + }, + { + "epoch": 3.597645177701868, + "eval_loss": 0.32793328166007996, + "eval_runtime": 180.4126, + "eval_samples_per_second": 41.095, + "eval_steps_per_second": 5.138, + "eval_wer": 0.13713761867591265, + "step": 99000 + }, + { + "epoch": 3.5980085762046663, + "grad_norm": 0.5008822083473206, + "learning_rate": 2.0084989224140468e-05, + "loss": 0.0577, + "step": 99010 + }, + { + "epoch": 3.5983719747074643, + "grad_norm": 0.7887293696403503, + "learning_rate": 2.0079901287914322e-05, + "loss": 0.0555, + "step": 99020 + }, + { + "epoch": 3.5987353732102623, + "grad_norm": 0.7174037098884583, + "learning_rate": 2.007481356367773e-05, + "loss": 0.0947, + "step": 99030 + }, + { + "epoch": 3.5990987717130607, + "grad_norm": 0.5506502389907837, + "learning_rate": 2.0069726051649897e-05, + "loss": 0.0828, + "step": 99040 + }, + { + "epoch": 3.5994621702158587, + "grad_norm": 0.6919708251953125, + "learning_rate": 2.006463875205003e-05, + "loss": 0.1141, + "step": 99050 + }, + { + "epoch": 3.599825568718657, + "grad_norm": 0.7304664254188538, + "learning_rate": 2.0059551665097314e-05, + "loss": 1.5067, + "step": 99060 + }, + { + "epoch": 3.600188967221455, + "grad_norm": 0.3626577854156494, + "learning_rate": 2.005446479101093e-05, + "loss": 0.0599, + "step": 99070 + }, + { + "epoch": 3.600552365724253, + "grad_norm": 1.0457093715667725, + "learning_rate": 2.0049378130010075e-05, + "loss": 0.0706, + "step": 99080 + }, + { + "epoch": 3.6009157642270515, + "grad_norm": 0.2686658203601837, + "learning_rate": 2.0044291682313905e-05, + "loss": 0.0958, + "step": 99090 + }, + { + "epoch": 3.6012791627298495, + "grad_norm": 0.9484358429908752, + "learning_rate": 2.0039205448141568e-05, + "loss": 0.1046, + "step": 99100 + }, + { + "epoch": 3.601642561232648, + "grad_norm": 0.34404778480529785, + "learning_rate": 2.0034119427712218e-05, + "loss": 0.0979, + "step": 99110 + }, + { + "epoch": 3.602005959735446, + "grad_norm": 0.6967700719833374, + "learning_rate": 2.0029033621244983e-05, + "loss": 0.0728, + "step": 99120 + }, + { + "epoch": 3.602369358238244, + "grad_norm": 0.640446662902832, + "learning_rate": 2.0023948028959017e-05, + "loss": 0.0701, + "step": 99130 + }, + { + "epoch": 3.6027327567410423, + "grad_norm": 1.3062965869903564, + "learning_rate": 2.001886265107342e-05, + "loss": 0.0921, + "step": 99140 + }, + { + "epoch": 3.6030961552438403, + "grad_norm": 0.7615834474563599, + "learning_rate": 2.0013777487807313e-05, + "loss": 0.076, + "step": 99150 + }, + { + "epoch": 3.6034595537466387, + "grad_norm": 0.9553613662719727, + "learning_rate": 2.0008692539379788e-05, + "loss": 0.0751, + "step": 99160 + }, + { + "epoch": 3.6038229522494367, + "grad_norm": 1.0260523557662964, + "learning_rate": 2.0003607806009937e-05, + "loss": 0.0673, + "step": 99170 + }, + { + "epoch": 3.6041863507522347, + "grad_norm": 1.555732250213623, + "learning_rate": 1.9998523287916858e-05, + "loss": 0.0793, + "step": 99180 + }, + { + "epoch": 3.604549749255033, + "grad_norm": 0.5698230862617493, + "learning_rate": 1.9993438985319612e-05, + "loss": 0.0815, + "step": 99190 + }, + { + "epoch": 3.604913147757831, + "grad_norm": 0.8293182849884033, + "learning_rate": 1.998835489843727e-05, + "loss": 0.0706, + "step": 99200 + }, + { + "epoch": 3.6052765462606295, + "grad_norm": 0.5732713341712952, + "learning_rate": 1.998327102748887e-05, + "loss": 0.0754, + "step": 99210 + }, + { + "epoch": 3.6056399447634275, + "grad_norm": 1.5586737394332886, + "learning_rate": 1.9978187372693486e-05, + "loss": 0.3522, + "step": 99220 + }, + { + "epoch": 3.6060033432662255, + "grad_norm": 5.692126750946045, + "learning_rate": 1.9973103934270136e-05, + "loss": 0.0782, + "step": 99230 + }, + { + "epoch": 3.606366741769024, + "grad_norm": 0.30950793623924255, + "learning_rate": 1.9968020712437857e-05, + "loss": 0.0866, + "step": 99240 + }, + { + "epoch": 3.606730140271822, + "grad_norm": 1.8191814422607422, + "learning_rate": 1.996293770741566e-05, + "loss": 0.0855, + "step": 99250 + }, + { + "epoch": 3.6070935387746204, + "grad_norm": 0.44492724537849426, + "learning_rate": 1.9957854919422543e-05, + "loss": 0.0804, + "step": 99260 + }, + { + "epoch": 3.6074569372774183, + "grad_norm": 0.49994924664497375, + "learning_rate": 1.9952772348677528e-05, + "loss": 0.069, + "step": 99270 + }, + { + "epoch": 3.6078203357802163, + "grad_norm": 0.6917654275894165, + "learning_rate": 1.99476899953996e-05, + "loss": 0.0745, + "step": 99280 + }, + { + "epoch": 3.6081837342830148, + "grad_norm": 1.5276329517364502, + "learning_rate": 1.9942607859807726e-05, + "loss": 0.0987, + "step": 99290 + }, + { + "epoch": 3.608547132785813, + "grad_norm": 1.3370615243911743, + "learning_rate": 1.9937525942120888e-05, + "loss": 0.0785, + "step": 99300 + }, + { + "epoch": 3.608910531288611, + "grad_norm": 0.6237464547157288, + "learning_rate": 1.9932444242558035e-05, + "loss": 0.0708, + "step": 99310 + }, + { + "epoch": 3.609273929791409, + "grad_norm": 1.2243361473083496, + "learning_rate": 1.992736276133814e-05, + "loss": 0.0606, + "step": 99320 + }, + { + "epoch": 3.6096373282942076, + "grad_norm": 0.5042082667350769, + "learning_rate": 1.9922281498680133e-05, + "loss": 0.0696, + "step": 99330 + }, + { + "epoch": 3.6100007267970056, + "grad_norm": 0.6634204387664795, + "learning_rate": 1.9917200454802953e-05, + "loss": 0.099, + "step": 99340 + }, + { + "epoch": 3.610364125299804, + "grad_norm": 0.39905115962028503, + "learning_rate": 1.991211962992552e-05, + "loss": 0.0799, + "step": 99350 + }, + { + "epoch": 3.610727523802602, + "grad_norm": 0.49072784185409546, + "learning_rate": 1.990703902426674e-05, + "loss": 0.1372, + "step": 99360 + }, + { + "epoch": 3.6110909223054, + "grad_norm": 0.8174235820770264, + "learning_rate": 1.990195863804553e-05, + "loss": 0.0647, + "step": 99370 + }, + { + "epoch": 3.6114543208081984, + "grad_norm": 0.634876549243927, + "learning_rate": 1.9896878471480794e-05, + "loss": 0.0704, + "step": 99380 + }, + { + "epoch": 3.6118177193109964, + "grad_norm": 0.5161920189857483, + "learning_rate": 1.98917985247914e-05, + "loss": 0.0912, + "step": 99390 + }, + { + "epoch": 3.612181117813795, + "grad_norm": 2.626404047012329, + "learning_rate": 1.9886718798196226e-05, + "loss": 0.0667, + "step": 99400 + }, + { + "epoch": 3.612544516316593, + "grad_norm": 0.4185205101966858, + "learning_rate": 1.9881639291914157e-05, + "loss": 0.0703, + "step": 99410 + }, + { + "epoch": 3.6129079148193908, + "grad_norm": 1.610190510749817, + "learning_rate": 1.9876560006164034e-05, + "loss": 0.067, + "step": 99420 + }, + { + "epoch": 3.613271313322189, + "grad_norm": 0.5030075311660767, + "learning_rate": 1.9871480941164718e-05, + "loss": 0.0706, + "step": 99430 + }, + { + "epoch": 3.613634711824987, + "grad_norm": 0.35228344798088074, + "learning_rate": 1.986640209713504e-05, + "loss": 0.095, + "step": 99440 + }, + { + "epoch": 3.6139981103277856, + "grad_norm": 0.5087105631828308, + "learning_rate": 1.9861323474293817e-05, + "loss": 0.0804, + "step": 99450 + }, + { + "epoch": 3.6143615088305836, + "grad_norm": 0.3755999505519867, + "learning_rate": 1.985624507285989e-05, + "loss": 0.0772, + "step": 99460 + }, + { + "epoch": 3.6147249073333816, + "grad_norm": 0.841137707233429, + "learning_rate": 1.985116689305207e-05, + "loss": 0.0665, + "step": 99470 + }, + { + "epoch": 3.61508830583618, + "grad_norm": 0.819675862789154, + "learning_rate": 1.984608893508914e-05, + "loss": 0.0654, + "step": 99480 + }, + { + "epoch": 3.615451704338978, + "grad_norm": 0.50279700756073, + "learning_rate": 1.9841011199189903e-05, + "loss": 0.0903, + "step": 99490 + }, + { + "epoch": 3.6158151028417764, + "grad_norm": 1.3066468238830566, + "learning_rate": 1.983593368557313e-05, + "loss": 0.0659, + "step": 99500 + }, + { + "epoch": 3.6161785013445744, + "grad_norm": 3.0848491191864014, + "learning_rate": 1.983085639445761e-05, + "loss": 0.0895, + "step": 99510 + }, + { + "epoch": 3.6165418998473724, + "grad_norm": 0.4366300106048584, + "learning_rate": 1.9825779326062092e-05, + "loss": 0.0701, + "step": 99520 + }, + { + "epoch": 3.616905298350171, + "grad_norm": 0.8729182481765747, + "learning_rate": 1.982070248060534e-05, + "loss": 0.0652, + "step": 99530 + }, + { + "epoch": 3.617268696852969, + "grad_norm": 0.6263926029205322, + "learning_rate": 1.9815625858306087e-05, + "loss": 0.0866, + "step": 99540 + }, + { + "epoch": 3.6176320953557672, + "grad_norm": 1.8468258380889893, + "learning_rate": 1.9810549459383057e-05, + "loss": 0.0881, + "step": 99550 + }, + { + "epoch": 3.6179954938585652, + "grad_norm": 0.606265127658844, + "learning_rate": 1.9805473284054997e-05, + "loss": 0.0915, + "step": 99560 + }, + { + "epoch": 3.618358892361363, + "grad_norm": 1.2118183374404907, + "learning_rate": 1.9800397332540615e-05, + "loss": 0.059, + "step": 99570 + }, + { + "epoch": 3.6187222908641616, + "grad_norm": 0.6607059240341187, + "learning_rate": 1.979532160505861e-05, + "loss": 0.0682, + "step": 99580 + }, + { + "epoch": 3.61908568936696, + "grad_norm": 0.6374445557594299, + "learning_rate": 1.979024610182767e-05, + "loss": 0.0748, + "step": 99590 + }, + { + "epoch": 3.619449087869758, + "grad_norm": 0.6254763007164001, + "learning_rate": 1.9785170823066492e-05, + "loss": 0.0805, + "step": 99600 + }, + { + "epoch": 3.619449087869758, + "eval_loss": 0.33375027775764465, + "eval_runtime": 179.686, + "eval_samples_per_second": 41.261, + "eval_steps_per_second": 5.159, + "eval_wer": 0.13485940421515058, + "step": 99600 + }, + { + "epoch": 3.619812486372556, + "grad_norm": 0.8105804324150085, + "learning_rate": 1.9780095768993756e-05, + "loss": 0.0794, + "step": 99610 + }, + { + "epoch": 3.6201758848753545, + "grad_norm": 1.2891360521316528, + "learning_rate": 1.9775020939828118e-05, + "loss": 0.0743, + "step": 99620 + }, + { + "epoch": 3.6205392833781525, + "grad_norm": 0.528218686580658, + "learning_rate": 1.9769946335788236e-05, + "loss": 0.0694, + "step": 99630 + }, + { + "epoch": 3.620902681880951, + "grad_norm": 0.47785595059394836, + "learning_rate": 1.976487195709275e-05, + "loss": 0.1074, + "step": 99640 + }, + { + "epoch": 3.621266080383749, + "grad_norm": 0.8402767181396484, + "learning_rate": 1.9759797803960318e-05, + "loss": 0.0795, + "step": 99650 + }, + { + "epoch": 3.621629478886547, + "grad_norm": 1.007688045501709, + "learning_rate": 1.9754723876609548e-05, + "loss": 0.0964, + "step": 99660 + }, + { + "epoch": 3.6219928773893453, + "grad_norm": 0.2740893065929413, + "learning_rate": 1.9749650175259067e-05, + "loss": 0.051, + "step": 99670 + }, + { + "epoch": 3.6223562758921433, + "grad_norm": 0.38753753900527954, + "learning_rate": 1.974457670012747e-05, + "loss": 0.0649, + "step": 99680 + }, + { + "epoch": 3.6227196743949417, + "grad_norm": 0.4678163230419159, + "learning_rate": 1.973950345143337e-05, + "loss": 0.094, + "step": 99690 + }, + { + "epoch": 3.6230830728977397, + "grad_norm": 1.056203842163086, + "learning_rate": 1.973443042939535e-05, + "loss": 0.3395, + "step": 99700 + }, + { + "epoch": 3.6234464714005377, + "grad_norm": 1.3664277791976929, + "learning_rate": 1.972935763423199e-05, + "loss": 0.0842, + "step": 99710 + }, + { + "epoch": 3.623809869903336, + "grad_norm": 0.618270218372345, + "learning_rate": 1.9724285066161858e-05, + "loss": 0.0642, + "step": 99720 + }, + { + "epoch": 3.624173268406134, + "grad_norm": 0.3474178910255432, + "learning_rate": 1.97192127254035e-05, + "loss": 0.0583, + "step": 99730 + }, + { + "epoch": 3.6245366669089325, + "grad_norm": 0.9433966875076294, + "learning_rate": 1.9714140612175483e-05, + "loss": 0.0691, + "step": 99740 + }, + { + "epoch": 3.6249000654117305, + "grad_norm": 0.7541018128395081, + "learning_rate": 1.9709068726696342e-05, + "loss": 0.0867, + "step": 99750 + }, + { + "epoch": 3.6252634639145285, + "grad_norm": 1.1116639375686646, + "learning_rate": 1.9703997069184606e-05, + "loss": 0.0827, + "step": 99760 + }, + { + "epoch": 3.625626862417327, + "grad_norm": 0.5351355671882629, + "learning_rate": 1.9698925639858792e-05, + "loss": 0.0689, + "step": 99770 + }, + { + "epoch": 3.625990260920125, + "grad_norm": 0.40793484449386597, + "learning_rate": 1.9693854438937405e-05, + "loss": 0.0565, + "step": 99780 + }, + { + "epoch": 3.6263536594229233, + "grad_norm": 0.4486261308193207, + "learning_rate": 1.9688783466638952e-05, + "loss": 0.099, + "step": 99790 + }, + { + "epoch": 3.6267170579257213, + "grad_norm": 0.5843884944915771, + "learning_rate": 1.9683712723181926e-05, + "loss": 0.0825, + "step": 99800 + }, + { + "epoch": 3.6270804564285193, + "grad_norm": 0.680952250957489, + "learning_rate": 1.9678642208784805e-05, + "loss": 0.0683, + "step": 99810 + }, + { + "epoch": 3.6274438549313177, + "grad_norm": 1.9215220212936401, + "learning_rate": 1.9673571923666052e-05, + "loss": 0.0576, + "step": 99820 + }, + { + "epoch": 3.6278072534341157, + "grad_norm": 0.8826823830604553, + "learning_rate": 1.9668501868044134e-05, + "loss": 0.0615, + "step": 99830 + }, + { + "epoch": 3.628170651936914, + "grad_norm": 8.166322708129883, + "learning_rate": 1.9663432042137507e-05, + "loss": 0.0749, + "step": 99840 + }, + { + "epoch": 3.628534050439712, + "grad_norm": 0.97735595703125, + "learning_rate": 1.96583624461646e-05, + "loss": 0.0721, + "step": 99850 + }, + { + "epoch": 3.62889744894251, + "grad_norm": 0.35416311025619507, + "learning_rate": 1.9653293080343858e-05, + "loss": 0.0927, + "step": 99860 + }, + { + "epoch": 3.6292608474453085, + "grad_norm": 1.6705694198608398, + "learning_rate": 1.964822394489368e-05, + "loss": 0.074, + "step": 99870 + }, + { + "epoch": 3.629624245948107, + "grad_norm": 0.5998109579086304, + "learning_rate": 1.9643155040032497e-05, + "loss": 0.0535, + "step": 99880 + }, + { + "epoch": 3.629987644450905, + "grad_norm": 0.28862881660461426, + "learning_rate": 1.9638086365978707e-05, + "loss": 0.0688, + "step": 99890 + }, + { + "epoch": 3.630351042953703, + "grad_norm": 1.4020724296569824, + "learning_rate": 1.9633017922950697e-05, + "loss": 0.0843, + "step": 99900 + }, + { + "epoch": 3.6307144414565014, + "grad_norm": 0.4853419363498688, + "learning_rate": 1.9627949711166843e-05, + "loss": 0.0813, + "step": 99910 + }, + { + "epoch": 3.6310778399592993, + "grad_norm": 0.5583473443984985, + "learning_rate": 1.9622881730845525e-05, + "loss": 0.0519, + "step": 99920 + }, + { + "epoch": 3.6314412384620978, + "grad_norm": 0.5846819281578064, + "learning_rate": 1.9617813982205104e-05, + "loss": 0.0698, + "step": 99930 + }, + { + "epoch": 3.6318046369648957, + "grad_norm": 0.44811734557151794, + "learning_rate": 1.9612746465463926e-05, + "loss": 0.0849, + "step": 99940 + }, + { + "epoch": 3.6321680354676937, + "grad_norm": 1.8960832357406616, + "learning_rate": 1.960767918084034e-05, + "loss": 0.0892, + "step": 99950 + }, + { + "epoch": 3.632531433970492, + "grad_norm": 0.5286456942558289, + "learning_rate": 1.9602612128552666e-05, + "loss": 0.0574, + "step": 99960 + }, + { + "epoch": 3.63289483247329, + "grad_norm": 0.5947690010070801, + "learning_rate": 1.9597545308819234e-05, + "loss": 0.059, + "step": 99970 + }, + { + "epoch": 3.6332582309760886, + "grad_norm": 0.42291057109832764, + "learning_rate": 1.959247872185835e-05, + "loss": 0.0977, + "step": 99980 + }, + { + "epoch": 3.6336216294788866, + "grad_norm": 0.7012550234794617, + "learning_rate": 1.958741236788832e-05, + "loss": 0.0865, + "step": 99990 + }, + { + "epoch": 3.6339850279816845, + "grad_norm": 4.052799224853516, + "learning_rate": 1.9582346247127432e-05, + "loss": 0.0951, + "step": 100000 + }, + { + "epoch": 3.634348426484483, + "grad_norm": 10.946352005004883, + "learning_rate": 1.957728035979397e-05, + "loss": 0.0716, + "step": 100010 + }, + { + "epoch": 3.634711824987281, + "grad_norm": 0.9583892822265625, + "learning_rate": 1.95722147061062e-05, + "loss": 0.0574, + "step": 100020 + }, + { + "epoch": 3.6350752234900794, + "grad_norm": 0.34174227714538574, + "learning_rate": 1.956714928628239e-05, + "loss": 0.0717, + "step": 100030 + }, + { + "epoch": 3.6354386219928774, + "grad_norm": 6.283267021179199, + "learning_rate": 1.9562084100540788e-05, + "loss": 0.0757, + "step": 100040 + }, + { + "epoch": 3.6358020204956754, + "grad_norm": 0.6548503041267395, + "learning_rate": 1.955701914909963e-05, + "loss": 0.0991, + "step": 100050 + }, + { + "epoch": 3.636165418998474, + "grad_norm": 0.3819758892059326, + "learning_rate": 1.9551954432177154e-05, + "loss": 0.0704, + "step": 100060 + }, + { + "epoch": 3.6365288175012718, + "grad_norm": 0.41873475909233093, + "learning_rate": 1.9546889949991575e-05, + "loss": 0.0723, + "step": 100070 + }, + { + "epoch": 3.63689221600407, + "grad_norm": 0.7295028567314148, + "learning_rate": 1.9541825702761107e-05, + "loss": 0.0635, + "step": 100080 + }, + { + "epoch": 3.637255614506868, + "grad_norm": 0.32320845127105713, + "learning_rate": 1.953676169070395e-05, + "loss": 0.0882, + "step": 100090 + }, + { + "epoch": 3.637619013009666, + "grad_norm": 0.6093827486038208, + "learning_rate": 1.9531697914038288e-05, + "loss": 0.0623, + "step": 100100 + }, + { + "epoch": 3.6379824115124646, + "grad_norm": 1.0530484914779663, + "learning_rate": 1.9526634372982315e-05, + "loss": 0.0793, + "step": 100110 + }, + { + "epoch": 3.6383458100152626, + "grad_norm": 0.6837037205696106, + "learning_rate": 1.9521571067754186e-05, + "loss": 0.0512, + "step": 100120 + }, + { + "epoch": 3.638709208518061, + "grad_norm": 0.4363226890563965, + "learning_rate": 1.951650799857207e-05, + "loss": 0.0645, + "step": 100130 + }, + { + "epoch": 3.639072607020859, + "grad_norm": 1.5349054336547852, + "learning_rate": 1.951144516565411e-05, + "loss": 0.0782, + "step": 100140 + }, + { + "epoch": 3.639436005523657, + "grad_norm": 1.7200794219970703, + "learning_rate": 1.9506382569218457e-05, + "loss": 0.0911, + "step": 100150 + }, + { + "epoch": 3.6397994040264554, + "grad_norm": 0.8230735659599304, + "learning_rate": 1.9501320209483232e-05, + "loss": 0.0666, + "step": 100160 + }, + { + "epoch": 3.640162802529254, + "grad_norm": 0.5745123028755188, + "learning_rate": 1.9496258086666548e-05, + "loss": 0.0625, + "step": 100170 + }, + { + "epoch": 3.640526201032052, + "grad_norm": 0.4190816283226013, + "learning_rate": 1.9491196200986525e-05, + "loss": 0.1627, + "step": 100180 + }, + { + "epoch": 3.64088959953485, + "grad_norm": 0.6358737349510193, + "learning_rate": 1.9486134552661252e-05, + "loss": 0.0775, + "step": 100190 + }, + { + "epoch": 3.6412529980376482, + "grad_norm": 0.5560312271118164, + "learning_rate": 1.9481073141908832e-05, + "loss": 0.068, + "step": 100200 + }, + { + "epoch": 3.6412529980376482, + "eval_loss": 0.32526150345802307, + "eval_runtime": 179.1108, + "eval_samples_per_second": 41.393, + "eval_steps_per_second": 5.176, + "eval_wer": 0.1345326483562365, + "step": 100200 + }, + { + "epoch": 3.641616396540446, + "grad_norm": 1.3325546979904175, + "learning_rate": 1.947601196894733e-05, + "loss": 0.0575, + "step": 100210 + }, + { + "epoch": 3.6419797950432446, + "grad_norm": 1.013014554977417, + "learning_rate": 1.9470951033994817e-05, + "loss": 0.0757, + "step": 100220 + }, + { + "epoch": 3.6423431935460426, + "grad_norm": 0.4979040026664734, + "learning_rate": 1.9465890337269345e-05, + "loss": 0.0626, + "step": 100230 + }, + { + "epoch": 3.6427065920488406, + "grad_norm": 0.3621061444282532, + "learning_rate": 1.9460829878988977e-05, + "loss": 0.0812, + "step": 100240 + }, + { + "epoch": 3.643069990551639, + "grad_norm": 1.418254017829895, + "learning_rate": 1.9455769659371744e-05, + "loss": 0.0961, + "step": 100250 + }, + { + "epoch": 3.643433389054437, + "grad_norm": 1.291218638420105, + "learning_rate": 1.945070967863566e-05, + "loss": 0.0642, + "step": 100260 + }, + { + "epoch": 3.6437967875572355, + "grad_norm": 0.4502682387828827, + "learning_rate": 1.944564993699876e-05, + "loss": 0.0581, + "step": 100270 + }, + { + "epoch": 3.6441601860600334, + "grad_norm": 1.5867079496383667, + "learning_rate": 1.9440590434679034e-05, + "loss": 0.078, + "step": 100280 + }, + { + "epoch": 3.6445235845628314, + "grad_norm": 1.330061912536621, + "learning_rate": 1.9435531171894493e-05, + "loss": 0.0625, + "step": 100290 + }, + { + "epoch": 3.64488698306563, + "grad_norm": 2.1027393341064453, + "learning_rate": 1.9430472148863113e-05, + "loss": 0.0961, + "step": 100300 + }, + { + "epoch": 3.645250381568428, + "grad_norm": 0.29562556743621826, + "learning_rate": 1.9425413365802865e-05, + "loss": 0.0594, + "step": 100310 + }, + { + "epoch": 3.6456137800712263, + "grad_norm": 0.3071857988834381, + "learning_rate": 1.9420354822931725e-05, + "loss": 0.062, + "step": 100320 + }, + { + "epoch": 3.6459771785740243, + "grad_norm": 0.3967355489730835, + "learning_rate": 1.9415296520467647e-05, + "loss": 0.0722, + "step": 100330 + }, + { + "epoch": 3.6463405770768222, + "grad_norm": 0.3051000237464905, + "learning_rate": 1.941023845862857e-05, + "loss": 0.0878, + "step": 100340 + }, + { + "epoch": 3.6467039755796207, + "grad_norm": 0.5483397245407104, + "learning_rate": 1.940518063763243e-05, + "loss": 0.0832, + "step": 100350 + }, + { + "epoch": 3.6470673740824187, + "grad_norm": 0.9624475836753845, + "learning_rate": 1.9400123057697147e-05, + "loss": 0.0844, + "step": 100360 + }, + { + "epoch": 3.647430772585217, + "grad_norm": 1.777334213256836, + "learning_rate": 1.9395065719040635e-05, + "loss": 0.0706, + "step": 100370 + }, + { + "epoch": 3.647794171088015, + "grad_norm": 0.42129790782928467, + "learning_rate": 1.9390008621880806e-05, + "loss": 0.0468, + "step": 100380 + }, + { + "epoch": 3.648157569590813, + "grad_norm": 0.38181936740875244, + "learning_rate": 1.938495176643554e-05, + "loss": 0.0748, + "step": 100390 + }, + { + "epoch": 3.6485209680936115, + "grad_norm": 0.6296722888946533, + "learning_rate": 1.9379895152922722e-05, + "loss": 0.0855, + "step": 100400 + }, + { + "epoch": 3.6488843665964095, + "grad_norm": 0.39220812916755676, + "learning_rate": 1.9374838781560228e-05, + "loss": 0.1038, + "step": 100410 + }, + { + "epoch": 3.649247765099208, + "grad_norm": 0.5395467281341553, + "learning_rate": 1.936978265256592e-05, + "loss": 0.0801, + "step": 100420 + }, + { + "epoch": 3.649611163602006, + "grad_norm": 0.5562071800231934, + "learning_rate": 1.9364726766157644e-05, + "loss": 0.0648, + "step": 100430 + }, + { + "epoch": 3.649974562104804, + "grad_norm": 0.5015496611595154, + "learning_rate": 1.935967112255324e-05, + "loss": 0.0943, + "step": 100440 + }, + { + "epoch": 3.6503379606076023, + "grad_norm": 0.5984257459640503, + "learning_rate": 1.935461572197054e-05, + "loss": 0.0724, + "step": 100450 + }, + { + "epoch": 3.6507013591104007, + "grad_norm": 0.3558928370475769, + "learning_rate": 1.9349560564627354e-05, + "loss": 4.4573, + "step": 100460 + }, + { + "epoch": 3.6510647576131987, + "grad_norm": 0.37172460556030273, + "learning_rate": 1.9344505650741506e-05, + "loss": 0.0577, + "step": 100470 + }, + { + "epoch": 3.6514281561159967, + "grad_norm": 0.4635004997253418, + "learning_rate": 1.933945098053079e-05, + "loss": 0.0586, + "step": 100480 + }, + { + "epoch": 3.651791554618795, + "grad_norm": 0.6180506944656372, + "learning_rate": 1.933439655421299e-05, + "loss": 0.0936, + "step": 100490 + }, + { + "epoch": 3.652154953121593, + "grad_norm": 0.4719576835632324, + "learning_rate": 1.932934237200588e-05, + "loss": 0.0786, + "step": 100500 + }, + { + "epoch": 3.6525183516243915, + "grad_norm": 0.47542062401771545, + "learning_rate": 1.932428843412723e-05, + "loss": 0.0847, + "step": 100510 + }, + { + "epoch": 3.6528817501271895, + "grad_norm": 0.40580594539642334, + "learning_rate": 1.93192347407948e-05, + "loss": 0.1121, + "step": 100520 + }, + { + "epoch": 3.6532451486299875, + "grad_norm": 0.6533267498016357, + "learning_rate": 1.9314181292226337e-05, + "loss": 0.0856, + "step": 100530 + }, + { + "epoch": 3.653608547132786, + "grad_norm": 0.41226625442504883, + "learning_rate": 1.930912808863957e-05, + "loss": 0.0721, + "step": 100540 + }, + { + "epoch": 3.653971945635584, + "grad_norm": 0.9230170249938965, + "learning_rate": 1.9304075130252212e-05, + "loss": 0.0788, + "step": 100550 + }, + { + "epoch": 3.6543353441383823, + "grad_norm": 1.469427227973938, + "learning_rate": 1.9299022417282006e-05, + "loss": 0.0899, + "step": 100560 + }, + { + "epoch": 3.6546987426411803, + "grad_norm": 0.38428354263305664, + "learning_rate": 1.9293969949946638e-05, + "loss": 0.0586, + "step": 100570 + }, + { + "epoch": 3.6550621411439783, + "grad_norm": 1.4046002626419067, + "learning_rate": 1.9288917728463802e-05, + "loss": 0.0668, + "step": 100580 + }, + { + "epoch": 3.6554255396467767, + "grad_norm": 0.8734479546546936, + "learning_rate": 1.9283865753051177e-05, + "loss": 0.0682, + "step": 100590 + }, + { + "epoch": 3.6557889381495747, + "grad_norm": 1.0090571641921997, + "learning_rate": 1.927881402392644e-05, + "loss": 0.0827, + "step": 100600 + }, + { + "epoch": 3.656152336652373, + "grad_norm": 0.4134766459465027, + "learning_rate": 1.927376254130725e-05, + "loss": 0.0897, + "step": 100610 + }, + { + "epoch": 3.656515735155171, + "grad_norm": 0.5063420534133911, + "learning_rate": 1.9268711305411262e-05, + "loss": 0.0894, + "step": 100620 + }, + { + "epoch": 3.656879133657969, + "grad_norm": 0.6011788845062256, + "learning_rate": 1.9263660316456114e-05, + "loss": 0.0595, + "step": 100630 + }, + { + "epoch": 3.6572425321607676, + "grad_norm": 0.7085416316986084, + "learning_rate": 1.925860957465942e-05, + "loss": 0.0748, + "step": 100640 + }, + { + "epoch": 3.6576059306635655, + "grad_norm": 0.7953318357467651, + "learning_rate": 1.925355908023882e-05, + "loss": 0.0856, + "step": 100650 + }, + { + "epoch": 3.657969329166364, + "grad_norm": 1.6228654384613037, + "learning_rate": 1.924901384694669e-05, + "loss": 3.1994, + "step": 100660 + }, + { + "epoch": 3.658332727669162, + "grad_norm": 0.5507588982582092, + "learning_rate": 1.9243963823140153e-05, + "loss": 0.0693, + "step": 100670 + }, + { + "epoch": 3.65869612617196, + "grad_norm": 0.5793731212615967, + "learning_rate": 1.9238914047340737e-05, + "loss": 0.0565, + "step": 100680 + }, + { + "epoch": 3.6590595246747584, + "grad_norm": 3.112278699874878, + "learning_rate": 1.9233864519766014e-05, + "loss": 0.0867, + "step": 100690 + }, + { + "epoch": 3.6594229231775564, + "grad_norm": 1.09683096408844, + "learning_rate": 1.922881524063356e-05, + "loss": 0.0739, + "step": 100700 + }, + { + "epoch": 3.659786321680355, + "grad_norm": 0.35297101736068726, + "learning_rate": 1.9223766210160906e-05, + "loss": 0.0809, + "step": 100710 + }, + { + "epoch": 3.6601497201831528, + "grad_norm": 0.3563065230846405, + "learning_rate": 1.9218717428565626e-05, + "loss": 0.0656, + "step": 100720 + }, + { + "epoch": 3.6605131186859508, + "grad_norm": 0.7128033638000488, + "learning_rate": 1.9213668896065246e-05, + "loss": 0.0731, + "step": 100730 + }, + { + "epoch": 3.660876517188749, + "grad_norm": 0.4847666919231415, + "learning_rate": 1.920862061287728e-05, + "loss": 0.0789, + "step": 100740 + }, + { + "epoch": 3.6612399156915476, + "grad_norm": 0.5890012383460999, + "learning_rate": 1.9203572579219246e-05, + "loss": 0.0941, + "step": 100750 + }, + { + "epoch": 3.6616033141943456, + "grad_norm": 0.40128976106643677, + "learning_rate": 1.919852479530864e-05, + "loss": 0.091, + "step": 100760 + }, + { + "epoch": 3.6619667126971436, + "grad_norm": 0.9814205169677734, + "learning_rate": 1.919347726136297e-05, + "loss": 0.0666, + "step": 100770 + }, + { + "epoch": 3.662330111199942, + "grad_norm": 0.8055763244628906, + "learning_rate": 1.9188429977599705e-05, + "loss": 0.7617, + "step": 100780 + }, + { + "epoch": 3.66269350970274, + "grad_norm": 0.596468985080719, + "learning_rate": 1.918338294423631e-05, + "loss": 1.6764, + "step": 100790 + }, + { + "epoch": 3.6630569082055384, + "grad_norm": 0.9450867772102356, + "learning_rate": 1.9178336161490244e-05, + "loss": 0.082, + "step": 100800 + }, + { + "epoch": 3.6630569082055384, + "eval_loss": 0.2825222611427307, + "eval_runtime": 179.407, + "eval_samples_per_second": 41.325, + "eval_steps_per_second": 5.167, + "eval_wer": 0.13438742353005245, + "step": 100800 + }, + { + "epoch": 3.6634203067083364, + "grad_norm": 2.9087324142456055, + "learning_rate": 1.917328962957896e-05, + "loss": 0.0853, + "step": 100810 + }, + { + "epoch": 3.6637837052111344, + "grad_norm": 0.3305929899215698, + "learning_rate": 1.9168243348719898e-05, + "loss": 0.0967, + "step": 100820 + }, + { + "epoch": 3.664147103713933, + "grad_norm": 1.6349554061889648, + "learning_rate": 1.9163197319130486e-05, + "loss": 0.0557, + "step": 100830 + }, + { + "epoch": 3.664510502216731, + "grad_norm": 1.9938950538635254, + "learning_rate": 1.915815154102813e-05, + "loss": 0.193, + "step": 100840 + }, + { + "epoch": 3.6648739007195292, + "grad_norm": 0.6496366262435913, + "learning_rate": 1.915310601463023e-05, + "loss": 0.0769, + "step": 100850 + }, + { + "epoch": 3.665237299222327, + "grad_norm": 0.5455463528633118, + "learning_rate": 1.91480607401542e-05, + "loss": 0.0835, + "step": 100860 + }, + { + "epoch": 3.665600697725125, + "grad_norm": 0.6552872657775879, + "learning_rate": 1.9143015717817408e-05, + "loss": 0.0774, + "step": 100870 + }, + { + "epoch": 3.6659640962279236, + "grad_norm": 0.6207099556922913, + "learning_rate": 1.913797094783723e-05, + "loss": 0.0613, + "step": 100880 + }, + { + "epoch": 3.6663274947307216, + "grad_norm": 1.0876959562301636, + "learning_rate": 1.913292643043103e-05, + "loss": 0.0825, + "step": 100890 + }, + { + "epoch": 3.66669089323352, + "grad_norm": 0.5958231687545776, + "learning_rate": 1.912788216581614e-05, + "loss": 0.0801, + "step": 100900 + }, + { + "epoch": 3.667054291736318, + "grad_norm": 0.7565116882324219, + "learning_rate": 1.912283815420993e-05, + "loss": 0.0776, + "step": 100910 + }, + { + "epoch": 3.667417690239116, + "grad_norm": 0.23336388170719147, + "learning_rate": 1.9117794395829706e-05, + "loss": 0.0467, + "step": 100920 + }, + { + "epoch": 3.6677810887419144, + "grad_norm": 0.23995588719844818, + "learning_rate": 1.91127508908928e-05, + "loss": 0.0567, + "step": 100930 + }, + { + "epoch": 3.6681444872447124, + "grad_norm": 5.810134410858154, + "learning_rate": 1.9107707639616495e-05, + "loss": 0.0745, + "step": 100940 + }, + { + "epoch": 3.668507885747511, + "grad_norm": 0.9874204993247986, + "learning_rate": 1.9102664642218118e-05, + "loss": 0.0621, + "step": 100950 + }, + { + "epoch": 3.668871284250309, + "grad_norm": 0.8151770830154419, + "learning_rate": 1.9097621898914937e-05, + "loss": 0.0711, + "step": 100960 + }, + { + "epoch": 3.669234682753107, + "grad_norm": 13.424245834350586, + "learning_rate": 1.9092579409924227e-05, + "loss": 0.6562, + "step": 100970 + }, + { + "epoch": 3.6695980812559053, + "grad_norm": 0.8328826427459717, + "learning_rate": 1.9087537175463252e-05, + "loss": 0.0722, + "step": 100980 + }, + { + "epoch": 3.6699614797587032, + "grad_norm": 0.9819945096969604, + "learning_rate": 1.9082495195749252e-05, + "loss": 0.0812, + "step": 100990 + }, + { + "epoch": 3.6703248782615017, + "grad_norm": 1.7970198392868042, + "learning_rate": 1.907745347099949e-05, + "loss": 0.1063, + "step": 101000 + }, + { + "epoch": 3.6706882767642997, + "grad_norm": 2.1204395294189453, + "learning_rate": 1.9072412001431188e-05, + "loss": 0.0787, + "step": 101010 + }, + { + "epoch": 3.6710516752670976, + "grad_norm": 1.9938024282455444, + "learning_rate": 1.906737078726156e-05, + "loss": 0.0694, + "step": 101020 + }, + { + "epoch": 3.671415073769896, + "grad_norm": 0.5936083793640137, + "learning_rate": 1.9062329828707818e-05, + "loss": 0.0937, + "step": 101030 + }, + { + "epoch": 3.6717784722726945, + "grad_norm": 0.3774571716785431, + "learning_rate": 1.9057289125987143e-05, + "loss": 0.0657, + "step": 101040 + }, + { + "epoch": 3.6721418707754925, + "grad_norm": 0.6404164433479309, + "learning_rate": 1.905224867931675e-05, + "loss": 0.0831, + "step": 101050 + }, + { + "epoch": 3.6725052692782905, + "grad_norm": 2.3458011150360107, + "learning_rate": 1.90472084889138e-05, + "loss": 0.0729, + "step": 101060 + }, + { + "epoch": 3.672868667781089, + "grad_norm": 0.458993136882782, + "learning_rate": 1.9042168554995453e-05, + "loss": 0.065, + "step": 101070 + }, + { + "epoch": 3.673232066283887, + "grad_norm": 0.4776252210140228, + "learning_rate": 1.9037128877778865e-05, + "loss": 0.0655, + "step": 101080 + }, + { + "epoch": 3.6735954647866853, + "grad_norm": 3.986689567565918, + "learning_rate": 1.903208945748117e-05, + "loss": 0.1049, + "step": 101090 + }, + { + "epoch": 3.6739588632894833, + "grad_norm": 0.9909849762916565, + "learning_rate": 1.9027050294319513e-05, + "loss": 0.0937, + "step": 101100 + }, + { + "epoch": 3.6743222617922813, + "grad_norm": 0.7600964903831482, + "learning_rate": 1.902201138851101e-05, + "loss": 0.0981, + "step": 101110 + }, + { + "epoch": 3.6746856602950797, + "grad_norm": 0.587383508682251, + "learning_rate": 1.9016972740272763e-05, + "loss": 0.0635, + "step": 101120 + }, + { + "epoch": 3.6750490587978777, + "grad_norm": 0.8672456741333008, + "learning_rate": 1.901193434982187e-05, + "loss": 0.073, + "step": 101130 + }, + { + "epoch": 3.675412457300676, + "grad_norm": 1.177628517150879, + "learning_rate": 1.9006896217375426e-05, + "loss": 0.0805, + "step": 101140 + }, + { + "epoch": 3.675775855803474, + "grad_norm": 0.7139699459075928, + "learning_rate": 1.9001858343150496e-05, + "loss": 0.0717, + "step": 101150 + }, + { + "epoch": 3.676139254306272, + "grad_norm": 3.405705213546753, + "learning_rate": 1.8996820727364155e-05, + "loss": 0.1095, + "step": 101160 + }, + { + "epoch": 3.6765026528090705, + "grad_norm": 22.62385368347168, + "learning_rate": 1.899178337023345e-05, + "loss": 0.1336, + "step": 101170 + }, + { + "epoch": 3.6768660513118685, + "grad_norm": 0.628099799156189, + "learning_rate": 1.8986746271975406e-05, + "loss": 0.0731, + "step": 101180 + }, + { + "epoch": 3.677229449814667, + "grad_norm": 0.5328086018562317, + "learning_rate": 1.8981709432807086e-05, + "loss": 0.1353, + "step": 101190 + }, + { + "epoch": 3.677592848317465, + "grad_norm": 0.4889640212059021, + "learning_rate": 1.897667285294549e-05, + "loss": 0.0941, + "step": 101200 + }, + { + "epoch": 3.677956246820263, + "grad_norm": 0.6223835945129395, + "learning_rate": 1.8971636532607627e-05, + "loss": 0.0895, + "step": 101210 + }, + { + "epoch": 3.6783196453230613, + "grad_norm": 0.3453806936740875, + "learning_rate": 1.8966600472010505e-05, + "loss": 0.0618, + "step": 101220 + }, + { + "epoch": 3.6786830438258593, + "grad_norm": 0.47201159596443176, + "learning_rate": 1.8961564671371084e-05, + "loss": 0.0565, + "step": 101230 + }, + { + "epoch": 3.6790464423286577, + "grad_norm": 0.44613829255104065, + "learning_rate": 1.895652913090637e-05, + "loss": 0.0892, + "step": 101240 + }, + { + "epoch": 3.6794098408314557, + "grad_norm": 1.7807142734527588, + "learning_rate": 1.8951493850833314e-05, + "loss": 0.0829, + "step": 101250 + }, + { + "epoch": 3.6797732393342537, + "grad_norm": 0.4978749752044678, + "learning_rate": 1.8946458831368866e-05, + "loss": 0.0808, + "step": 101260 + }, + { + "epoch": 3.680136637837052, + "grad_norm": 0.5074789524078369, + "learning_rate": 1.894142407272997e-05, + "loss": 0.0831, + "step": 101270 + }, + { + "epoch": 3.68050003633985, + "grad_norm": 0.3504372537136078, + "learning_rate": 1.893638957513354e-05, + "loss": 0.2878, + "step": 101280 + }, + { + "epoch": 3.6808634348426486, + "grad_norm": 0.5575164556503296, + "learning_rate": 1.8931355338796523e-05, + "loss": 0.0873, + "step": 101290 + }, + { + "epoch": 3.6812268333454465, + "grad_norm": 1.6178854703903198, + "learning_rate": 1.892632136393581e-05, + "loss": 0.0886, + "step": 101300 + }, + { + "epoch": 3.6815902318482445, + "grad_norm": 1.254417896270752, + "learning_rate": 1.89212876507683e-05, + "loss": 0.0666, + "step": 101310 + }, + { + "epoch": 3.681953630351043, + "grad_norm": 0.5396857857704163, + "learning_rate": 1.8916254199510867e-05, + "loss": 0.0623, + "step": 101320 + }, + { + "epoch": 3.6823170288538414, + "grad_norm": 0.29756027460098267, + "learning_rate": 1.8911221010380403e-05, + "loss": 0.0715, + "step": 101330 + }, + { + "epoch": 3.6826804273566394, + "grad_norm": 0.4690750539302826, + "learning_rate": 1.8906188083593762e-05, + "loss": 1.6117, + "step": 101340 + }, + { + "epoch": 3.6830438258594373, + "grad_norm": 3.301415205001831, + "learning_rate": 1.8901155419367796e-05, + "loss": 0.1075, + "step": 101350 + }, + { + "epoch": 3.6834072243622358, + "grad_norm": 0.5322970151901245, + "learning_rate": 1.8896123017919344e-05, + "loss": 0.0663, + "step": 101360 + }, + { + "epoch": 3.6837706228650338, + "grad_norm": 0.7713887691497803, + "learning_rate": 1.889109087946522e-05, + "loss": 0.0798, + "step": 101370 + }, + { + "epoch": 3.684134021367832, + "grad_norm": 0.5864616632461548, + "learning_rate": 1.8886059004222266e-05, + "loss": 0.0631, + "step": 101380 + }, + { + "epoch": 3.68449741987063, + "grad_norm": 0.5340792536735535, + "learning_rate": 1.888102739240728e-05, + "loss": 0.0708, + "step": 101390 + }, + { + "epoch": 3.684860818373428, + "grad_norm": 0.6674026250839233, + "learning_rate": 1.8875996044237047e-05, + "loss": 0.0886, + "step": 101400 + }, + { + "epoch": 3.684860818373428, + "eval_loss": 0.32165661454200745, + "eval_runtime": 180.5952, + "eval_samples_per_second": 41.053, + "eval_steps_per_second": 5.133, + "eval_wer": 0.13550383938134225, + "step": 101400 + }, + { + "epoch": 3.6852242168762266, + "grad_norm": 2.9790070056915283, + "learning_rate": 1.887096495992836e-05, + "loss": 0.1033, + "step": 101410 + }, + { + "epoch": 3.6855876153790246, + "grad_norm": 177.49661254882812, + "learning_rate": 1.886593413969797e-05, + "loss": 2.0711, + "step": 101420 + }, + { + "epoch": 3.685951013881823, + "grad_norm": 1.7257159948349, + "learning_rate": 1.8860903583762665e-05, + "loss": 0.0661, + "step": 101430 + }, + { + "epoch": 3.686314412384621, + "grad_norm": 0.6517038941383362, + "learning_rate": 1.885587329233918e-05, + "loss": 0.1329, + "step": 101440 + }, + { + "epoch": 3.686677810887419, + "grad_norm": 1.2107021808624268, + "learning_rate": 1.885084326564426e-05, + "loss": 0.0769, + "step": 101450 + }, + { + "epoch": 3.6870412093902174, + "grad_norm": 0.4862198829650879, + "learning_rate": 1.8845813503894622e-05, + "loss": 0.0795, + "step": 101460 + }, + { + "epoch": 3.6874046078930154, + "grad_norm": 0.766268253326416, + "learning_rate": 1.884078400730697e-05, + "loss": 0.0593, + "step": 101470 + }, + { + "epoch": 3.687768006395814, + "grad_norm": 0.39750218391418457, + "learning_rate": 1.8835754776098035e-05, + "loss": 0.0729, + "step": 101480 + }, + { + "epoch": 3.688131404898612, + "grad_norm": 0.4829258322715759, + "learning_rate": 1.8830725810484493e-05, + "loss": 0.0722, + "step": 101490 + }, + { + "epoch": 3.68849480340141, + "grad_norm": 0.6579453349113464, + "learning_rate": 1.8825697110683025e-05, + "loss": 0.0968, + "step": 101500 + }, + { + "epoch": 3.688858201904208, + "grad_norm": 0.4035875201225281, + "learning_rate": 1.882066867691029e-05, + "loss": 0.0727, + "step": 101510 + }, + { + "epoch": 3.689221600407006, + "grad_norm": 0.5288979411125183, + "learning_rate": 1.8815640509382964e-05, + "loss": 0.0677, + "step": 101520 + }, + { + "epoch": 3.6895849989098046, + "grad_norm": 1.3554776906967163, + "learning_rate": 1.881061260831769e-05, + "loss": 0.069, + "step": 101530 + }, + { + "epoch": 3.6899483974126026, + "grad_norm": 1.4026339054107666, + "learning_rate": 1.880558497393109e-05, + "loss": 0.0837, + "step": 101540 + }, + { + "epoch": 3.6903117959154006, + "grad_norm": 0.4800674319267273, + "learning_rate": 1.8800557606439798e-05, + "loss": 0.067, + "step": 101550 + }, + { + "epoch": 3.690675194418199, + "grad_norm": 0.9763014912605286, + "learning_rate": 1.879553050606041e-05, + "loss": 0.0759, + "step": 101560 + }, + { + "epoch": 3.691038592920997, + "grad_norm": 0.470887690782547, + "learning_rate": 1.8790503673009548e-05, + "loss": 1.3916, + "step": 101570 + }, + { + "epoch": 3.6914019914237954, + "grad_norm": 1.4094375371932983, + "learning_rate": 1.8785477107503784e-05, + "loss": 0.1743, + "step": 101580 + }, + { + "epoch": 3.6917653899265934, + "grad_norm": 0.4633733928203583, + "learning_rate": 1.8780450809759707e-05, + "loss": 0.0662, + "step": 101590 + }, + { + "epoch": 3.6921287884293914, + "grad_norm": 0.913625180721283, + "learning_rate": 1.8775424779993873e-05, + "loss": 0.0774, + "step": 101600 + }, + { + "epoch": 3.69249218693219, + "grad_norm": 0.5545070171356201, + "learning_rate": 1.8770399018422824e-05, + "loss": 0.0655, + "step": 101610 + }, + { + "epoch": 3.6928555854349883, + "grad_norm": 0.33811673521995544, + "learning_rate": 1.876537352526313e-05, + "loss": 0.0594, + "step": 101620 + }, + { + "epoch": 3.6932189839377862, + "grad_norm": 0.8966468572616577, + "learning_rate": 1.8760348300731308e-05, + "loss": 0.0891, + "step": 101630 + }, + { + "epoch": 3.6935823824405842, + "grad_norm": 0.5035248398780823, + "learning_rate": 1.8755323345043878e-05, + "loss": 0.0728, + "step": 101640 + }, + { + "epoch": 3.6939457809433827, + "grad_norm": 0.8029829263687134, + "learning_rate": 1.8750298658417345e-05, + "loss": 0.0732, + "step": 101650 + }, + { + "epoch": 3.6943091794461806, + "grad_norm": 0.46018436551094055, + "learning_rate": 1.8745274241068196e-05, + "loss": 0.068, + "step": 101660 + }, + { + "epoch": 3.694672577948979, + "grad_norm": 0.6424596905708313, + "learning_rate": 1.8740250093212934e-05, + "loss": 0.0622, + "step": 101670 + }, + { + "epoch": 3.695035976451777, + "grad_norm": 0.5092839002609253, + "learning_rate": 1.8735226215068026e-05, + "loss": 0.0755, + "step": 101680 + }, + { + "epoch": 3.695399374954575, + "grad_norm": 0.8991031050682068, + "learning_rate": 1.8730202606849933e-05, + "loss": 0.089, + "step": 101690 + }, + { + "epoch": 3.6957627734573735, + "grad_norm": 1.3124310970306396, + "learning_rate": 1.8725179268775088e-05, + "loss": 0.0809, + "step": 101700 + }, + { + "epoch": 3.6961261719601715, + "grad_norm": 0.49164462089538574, + "learning_rate": 1.872015620105995e-05, + "loss": 0.0835, + "step": 101710 + }, + { + "epoch": 3.69648957046297, + "grad_norm": 0.4127480089664459, + "learning_rate": 1.8715133403920942e-05, + "loss": 0.064, + "step": 101720 + }, + { + "epoch": 3.696852968965768, + "grad_norm": 0.4940035939216614, + "learning_rate": 1.871011087757447e-05, + "loss": 0.067, + "step": 101730 + }, + { + "epoch": 3.697216367468566, + "grad_norm": 0.678022563457489, + "learning_rate": 1.8705088622236944e-05, + "loss": 0.084, + "step": 101740 + }, + { + "epoch": 3.6975797659713643, + "grad_norm": 0.5053865909576416, + "learning_rate": 1.870006663812474e-05, + "loss": 0.0637, + "step": 101750 + }, + { + "epoch": 3.6979431644741623, + "grad_norm": 0.49947378039360046, + "learning_rate": 1.869504492545426e-05, + "loss": 0.0501, + "step": 101760 + }, + { + "epoch": 3.6983065629769607, + "grad_norm": 0.5758054256439209, + "learning_rate": 1.869002348444186e-05, + "loss": 0.0615, + "step": 101770 + }, + { + "epoch": 3.6986699614797587, + "grad_norm": 0.6957302689552307, + "learning_rate": 1.8685002315303902e-05, + "loss": 0.0997, + "step": 101780 + }, + { + "epoch": 3.6990333599825567, + "grad_norm": 0.32749390602111816, + "learning_rate": 1.867998141825672e-05, + "loss": 0.0809, + "step": 101790 + }, + { + "epoch": 3.699396758485355, + "grad_norm": 0.7871354818344116, + "learning_rate": 1.8674960793516644e-05, + "loss": 0.077, + "step": 101800 + }, + { + "epoch": 3.699760156988153, + "grad_norm": 0.7597861289978027, + "learning_rate": 1.8669940441300013e-05, + "loss": 0.0736, + "step": 101810 + }, + { + "epoch": 3.7001235554909515, + "grad_norm": 0.5100244879722595, + "learning_rate": 1.8664920361823123e-05, + "loss": 0.0686, + "step": 101820 + }, + { + "epoch": 3.7004869539937495, + "grad_norm": 0.7316411733627319, + "learning_rate": 1.865990055530228e-05, + "loss": 0.0686, + "step": 101830 + }, + { + "epoch": 3.7008503524965475, + "grad_norm": 0.4531911313533783, + "learning_rate": 1.865488102195376e-05, + "loss": 0.0706, + "step": 101840 + }, + { + "epoch": 3.701213750999346, + "grad_norm": 0.6008373498916626, + "learning_rate": 1.864986176199383e-05, + "loss": 0.0756, + "step": 101850 + }, + { + "epoch": 3.701577149502144, + "grad_norm": 1.114786982536316, + "learning_rate": 1.8644842775638776e-05, + "loss": 0.1311, + "step": 101860 + }, + { + "epoch": 3.7019405480049423, + "grad_norm": 0.8214989304542542, + "learning_rate": 1.8639824063104832e-05, + "loss": 0.0723, + "step": 101870 + }, + { + "epoch": 3.7023039465077403, + "grad_norm": 2.9424381256103516, + "learning_rate": 1.863480562460824e-05, + "loss": 0.0718, + "step": 101880 + }, + { + "epoch": 3.7026673450105383, + "grad_norm": 1.3206162452697754, + "learning_rate": 1.862978746036523e-05, + "loss": 0.07, + "step": 101890 + }, + { + "epoch": 3.7030307435133367, + "grad_norm": 1.1394771337509155, + "learning_rate": 1.8624769570592e-05, + "loss": 0.1062, + "step": 101900 + }, + { + "epoch": 3.703394142016135, + "grad_norm": 0.38469168543815613, + "learning_rate": 1.8619751955504776e-05, + "loss": 0.0607, + "step": 101910 + }, + { + "epoch": 3.703757540518933, + "grad_norm": 0.4007944166660309, + "learning_rate": 1.861473461531974e-05, + "loss": 0.0679, + "step": 101920 + }, + { + "epoch": 3.704120939021731, + "grad_norm": 0.5937279462814331, + "learning_rate": 1.860971755025307e-05, + "loss": 0.0654, + "step": 101930 + }, + { + "epoch": 3.7044843375245295, + "grad_norm": 1.9605783224105835, + "learning_rate": 1.860470076052092e-05, + "loss": 0.078, + "step": 101940 + }, + { + "epoch": 3.7048477360273275, + "grad_norm": 0.9154660701751709, + "learning_rate": 1.859968424633948e-05, + "loss": 0.0746, + "step": 101950 + }, + { + "epoch": 3.705211134530126, + "grad_norm": 1.3195884227752686, + "learning_rate": 1.8594668007924863e-05, + "loss": 0.0618, + "step": 101960 + }, + { + "epoch": 3.705574533032924, + "grad_norm": 0.41499805450439453, + "learning_rate": 1.8589652045493216e-05, + "loss": 1.6398, + "step": 101970 + }, + { + "epoch": 3.705937931535722, + "grad_norm": 0.4965570569038391, + "learning_rate": 1.8584636359260656e-05, + "loss": 0.0857, + "step": 101980 + }, + { + "epoch": 3.7063013300385204, + "grad_norm": 0.46944138407707214, + "learning_rate": 1.8579620949443275e-05, + "loss": 0.089, + "step": 101990 + }, + { + "epoch": 3.7066647285413183, + "grad_norm": 0.49834463000297546, + "learning_rate": 1.8574605816257195e-05, + "loss": 0.075, + "step": 102000 + }, + { + "epoch": 3.7066647285413183, + "eval_loss": 0.3085034489631653, + "eval_runtime": 179.7736, + "eval_samples_per_second": 41.241, + "eval_steps_per_second": 5.156, + "eval_wer": 0.1340878973260479, + "step": 102000 + }, + { + "epoch": 3.7070281270441168, + "grad_norm": 2.115856647491455, + "learning_rate": 1.856959095991849e-05, + "loss": 0.0773, + "step": 102010 + }, + { + "epoch": 3.7073915255469148, + "grad_norm": 0.5380850434303284, + "learning_rate": 1.856457638064323e-05, + "loss": 0.0582, + "step": 102020 + }, + { + "epoch": 3.7077549240497127, + "grad_norm": 0.6383968591690063, + "learning_rate": 1.8559562078647477e-05, + "loss": 0.1424, + "step": 102030 + }, + { + "epoch": 3.708118322552511, + "grad_norm": 0.5777453184127808, + "learning_rate": 1.855454805414727e-05, + "loss": 0.0883, + "step": 102040 + }, + { + "epoch": 3.708481721055309, + "grad_norm": 0.47622185945510864, + "learning_rate": 1.8549534307358663e-05, + "loss": 0.0826, + "step": 102050 + }, + { + "epoch": 3.7088451195581076, + "grad_norm": 0.34270182251930237, + "learning_rate": 1.854452083849767e-05, + "loss": 0.0669, + "step": 102060 + }, + { + "epoch": 3.7092085180609056, + "grad_norm": 0.6671618223190308, + "learning_rate": 1.853950764778031e-05, + "loss": 0.0541, + "step": 102070 + }, + { + "epoch": 3.7095719165637036, + "grad_norm": 8.398327827453613, + "learning_rate": 1.8534494735422574e-05, + "loss": 0.0621, + "step": 102080 + }, + { + "epoch": 3.709935315066502, + "grad_norm": 0.7380484342575073, + "learning_rate": 1.852948210164045e-05, + "loss": 0.0945, + "step": 102090 + }, + { + "epoch": 3.7102987135693, + "grad_norm": 0.9097635746002197, + "learning_rate": 1.8524469746649925e-05, + "loss": 0.0949, + "step": 102100 + }, + { + "epoch": 3.7106621120720984, + "grad_norm": 0.8075299263000488, + "learning_rate": 1.8519457670666962e-05, + "loss": 0.0888, + "step": 102110 + }, + { + "epoch": 3.7110255105748964, + "grad_norm": 0.42995816469192505, + "learning_rate": 1.851444587390751e-05, + "loss": 0.0541, + "step": 102120 + }, + { + "epoch": 3.7113889090776944, + "grad_norm": 0.4821558892726898, + "learning_rate": 1.85094343565875e-05, + "loss": 0.0657, + "step": 102130 + }, + { + "epoch": 3.711752307580493, + "grad_norm": 0.7254829406738281, + "learning_rate": 1.850442311892288e-05, + "loss": 0.1247, + "step": 102140 + }, + { + "epoch": 3.712115706083291, + "grad_norm": 0.9396657943725586, + "learning_rate": 1.8499412161129554e-05, + "loss": 0.0759, + "step": 102150 + }, + { + "epoch": 3.712479104586089, + "grad_norm": 0.4930213391780853, + "learning_rate": 1.849440148342343e-05, + "loss": 0.0564, + "step": 102160 + }, + { + "epoch": 3.712842503088887, + "grad_norm": 1.0215650796890259, + "learning_rate": 1.8489391086020402e-05, + "loss": 0.0667, + "step": 102170 + }, + { + "epoch": 3.713205901591685, + "grad_norm": 0.5984035730361938, + "learning_rate": 1.8484380969136332e-05, + "loss": 0.0703, + "step": 102180 + }, + { + "epoch": 3.7135693000944836, + "grad_norm": 0.41362717747688293, + "learning_rate": 1.8479371132987116e-05, + "loss": 0.0955, + "step": 102190 + }, + { + "epoch": 3.713932698597282, + "grad_norm": 0.6265028119087219, + "learning_rate": 1.84743615777886e-05, + "loss": 0.0789, + "step": 102200 + }, + { + "epoch": 3.71429609710008, + "grad_norm": 0.8152180910110474, + "learning_rate": 1.8469352303756625e-05, + "loss": 0.0844, + "step": 102210 + }, + { + "epoch": 3.714659495602878, + "grad_norm": 5.7836480140686035, + "learning_rate": 1.846434331110702e-05, + "loss": 0.0633, + "step": 102220 + }, + { + "epoch": 3.7150228941056764, + "grad_norm": 0.7578589916229248, + "learning_rate": 1.84593346000556e-05, + "loss": 0.0684, + "step": 102230 + }, + { + "epoch": 3.7153862926084744, + "grad_norm": 0.5012345314025879, + "learning_rate": 1.845432617081819e-05, + "loss": 2.2437, + "step": 102240 + }, + { + "epoch": 3.715749691111273, + "grad_norm": 4.744391918182373, + "learning_rate": 1.8449318023610575e-05, + "loss": 0.0819, + "step": 102250 + }, + { + "epoch": 3.716113089614071, + "grad_norm": 0.4452058672904968, + "learning_rate": 1.8444310158648535e-05, + "loss": 0.0791, + "step": 102260 + }, + { + "epoch": 3.716476488116869, + "grad_norm": 6.5998005867004395, + "learning_rate": 1.843930257614785e-05, + "loss": 0.1203, + "step": 102270 + }, + { + "epoch": 3.7168398866196672, + "grad_norm": 0.35066086053848267, + "learning_rate": 1.8434295276324265e-05, + "loss": 0.0677, + "step": 102280 + }, + { + "epoch": 3.7172032851224652, + "grad_norm": 0.43617117404937744, + "learning_rate": 1.8429288259393544e-05, + "loss": 0.0723, + "step": 102290 + }, + { + "epoch": 3.7175666836252637, + "grad_norm": 0.6377655267715454, + "learning_rate": 1.842428152557141e-05, + "loss": 0.0611, + "step": 102300 + }, + { + "epoch": 3.7179300821280616, + "grad_norm": 0.7913844585418701, + "learning_rate": 1.8419275075073594e-05, + "loss": 0.0635, + "step": 102310 + }, + { + "epoch": 3.7182934806308596, + "grad_norm": 0.437308132648468, + "learning_rate": 1.8414268908115786e-05, + "loss": 0.0659, + "step": 102320 + }, + { + "epoch": 3.718656879133658, + "grad_norm": 1.292069673538208, + "learning_rate": 1.840926302491371e-05, + "loss": 0.0669, + "step": 102330 + }, + { + "epoch": 3.719020277636456, + "grad_norm": 0.46069836616516113, + "learning_rate": 1.840425742568304e-05, + "loss": 0.0696, + "step": 102340 + }, + { + "epoch": 3.7193836761392545, + "grad_norm": 0.9697392582893372, + "learning_rate": 1.8399252110639454e-05, + "loss": 0.0721, + "step": 102350 + }, + { + "epoch": 3.7197470746420525, + "grad_norm": 1.3439652919769287, + "learning_rate": 1.8394247079998605e-05, + "loss": 0.0859, + "step": 102360 + }, + { + "epoch": 3.7201104731448504, + "grad_norm": 0.5652872920036316, + "learning_rate": 1.8389242333976138e-05, + "loss": 0.05, + "step": 102370 + }, + { + "epoch": 3.720473871647649, + "grad_norm": 0.6099680662155151, + "learning_rate": 1.8384237872787706e-05, + "loss": 0.069, + "step": 102380 + }, + { + "epoch": 3.720837270150447, + "grad_norm": 0.27559173107147217, + "learning_rate": 1.8379233696648928e-05, + "loss": 0.0795, + "step": 102390 + }, + { + "epoch": 3.7212006686532453, + "grad_norm": 0.7850374579429626, + "learning_rate": 1.8374229805775413e-05, + "loss": 0.1171, + "step": 102400 + }, + { + "epoch": 3.7215640671560433, + "grad_norm": 0.4163167476654053, + "learning_rate": 1.8369226200382755e-05, + "loss": 0.087, + "step": 102410 + }, + { + "epoch": 3.7219274656588412, + "grad_norm": 1.2209895849227905, + "learning_rate": 1.8364222880686545e-05, + "loss": 0.0764, + "step": 102420 + }, + { + "epoch": 3.7222908641616397, + "grad_norm": 0.7018761038780212, + "learning_rate": 1.8359219846902366e-05, + "loss": 0.065, + "step": 102430 + }, + { + "epoch": 3.7226542626644377, + "grad_norm": 0.4590131342411041, + "learning_rate": 1.8354217099245777e-05, + "loss": 0.0735, + "step": 102440 + }, + { + "epoch": 3.723017661167236, + "grad_norm": 1.2598764896392822, + "learning_rate": 1.8349214637932326e-05, + "loss": 0.0809, + "step": 102450 + }, + { + "epoch": 3.723381059670034, + "grad_norm": 0.47641921043395996, + "learning_rate": 1.834421246317755e-05, + "loss": 0.0901, + "step": 102460 + }, + { + "epoch": 3.723744458172832, + "grad_norm": 1.0995975732803345, + "learning_rate": 1.833921057519698e-05, + "loss": 0.0662, + "step": 102470 + }, + { + "epoch": 3.7241078566756305, + "grad_norm": 0.47749069333076477, + "learning_rate": 1.833420897420613e-05, + "loss": 0.0719, + "step": 102480 + }, + { + "epoch": 3.724471255178429, + "grad_norm": 0.9003120064735413, + "learning_rate": 1.8329207660420496e-05, + "loss": 0.0911, + "step": 102490 + }, + { + "epoch": 3.724834653681227, + "grad_norm": 0.9296249151229858, + "learning_rate": 1.832420663405557e-05, + "loss": 0.0935, + "step": 102500 + }, + { + "epoch": 3.725198052184025, + "grad_norm": 0.7710517644882202, + "learning_rate": 1.8319205895326818e-05, + "loss": 0.0562, + "step": 102510 + }, + { + "epoch": 3.7255614506868233, + "grad_norm": 0.6956592798233032, + "learning_rate": 1.8314205444449726e-05, + "loss": 0.0859, + "step": 102520 + }, + { + "epoch": 3.7259248491896213, + "grad_norm": 0.7365669012069702, + "learning_rate": 1.830920528163973e-05, + "loss": 0.0645, + "step": 102530 + }, + { + "epoch": 3.7262882476924197, + "grad_norm": 0.9889929294586182, + "learning_rate": 1.8304205407112275e-05, + "loss": 0.0862, + "step": 102540 + }, + { + "epoch": 3.7266516461952177, + "grad_norm": 1.0486135482788086, + "learning_rate": 1.8299205821082778e-05, + "loss": 0.0692, + "step": 102550 + }, + { + "epoch": 3.7270150446980157, + "grad_norm": 0.7183421850204468, + "learning_rate": 1.829420652376666e-05, + "loss": 0.0811, + "step": 102560 + }, + { + "epoch": 3.727378443200814, + "grad_norm": 1.0006262063980103, + "learning_rate": 1.828920751537933e-05, + "loss": 0.0704, + "step": 102570 + }, + { + "epoch": 3.727741841703612, + "grad_norm": 0.4710160195827484, + "learning_rate": 1.8284208796136173e-05, + "loss": 0.0728, + "step": 102580 + }, + { + "epoch": 3.7281052402064105, + "grad_norm": 0.4841473698616028, + "learning_rate": 1.8279210366252564e-05, + "loss": 0.1235, + "step": 102590 + }, + { + "epoch": 3.7284686387092085, + "grad_norm": 0.28182855248451233, + "learning_rate": 1.8274212225943858e-05, + "loss": 0.0631, + "step": 102600 + }, + { + "epoch": 3.7284686387092085, + "eval_loss": 0.32021304965019226, + "eval_runtime": 179.5125, + "eval_samples_per_second": 41.301, + "eval_steps_per_second": 5.164, + "eval_wer": 0.1356490642075263, + "step": 102600 + }, + { + "epoch": 3.7288320372120065, + "grad_norm": 0.28042423725128174, + "learning_rate": 1.8269214375425422e-05, + "loss": 0.0669, + "step": 102610 + }, + { + "epoch": 3.729195435714805, + "grad_norm": 0.36679309606552124, + "learning_rate": 1.8264216814912595e-05, + "loss": 0.0702, + "step": 102620 + }, + { + "epoch": 3.729558834217603, + "grad_norm": 0.6412705779075623, + "learning_rate": 1.82592195446207e-05, + "loss": 0.0774, + "step": 102630 + }, + { + "epoch": 3.7299222327204014, + "grad_norm": 0.603082537651062, + "learning_rate": 1.8254222564765044e-05, + "loss": 0.1122, + "step": 102640 + }, + { + "epoch": 3.7302856312231993, + "grad_norm": 0.7889009714126587, + "learning_rate": 1.824922587556094e-05, + "loss": 0.0914, + "step": 102650 + }, + { + "epoch": 3.7306490297259973, + "grad_norm": 0.38122794032096863, + "learning_rate": 1.8244229477223668e-05, + "loss": 0.082, + "step": 102660 + }, + { + "epoch": 3.7310124282287958, + "grad_norm": 1.1918739080429077, + "learning_rate": 1.8239732967589197e-05, + "loss": 2.188, + "step": 102670 + }, + { + "epoch": 3.7313758267315937, + "grad_norm": 0.9827843308448792, + "learning_rate": 1.8234737122492e-05, + "loss": 0.0793, + "step": 102680 + }, + { + "epoch": 3.731739225234392, + "grad_norm": 9.220224380493164, + "learning_rate": 1.822974156888591e-05, + "loss": 0.0989, + "step": 102690 + }, + { + "epoch": 3.73210262373719, + "grad_norm": 0.5240766406059265, + "learning_rate": 1.822474630698617e-05, + "loss": 0.0771, + "step": 102700 + }, + { + "epoch": 3.732466022239988, + "grad_norm": 0.627837061882019, + "learning_rate": 1.8219751337008003e-05, + "loss": 0.0755, + "step": 102710 + }, + { + "epoch": 3.7328294207427866, + "grad_norm": 0.41630762815475464, + "learning_rate": 1.8214756659166617e-05, + "loss": 0.074, + "step": 102720 + }, + { + "epoch": 3.7331928192455845, + "grad_norm": 0.7459368109703064, + "learning_rate": 1.8209762273677232e-05, + "loss": 0.06, + "step": 102730 + }, + { + "epoch": 3.733556217748383, + "grad_norm": 0.3518989384174347, + "learning_rate": 1.8204768180755037e-05, + "loss": 0.0606, + "step": 102740 + }, + { + "epoch": 3.733919616251181, + "grad_norm": 1.498246669769287, + "learning_rate": 1.8199774380615197e-05, + "loss": 0.0735, + "step": 102750 + }, + { + "epoch": 3.734283014753979, + "grad_norm": 0.4231000244617462, + "learning_rate": 1.8194780873472883e-05, + "loss": 0.0825, + "step": 102760 + }, + { + "epoch": 3.7346464132567774, + "grad_norm": 0.4876510798931122, + "learning_rate": 1.8189787659543246e-05, + "loss": 0.061, + "step": 102770 + }, + { + "epoch": 3.735009811759576, + "grad_norm": 9.027270317077637, + "learning_rate": 1.8184794739041433e-05, + "loss": 0.0627, + "step": 102780 + }, + { + "epoch": 3.735373210262374, + "grad_norm": 0.3482457995414734, + "learning_rate": 1.817980211218257e-05, + "loss": 0.0852, + "step": 102790 + }, + { + "epoch": 3.7357366087651718, + "grad_norm": 0.5797934532165527, + "learning_rate": 1.817480977918176e-05, + "loss": 0.075, + "step": 102800 + }, + { + "epoch": 3.73610000726797, + "grad_norm": 0.5175044536590576, + "learning_rate": 1.8169817740254114e-05, + "loss": 0.088, + "step": 102810 + }, + { + "epoch": 3.736463405770768, + "grad_norm": 0.40952855348587036, + "learning_rate": 1.8164825995614714e-05, + "loss": 0.0663, + "step": 102820 + }, + { + "epoch": 3.7368268042735666, + "grad_norm": 0.47273626923561096, + "learning_rate": 1.8159834545478655e-05, + "loss": 0.0657, + "step": 102830 + }, + { + "epoch": 3.7371902027763646, + "grad_norm": 0.6765505075454712, + "learning_rate": 1.815484339006098e-05, + "loss": 0.088, + "step": 102840 + }, + { + "epoch": 3.7375536012791626, + "grad_norm": 0.7039837837219238, + "learning_rate": 1.814985252957675e-05, + "loss": 0.0837, + "step": 102850 + }, + { + "epoch": 3.737916999781961, + "grad_norm": 0.7344921231269836, + "learning_rate": 1.8144861964240995e-05, + "loss": 0.0831, + "step": 102860 + }, + { + "epoch": 3.738280398284759, + "grad_norm": 0.5294970870018005, + "learning_rate": 1.8139871694268756e-05, + "loss": 0.0656, + "step": 102870 + }, + { + "epoch": 3.7386437967875574, + "grad_norm": 0.4704716205596924, + "learning_rate": 1.813488171987504e-05, + "loss": 0.0612, + "step": 102880 + }, + { + "epoch": 3.7390071952903554, + "grad_norm": 0.43470290303230286, + "learning_rate": 1.812989204127484e-05, + "loss": 0.0932, + "step": 102890 + }, + { + "epoch": 3.7393705937931534, + "grad_norm": 0.9852955341339111, + "learning_rate": 1.8124902658683146e-05, + "loss": 0.0947, + "step": 102900 + }, + { + "epoch": 3.739733992295952, + "grad_norm": 0.6351022720336914, + "learning_rate": 1.8119913572314932e-05, + "loss": 0.0969, + "step": 102910 + }, + { + "epoch": 3.74009739079875, + "grad_norm": 0.5832617282867432, + "learning_rate": 1.8114924782385167e-05, + "loss": 0.0584, + "step": 102920 + }, + { + "epoch": 3.7404607893015482, + "grad_norm": 0.47710007429122925, + "learning_rate": 1.81099362891088e-05, + "loss": 0.0694, + "step": 102930 + }, + { + "epoch": 3.7408241878043462, + "grad_norm": 0.515385091304779, + "learning_rate": 1.8104948092700758e-05, + "loss": 0.1376, + "step": 102940 + }, + { + "epoch": 3.741187586307144, + "grad_norm": 2.539031505584717, + "learning_rate": 1.809996019337597e-05, + "loss": 0.0742, + "step": 102950 + }, + { + "epoch": 3.7415509848099426, + "grad_norm": 0.30518245697021484, + "learning_rate": 1.8094972591349346e-05, + "loss": 0.0718, + "step": 102960 + }, + { + "epoch": 3.7419143833127406, + "grad_norm": 1.3002036809921265, + "learning_rate": 1.808998528683579e-05, + "loss": 1.3903, + "step": 102970 + }, + { + "epoch": 3.742277781815539, + "grad_norm": 0.6815840005874634, + "learning_rate": 1.8084998280050182e-05, + "loss": 0.0831, + "step": 102980 + }, + { + "epoch": 3.742641180318337, + "grad_norm": 0.44951331615448, + "learning_rate": 1.8080011571207388e-05, + "loss": 0.073, + "step": 102990 + }, + { + "epoch": 3.743004578821135, + "grad_norm": 1.890199899673462, + "learning_rate": 1.807502516052228e-05, + "loss": 0.0831, + "step": 103000 + }, + { + "epoch": 3.7433679773239334, + "grad_norm": 0.29908934235572815, + "learning_rate": 1.807003904820969e-05, + "loss": 0.0641, + "step": 103010 + }, + { + "epoch": 3.7437313758267314, + "grad_norm": 0.28492602705955505, + "learning_rate": 1.8065053234484472e-05, + "loss": 0.097, + "step": 103020 + }, + { + "epoch": 3.74409477432953, + "grad_norm": 0.9672560095787048, + "learning_rate": 1.8060067719561434e-05, + "loss": 0.0678, + "step": 103030 + }, + { + "epoch": 3.744458172832328, + "grad_norm": 0.7627213597297668, + "learning_rate": 1.8055082503655376e-05, + "loss": 0.08, + "step": 103040 + }, + { + "epoch": 3.744821571335126, + "grad_norm": 2.1501963138580322, + "learning_rate": 1.8050097586981107e-05, + "loss": 0.0728, + "step": 103050 + }, + { + "epoch": 3.7451849698379243, + "grad_norm": 0.8454054594039917, + "learning_rate": 1.804511296975341e-05, + "loss": 0.0683, + "step": 103060 + }, + { + "epoch": 3.7455483683407227, + "grad_norm": 0.34756171703338623, + "learning_rate": 1.8040128652187048e-05, + "loss": 0.0547, + "step": 103070 + }, + { + "epoch": 3.7459117668435207, + "grad_norm": 0.21398131549358368, + "learning_rate": 1.8035144634496775e-05, + "loss": 0.0477, + "step": 103080 + }, + { + "epoch": 3.7462751653463187, + "grad_norm": 0.5252348780632019, + "learning_rate": 1.8030160916897342e-05, + "loss": 0.0801, + "step": 103090 + }, + { + "epoch": 3.746638563849117, + "grad_norm": 1.0118563175201416, + "learning_rate": 1.8025177499603473e-05, + "loss": 0.0721, + "step": 103100 + }, + { + "epoch": 3.747001962351915, + "grad_norm": 0.5007623434066772, + "learning_rate": 1.8020194382829894e-05, + "loss": 0.0779, + "step": 103110 + }, + { + "epoch": 3.7473653608547135, + "grad_norm": 0.41904526948928833, + "learning_rate": 1.8015211566791304e-05, + "loss": 0.0623, + "step": 103120 + }, + { + "epoch": 3.7477287593575115, + "grad_norm": 0.6131216883659363, + "learning_rate": 1.801022905170239e-05, + "loss": 0.0882, + "step": 103130 + }, + { + "epoch": 3.7480921578603095, + "grad_norm": 0.7774443626403809, + "learning_rate": 1.8005246837777846e-05, + "loss": 0.0896, + "step": 103140 + }, + { + "epoch": 3.748455556363108, + "grad_norm": 0.6011605858802795, + "learning_rate": 1.800026492523232e-05, + "loss": 0.08, + "step": 103150 + }, + { + "epoch": 3.748818954865906, + "grad_norm": 0.2599460184574127, + "learning_rate": 1.7995283314280476e-05, + "loss": 0.0764, + "step": 103160 + }, + { + "epoch": 3.7491823533687043, + "grad_norm": 0.8700913786888123, + "learning_rate": 1.7990302005136948e-05, + "loss": 0.0803, + "step": 103170 + }, + { + "epoch": 3.7495457518715023, + "grad_norm": 0.8427462577819824, + "learning_rate": 1.798532099801637e-05, + "loss": 0.0783, + "step": 103180 + }, + { + "epoch": 3.7499091503743003, + "grad_norm": 0.859573483467102, + "learning_rate": 1.7980340293133353e-05, + "loss": 0.0608, + "step": 103190 + }, + { + "epoch": 3.7502725488770987, + "grad_norm": 0.4894687533378601, + "learning_rate": 1.7975359890702492e-05, + "loss": 0.0746, + "step": 103200 + }, + { + "epoch": 3.7502725488770987, + "eval_loss": 0.30459001660346985, + "eval_runtime": 180.7543, + "eval_samples_per_second": 41.017, + "eval_steps_per_second": 5.129, + "eval_wer": 0.1349229400766061, + "step": 103200 + }, + { + "epoch": 3.7506359473798967, + "grad_norm": 1.1439019441604614, + "learning_rate": 1.7970379790938386e-05, + "loss": 0.1182, + "step": 103210 + }, + { + "epoch": 3.750999345882695, + "grad_norm": 1.0490964651107788, + "learning_rate": 1.79653999940556e-05, + "loss": 0.0761, + "step": 103220 + }, + { + "epoch": 3.751362744385493, + "grad_norm": 0.7302677631378174, + "learning_rate": 1.79604205002687e-05, + "loss": 0.0691, + "step": 103230 + }, + { + "epoch": 3.751726142888291, + "grad_norm": 0.3179365396499634, + "learning_rate": 1.7955441309792227e-05, + "loss": 0.074, + "step": 103240 + }, + { + "epoch": 3.7520895413910895, + "grad_norm": 0.6117727160453796, + "learning_rate": 1.795046242284073e-05, + "loss": 0.0933, + "step": 103250 + }, + { + "epoch": 3.7524529398938875, + "grad_norm": 0.4880678355693817, + "learning_rate": 1.794548383962872e-05, + "loss": 0.3885, + "step": 103260 + }, + { + "epoch": 3.752816338396686, + "grad_norm": 2.7460379600524902, + "learning_rate": 1.794050556037072e-05, + "loss": 3.1444, + "step": 103270 + }, + { + "epoch": 3.753179736899484, + "grad_norm": 0.6626706719398499, + "learning_rate": 1.7935527585281215e-05, + "loss": 0.0568, + "step": 103280 + }, + { + "epoch": 3.753543135402282, + "grad_norm": 0.7829678654670715, + "learning_rate": 1.7930549914574685e-05, + "loss": 0.0743, + "step": 103290 + }, + { + "epoch": 3.7539065339050803, + "grad_norm": 0.5516233444213867, + "learning_rate": 1.792557254846561e-05, + "loss": 0.0805, + "step": 103300 + }, + { + "epoch": 3.7542699324078783, + "grad_norm": 3.4377355575561523, + "learning_rate": 1.792059548716844e-05, + "loss": 0.0762, + "step": 103310 + }, + { + "epoch": 3.7546333309106767, + "grad_norm": 0.40759000182151794, + "learning_rate": 1.7915618730897626e-05, + "loss": 0.0551, + "step": 103320 + }, + { + "epoch": 3.7549967294134747, + "grad_norm": 0.7524077892303467, + "learning_rate": 1.7910642279867596e-05, + "loss": 0.067, + "step": 103330 + }, + { + "epoch": 3.7553601279162727, + "grad_norm": 0.8002333641052246, + "learning_rate": 1.7905666134292758e-05, + "loss": 0.0857, + "step": 103340 + }, + { + "epoch": 3.755723526419071, + "grad_norm": 0.4075934588909149, + "learning_rate": 1.7900690294387533e-05, + "loss": 0.0805, + "step": 103350 + }, + { + "epoch": 3.7560869249218696, + "grad_norm": 0.3919731080532074, + "learning_rate": 1.78957147603663e-05, + "loss": 0.099, + "step": 103360 + }, + { + "epoch": 3.7564503234246676, + "grad_norm": 0.8194761872291565, + "learning_rate": 1.789073953244344e-05, + "loss": 0.4237, + "step": 103370 + }, + { + "epoch": 3.7568137219274655, + "grad_norm": 4.275974750518799, + "learning_rate": 1.7885764610833323e-05, + "loss": 0.0658, + "step": 103380 + }, + { + "epoch": 3.757177120430264, + "grad_norm": 0.3424472510814667, + "learning_rate": 1.7880789995750293e-05, + "loss": 0.0618, + "step": 103390 + }, + { + "epoch": 3.757540518933062, + "grad_norm": 0.4911600947380066, + "learning_rate": 1.7875815687408687e-05, + "loss": 0.0692, + "step": 103400 + }, + { + "epoch": 3.7579039174358604, + "grad_norm": 0.9533910155296326, + "learning_rate": 1.7870841686022844e-05, + "loss": 0.0818, + "step": 103410 + }, + { + "epoch": 3.7582673159386584, + "grad_norm": 0.4925590455532074, + "learning_rate": 1.7865867991807064e-05, + "loss": 0.0706, + "step": 103420 + }, + { + "epoch": 3.7586307144414564, + "grad_norm": 0.3848567008972168, + "learning_rate": 1.7860894604975648e-05, + "loss": 0.0679, + "step": 103430 + }, + { + "epoch": 3.758994112944255, + "grad_norm": 1.7480194568634033, + "learning_rate": 1.785592152574288e-05, + "loss": 0.0985, + "step": 103440 + }, + { + "epoch": 3.7593575114470528, + "grad_norm": 1.4022555351257324, + "learning_rate": 1.7850948754323036e-05, + "loss": 0.0773, + "step": 103450 + }, + { + "epoch": 3.759720909949851, + "grad_norm": 0.5998416543006897, + "learning_rate": 1.784597629093038e-05, + "loss": 0.2167, + "step": 103460 + }, + { + "epoch": 3.760084308452649, + "grad_norm": 0.5429103374481201, + "learning_rate": 1.784100413577915e-05, + "loss": 0.0695, + "step": 103470 + }, + { + "epoch": 3.760447706955447, + "grad_norm": 0.43059512972831726, + "learning_rate": 1.783603228908357e-05, + "loss": 0.0541, + "step": 103480 + }, + { + "epoch": 3.7608111054582456, + "grad_norm": 0.7966387271881104, + "learning_rate": 1.7831060751057877e-05, + "loss": 0.0789, + "step": 103490 + }, + { + "epoch": 3.7611745039610436, + "grad_norm": 0.5265412330627441, + "learning_rate": 1.7826089521916266e-05, + "loss": 0.061, + "step": 103500 + }, + { + "epoch": 3.761537902463842, + "grad_norm": 0.580176055431366, + "learning_rate": 1.782111860187294e-05, + "loss": 0.066, + "step": 103510 + }, + { + "epoch": 3.76190130096664, + "grad_norm": 0.7160188555717468, + "learning_rate": 1.7816147991142067e-05, + "loss": 3.6468, + "step": 103520 + }, + { + "epoch": 3.762264699469438, + "grad_norm": 0.5515186786651611, + "learning_rate": 1.7811177689937813e-05, + "loss": 0.0793, + "step": 103530 + }, + { + "epoch": 3.7626280979722364, + "grad_norm": 0.7371792197227478, + "learning_rate": 1.7806207698474334e-05, + "loss": 0.074, + "step": 103540 + }, + { + "epoch": 3.7629914964750344, + "grad_norm": 0.7654315233230591, + "learning_rate": 1.7801238016965774e-05, + "loss": 0.0752, + "step": 103550 + }, + { + "epoch": 3.763354894977833, + "grad_norm": 0.42323166131973267, + "learning_rate": 1.7796268645626256e-05, + "loss": 0.1029, + "step": 103560 + }, + { + "epoch": 3.763718293480631, + "grad_norm": 1.1199790239334106, + "learning_rate": 1.779129958466989e-05, + "loss": 0.196, + "step": 103570 + }, + { + "epoch": 3.764081691983429, + "grad_norm": 0.9370298981666565, + "learning_rate": 1.7786330834310765e-05, + "loss": 0.0776, + "step": 103580 + }, + { + "epoch": 3.764445090486227, + "grad_norm": 0.6853455901145935, + "learning_rate": 1.778136239476299e-05, + "loss": 0.0691, + "step": 103590 + }, + { + "epoch": 3.764808488989025, + "grad_norm": 1.1451141834259033, + "learning_rate": 1.7776394266240624e-05, + "loss": 0.079, + "step": 103600 + }, + { + "epoch": 3.7651718874918236, + "grad_norm": 0.9805148243904114, + "learning_rate": 1.777142644895773e-05, + "loss": 0.0742, + "step": 103610 + }, + { + "epoch": 3.7655352859946216, + "grad_norm": 0.812449038028717, + "learning_rate": 1.7766458943128346e-05, + "loss": 0.0709, + "step": 103620 + }, + { + "epoch": 3.7658986844974196, + "grad_norm": 1.055368185043335, + "learning_rate": 1.7761491748966506e-05, + "loss": 0.0553, + "step": 103630 + }, + { + "epoch": 3.766262083000218, + "grad_norm": 0.5963478684425354, + "learning_rate": 1.775652486668624e-05, + "loss": 0.6731, + "step": 103640 + }, + { + "epoch": 3.7666254815030165, + "grad_norm": 0.4923159182071686, + "learning_rate": 1.775155829650154e-05, + "loss": 0.0761, + "step": 103650 + }, + { + "epoch": 3.7669888800058144, + "grad_norm": 0.3637178838253021, + "learning_rate": 1.774659203862641e-05, + "loss": 0.0936, + "step": 103660 + }, + { + "epoch": 3.7673522785086124, + "grad_norm": 0.23796427249908447, + "learning_rate": 1.7741626093274808e-05, + "loss": 1.0844, + "step": 103670 + }, + { + "epoch": 3.767715677011411, + "grad_norm": 0.570115327835083, + "learning_rate": 1.773666046066072e-05, + "loss": 0.0716, + "step": 103680 + }, + { + "epoch": 3.768079075514209, + "grad_norm": 0.40989992022514343, + "learning_rate": 1.7731695140998095e-05, + "loss": 0.1496, + "step": 103690 + }, + { + "epoch": 3.7684424740170073, + "grad_norm": 0.913547933101654, + "learning_rate": 1.7726730134500863e-05, + "loss": 0.0984, + "step": 103700 + }, + { + "epoch": 3.7688058725198053, + "grad_norm": 0.6644873023033142, + "learning_rate": 1.7721765441382948e-05, + "loss": 0.0709, + "step": 103710 + }, + { + "epoch": 3.7691692710226032, + "grad_norm": 16.15460777282715, + "learning_rate": 1.7716801061858256e-05, + "loss": 0.0726, + "step": 103720 + }, + { + "epoch": 3.7695326695254017, + "grad_norm": 1.16048002243042, + "learning_rate": 1.7711836996140704e-05, + "loss": 0.0689, + "step": 103730 + }, + { + "epoch": 3.7698960680281997, + "grad_norm": 0.36787500977516174, + "learning_rate": 1.7706873244444165e-05, + "loss": 0.0709, + "step": 103740 + }, + { + "epoch": 3.770259466530998, + "grad_norm": 0.9788756966590881, + "learning_rate": 1.7701909806982507e-05, + "loss": 0.075, + "step": 103750 + }, + { + "epoch": 3.770622865033796, + "grad_norm": 0.5091611742973328, + "learning_rate": 1.769694668396959e-05, + "loss": 0.0679, + "step": 103760 + }, + { + "epoch": 3.770986263536594, + "grad_norm": 0.774710476398468, + "learning_rate": 1.7691983875619245e-05, + "loss": 0.0626, + "step": 103770 + }, + { + "epoch": 3.7713496620393925, + "grad_norm": 1.0591776371002197, + "learning_rate": 1.768702138214532e-05, + "loss": 0.0847, + "step": 103780 + }, + { + "epoch": 3.7717130605421905, + "grad_norm": 0.4821354150772095, + "learning_rate": 1.7682059203761632e-05, + "loss": 0.0793, + "step": 103790 + }, + { + "epoch": 3.772076459044989, + "grad_norm": 1.042035460472107, + "learning_rate": 1.767709734068197e-05, + "loss": 0.0684, + "step": 103800 + }, + { + "epoch": 3.772076459044989, + "eval_loss": 0.31211939454078674, + "eval_runtime": 179.5485, + "eval_samples_per_second": 41.292, + "eval_steps_per_second": 5.163, + "eval_wer": 0.13473233249223954, + "step": 103800 + }, + { + "epoch": 3.772439857547787, + "grad_norm": 0.6240308880805969, + "learning_rate": 1.767213579312012e-05, + "loss": 0.0686, + "step": 103810 + }, + { + "epoch": 3.772803256050585, + "grad_norm": 0.6886245012283325, + "learning_rate": 1.7667174561289874e-05, + "loss": 0.172, + "step": 103820 + }, + { + "epoch": 3.7731666545533833, + "grad_norm": 0.6289244294166565, + "learning_rate": 1.7662213645404985e-05, + "loss": 0.3868, + "step": 103830 + }, + { + "epoch": 3.7735300530561813, + "grad_norm": 0.2528345584869385, + "learning_rate": 1.7657253045679205e-05, + "loss": 0.0879, + "step": 103840 + }, + { + "epoch": 3.7738934515589797, + "grad_norm": 5.790124893188477, + "learning_rate": 1.7652292762326266e-05, + "loss": 0.0851, + "step": 103850 + }, + { + "epoch": 3.7742568500617777, + "grad_norm": 0.524517834186554, + "learning_rate": 1.764733279555988e-05, + "loss": 0.0537, + "step": 103860 + }, + { + "epoch": 3.7746202485645757, + "grad_norm": 0.41325438022613525, + "learning_rate": 1.7642373145593764e-05, + "loss": 0.0695, + "step": 103870 + }, + { + "epoch": 3.774983647067374, + "grad_norm": 0.5350490212440491, + "learning_rate": 1.763741381264162e-05, + "loss": 0.0674, + "step": 103880 + }, + { + "epoch": 3.775347045570172, + "grad_norm": 0.4079577326774597, + "learning_rate": 1.7632454796917117e-05, + "loss": 0.0725, + "step": 103890 + }, + { + "epoch": 3.7757104440729705, + "grad_norm": 1.083164095878601, + "learning_rate": 1.7627496098633923e-05, + "loss": 0.0943, + "step": 103900 + }, + { + "epoch": 3.7760738425757685, + "grad_norm": 0.525883138179779, + "learning_rate": 1.7622537718005676e-05, + "loss": 0.0774, + "step": 103910 + }, + { + "epoch": 3.7764372410785665, + "grad_norm": 0.420608788728714, + "learning_rate": 1.7617579655246048e-05, + "loss": 0.0661, + "step": 103920 + }, + { + "epoch": 3.776800639581365, + "grad_norm": 0.5199810862541199, + "learning_rate": 1.761262191056864e-05, + "loss": 0.0948, + "step": 103930 + }, + { + "epoch": 3.7771640380841633, + "grad_norm": 0.519557535648346, + "learning_rate": 1.760766448418707e-05, + "loss": 0.0978, + "step": 103940 + }, + { + "epoch": 3.7775274365869613, + "grad_norm": 0.6157034635543823, + "learning_rate": 1.7602707376314935e-05, + "loss": 0.0882, + "step": 103950 + }, + { + "epoch": 3.7778908350897593, + "grad_norm": 0.5788136720657349, + "learning_rate": 1.7597750587165813e-05, + "loss": 0.0687, + "step": 103960 + }, + { + "epoch": 3.7782542335925577, + "grad_norm": 3.0995774269104004, + "learning_rate": 1.7592794116953287e-05, + "loss": 0.6063, + "step": 103970 + }, + { + "epoch": 3.7786176320953557, + "grad_norm": 0.4705333411693573, + "learning_rate": 1.7587837965890907e-05, + "loss": 0.0759, + "step": 103980 + }, + { + "epoch": 3.778981030598154, + "grad_norm": 0.5289357304573059, + "learning_rate": 1.758288213419222e-05, + "loss": 0.07, + "step": 103990 + }, + { + "epoch": 3.779344429100952, + "grad_norm": 0.28470751643180847, + "learning_rate": 1.7577926622070752e-05, + "loss": 0.0824, + "step": 104000 + }, + { + "epoch": 3.77970782760375, + "grad_norm": 0.4704053997993469, + "learning_rate": 1.7572971429740004e-05, + "loss": 0.0679, + "step": 104010 + }, + { + "epoch": 3.7800712261065486, + "grad_norm": 0.5059223771095276, + "learning_rate": 1.7568016557413503e-05, + "loss": 0.0598, + "step": 104020 + }, + { + "epoch": 3.7804346246093465, + "grad_norm": 0.2848690450191498, + "learning_rate": 1.7563062005304724e-05, + "loss": 0.0811, + "step": 104030 + }, + { + "epoch": 3.780798023112145, + "grad_norm": 0.5668039917945862, + "learning_rate": 1.7558107773627147e-05, + "loss": 0.106, + "step": 104040 + }, + { + "epoch": 3.781161421614943, + "grad_norm": 0.8382745981216431, + "learning_rate": 1.7553153862594214e-05, + "loss": 0.0748, + "step": 104050 + }, + { + "epoch": 3.781524820117741, + "grad_norm": 0.817533552646637, + "learning_rate": 1.75482002724194e-05, + "loss": 0.0579, + "step": 104060 + }, + { + "epoch": 3.7818882186205394, + "grad_norm": 0.26132848858833313, + "learning_rate": 1.7543247003316117e-05, + "loss": 0.0842, + "step": 104070 + }, + { + "epoch": 3.7822516171233374, + "grad_norm": 0.4925542175769806, + "learning_rate": 1.7538294055497793e-05, + "loss": 1.3918, + "step": 104080 + }, + { + "epoch": 3.782615015626136, + "grad_norm": 0.36757928133010864, + "learning_rate": 1.753334142917783e-05, + "loss": 0.0696, + "step": 104090 + }, + { + "epoch": 3.7829784141289338, + "grad_norm": 1.2747372388839722, + "learning_rate": 1.7528389124569605e-05, + "loss": 0.0939, + "step": 104100 + }, + { + "epoch": 3.7833418126317317, + "grad_norm": 0.5699681639671326, + "learning_rate": 1.7523437141886516e-05, + "loss": 0.0728, + "step": 104110 + }, + { + "epoch": 3.78370521113453, + "grad_norm": 0.7893559336662292, + "learning_rate": 1.7518485481341926e-05, + "loss": 0.0609, + "step": 104120 + }, + { + "epoch": 3.784068609637328, + "grad_norm": 1.0749348402023315, + "learning_rate": 1.7513534143149175e-05, + "loss": 0.0692, + "step": 104130 + }, + { + "epoch": 3.7844320081401266, + "grad_norm": 0.6943231225013733, + "learning_rate": 1.75085831275216e-05, + "loss": 0.0766, + "step": 104140 + }, + { + "epoch": 3.7847954066429246, + "grad_norm": 0.32822078466415405, + "learning_rate": 1.750363243467251e-05, + "loss": 0.1134, + "step": 104150 + }, + { + "epoch": 3.7851588051457226, + "grad_norm": 1.1365320682525635, + "learning_rate": 1.7498682064815242e-05, + "loss": 0.0622, + "step": 104160 + }, + { + "epoch": 3.785522203648521, + "grad_norm": 1.4524558782577515, + "learning_rate": 1.749373201816307e-05, + "loss": 0.0624, + "step": 104170 + }, + { + "epoch": 3.785885602151319, + "grad_norm": 0.39508798718452454, + "learning_rate": 1.7488782294929278e-05, + "loss": 0.0728, + "step": 104180 + }, + { + "epoch": 3.7862490006541174, + "grad_norm": 0.8721691966056824, + "learning_rate": 1.7483832895327135e-05, + "loss": 0.2174, + "step": 104190 + }, + { + "epoch": 3.7866123991569154, + "grad_norm": 2.2220089435577393, + "learning_rate": 1.747888381956988e-05, + "loss": 0.0782, + "step": 104200 + }, + { + "epoch": 3.7869757976597134, + "grad_norm": 0.5237187147140503, + "learning_rate": 1.7473935067870766e-05, + "loss": 0.0985, + "step": 104210 + }, + { + "epoch": 3.787339196162512, + "grad_norm": 0.4212065041065216, + "learning_rate": 1.7468986640443017e-05, + "loss": 0.0634, + "step": 104220 + }, + { + "epoch": 3.7877025946653102, + "grad_norm": 0.6862966418266296, + "learning_rate": 1.746403853749984e-05, + "loss": 0.0768, + "step": 104230 + }, + { + "epoch": 3.788065993168108, + "grad_norm": 0.4138953685760498, + "learning_rate": 1.7459090759254414e-05, + "loss": 0.0554, + "step": 104240 + }, + { + "epoch": 3.788429391670906, + "grad_norm": 1.6833606958389282, + "learning_rate": 1.745414330591995e-05, + "loss": 0.0804, + "step": 104250 + }, + { + "epoch": 3.7887927901737046, + "grad_norm": 0.5321451425552368, + "learning_rate": 1.7449196177709597e-05, + "loss": 0.099, + "step": 104260 + }, + { + "epoch": 3.7891561886765026, + "grad_norm": 0.4269891381263733, + "learning_rate": 1.744424937483652e-05, + "loss": 0.0539, + "step": 104270 + }, + { + "epoch": 3.789519587179301, + "grad_norm": 0.49251261353492737, + "learning_rate": 1.7439302897513854e-05, + "loss": 0.0691, + "step": 104280 + }, + { + "epoch": 3.789882985682099, + "grad_norm": 0.3544449508190155, + "learning_rate": 1.7434356745954717e-05, + "loss": 0.2799, + "step": 104290 + }, + { + "epoch": 3.790246384184897, + "grad_norm": 0.3938431143760681, + "learning_rate": 1.7429410920372235e-05, + "loss": 0.0788, + "step": 104300 + }, + { + "epoch": 3.7906097826876954, + "grad_norm": 1.1323349475860596, + "learning_rate": 1.74244654209795e-05, + "loss": 0.0604, + "step": 104310 + }, + { + "epoch": 3.7909731811904934, + "grad_norm": 0.40305644273757935, + "learning_rate": 1.74195202479896e-05, + "loss": 0.05, + "step": 104320 + }, + { + "epoch": 3.791336579693292, + "grad_norm": 2.940786600112915, + "learning_rate": 1.74145754016156e-05, + "loss": 0.0701, + "step": 104330 + }, + { + "epoch": 3.79169997819609, + "grad_norm": 0.7965475916862488, + "learning_rate": 1.7409630882070542e-05, + "loss": 0.0764, + "step": 104340 + }, + { + "epoch": 3.792063376698888, + "grad_norm": 0.395450234413147, + "learning_rate": 1.7404686689567498e-05, + "loss": 0.0675, + "step": 104350 + }, + { + "epoch": 3.7924267752016863, + "grad_norm": 0.5075439214706421, + "learning_rate": 1.7399742824319478e-05, + "loss": 0.1068, + "step": 104360 + }, + { + "epoch": 3.7927901737044842, + "grad_norm": 0.41297322511672974, + "learning_rate": 1.7394799286539498e-05, + "loss": 0.0562, + "step": 104370 + }, + { + "epoch": 3.7931535722072827, + "grad_norm": 0.254867821931839, + "learning_rate": 1.7389856076440557e-05, + "loss": 0.0703, + "step": 104380 + }, + { + "epoch": 3.7935169707100806, + "grad_norm": 0.4929332435131073, + "learning_rate": 1.7384913194235635e-05, + "loss": 0.0724, + "step": 104390 + }, + { + "epoch": 3.7938803692128786, + "grad_norm": 0.49922120571136475, + "learning_rate": 1.7379970640137717e-05, + "loss": 0.0715, + "step": 104400 + }, + { + "epoch": 3.7938803692128786, + "eval_loss": 0.3130161762237549, + "eval_runtime": 178.8829, + "eval_samples_per_second": 41.446, + "eval_steps_per_second": 5.182, + "eval_wer": 0.13230889320529343, + "step": 104400 + }, + { + "epoch": 3.794243767715677, + "grad_norm": 0.5942332744598389, + "learning_rate": 1.737502841435975e-05, + "loss": 0.0644, + "step": 104410 + }, + { + "epoch": 3.794607166218475, + "grad_norm": 0.33601608872413635, + "learning_rate": 1.7370086517114678e-05, + "loss": 0.061, + "step": 104420 + }, + { + "epoch": 3.7949705647212735, + "grad_norm": 0.44274547696113586, + "learning_rate": 1.736514494861542e-05, + "loss": 0.0816, + "step": 104430 + }, + { + "epoch": 3.7953339632240715, + "grad_norm": 1.315964698791504, + "learning_rate": 1.7360203709074914e-05, + "loss": 0.0883, + "step": 104440 + }, + { + "epoch": 3.7956973617268694, + "grad_norm": 0.5077027678489685, + "learning_rate": 1.735526279870605e-05, + "loss": 0.0776, + "step": 104450 + }, + { + "epoch": 3.796060760229668, + "grad_norm": 0.806336522102356, + "learning_rate": 1.735032221772171e-05, + "loss": 0.0767, + "step": 104460 + }, + { + "epoch": 3.796424158732466, + "grad_norm": 2.48112416267395, + "learning_rate": 1.7345381966334766e-05, + "loss": 0.0613, + "step": 104470 + }, + { + "epoch": 3.7967875572352643, + "grad_norm": 1.1071819067001343, + "learning_rate": 1.7340442044758067e-05, + "loss": 0.0574, + "step": 104480 + }, + { + "epoch": 3.7971509557380623, + "grad_norm": 0.4046606123447418, + "learning_rate": 1.733550245320448e-05, + "loss": 0.0775, + "step": 104490 + }, + { + "epoch": 3.7975143542408603, + "grad_norm": 0.5835363864898682, + "learning_rate": 1.7330563191886822e-05, + "loss": 0.0818, + "step": 104500 + }, + { + "epoch": 3.7978777527436587, + "grad_norm": 1.6785948276519775, + "learning_rate": 1.732562426101791e-05, + "loss": 0.091, + "step": 104510 + }, + { + "epoch": 3.798241151246457, + "grad_norm": 0.5676856637001038, + "learning_rate": 1.732068566081054e-05, + "loss": 0.0638, + "step": 104520 + }, + { + "epoch": 3.798604549749255, + "grad_norm": 0.5159793496131897, + "learning_rate": 1.731574739147749e-05, + "loss": 0.069, + "step": 104530 + }, + { + "epoch": 3.798967948252053, + "grad_norm": 0.44450074434280396, + "learning_rate": 1.7310809453231557e-05, + "loss": 0.0721, + "step": 104540 + }, + { + "epoch": 3.7993313467548515, + "grad_norm": 1.7773900032043457, + "learning_rate": 1.730587184628549e-05, + "loss": 0.0927, + "step": 104550 + }, + { + "epoch": 3.7996947452576495, + "grad_norm": 0.33800819516181946, + "learning_rate": 1.7300934570852022e-05, + "loss": 0.0798, + "step": 104560 + }, + { + "epoch": 3.800058143760448, + "grad_norm": 0.5945661067962646, + "learning_rate": 1.7295997627143892e-05, + "loss": 0.063, + "step": 104570 + }, + { + "epoch": 3.800421542263246, + "grad_norm": 1.0598748922348022, + "learning_rate": 1.7291061015373805e-05, + "loss": 0.0708, + "step": 104580 + }, + { + "epoch": 3.800784940766044, + "grad_norm": 0.390257328748703, + "learning_rate": 1.728612473575448e-05, + "loss": 0.0649, + "step": 104590 + }, + { + "epoch": 3.8011483392688423, + "grad_norm": 0.650588870048523, + "learning_rate": 1.7281188788498593e-05, + "loss": 0.0786, + "step": 104600 + }, + { + "epoch": 3.8015117377716403, + "grad_norm": 0.6513156294822693, + "learning_rate": 1.727625317381882e-05, + "loss": 0.2863, + "step": 104610 + }, + { + "epoch": 3.8018751362744387, + "grad_norm": 0.31333038210868835, + "learning_rate": 1.7271317891927806e-05, + "loss": 0.0651, + "step": 104620 + }, + { + "epoch": 3.8022385347772367, + "grad_norm": 0.7151166200637817, + "learning_rate": 1.726638294303821e-05, + "loss": 0.072, + "step": 104630 + }, + { + "epoch": 3.8026019332800347, + "grad_norm": 2.873300552368164, + "learning_rate": 1.7261448327362664e-05, + "loss": 0.0924, + "step": 104640 + }, + { + "epoch": 3.802965331782833, + "grad_norm": 0.9299377202987671, + "learning_rate": 1.7256514045113776e-05, + "loss": 0.1467, + "step": 104650 + }, + { + "epoch": 3.803328730285631, + "grad_norm": 0.6155283451080322, + "learning_rate": 1.7251580096504142e-05, + "loss": 0.0617, + "step": 104660 + }, + { + "epoch": 3.8036921287884295, + "grad_norm": 0.7554726600646973, + "learning_rate": 1.724664648174634e-05, + "loss": 0.056, + "step": 104670 + }, + { + "epoch": 3.8040555272912275, + "grad_norm": 0.45988065004348755, + "learning_rate": 1.7241713201052974e-05, + "loss": 0.0613, + "step": 104680 + }, + { + "epoch": 3.8044189257940255, + "grad_norm": 0.6679671406745911, + "learning_rate": 1.7236780254636575e-05, + "loss": 0.0661, + "step": 104690 + }, + { + "epoch": 3.804782324296824, + "grad_norm": 0.6426312327384949, + "learning_rate": 1.7231847642709693e-05, + "loss": 0.0669, + "step": 104700 + }, + { + "epoch": 3.805145722799622, + "grad_norm": 4.563010215759277, + "learning_rate": 1.7226915365484858e-05, + "loss": 0.0891, + "step": 104710 + }, + { + "epoch": 3.8055091213024204, + "grad_norm": 5.822086334228516, + "learning_rate": 1.7221983423174576e-05, + "loss": 0.0657, + "step": 104720 + }, + { + "epoch": 3.8058725198052183, + "grad_norm": 0.4104626476764679, + "learning_rate": 1.721705181599136e-05, + "loss": 0.0737, + "step": 104730 + }, + { + "epoch": 3.8062359183080163, + "grad_norm": 0.41885101795196533, + "learning_rate": 1.7212120544147693e-05, + "loss": 0.0615, + "step": 104740 + }, + { + "epoch": 3.8065993168108148, + "grad_norm": 0.6531028151512146, + "learning_rate": 1.720718960785604e-05, + "loss": 0.094, + "step": 104750 + }, + { + "epoch": 3.8069627153136127, + "grad_norm": 0.4141846001148224, + "learning_rate": 1.7202259007328862e-05, + "loss": 0.0677, + "step": 104760 + }, + { + "epoch": 3.807326113816411, + "grad_norm": 0.2788487672805786, + "learning_rate": 1.7197328742778586e-05, + "loss": 0.0592, + "step": 104770 + }, + { + "epoch": 3.807689512319209, + "grad_norm": 0.95374596118927, + "learning_rate": 1.7192398814417665e-05, + "loss": 0.0557, + "step": 104780 + }, + { + "epoch": 3.808052910822007, + "grad_norm": 0.4431002140045166, + "learning_rate": 1.7187469222458495e-05, + "loss": 0.1018, + "step": 104790 + }, + { + "epoch": 3.8084163093248056, + "grad_norm": 0.7210586071014404, + "learning_rate": 1.7182539967113488e-05, + "loss": 0.0753, + "step": 104800 + }, + { + "epoch": 3.808779707827604, + "grad_norm": 1.0989420413970947, + "learning_rate": 1.7177611048594998e-05, + "loss": 0.0632, + "step": 104810 + }, + { + "epoch": 3.809143106330402, + "grad_norm": 0.3087826073169708, + "learning_rate": 1.7172682467115434e-05, + "loss": 0.0489, + "step": 104820 + }, + { + "epoch": 3.8095065048332, + "grad_norm": 0.5821923017501831, + "learning_rate": 1.716775422288713e-05, + "loss": 0.0744, + "step": 104830 + }, + { + "epoch": 3.8098699033359984, + "grad_norm": 0.464653879404068, + "learning_rate": 1.7162826316122432e-05, + "loss": 0.0724, + "step": 104840 + }, + { + "epoch": 3.8102333018387964, + "grad_norm": 1.4593892097473145, + "learning_rate": 1.715789874703366e-05, + "loss": 0.0721, + "step": 104850 + }, + { + "epoch": 3.810596700341595, + "grad_norm": 0.3369029462337494, + "learning_rate": 1.7152971515833118e-05, + "loss": 0.0905, + "step": 104860 + }, + { + "epoch": 3.810960098844393, + "grad_norm": 0.6023349761962891, + "learning_rate": 1.7148044622733127e-05, + "loss": 0.0635, + "step": 104870 + }, + { + "epoch": 3.811323497347191, + "grad_norm": 0.5245524048805237, + "learning_rate": 1.7143118067945955e-05, + "loss": 0.0542, + "step": 104880 + }, + { + "epoch": 3.811686895849989, + "grad_norm": 0.5203279256820679, + "learning_rate": 1.713819185168387e-05, + "loss": 0.0914, + "step": 104890 + }, + { + "epoch": 3.812050294352787, + "grad_norm": 1.1707444190979004, + "learning_rate": 1.7133265974159123e-05, + "loss": 0.0817, + "step": 104900 + }, + { + "epoch": 3.8124136928555856, + "grad_norm": 0.41992756724357605, + "learning_rate": 1.7128340435583948e-05, + "loss": 0.075, + "step": 104910 + }, + { + "epoch": 3.8127770913583836, + "grad_norm": 0.31755873560905457, + "learning_rate": 1.7123415236170587e-05, + "loss": 0.057, + "step": 104920 + }, + { + "epoch": 3.8131404898611816, + "grad_norm": 0.6827322244644165, + "learning_rate": 1.7118490376131236e-05, + "loss": 0.0622, + "step": 104930 + }, + { + "epoch": 3.81350388836398, + "grad_norm": 0.27975374460220337, + "learning_rate": 1.7113565855678093e-05, + "loss": 0.0671, + "step": 104940 + }, + { + "epoch": 3.813867286866778, + "grad_norm": 0.5157541632652283, + "learning_rate": 1.710864167502334e-05, + "loss": 0.0853, + "step": 104950 + }, + { + "epoch": 3.8142306853695764, + "grad_norm": 0.5502781271934509, + "learning_rate": 1.7103717834379126e-05, + "loss": 0.0899, + "step": 104960 + }, + { + "epoch": 3.8145940838723744, + "grad_norm": 0.4045618176460266, + "learning_rate": 1.7098794333957627e-05, + "loss": 0.0714, + "step": 104970 + }, + { + "epoch": 3.8149574823751724, + "grad_norm": 1.508198857307434, + "learning_rate": 1.709387117397097e-05, + "loss": 0.0804, + "step": 104980 + }, + { + "epoch": 3.815320880877971, + "grad_norm": 0.34709009528160095, + "learning_rate": 1.7088948354631268e-05, + "loss": 0.0769, + "step": 104990 + }, + { + "epoch": 3.815684279380769, + "grad_norm": 1.4935734272003174, + "learning_rate": 1.708402587615063e-05, + "loss": 0.1259, + "step": 105000 + }, + { + "epoch": 3.815684279380769, + "eval_loss": 0.31589025259017944, + "eval_runtime": 181.3291, + "eval_samples_per_second": 40.887, + "eval_steps_per_second": 5.112, + "eval_wer": 0.1329533283714851, + "step": 105000 + }, + { + "epoch": 3.8160476778835672, + "grad_norm": 0.5566298365592957, + "learning_rate": 1.707910373874116e-05, + "loss": 0.0703, + "step": 105010 + }, + { + "epoch": 3.8164110763863652, + "grad_norm": 0.5731711983680725, + "learning_rate": 1.7074181942614925e-05, + "loss": 0.0697, + "step": 105020 + }, + { + "epoch": 3.816774474889163, + "grad_norm": 0.6845974922180176, + "learning_rate": 1.706926048798399e-05, + "loss": 0.0658, + "step": 105030 + }, + { + "epoch": 3.8171378733919616, + "grad_norm": 0.4797564744949341, + "learning_rate": 1.7064339375060407e-05, + "loss": 0.0656, + "step": 105040 + }, + { + "epoch": 3.8175012718947596, + "grad_norm": 0.47920140624046326, + "learning_rate": 1.705941860405619e-05, + "loss": 0.0717, + "step": 105050 + }, + { + "epoch": 3.817864670397558, + "grad_norm": 1.682005524635315, + "learning_rate": 1.7054498175183387e-05, + "loss": 0.0719, + "step": 105060 + }, + { + "epoch": 3.818228068900356, + "grad_norm": Infinity, + "learning_rate": 1.7050070081895424e-05, + "loss": 3.2264, + "step": 105070 + }, + { + "epoch": 3.818591467403154, + "grad_norm": 0.9053909182548523, + "learning_rate": 1.704515030365634e-05, + "loss": 0.0653, + "step": 105080 + }, + { + "epoch": 3.8189548659059525, + "grad_norm": 0.34650978446006775, + "learning_rate": 1.704023086816342e-05, + "loss": 0.0794, + "step": 105090 + }, + { + "epoch": 3.819318264408751, + "grad_norm": 0.4872235357761383, + "learning_rate": 1.7035311775628635e-05, + "loss": 0.0793, + "step": 105100 + }, + { + "epoch": 3.819681662911549, + "grad_norm": 0.5745834708213806, + "learning_rate": 1.7030393026263923e-05, + "loss": 0.0899, + "step": 105110 + }, + { + "epoch": 3.820045061414347, + "grad_norm": 0.28083306550979614, + "learning_rate": 1.7025474620281215e-05, + "loss": 0.4912, + "step": 105120 + }, + { + "epoch": 3.8204084599171453, + "grad_norm": 0.37169963121414185, + "learning_rate": 1.702055655789244e-05, + "loss": 0.0659, + "step": 105130 + }, + { + "epoch": 3.8207718584199433, + "grad_norm": 0.39560073614120483, + "learning_rate": 1.7015638839309488e-05, + "loss": 0.992, + "step": 105140 + }, + { + "epoch": 3.8211352569227417, + "grad_norm": 1.3541598320007324, + "learning_rate": 1.7010721464744254e-05, + "loss": 0.0931, + "step": 105150 + }, + { + "epoch": 3.8214986554255397, + "grad_norm": 0.5384494662284851, + "learning_rate": 1.7005804434408596e-05, + "loss": 0.068, + "step": 105160 + }, + { + "epoch": 3.8218620539283377, + "grad_norm": 0.4317835569381714, + "learning_rate": 1.7000887748514392e-05, + "loss": 0.0526, + "step": 105170 + }, + { + "epoch": 3.822225452431136, + "grad_norm": 2.2331621646881104, + "learning_rate": 1.6995971407273474e-05, + "loss": 0.0795, + "step": 105180 + }, + { + "epoch": 3.822588850933934, + "grad_norm": 0.9505332112312317, + "learning_rate": 1.6991055410897666e-05, + "loss": 0.0659, + "step": 105190 + }, + { + "epoch": 3.8229522494367325, + "grad_norm": 0.5199359655380249, + "learning_rate": 1.698613975959879e-05, + "loss": 0.0803, + "step": 105200 + }, + { + "epoch": 3.8233156479395305, + "grad_norm": 0.3324550688266754, + "learning_rate": 1.6981224453588625e-05, + "loss": 0.0634, + "step": 105210 + }, + { + "epoch": 3.8236790464423285, + "grad_norm": 1.3727059364318848, + "learning_rate": 1.697630949307898e-05, + "loss": 0.0668, + "step": 105220 + }, + { + "epoch": 3.824042444945127, + "grad_norm": 21.728702545166016, + "learning_rate": 1.6971394878281604e-05, + "loss": 0.2376, + "step": 105230 + }, + { + "epoch": 3.824405843447925, + "grad_norm": 0.6887884140014648, + "learning_rate": 1.6966480609408265e-05, + "loss": 0.0708, + "step": 105240 + }, + { + "epoch": 3.8247692419507233, + "grad_norm": 0.7199526429176331, + "learning_rate": 1.696156668667069e-05, + "loss": 0.0734, + "step": 105250 + }, + { + "epoch": 3.8251326404535213, + "grad_norm": 0.6712095141410828, + "learning_rate": 1.695665311028059e-05, + "loss": 0.0976, + "step": 105260 + }, + { + "epoch": 3.8254960389563193, + "grad_norm": 0.45200514793395996, + "learning_rate": 1.6951739880449707e-05, + "loss": 0.0668, + "step": 105270 + }, + { + "epoch": 3.8258594374591177, + "grad_norm": 0.4408622980117798, + "learning_rate": 1.6946826997389714e-05, + "loss": 0.0662, + "step": 105280 + }, + { + "epoch": 3.8262228359619157, + "grad_norm": 0.3657882511615753, + "learning_rate": 1.6941914461312293e-05, + "loss": 0.1123, + "step": 105290 + }, + { + "epoch": 3.826586234464714, + "grad_norm": 0.49692997336387634, + "learning_rate": 1.6937002272429102e-05, + "loss": 0.078, + "step": 105300 + }, + { + "epoch": 3.826949632967512, + "grad_norm": 1.2416878938674927, + "learning_rate": 1.6932090430951793e-05, + "loss": 0.0857, + "step": 105310 + }, + { + "epoch": 3.82731303147031, + "grad_norm": 0.4481440782546997, + "learning_rate": 1.692717893709201e-05, + "loss": 0.0482, + "step": 105320 + }, + { + "epoch": 3.8276764299731085, + "grad_norm": 4.277026176452637, + "learning_rate": 1.6922267791061358e-05, + "loss": 0.0658, + "step": 105330 + }, + { + "epoch": 3.8280398284759065, + "grad_norm": 0.5307076573371887, + "learning_rate": 1.6917356993071447e-05, + "loss": 0.09, + "step": 105340 + }, + { + "epoch": 3.828403226978705, + "grad_norm": 0.5222869515419006, + "learning_rate": 1.6912446543333858e-05, + "loss": 0.0694, + "step": 105350 + }, + { + "epoch": 3.828766625481503, + "grad_norm": 0.5144734382629395, + "learning_rate": 1.6907536442060185e-05, + "loss": 0.0826, + "step": 105360 + }, + { + "epoch": 3.829130023984301, + "grad_norm": 1.0012280941009521, + "learning_rate": 1.690262668946197e-05, + "loss": 0.0629, + "step": 105370 + }, + { + "epoch": 3.8294934224870993, + "grad_norm": 0.7172144651412964, + "learning_rate": 1.6897717285750758e-05, + "loss": 0.0787, + "step": 105380 + }, + { + "epoch": 3.8298568209898978, + "grad_norm": 0.3552602529525757, + "learning_rate": 1.6892808231138087e-05, + "loss": 0.0678, + "step": 105390 + }, + { + "epoch": 3.8302202194926958, + "grad_norm": 0.5939834713935852, + "learning_rate": 1.6887899525835447e-05, + "loss": 0.0892, + "step": 105400 + }, + { + "epoch": 3.8305836179954937, + "grad_norm": 0.5008695721626282, + "learning_rate": 1.6882991170054362e-05, + "loss": 0.0528, + "step": 105410 + }, + { + "epoch": 3.830947016498292, + "grad_norm": 0.30899590253829956, + "learning_rate": 1.6878083164006314e-05, + "loss": 0.0618, + "step": 105420 + }, + { + "epoch": 3.83131041500109, + "grad_norm": 0.811284601688385, + "learning_rate": 1.6873175507902762e-05, + "loss": 0.0621, + "step": 105430 + }, + { + "epoch": 3.8316738135038886, + "grad_norm": 0.5650566220283508, + "learning_rate": 1.6868268201955164e-05, + "loss": 0.073, + "step": 105440 + }, + { + "epoch": 3.8320372120066866, + "grad_norm": 3.2447309494018555, + "learning_rate": 1.6863361246374944e-05, + "loss": 0.0765, + "step": 105450 + }, + { + "epoch": 3.8324006105094846, + "grad_norm": 0.9772164821624756, + "learning_rate": 1.6858454641373543e-05, + "loss": 0.0875, + "step": 105460 + }, + { + "epoch": 3.832764009012283, + "grad_norm": 2.7476863861083984, + "learning_rate": 1.685354838716237e-05, + "loss": 0.8768, + "step": 105470 + }, + { + "epoch": 3.833127407515081, + "grad_norm": 2.6818318367004395, + "learning_rate": 1.6848642483952808e-05, + "loss": 0.07, + "step": 105480 + }, + { + "epoch": 3.8334908060178794, + "grad_norm": 2.635261058807373, + "learning_rate": 1.6843736931956238e-05, + "loss": 0.0747, + "step": 105490 + }, + { + "epoch": 3.8338542045206774, + "grad_norm": 0.7043896913528442, + "learning_rate": 1.6838831731384022e-05, + "loss": 0.0884, + "step": 105500 + }, + { + "epoch": 3.8342176030234754, + "grad_norm": 0.5598475337028503, + "learning_rate": 1.6833926882447516e-05, + "loss": 0.0857, + "step": 105510 + }, + { + "epoch": 3.834581001526274, + "grad_norm": 0.49555504322052, + "learning_rate": 1.682902238535804e-05, + "loss": 0.0676, + "step": 105520 + }, + { + "epoch": 3.8349444000290718, + "grad_norm": 0.46926450729370117, + "learning_rate": 1.682411824032692e-05, + "loss": 0.0518, + "step": 105530 + }, + { + "epoch": 3.83530779853187, + "grad_norm": 2.069505214691162, + "learning_rate": 1.6819214447565445e-05, + "loss": 0.0886, + "step": 105540 + }, + { + "epoch": 3.835671197034668, + "grad_norm": 0.7095978856086731, + "learning_rate": 1.6814311007284923e-05, + "loss": 0.066, + "step": 105550 + }, + { + "epoch": 3.836034595537466, + "grad_norm": 0.3672688603401184, + "learning_rate": 1.6809407919696615e-05, + "loss": 0.101, + "step": 105560 + }, + { + "epoch": 3.8363979940402646, + "grad_norm": 0.9248769879341125, + "learning_rate": 1.6804505185011777e-05, + "loss": 0.0519, + "step": 105570 + }, + { + "epoch": 3.8367613925430626, + "grad_norm": 0.9654824733734131, + "learning_rate": 1.679960280344165e-05, + "loss": 0.0796, + "step": 105580 + }, + { + "epoch": 3.837124791045861, + "grad_norm": 0.5376684069633484, + "learning_rate": 1.6794700775197452e-05, + "loss": 0.0733, + "step": 105590 + }, + { + "epoch": 3.837488189548659, + "grad_norm": 0.3716341555118561, + "learning_rate": 1.6789799100490414e-05, + "loss": 0.0693, + "step": 105600 + }, + { + "epoch": 3.837488189548659, + "eval_loss": 0.31987106800079346, + "eval_runtime": 181.8851, + "eval_samples_per_second": 40.762, + "eval_steps_per_second": 5.097, + "eval_wer": 0.13323470147221667, + "step": 105600 + }, + { + "epoch": 3.837851588051457, + "grad_norm": 0.5739371180534363, + "learning_rate": 1.678489777953172e-05, + "loss": 0.0888, + "step": 105610 + }, + { + "epoch": 3.8382149865542554, + "grad_norm": 0.628527045249939, + "learning_rate": 1.677999681253255e-05, + "loss": 0.6623, + "step": 105620 + }, + { + "epoch": 3.8385783850570534, + "grad_norm": 0.6783026456832886, + "learning_rate": 1.6775096199704067e-05, + "loss": 0.0822, + "step": 105630 + }, + { + "epoch": 3.838941783559852, + "grad_norm": 0.5286380052566528, + "learning_rate": 1.6770195941257425e-05, + "loss": 0.0997, + "step": 105640 + }, + { + "epoch": 3.83930518206265, + "grad_norm": 0.7992109656333923, + "learning_rate": 1.676529603740376e-05, + "loss": 0.0738, + "step": 105650 + }, + { + "epoch": 3.839668580565448, + "grad_norm": 0.4643116593360901, + "learning_rate": 1.6760396488354195e-05, + "loss": 0.056, + "step": 105660 + }, + { + "epoch": 3.8400319790682462, + "grad_norm": 0.5640047192573547, + "learning_rate": 1.6755497294319823e-05, + "loss": 0.0651, + "step": 105670 + }, + { + "epoch": 3.8403953775710447, + "grad_norm": 0.4988393485546112, + "learning_rate": 1.6750598455511737e-05, + "loss": 0.0622, + "step": 105680 + }, + { + "epoch": 3.8407587760738426, + "grad_norm": 0.24571850895881653, + "learning_rate": 1.674569997214101e-05, + "loss": 3.0376, + "step": 105690 + }, + { + "epoch": 3.8411221745766406, + "grad_norm": 0.3480076193809509, + "learning_rate": 1.674080184441871e-05, + "loss": 0.07, + "step": 105700 + }, + { + "epoch": 3.841485573079439, + "grad_norm": 0.33364272117614746, + "learning_rate": 1.6735904072555868e-05, + "loss": 0.0919, + "step": 105710 + }, + { + "epoch": 3.841848971582237, + "grad_norm": 0.29854241013526917, + "learning_rate": 1.6731006656763515e-05, + "loss": 0.0564, + "step": 105720 + }, + { + "epoch": 3.8422123700850355, + "grad_norm": 0.4374145269393921, + "learning_rate": 1.6726109597252662e-05, + "loss": 0.0678, + "step": 105730 + }, + { + "epoch": 3.8425757685878335, + "grad_norm": 0.3668364882469177, + "learning_rate": 1.6721212894234314e-05, + "loss": 0.0832, + "step": 105740 + }, + { + "epoch": 3.8429391670906314, + "grad_norm": 0.8300511837005615, + "learning_rate": 1.671631654791945e-05, + "loss": 0.0963, + "step": 105750 + }, + { + "epoch": 3.84330256559343, + "grad_norm": 0.600064754486084, + "learning_rate": 1.6711420558519026e-05, + "loss": 0.0829, + "step": 105760 + }, + { + "epoch": 3.843665964096228, + "grad_norm": 0.42504894733428955, + "learning_rate": 1.6706524926243995e-05, + "loss": 0.0541, + "step": 105770 + }, + { + "epoch": 3.8440293625990263, + "grad_norm": 1.8978909254074097, + "learning_rate": 1.6701629651305296e-05, + "loss": 0.071, + "step": 105780 + }, + { + "epoch": 3.8443927611018243, + "grad_norm": 0.44248759746551514, + "learning_rate": 1.6696734733913857e-05, + "loss": 0.1478, + "step": 105790 + }, + { + "epoch": 3.8447561596046222, + "grad_norm": 0.6377494931221008, + "learning_rate": 1.6691840174280577e-05, + "loss": 0.0822, + "step": 105800 + }, + { + "epoch": 3.8451195581074207, + "grad_norm": 0.37662044167518616, + "learning_rate": 1.6686945972616336e-05, + "loss": 0.1136, + "step": 105810 + }, + { + "epoch": 3.8454829566102187, + "grad_norm": 0.3601333796977997, + "learning_rate": 1.668205212913202e-05, + "loss": 0.063, + "step": 105820 + }, + { + "epoch": 3.845846355113017, + "grad_norm": 0.36584463715553284, + "learning_rate": 1.6677158644038478e-05, + "loss": 0.0632, + "step": 105830 + }, + { + "epoch": 3.846209753615815, + "grad_norm": 0.5580528378486633, + "learning_rate": 1.667226551754656e-05, + "loss": 0.0957, + "step": 105840 + }, + { + "epoch": 3.846573152118613, + "grad_norm": 1.119971752166748, + "learning_rate": 1.6667372749867093e-05, + "loss": 0.0798, + "step": 105850 + }, + { + "epoch": 3.8469365506214115, + "grad_norm": 0.424441397190094, + "learning_rate": 1.6662480341210882e-05, + "loss": 0.072, + "step": 105860 + }, + { + "epoch": 3.8472999491242095, + "grad_norm": 0.39438381791114807, + "learning_rate": 1.6657588291788734e-05, + "loss": 0.1601, + "step": 105870 + }, + { + "epoch": 3.847663347627008, + "grad_norm": 0.9732398390769958, + "learning_rate": 1.6652696601811417e-05, + "loss": 0.0615, + "step": 105880 + }, + { + "epoch": 3.848026746129806, + "grad_norm": 0.6014724969863892, + "learning_rate": 1.664780527148971e-05, + "loss": 0.067, + "step": 105890 + }, + { + "epoch": 3.848390144632604, + "grad_norm": 0.38137149810791016, + "learning_rate": 1.6642914301034355e-05, + "loss": 0.0779, + "step": 105900 + }, + { + "epoch": 3.8487535431354023, + "grad_norm": 0.6532992720603943, + "learning_rate": 1.663802369065608e-05, + "loss": 0.0636, + "step": 105910 + }, + { + "epoch": 3.8491169416382003, + "grad_norm": 0.22969412803649902, + "learning_rate": 1.663313344056562e-05, + "loss": 0.0629, + "step": 105920 + }, + { + "epoch": 3.8494803401409987, + "grad_norm": 0.3547755777835846, + "learning_rate": 1.662824355097367e-05, + "loss": 0.0715, + "step": 105930 + }, + { + "epoch": 3.8498437386437967, + "grad_norm": 1.1977945566177368, + "learning_rate": 1.662335402209092e-05, + "loss": 0.1126, + "step": 105940 + }, + { + "epoch": 3.8502071371465947, + "grad_norm": 1.9513736963272095, + "learning_rate": 1.6618464854128036e-05, + "loss": 0.1062, + "step": 105950 + }, + { + "epoch": 3.850570535649393, + "grad_norm": 0.7033438682556152, + "learning_rate": 1.6613576047295688e-05, + "loss": 0.0653, + "step": 105960 + }, + { + "epoch": 3.8509339341521915, + "grad_norm": 0.6035280227661133, + "learning_rate": 1.66086876018045e-05, + "loss": 0.0624, + "step": 105970 + }, + { + "epoch": 3.8512973326549895, + "grad_norm": 0.5035017132759094, + "learning_rate": 1.660379951786511e-05, + "loss": 0.0756, + "step": 105980 + }, + { + "epoch": 3.8516607311577875, + "grad_norm": 0.5310548543930054, + "learning_rate": 1.6598911795688132e-05, + "loss": 0.0717, + "step": 105990 + }, + { + "epoch": 3.852024129660586, + "grad_norm": 0.5817539691925049, + "learning_rate": 1.6594024435484144e-05, + "loss": 0.0751, + "step": 106000 + }, + { + "epoch": 3.852387528163384, + "grad_norm": 0.823477566242218, + "learning_rate": 1.658913743746374e-05, + "loss": 0.097, + "step": 106010 + }, + { + "epoch": 3.8527509266661824, + "grad_norm": 0.6416204571723938, + "learning_rate": 1.658425080183747e-05, + "loss": 0.0612, + "step": 106020 + }, + { + "epoch": 3.8531143251689803, + "grad_norm": 7.8174920082092285, + "learning_rate": 1.65793645288159e-05, + "loss": 0.0709, + "step": 106030 + }, + { + "epoch": 3.8534777236717783, + "grad_norm": 0.37136542797088623, + "learning_rate": 1.6574478618609546e-05, + "loss": 0.075, + "step": 106040 + }, + { + "epoch": 3.8538411221745767, + "grad_norm": 0.7363283634185791, + "learning_rate": 1.6569593071428932e-05, + "loss": 0.0781, + "step": 106050 + }, + { + "epoch": 3.8542045206773747, + "grad_norm": 0.7914659976959229, + "learning_rate": 1.656470788748456e-05, + "loss": 0.0683, + "step": 106060 + }, + { + "epoch": 3.854567919180173, + "grad_norm": 0.710452675819397, + "learning_rate": 1.6559823066986906e-05, + "loss": 0.059, + "step": 106070 + }, + { + "epoch": 3.854931317682971, + "grad_norm": 0.8524606227874756, + "learning_rate": 1.655493861014645e-05, + "loss": 0.0528, + "step": 106080 + }, + { + "epoch": 3.855294716185769, + "grad_norm": 1.270829677581787, + "learning_rate": 1.6550054517173635e-05, + "loss": 0.0747, + "step": 106090 + }, + { + "epoch": 3.8556581146885676, + "grad_norm": 0.5435279011726379, + "learning_rate": 1.6545170788278913e-05, + "loss": 0.0787, + "step": 106100 + }, + { + "epoch": 3.8560215131913655, + "grad_norm": 0.6725544333457947, + "learning_rate": 1.65402874236727e-05, + "loss": 0.074, + "step": 106110 + }, + { + "epoch": 3.856384911694164, + "grad_norm": 0.43053969740867615, + "learning_rate": 1.6535404423565397e-05, + "loss": 0.0678, + "step": 106120 + }, + { + "epoch": 3.856748310196962, + "grad_norm": 0.36162063479423523, + "learning_rate": 1.65305217881674e-05, + "loss": 0.062, + "step": 106130 + }, + { + "epoch": 3.85711170869976, + "grad_norm": 2.1975929737091064, + "learning_rate": 1.6525639517689088e-05, + "loss": 0.0719, + "step": 106140 + }, + { + "epoch": 3.8574751072025584, + "grad_norm": 0.450427770614624, + "learning_rate": 1.652075761234082e-05, + "loss": 0.0682, + "step": 106150 + }, + { + "epoch": 3.8578385057053564, + "grad_norm": 1.4081052541732788, + "learning_rate": 1.6515876072332934e-05, + "loss": 0.06, + "step": 106160 + }, + { + "epoch": 3.858201904208155, + "grad_norm": 0.28008484840393066, + "learning_rate": 1.6510994897875763e-05, + "loss": 0.0616, + "step": 106170 + }, + { + "epoch": 3.8585653027109528, + "grad_norm": 0.6576961874961853, + "learning_rate": 1.6506114089179612e-05, + "loss": 0.0617, + "step": 106180 + }, + { + "epoch": 3.8589287012137508, + "grad_norm": 0.5949859023094177, + "learning_rate": 1.650123364645479e-05, + "loss": 0.0767, + "step": 106190 + }, + { + "epoch": 3.859292099716549, + "grad_norm": 1.566115379333496, + "learning_rate": 1.6496353569911575e-05, + "loss": 0.1152, + "step": 106200 + }, + { + "epoch": 3.859292099716549, + "eval_loss": 0.3167436122894287, + "eval_runtime": 180.8794, + "eval_samples_per_second": 40.989, + "eval_steps_per_second": 5.125, + "eval_wer": 0.1313286256285512, + "step": 106200 + }, + { + "epoch": 3.8596554982193476, + "grad_norm": 0.697790801525116, + "learning_rate": 1.649147385976022e-05, + "loss": 0.0746, + "step": 106210 + }, + { + "epoch": 3.8600188967221456, + "grad_norm": 0.2491450309753418, + "learning_rate": 1.648659451621098e-05, + "loss": 0.0578, + "step": 106220 + }, + { + "epoch": 3.8603822952249436, + "grad_norm": 0.42509350180625916, + "learning_rate": 1.64817155394741e-05, + "loss": 0.0646, + "step": 106230 + }, + { + "epoch": 3.8607456937277416, + "grad_norm": 1.1668282747268677, + "learning_rate": 1.647683692975979e-05, + "loss": 0.0992, + "step": 106240 + }, + { + "epoch": 3.86110909223054, + "grad_norm": 0.8720428347587585, + "learning_rate": 1.647195868727825e-05, + "loss": 0.0714, + "step": 106250 + }, + { + "epoch": 3.8614724907333384, + "grad_norm": 0.5526779294013977, + "learning_rate": 1.6467080812239662e-05, + "loss": 0.0523, + "step": 106260 + }, + { + "epoch": 3.8618358892361364, + "grad_norm": 0.6438971161842346, + "learning_rate": 1.6462203304854203e-05, + "loss": 0.0666, + "step": 106270 + }, + { + "epoch": 3.8621992877389344, + "grad_norm": 0.5303975343704224, + "learning_rate": 1.645732616533203e-05, + "loss": 0.0634, + "step": 106280 + }, + { + "epoch": 3.862562686241733, + "grad_norm": 0.9776999354362488, + "learning_rate": 1.6452449393883276e-05, + "loss": 0.0791, + "step": 106290 + }, + { + "epoch": 3.862926084744531, + "grad_norm": 0.5289604663848877, + "learning_rate": 1.6447572990718068e-05, + "loss": 0.1386, + "step": 106300 + }, + { + "epoch": 3.8632894832473292, + "grad_norm": 0.6309967637062073, + "learning_rate": 1.6442696956046504e-05, + "loss": 0.1688, + "step": 106310 + }, + { + "epoch": 3.863652881750127, + "grad_norm": 0.8625107407569885, + "learning_rate": 1.6437821290078682e-05, + "loss": 0.0569, + "step": 106320 + }, + { + "epoch": 3.864016280252925, + "grad_norm": 0.4633709788322449, + "learning_rate": 1.643294599302468e-05, + "loss": 0.0567, + "step": 106330 + }, + { + "epoch": 3.8643796787557236, + "grad_norm": 0.47958311438560486, + "learning_rate": 1.6428071065094553e-05, + "loss": 0.2378, + "step": 106340 + }, + { + "epoch": 3.8647430772585216, + "grad_norm": 0.53351891040802, + "learning_rate": 1.6423196506498338e-05, + "loss": 0.1388, + "step": 106350 + }, + { + "epoch": 3.86510647576132, + "grad_norm": 0.5696703791618347, + "learning_rate": 1.6418322317446073e-05, + "loss": 0.0815, + "step": 106360 + }, + { + "epoch": 3.865469874264118, + "grad_norm": 0.8141573667526245, + "learning_rate": 1.641344849814777e-05, + "loss": 0.1241, + "step": 106370 + }, + { + "epoch": 3.865833272766916, + "grad_norm": 2.059314012527466, + "learning_rate": 1.6408575048813424e-05, + "loss": 0.0726, + "step": 106380 + }, + { + "epoch": 3.8661966712697144, + "grad_norm": 0.37247422337532043, + "learning_rate": 1.6403701969653004e-05, + "loss": 0.9905, + "step": 106390 + }, + { + "epoch": 3.8665600697725124, + "grad_norm": 2.2825708389282227, + "learning_rate": 1.639882926087648e-05, + "loss": 0.0743, + "step": 106400 + }, + { + "epoch": 3.866923468275311, + "grad_norm": 0.778655469417572, + "learning_rate": 1.6393956922693798e-05, + "loss": 0.0703, + "step": 106410 + }, + { + "epoch": 3.867286866778109, + "grad_norm": 1.8675559759140015, + "learning_rate": 1.6389084955314896e-05, + "loss": 0.0579, + "step": 106420 + }, + { + "epoch": 3.867650265280907, + "grad_norm": 0.796132504940033, + "learning_rate": 1.6384213358949683e-05, + "loss": 0.0465, + "step": 106430 + }, + { + "epoch": 3.8680136637837053, + "grad_norm": 1.6710090637207031, + "learning_rate": 1.6379342133808067e-05, + "loss": 0.0879, + "step": 106440 + }, + { + "epoch": 3.8683770622865032, + "grad_norm": 0.64317786693573, + "learning_rate": 1.6374471280099912e-05, + "loss": 0.0945, + "step": 106450 + }, + { + "epoch": 3.8687404607893017, + "grad_norm": 1.1464306116104126, + "learning_rate": 1.6369600798035113e-05, + "loss": 0.1468, + "step": 106460 + }, + { + "epoch": 3.8691038592920997, + "grad_norm": 4.935044765472412, + "learning_rate": 1.636473068782351e-05, + "loss": 0.0602, + "step": 106470 + }, + { + "epoch": 3.8694672577948976, + "grad_norm": 0.9155591726303101, + "learning_rate": 1.6359860949674932e-05, + "loss": 0.0722, + "step": 106480 + }, + { + "epoch": 3.869830656297696, + "grad_norm": 0.3697691261768341, + "learning_rate": 1.6354991583799208e-05, + "loss": 0.0762, + "step": 106490 + }, + { + "epoch": 3.8701940548004945, + "grad_norm": 0.9484128952026367, + "learning_rate": 1.6350122590406126e-05, + "loss": 0.0743, + "step": 106500 + }, + { + "epoch": 3.8705574533032925, + "grad_norm": 0.4434047043323517, + "learning_rate": 1.6345253969705492e-05, + "loss": 0.1289, + "step": 106510 + }, + { + "epoch": 3.8709208518060905, + "grad_norm": 0.3249851167201996, + "learning_rate": 1.6340385721907075e-05, + "loss": 0.0621, + "step": 106520 + }, + { + "epoch": 3.8712842503088885, + "grad_norm": 1.056702733039856, + "learning_rate": 1.6335517847220626e-05, + "loss": 0.0587, + "step": 106530 + }, + { + "epoch": 3.871647648811687, + "grad_norm": 0.3898318111896515, + "learning_rate": 1.6330650345855874e-05, + "loss": 0.0668, + "step": 106540 + }, + { + "epoch": 3.8720110473144853, + "grad_norm": 0.44665372371673584, + "learning_rate": 1.6325783218022563e-05, + "loss": 0.0572, + "step": 106550 + }, + { + "epoch": 3.8723744458172833, + "grad_norm": 0.4088903069496155, + "learning_rate": 1.632091646393039e-05, + "loss": 0.0907, + "step": 106560 + }, + { + "epoch": 3.8727378443200813, + "grad_norm": 0.8101043105125427, + "learning_rate": 1.6316050083789046e-05, + "loss": 0.0548, + "step": 106570 + }, + { + "epoch": 3.8731012428228797, + "grad_norm": 0.627242386341095, + "learning_rate": 1.6311184077808206e-05, + "loss": 0.2322, + "step": 106580 + }, + { + "epoch": 3.8734646413256777, + "grad_norm": 0.3941401243209839, + "learning_rate": 1.6306318446197518e-05, + "loss": 0.074, + "step": 106590 + }, + { + "epoch": 3.873828039828476, + "grad_norm": 4.295115947723389, + "learning_rate": 1.630145318916665e-05, + "loss": 0.0713, + "step": 106600 + }, + { + "epoch": 3.874191438331274, + "grad_norm": 0.5540897846221924, + "learning_rate": 1.629658830692521e-05, + "loss": 0.0737, + "step": 106610 + }, + { + "epoch": 3.874554836834072, + "grad_norm": 0.6114850044250488, + "learning_rate": 1.6291723799682818e-05, + "loss": 0.1015, + "step": 106620 + }, + { + "epoch": 3.8749182353368705, + "grad_norm": 0.686089038848877, + "learning_rate": 1.6286859667649058e-05, + "loss": 0.0661, + "step": 106630 + }, + { + "epoch": 3.8752816338396685, + "grad_norm": 0.49293363094329834, + "learning_rate": 1.6281995911033507e-05, + "loss": 0.076, + "step": 106640 + }, + { + "epoch": 3.875645032342467, + "grad_norm": 0.6190850734710693, + "learning_rate": 1.6277132530045742e-05, + "loss": 0.0892, + "step": 106650 + }, + { + "epoch": 3.876008430845265, + "grad_norm": 0.4621674418449402, + "learning_rate": 1.62722695248953e-05, + "loss": 0.0678, + "step": 106660 + }, + { + "epoch": 3.876371829348063, + "grad_norm": 1.242116093635559, + "learning_rate": 1.626740689579171e-05, + "loss": 0.0694, + "step": 106670 + }, + { + "epoch": 3.8767352278508613, + "grad_norm": 0.4658359885215759, + "learning_rate": 1.6262544642944488e-05, + "loss": 0.0659, + "step": 106680 + }, + { + "epoch": 3.8770986263536593, + "grad_norm": 0.6650833487510681, + "learning_rate": 1.625768276656312e-05, + "loss": 0.0776, + "step": 106690 + }, + { + "epoch": 3.8774620248564577, + "grad_norm": 0.6864154934883118, + "learning_rate": 1.6252821266857105e-05, + "loss": 0.0891, + "step": 106700 + }, + { + "epoch": 3.8778254233592557, + "grad_norm": 0.6746088862419128, + "learning_rate": 1.62479601440359e-05, + "loss": 0.1, + "step": 106710 + }, + { + "epoch": 3.8781888218620537, + "grad_norm": 0.5241110920906067, + "learning_rate": 1.6243099398308954e-05, + "loss": 0.0599, + "step": 106720 + }, + { + "epoch": 3.878552220364852, + "grad_norm": 0.4247439205646515, + "learning_rate": 1.6238239029885684e-05, + "loss": 0.0843, + "step": 106730 + }, + { + "epoch": 3.87891561886765, + "grad_norm": 0.42593395709991455, + "learning_rate": 1.6233379038975532e-05, + "loss": 0.0776, + "step": 106740 + }, + { + "epoch": 3.8792790173704486, + "grad_norm": 0.4933978021144867, + "learning_rate": 1.6228519425787885e-05, + "loss": 0.0887, + "step": 106750 + }, + { + "epoch": 3.8796424158732465, + "grad_norm": 39.70121383666992, + "learning_rate": 1.6223660190532126e-05, + "loss": 0.776, + "step": 106760 + }, + { + "epoch": 3.8800058143760445, + "grad_norm": 1.380768895149231, + "learning_rate": 1.6218801333417624e-05, + "loss": 0.0576, + "step": 106770 + }, + { + "epoch": 3.880369212878843, + "grad_norm": 0.2609730064868927, + "learning_rate": 1.621394285465372e-05, + "loss": 0.0631, + "step": 106780 + }, + { + "epoch": 3.8807326113816414, + "grad_norm": 0.5191308856010437, + "learning_rate": 1.6209084754449766e-05, + "loss": 0.0902, + "step": 106790 + }, + { + "epoch": 3.8810960098844394, + "grad_norm": 0.930590033531189, + "learning_rate": 1.620422703301507e-05, + "loss": 0.0548, + "step": 106800 + }, + { + "epoch": 3.8810960098844394, + "eval_loss": 0.30408763885498047, + "eval_runtime": 179.2538, + "eval_samples_per_second": 41.36, + "eval_steps_per_second": 5.171, + "eval_wer": 0.13078403253036106, + "step": 106800 + }, + { + "epoch": 3.8814594083872374, + "grad_norm": 0.5123357176780701, + "learning_rate": 1.619936969055894e-05, + "loss": 0.0778, + "step": 106810 + }, + { + "epoch": 3.8818228068900353, + "grad_norm": 0.5172662138938904, + "learning_rate": 1.6194512727290656e-05, + "loss": 0.0528, + "step": 106820 + }, + { + "epoch": 3.8821862053928338, + "grad_norm": 1.0481631755828857, + "learning_rate": 1.6189656143419474e-05, + "loss": 0.0652, + "step": 106830 + }, + { + "epoch": 3.882549603895632, + "grad_norm": 0.29324159026145935, + "learning_rate": 1.6184799939154677e-05, + "loss": 0.083, + "step": 106840 + }, + { + "epoch": 3.88291300239843, + "grad_norm": 0.7054247260093689, + "learning_rate": 1.6179944114705484e-05, + "loss": 0.0834, + "step": 106850 + }, + { + "epoch": 3.883276400901228, + "grad_norm": 0.34651482105255127, + "learning_rate": 1.617508867028112e-05, + "loss": 0.0605, + "step": 106860 + }, + { + "epoch": 3.8836397994040266, + "grad_norm": 2.1791582107543945, + "learning_rate": 1.6170233606090783e-05, + "loss": 0.0598, + "step": 106870 + }, + { + "epoch": 3.8840031979068246, + "grad_norm": 0.725145161151886, + "learning_rate": 1.6165378922343652e-05, + "loss": 0.086, + "step": 106880 + }, + { + "epoch": 3.884366596409623, + "grad_norm": 0.8774747848510742, + "learning_rate": 1.616052461924892e-05, + "loss": 0.0777, + "step": 106890 + }, + { + "epoch": 3.884729994912421, + "grad_norm": 0.5606204867362976, + "learning_rate": 1.6155670697015735e-05, + "loss": 0.0838, + "step": 106900 + }, + { + "epoch": 3.885093393415219, + "grad_norm": 0.44490596652030945, + "learning_rate": 1.615081715585323e-05, + "loss": 0.0733, + "step": 106910 + }, + { + "epoch": 3.8854567919180174, + "grad_norm": 0.3232552707195282, + "learning_rate": 1.6145963995970514e-05, + "loss": 0.0528, + "step": 106920 + }, + { + "epoch": 3.8858201904208154, + "grad_norm": 0.4871584475040436, + "learning_rate": 1.614111121757672e-05, + "loss": 0.1162, + "step": 106930 + }, + { + "epoch": 3.886183588923614, + "grad_norm": 2.606072425842285, + "learning_rate": 1.6136258820880925e-05, + "loss": 0.0982, + "step": 106940 + }, + { + "epoch": 3.886546987426412, + "grad_norm": 0.6759468913078308, + "learning_rate": 1.61314068060922e-05, + "loss": 0.0551, + "step": 106950 + }, + { + "epoch": 3.88691038592921, + "grad_norm": 0.5082545876502991, + "learning_rate": 1.61265551734196e-05, + "loss": 0.1194, + "step": 106960 + }, + { + "epoch": 3.887273784432008, + "grad_norm": 0.4217374920845032, + "learning_rate": 1.6121703923072155e-05, + "loss": 0.0739, + "step": 106970 + }, + { + "epoch": 3.887637182934806, + "grad_norm": 0.2575652003288269, + "learning_rate": 1.611685305525891e-05, + "loss": 0.0517, + "step": 106980 + }, + { + "epoch": 3.8880005814376046, + "grad_norm": 0.5304332375526428, + "learning_rate": 1.611200257018886e-05, + "loss": 0.0997, + "step": 106990 + }, + { + "epoch": 3.8883639799404026, + "grad_norm": 1.2850308418273926, + "learning_rate": 1.6107152468070995e-05, + "loss": 0.0848, + "step": 107000 + }, + { + "epoch": 3.8887273784432006, + "grad_norm": 0.8286998867988586, + "learning_rate": 1.610230274911429e-05, + "loss": 0.06, + "step": 107010 + }, + { + "epoch": 3.889090776945999, + "grad_norm": 0.36516815423965454, + "learning_rate": 1.609745341352769e-05, + "loss": 0.0546, + "step": 107020 + }, + { + "epoch": 3.889454175448797, + "grad_norm": 0.608978271484375, + "learning_rate": 1.6092604461520162e-05, + "loss": 0.0591, + "step": 107030 + }, + { + "epoch": 3.8898175739515954, + "grad_norm": 0.40146422386169434, + "learning_rate": 1.608775589330061e-05, + "loss": 0.0586, + "step": 107040 + }, + { + "epoch": 3.8901809724543934, + "grad_norm": 0.7387737035751343, + "learning_rate": 1.608290770907795e-05, + "loss": 0.0826, + "step": 107050 + }, + { + "epoch": 3.8905443709571914, + "grad_norm": 1.0020533800125122, + "learning_rate": 1.6078059909061067e-05, + "loss": 0.0709, + "step": 107060 + }, + { + "epoch": 3.89090776945999, + "grad_norm": 0.44272226095199585, + "learning_rate": 1.6073212493458827e-05, + "loss": 0.0711, + "step": 107070 + }, + { + "epoch": 3.8912711679627883, + "grad_norm": 0.39536282420158386, + "learning_rate": 1.6068365462480112e-05, + "loss": 0.059, + "step": 107080 + }, + { + "epoch": 3.8916345664655863, + "grad_norm": 1.6497310400009155, + "learning_rate": 1.606351881633375e-05, + "loss": 0.0642, + "step": 107090 + }, + { + "epoch": 3.8919979649683842, + "grad_norm": 0.7479108572006226, + "learning_rate": 1.6058672555228565e-05, + "loss": 0.0902, + "step": 107100 + }, + { + "epoch": 3.8923613634711822, + "grad_norm": 0.6676745414733887, + "learning_rate": 1.6053826679373356e-05, + "loss": 0.0975, + "step": 107110 + }, + { + "epoch": 3.8927247619739807, + "grad_norm": 0.6554991602897644, + "learning_rate": 1.6048981188976936e-05, + "loss": 0.0753, + "step": 107120 + }, + { + "epoch": 3.893088160476779, + "grad_norm": 0.561316728591919, + "learning_rate": 1.6044136084248068e-05, + "loss": 0.0874, + "step": 107130 + }, + { + "epoch": 3.893451558979577, + "grad_norm": 0.5027711987495422, + "learning_rate": 1.6039291365395515e-05, + "loss": 0.0706, + "step": 107140 + }, + { + "epoch": 3.893814957482375, + "grad_norm": 0.5242792963981628, + "learning_rate": 1.603444703262801e-05, + "loss": 0.0705, + "step": 107150 + }, + { + "epoch": 3.8941783559851735, + "grad_norm": 0.5882894992828369, + "learning_rate": 1.6029603086154274e-05, + "loss": 0.0854, + "step": 107160 + }, + { + "epoch": 3.8945417544879715, + "grad_norm": 0.5304105281829834, + "learning_rate": 1.6024759526183037e-05, + "loss": 0.0564, + "step": 107170 + }, + { + "epoch": 3.89490515299077, + "grad_norm": 0.9514649510383606, + "learning_rate": 1.6019916352922972e-05, + "loss": 0.0565, + "step": 107180 + }, + { + "epoch": 3.895268551493568, + "grad_norm": 0.5723317861557007, + "learning_rate": 1.6015073566582762e-05, + "loss": 0.0756, + "step": 107190 + }, + { + "epoch": 3.895631949996366, + "grad_norm": 1.066159963607788, + "learning_rate": 1.6010231167371067e-05, + "loss": 0.0668, + "step": 107200 + }, + { + "epoch": 3.8959953484991643, + "grad_norm": 0.6977314949035645, + "learning_rate": 1.6005389155496514e-05, + "loss": 0.063, + "step": 107210 + }, + { + "epoch": 3.8963587470019623, + "grad_norm": 0.9089870452880859, + "learning_rate": 1.6000547531167747e-05, + "loss": 0.0545, + "step": 107220 + }, + { + "epoch": 3.8967221455047607, + "grad_norm": 0.4446127116680145, + "learning_rate": 1.599570629459337e-05, + "loss": 0.0672, + "step": 107230 + }, + { + "epoch": 3.8970855440075587, + "grad_norm": 0.4407578408718109, + "learning_rate": 1.599086544598197e-05, + "loss": 0.0931, + "step": 107240 + }, + { + "epoch": 3.8974489425103567, + "grad_norm": 0.5468510389328003, + "learning_rate": 1.5986024985542124e-05, + "loss": 0.0712, + "step": 107250 + }, + { + "epoch": 3.897812341013155, + "grad_norm": 0.7128602266311646, + "learning_rate": 1.598118491348238e-05, + "loss": 0.0582, + "step": 107260 + }, + { + "epoch": 3.898175739515953, + "grad_norm": 0.6812794804573059, + "learning_rate": 1.5976345230011297e-05, + "loss": 0.0638, + "step": 107270 + }, + { + "epoch": 3.8985391380187515, + "grad_norm": 0.9153865575790405, + "learning_rate": 1.597150593533739e-05, + "loss": 0.0765, + "step": 107280 + }, + { + "epoch": 3.8989025365215495, + "grad_norm": 0.4191397428512573, + "learning_rate": 1.5966667029669173e-05, + "loss": 0.1032, + "step": 107290 + }, + { + "epoch": 3.8992659350243475, + "grad_norm": 0.9470223784446716, + "learning_rate": 1.5961828513215132e-05, + "loss": 0.0772, + "step": 107300 + }, + { + "epoch": 3.899629333527146, + "grad_norm": 0.8003381490707397, + "learning_rate": 1.595699038618373e-05, + "loss": 0.0638, + "step": 107310 + }, + { + "epoch": 3.899992732029944, + "grad_norm": 0.5758501887321472, + "learning_rate": 1.5952152648783448e-05, + "loss": 0.0702, + "step": 107320 + }, + { + "epoch": 3.9003561305327423, + "grad_norm": 0.4874951243400574, + "learning_rate": 1.5947315301222717e-05, + "loss": 0.0698, + "step": 107330 + }, + { + "epoch": 3.9007195290355403, + "grad_norm": 0.9432708024978638, + "learning_rate": 1.594247834370996e-05, + "loss": 0.0931, + "step": 107340 + }, + { + "epoch": 3.9010829275383383, + "grad_norm": 6.735447883605957, + "learning_rate": 1.5937641776453578e-05, + "loss": 0.0929, + "step": 107350 + }, + { + "epoch": 3.9014463260411367, + "grad_norm": 0.32913991808891296, + "learning_rate": 1.593280559966197e-05, + "loss": 0.0863, + "step": 107360 + }, + { + "epoch": 3.901809724543935, + "grad_norm": 0.65277099609375, + "learning_rate": 1.5927969813543513e-05, + "loss": 0.0631, + "step": 107370 + }, + { + "epoch": 3.902173123046733, + "grad_norm": 0.7276453375816345, + "learning_rate": 1.5923134418306557e-05, + "loss": 1.8576, + "step": 107380 + }, + { + "epoch": 3.902536521549531, + "grad_norm": 0.7184520959854126, + "learning_rate": 1.5918299414159443e-05, + "loss": 0.0671, + "step": 107390 + }, + { + "epoch": 3.902899920052329, + "grad_norm": 0.7107725143432617, + "learning_rate": 1.5913464801310486e-05, + "loss": 0.0847, + "step": 107400 + }, + { + "epoch": 3.902899920052329, + "eval_loss": 0.30393534898757935, + "eval_runtime": 181.5459, + "eval_samples_per_second": 40.838, + "eval_steps_per_second": 5.106, + "eval_wer": 0.13077495597872457, + "step": 107400 + }, + { + "epoch": 3.9032633185551275, + "grad_norm": 0.760789692401886, + "learning_rate": 1.590863057996801e-05, + "loss": 0.0683, + "step": 107410 + }, + { + "epoch": 3.903626717057926, + "grad_norm": 0.39104652404785156, + "learning_rate": 1.5903796750340295e-05, + "loss": 0.0493, + "step": 107420 + }, + { + "epoch": 3.903990115560724, + "grad_norm": 1.022646188735962, + "learning_rate": 1.5898963312635612e-05, + "loss": 0.0675, + "step": 107430 + }, + { + "epoch": 3.904353514063522, + "grad_norm": 0.4232107400894165, + "learning_rate": 1.5894130267062217e-05, + "loss": 0.0684, + "step": 107440 + }, + { + "epoch": 3.9047169125663204, + "grad_norm": 1.4065017700195312, + "learning_rate": 1.5889297613828347e-05, + "loss": 1.4241, + "step": 107450 + }, + { + "epoch": 3.9050803110691183, + "grad_norm": 1.4978172779083252, + "learning_rate": 1.588446535314223e-05, + "loss": 0.079, + "step": 107460 + }, + { + "epoch": 3.9054437095719168, + "grad_norm": 0.40406903624534607, + "learning_rate": 1.587963348521207e-05, + "loss": 0.0671, + "step": 107470 + }, + { + "epoch": 3.9058071080747148, + "grad_norm": 0.4887799918651581, + "learning_rate": 1.5874802010246054e-05, + "loss": 0.0739, + "step": 107480 + }, + { + "epoch": 3.9061705065775127, + "grad_norm": 1.1040798425674438, + "learning_rate": 1.586997092845235e-05, + "loss": 0.0894, + "step": 107490 + }, + { + "epoch": 3.906533905080311, + "grad_norm": 1.5164659023284912, + "learning_rate": 1.5865140240039106e-05, + "loss": 0.0769, + "step": 107500 + }, + { + "epoch": 3.906897303583109, + "grad_norm": 0.4174286425113678, + "learning_rate": 1.5860309945214473e-05, + "loss": 0.0785, + "step": 107510 + }, + { + "epoch": 3.9072607020859076, + "grad_norm": 0.3692898750305176, + "learning_rate": 1.585548004418657e-05, + "loss": 0.0511, + "step": 107520 + }, + { + "epoch": 3.9076241005887056, + "grad_norm": 0.49770957231521606, + "learning_rate": 1.5850650537163494e-05, + "loss": 0.0643, + "step": 107530 + }, + { + "epoch": 3.9079874990915036, + "grad_norm": 0.4804151654243469, + "learning_rate": 1.5845821424353323e-05, + "loss": 0.0757, + "step": 107540 + }, + { + "epoch": 3.908350897594302, + "grad_norm": 0.7496044635772705, + "learning_rate": 1.5840992705964148e-05, + "loss": 0.0656, + "step": 107550 + }, + { + "epoch": 3.9087142960971, + "grad_norm": 0.6202625632286072, + "learning_rate": 1.5836164382204004e-05, + "loss": 0.062, + "step": 107560 + }, + { + "epoch": 3.9090776945998984, + "grad_norm": 0.4673279821872711, + "learning_rate": 1.5831336453280937e-05, + "loss": 0.0669, + "step": 107570 + }, + { + "epoch": 3.9094410931026964, + "grad_norm": 0.4209801256656647, + "learning_rate": 1.5826508919402958e-05, + "loss": 0.0724, + "step": 107580 + }, + { + "epoch": 3.9098044916054944, + "grad_norm": 0.6488276124000549, + "learning_rate": 1.5821681780778057e-05, + "loss": 0.0854, + "step": 107590 + }, + { + "epoch": 3.910167890108293, + "grad_norm": 1.0254498720169067, + "learning_rate": 1.5816855037614248e-05, + "loss": 0.08, + "step": 107600 + }, + { + "epoch": 3.910531288611091, + "grad_norm": 0.4866830110549927, + "learning_rate": 1.581202869011948e-05, + "loss": 0.0676, + "step": 107610 + }, + { + "epoch": 3.910894687113889, + "grad_norm": 0.3314000368118286, + "learning_rate": 1.5807202738501703e-05, + "loss": 0.0454, + "step": 107620 + }, + { + "epoch": 3.911258085616687, + "grad_norm": 0.7425368428230286, + "learning_rate": 1.580237718296885e-05, + "loss": 0.0688, + "step": 107630 + }, + { + "epoch": 3.911621484119485, + "grad_norm": 1.5754629373550415, + "learning_rate": 1.5797552023728833e-05, + "loss": 0.0629, + "step": 107640 + }, + { + "epoch": 3.9119848826222836, + "grad_norm": 1.2293038368225098, + "learning_rate": 1.5792727260989566e-05, + "loss": 0.1056, + "step": 107650 + }, + { + "epoch": 3.912348281125082, + "grad_norm": 0.7234967947006226, + "learning_rate": 1.5787902894958916e-05, + "loss": 0.09, + "step": 107660 + }, + { + "epoch": 3.91271167962788, + "grad_norm": 1.6464751958847046, + "learning_rate": 1.578307892584476e-05, + "loss": 0.0716, + "step": 107670 + }, + { + "epoch": 3.913075078130678, + "grad_norm": 0.325550377368927, + "learning_rate": 1.5778255353854937e-05, + "loss": 0.0843, + "step": 107680 + }, + { + "epoch": 3.913438476633476, + "grad_norm": 0.40090569853782654, + "learning_rate": 1.5773432179197272e-05, + "loss": 0.0599, + "step": 107690 + }, + { + "epoch": 3.9138018751362744, + "grad_norm": 0.4380953907966614, + "learning_rate": 1.5768609402079592e-05, + "loss": 0.0877, + "step": 107700 + }, + { + "epoch": 3.914165273639073, + "grad_norm": 0.5776761174201965, + "learning_rate": 1.5763787022709693e-05, + "loss": 0.0736, + "step": 107710 + }, + { + "epoch": 3.914528672141871, + "grad_norm": 0.5864748954772949, + "learning_rate": 1.5758965041295343e-05, + "loss": 0.0563, + "step": 107720 + }, + { + "epoch": 3.914892070644669, + "grad_norm": 0.4065738320350647, + "learning_rate": 1.57541434580443e-05, + "loss": 0.0739, + "step": 107730 + }, + { + "epoch": 3.9152554691474672, + "grad_norm": 0.3938973546028137, + "learning_rate": 1.5749322273164336e-05, + "loss": 0.0883, + "step": 107740 + }, + { + "epoch": 3.9156188676502652, + "grad_norm": 0.42485731840133667, + "learning_rate": 1.574450148686315e-05, + "loss": 0.0824, + "step": 107750 + }, + { + "epoch": 3.9159822661530637, + "grad_norm": 1.9941436052322388, + "learning_rate": 1.5739681099348473e-05, + "loss": 0.0731, + "step": 107760 + }, + { + "epoch": 3.9163456646558616, + "grad_norm": 0.6175404787063599, + "learning_rate": 1.5734861110827987e-05, + "loss": 0.0537, + "step": 107770 + }, + { + "epoch": 3.9167090631586596, + "grad_norm": 0.7640911340713501, + "learning_rate": 1.573004152150936e-05, + "loss": 0.0635, + "step": 107780 + }, + { + "epoch": 3.917072461661458, + "grad_norm": 0.23279604315757751, + "learning_rate": 1.572522233160027e-05, + "loss": 0.0634, + "step": 107790 + }, + { + "epoch": 3.917435860164256, + "grad_norm": 0.3372284770011902, + "learning_rate": 1.572040354130835e-05, + "loss": 0.0966, + "step": 107800 + }, + { + "epoch": 3.9177992586670545, + "grad_norm": 0.8758069276809692, + "learning_rate": 1.571558515084122e-05, + "loss": 0.06, + "step": 107810 + }, + { + "epoch": 3.9181626571698525, + "grad_norm": 0.29386308789253235, + "learning_rate": 1.57107671604065e-05, + "loss": 0.0593, + "step": 107820 + }, + { + "epoch": 3.9185260556726504, + "grad_norm": 0.23792269825935364, + "learning_rate": 1.5705949570211757e-05, + "loss": 0.0801, + "step": 107830 + }, + { + "epoch": 3.918889454175449, + "grad_norm": 0.33399441838264465, + "learning_rate": 1.570113238046459e-05, + "loss": 0.068, + "step": 107840 + }, + { + "epoch": 3.919252852678247, + "grad_norm": 0.7931497097015381, + "learning_rate": 1.5696315591372544e-05, + "loss": 0.0883, + "step": 107850 + }, + { + "epoch": 3.9196162511810453, + "grad_norm": 0.4268588721752167, + "learning_rate": 1.5691499203143157e-05, + "loss": 0.0622, + "step": 107860 + }, + { + "epoch": 3.9199796496838433, + "grad_norm": 0.3438781797885895, + "learning_rate": 1.568668321598395e-05, + "loss": 0.0553, + "step": 107870 + }, + { + "epoch": 3.9203430481866413, + "grad_norm": 0.6346169114112854, + "learning_rate": 1.568186763010241e-05, + "loss": 0.0677, + "step": 107880 + }, + { + "epoch": 3.9207064466894397, + "grad_norm": 0.4053609371185303, + "learning_rate": 1.5677052445706058e-05, + "loss": 0.0733, + "step": 107890 + }, + { + "epoch": 3.9210698451922377, + "grad_norm": 0.5387663841247559, + "learning_rate": 1.5672237663002344e-05, + "loss": 0.0905, + "step": 107900 + }, + { + "epoch": 3.921433243695036, + "grad_norm": 0.35756614804267883, + "learning_rate": 1.5667423282198714e-05, + "loss": 0.0616, + "step": 107910 + }, + { + "epoch": 3.921796642197834, + "grad_norm": 0.5838266611099243, + "learning_rate": 1.5662609303502607e-05, + "loss": 0.068, + "step": 107920 + }, + { + "epoch": 3.922160040700632, + "grad_norm": 0.5504740476608276, + "learning_rate": 1.5657795727121448e-05, + "loss": 0.0596, + "step": 107930 + }, + { + "epoch": 3.9225234392034305, + "grad_norm": 0.3853808343410492, + "learning_rate": 1.565298255326263e-05, + "loss": 0.1693, + "step": 107940 + }, + { + "epoch": 3.922886837706229, + "grad_norm": 0.6952928900718689, + "learning_rate": 1.5648169782133543e-05, + "loss": 0.0862, + "step": 107950 + }, + { + "epoch": 3.923250236209027, + "grad_norm": 0.39551839232444763, + "learning_rate": 1.564335741394154e-05, + "loss": 0.0867, + "step": 107960 + }, + { + "epoch": 3.923613634711825, + "grad_norm": 0.5088280439376831, + "learning_rate": 1.563854544889397e-05, + "loss": 0.0541, + "step": 107970 + }, + { + "epoch": 3.9239770332146233, + "grad_norm": 0.33864468336105347, + "learning_rate": 1.5633733887198175e-05, + "loss": 0.0592, + "step": 107980 + }, + { + "epoch": 3.9243404317174213, + "grad_norm": 1.1237084865570068, + "learning_rate": 1.5628922729061463e-05, + "loss": 0.0847, + "step": 107990 + }, + { + "epoch": 3.9247038302202197, + "grad_norm": 0.4131975769996643, + "learning_rate": 1.562411197469113e-05, + "loss": 0.0857, + "step": 108000 + }, + { + "epoch": 3.9247038302202197, + "eval_loss": 0.31695127487182617, + "eval_runtime": 179.0981, + "eval_samples_per_second": 41.396, + "eval_steps_per_second": 5.176, + "eval_wer": 0.13087479804672608, + "step": 108000 + }, + { + "epoch": 3.9250672287230177, + "grad_norm": 0.4081736207008362, + "learning_rate": 1.561930162429445e-05, + "loss": 0.0775, + "step": 108010 + }, + { + "epoch": 3.9254306272258157, + "grad_norm": 0.9158249497413635, + "learning_rate": 1.5614491678078673e-05, + "loss": 0.0609, + "step": 108020 + }, + { + "epoch": 3.925794025728614, + "grad_norm": 0.4705490469932556, + "learning_rate": 1.5609682136251072e-05, + "loss": 0.0512, + "step": 108030 + }, + { + "epoch": 3.926157424231412, + "grad_norm": 0.3536396622657776, + "learning_rate": 1.5604872999018854e-05, + "loss": 0.0765, + "step": 108040 + }, + { + "epoch": 3.9265208227342105, + "grad_norm": 0.530402660369873, + "learning_rate": 1.5600064266589232e-05, + "loss": 0.0821, + "step": 108050 + }, + { + "epoch": 3.9268842212370085, + "grad_norm": 0.5621787309646606, + "learning_rate": 1.559525593916939e-05, + "loss": 0.0988, + "step": 108060 + }, + { + "epoch": 3.9272476197398065, + "grad_norm": 0.42473259568214417, + "learning_rate": 1.5590448016966507e-05, + "loss": 0.047, + "step": 108070 + }, + { + "epoch": 3.927611018242605, + "grad_norm": 0.5450408458709717, + "learning_rate": 1.5585640500187746e-05, + "loss": 0.0766, + "step": 108080 + }, + { + "epoch": 3.927974416745403, + "grad_norm": 0.3374296724796295, + "learning_rate": 1.558083338904024e-05, + "loss": 0.0931, + "step": 108090 + }, + { + "epoch": 3.9283378152482014, + "grad_norm": 0.3764584958553314, + "learning_rate": 1.5576026683731103e-05, + "loss": 0.0805, + "step": 108100 + }, + { + "epoch": 3.9287012137509993, + "grad_norm": 0.6628488898277283, + "learning_rate": 1.5571220384467444e-05, + "loss": 0.0795, + "step": 108110 + }, + { + "epoch": 3.9290646122537973, + "grad_norm": 0.5128569602966309, + "learning_rate": 1.556641449145636e-05, + "loss": 0.5942, + "step": 108120 + }, + { + "epoch": 3.9294280107565958, + "grad_norm": 0.4064798057079315, + "learning_rate": 1.5561609004904905e-05, + "loss": 0.0817, + "step": 108130 + }, + { + "epoch": 3.9297914092593937, + "grad_norm": 0.38870757818222046, + "learning_rate": 1.5556803925020143e-05, + "loss": 0.0707, + "step": 108140 + }, + { + "epoch": 3.930154807762192, + "grad_norm": 0.9279939532279968, + "learning_rate": 1.5551999252009093e-05, + "loss": 0.0927, + "step": 108150 + }, + { + "epoch": 3.93051820626499, + "grad_norm": 1.5513790845870972, + "learning_rate": 1.5547194986078772e-05, + "loss": 0.0582, + "step": 108160 + }, + { + "epoch": 3.930881604767788, + "grad_norm": 0.7676124572753906, + "learning_rate": 1.5542391127436203e-05, + "loss": 0.0502, + "step": 108170 + }, + { + "epoch": 3.9312450032705866, + "grad_norm": 0.5067169070243835, + "learning_rate": 1.553758767628834e-05, + "loss": 0.0547, + "step": 108180 + }, + { + "epoch": 3.9316084017733846, + "grad_norm": 1.1675875186920166, + "learning_rate": 1.553278463284216e-05, + "loss": 0.0684, + "step": 108190 + }, + { + "epoch": 3.931971800276183, + "grad_norm": 0.763733446598053, + "learning_rate": 1.5527981997304604e-05, + "loss": 0.0811, + "step": 108200 + }, + { + "epoch": 3.932335198778981, + "grad_norm": 1.6377514600753784, + "learning_rate": 1.55231797698826e-05, + "loss": 0.0599, + "step": 108210 + }, + { + "epoch": 3.932698597281779, + "grad_norm": 0.814492404460907, + "learning_rate": 1.5518377950783063e-05, + "loss": 0.0676, + "step": 108220 + }, + { + "epoch": 3.9330619957845774, + "grad_norm": 0.41483911871910095, + "learning_rate": 1.551357654021289e-05, + "loss": 0.0713, + "step": 108230 + }, + { + "epoch": 3.933425394287376, + "grad_norm": 0.4929620027542114, + "learning_rate": 1.550877553837895e-05, + "loss": 0.0705, + "step": 108240 + }, + { + "epoch": 3.933788792790174, + "grad_norm": 0.7828431725502014, + "learning_rate": 1.550397494548809e-05, + "loss": 0.14, + "step": 108250 + }, + { + "epoch": 3.934152191292972, + "grad_norm": 0.5891411304473877, + "learning_rate": 1.549917476174717e-05, + "loss": 0.0672, + "step": 108260 + }, + { + "epoch": 3.93451558979577, + "grad_norm": 0.6698787212371826, + "learning_rate": 1.549437498736301e-05, + "loss": 0.065, + "step": 108270 + }, + { + "epoch": 3.934878988298568, + "grad_norm": 0.500284731388092, + "learning_rate": 1.548957562254241e-05, + "loss": 0.0762, + "step": 108280 + }, + { + "epoch": 3.9352423868013666, + "grad_norm": 0.3653343617916107, + "learning_rate": 1.5484776667492153e-05, + "loss": 0.0865, + "step": 108290 + }, + { + "epoch": 3.9356057853041646, + "grad_norm": 0.552470862865448, + "learning_rate": 1.5479978122419013e-05, + "loss": 0.0883, + "step": 108300 + }, + { + "epoch": 3.9359691838069626, + "grad_norm": 3.6672260761260986, + "learning_rate": 1.5475659782554515e-05, + "loss": 0.4845, + "step": 108310 + }, + { + "epoch": 3.936332582309761, + "grad_norm": 0.20884235203266144, + "learning_rate": 1.5470862017007484e-05, + "loss": 0.0482, + "step": 108320 + }, + { + "epoch": 3.936695980812559, + "grad_norm": 0.35152187943458557, + "learning_rate": 1.5466064662037115e-05, + "loss": 0.0674, + "step": 108330 + }, + { + "epoch": 3.9370593793153574, + "grad_norm": 0.6102438569068909, + "learning_rate": 1.5461267717850096e-05, + "loss": 0.0861, + "step": 108340 + }, + { + "epoch": 3.9374227778181554, + "grad_norm": 0.943453311920166, + "learning_rate": 1.545647118465311e-05, + "loss": 0.0787, + "step": 108350 + }, + { + "epoch": 3.9377861763209534, + "grad_norm": 0.4639219641685486, + "learning_rate": 1.545167506265282e-05, + "loss": 0.0676, + "step": 108360 + }, + { + "epoch": 3.938149574823752, + "grad_norm": 0.5150544047355652, + "learning_rate": 1.544687935205588e-05, + "loss": 0.0651, + "step": 108370 + }, + { + "epoch": 3.93851297332655, + "grad_norm": 0.5360589027404785, + "learning_rate": 1.5442084053068927e-05, + "loss": 0.0552, + "step": 108380 + }, + { + "epoch": 3.9388763718293482, + "grad_norm": 0.6416401267051697, + "learning_rate": 1.543728916589856e-05, + "loss": 0.0997, + "step": 108390 + }, + { + "epoch": 3.9392397703321462, + "grad_norm": 0.6596519351005554, + "learning_rate": 1.5432494690751383e-05, + "loss": 0.0774, + "step": 108400 + }, + { + "epoch": 3.939603168834944, + "grad_norm": 0.40156471729278564, + "learning_rate": 1.5427700627833958e-05, + "loss": 0.0641, + "step": 108410 + }, + { + "epoch": 3.9399665673377426, + "grad_norm": 1.4535303115844727, + "learning_rate": 1.5422906977352857e-05, + "loss": 0.063, + "step": 108420 + }, + { + "epoch": 3.9403299658405406, + "grad_norm": 0.359152615070343, + "learning_rate": 1.5418113739514623e-05, + "loss": 0.0632, + "step": 108430 + }, + { + "epoch": 3.940693364343339, + "grad_norm": 0.9958677291870117, + "learning_rate": 1.5413320914525778e-05, + "loss": 0.084, + "step": 108440 + }, + { + "epoch": 3.941056762846137, + "grad_norm": 0.8245310187339783, + "learning_rate": 1.5408528502592823e-05, + "loss": 0.0915, + "step": 108450 + }, + { + "epoch": 3.941420161348935, + "grad_norm": 3.544517755508423, + "learning_rate": 1.540373650392224e-05, + "loss": 0.0664, + "step": 108460 + }, + { + "epoch": 3.9417835598517335, + "grad_norm": 1.7801954746246338, + "learning_rate": 1.5398944918720516e-05, + "loss": 0.0644, + "step": 108470 + }, + { + "epoch": 3.9421469583545314, + "grad_norm": 0.5512742400169373, + "learning_rate": 1.5394153747194096e-05, + "loss": 0.0683, + "step": 108480 + }, + { + "epoch": 3.94251035685733, + "grad_norm": 0.4868443012237549, + "learning_rate": 1.5389362989549413e-05, + "loss": 1.0657, + "step": 108490 + }, + { + "epoch": 3.942873755360128, + "grad_norm": 0.46926984190940857, + "learning_rate": 1.5384572645992877e-05, + "loss": 0.1335, + "step": 108500 + }, + { + "epoch": 3.943237153862926, + "grad_norm": 1.9577665328979492, + "learning_rate": 1.5379782716730896e-05, + "loss": 0.0659, + "step": 108510 + }, + { + "epoch": 3.9436005523657243, + "grad_norm": 2.820413827896118, + "learning_rate": 1.5374993201969855e-05, + "loss": 0.0566, + "step": 108520 + }, + { + "epoch": 3.9439639508685227, + "grad_norm": 0.4880913197994232, + "learning_rate": 1.5370204101916107e-05, + "loss": 0.0815, + "step": 108530 + }, + { + "epoch": 3.9443273493713207, + "grad_norm": 0.7637322545051575, + "learning_rate": 1.5365415416776007e-05, + "loss": 0.0615, + "step": 108540 + }, + { + "epoch": 3.9446907478741187, + "grad_norm": 0.6059696078300476, + "learning_rate": 1.536062714675587e-05, + "loss": 0.0647, + "step": 108550 + }, + { + "epoch": 3.945054146376917, + "grad_norm": 0.6734591126441956, + "learning_rate": 1.5355839292062008e-05, + "loss": 0.0598, + "step": 108560 + }, + { + "epoch": 3.945417544879715, + "grad_norm": 1.2149147987365723, + "learning_rate": 1.5351051852900726e-05, + "loss": 2.337, + "step": 108570 + }, + { + "epoch": 3.9457809433825135, + "grad_norm": 0.5000627636909485, + "learning_rate": 1.534626482947829e-05, + "loss": 0.0783, + "step": 108580 + }, + { + "epoch": 3.9461443418853115, + "grad_norm": 0.5199702978134155, + "learning_rate": 1.5341478222000944e-05, + "loss": 0.0833, + "step": 108590 + }, + { + "epoch": 3.9465077403881095, + "grad_norm": 0.526923656463623, + "learning_rate": 1.5336692030674942e-05, + "loss": 0.1112, + "step": 108600 + }, + { + "epoch": 3.9465077403881095, + "eval_loss": 0.30439403653144836, + "eval_runtime": 179.538, + "eval_samples_per_second": 41.295, + "eval_steps_per_second": 5.163, + "eval_wer": 0.1313104725252782, + "step": 108600 + }, + { + "epoch": 3.946871138890908, + "grad_norm": 0.7700692415237427, + "learning_rate": 1.5331906255706495e-05, + "loss": 0.0835, + "step": 108610 + }, + { + "epoch": 3.947234537393706, + "grad_norm": 1.3175575733184814, + "learning_rate": 1.5327120897301817e-05, + "loss": 0.0635, + "step": 108620 + }, + { + "epoch": 3.9475979358965043, + "grad_norm": 0.5062894225120544, + "learning_rate": 1.5322335955667077e-05, + "loss": 0.0665, + "step": 108630 + }, + { + "epoch": 3.9479613343993023, + "grad_norm": 2.1600241661071777, + "learning_rate": 1.531755143100845e-05, + "loss": 0.079, + "step": 108640 + }, + { + "epoch": 3.9483247329021003, + "grad_norm": 1.095664143562317, + "learning_rate": 1.5312767323532074e-05, + "loss": 0.0874, + "step": 108650 + }, + { + "epoch": 3.9486881314048987, + "grad_norm": 2.105536699295044, + "learning_rate": 1.5307983633444096e-05, + "loss": 0.0709, + "step": 108660 + }, + { + "epoch": 3.9490515299076967, + "grad_norm": 0.4600623548030853, + "learning_rate": 1.5303200360950618e-05, + "loss": 0.0607, + "step": 108670 + }, + { + "epoch": 3.949414928410495, + "grad_norm": 4.388635158538818, + "learning_rate": 1.5298417506257727e-05, + "loss": 0.0691, + "step": 108680 + }, + { + "epoch": 3.949778326913293, + "grad_norm": 0.7141379714012146, + "learning_rate": 1.5293635069571516e-05, + "loss": 0.0948, + "step": 108690 + }, + { + "epoch": 3.950141725416091, + "grad_norm": 0.6126868724822998, + "learning_rate": 1.5288853051098028e-05, + "loss": 0.0655, + "step": 108700 + }, + { + "epoch": 3.9505051239188895, + "grad_norm": 0.4774114787578583, + "learning_rate": 1.5284071451043315e-05, + "loss": 0.1103, + "step": 108710 + }, + { + "epoch": 3.9508685224216875, + "grad_norm": 0.7860293388366699, + "learning_rate": 1.5279290269613393e-05, + "loss": 0.0787, + "step": 108720 + }, + { + "epoch": 3.951231920924486, + "grad_norm": 0.3550005853176117, + "learning_rate": 1.5274509507014263e-05, + "loss": 0.09, + "step": 108730 + }, + { + "epoch": 3.951595319427284, + "grad_norm": 0.42200934886932373, + "learning_rate": 1.5269729163451924e-05, + "loss": 0.0617, + "step": 108740 + }, + { + "epoch": 3.951958717930082, + "grad_norm": 0.6047300696372986, + "learning_rate": 1.5264949239132327e-05, + "loss": 0.0645, + "step": 108750 + }, + { + "epoch": 3.9523221164328803, + "grad_norm": 0.3704775869846344, + "learning_rate": 1.5260169734261432e-05, + "loss": 0.0705, + "step": 108760 + }, + { + "epoch": 3.9526855149356783, + "grad_norm": 0.5324059724807739, + "learning_rate": 1.5255390649045165e-05, + "loss": 0.129, + "step": 108770 + }, + { + "epoch": 3.9530489134384768, + "grad_norm": 0.5403017401695251, + "learning_rate": 1.5250611983689448e-05, + "loss": 0.0975, + "step": 108780 + }, + { + "epoch": 3.9534123119412747, + "grad_norm": 0.30867788195610046, + "learning_rate": 1.5245833738400173e-05, + "loss": 0.0778, + "step": 108790 + }, + { + "epoch": 3.9537757104440727, + "grad_norm": 0.4358409643173218, + "learning_rate": 1.5241055913383212e-05, + "loss": 0.0818, + "step": 108800 + }, + { + "epoch": 3.954139108946871, + "grad_norm": 0.5556782484054565, + "learning_rate": 1.5236278508844431e-05, + "loss": 0.0898, + "step": 108810 + }, + { + "epoch": 3.9545025074496696, + "grad_norm": 0.4871123135089874, + "learning_rate": 1.523150152498967e-05, + "loss": 0.0775, + "step": 108820 + }, + { + "epoch": 3.9548659059524676, + "grad_norm": 0.6219027042388916, + "learning_rate": 1.5226724962024755e-05, + "loss": 0.216, + "step": 108830 + }, + { + "epoch": 3.9552293044552655, + "grad_norm": 0.5182541012763977, + "learning_rate": 1.5221948820155483e-05, + "loss": 0.068, + "step": 108840 + }, + { + "epoch": 3.955592702958064, + "grad_norm": 0.5644829869270325, + "learning_rate": 1.521717309958765e-05, + "loss": 0.0755, + "step": 108850 + }, + { + "epoch": 3.955956101460862, + "grad_norm": 0.5542482733726501, + "learning_rate": 1.521239780052702e-05, + "loss": 0.1001, + "step": 108860 + }, + { + "epoch": 3.9563194999636604, + "grad_norm": 0.5790029764175415, + "learning_rate": 1.520762292317934e-05, + "loss": 0.0521, + "step": 108870 + }, + { + "epoch": 3.9566828984664584, + "grad_norm": 0.33317145705223083, + "learning_rate": 1.5202848467750353e-05, + "loss": 0.056, + "step": 108880 + }, + { + "epoch": 3.9570462969692564, + "grad_norm": 0.40172079205513, + "learning_rate": 1.5198074434445762e-05, + "loss": 0.0723, + "step": 108890 + }, + { + "epoch": 3.957409695472055, + "grad_norm": 0.7119815349578857, + "learning_rate": 1.5193300823471273e-05, + "loss": 0.0804, + "step": 108900 + }, + { + "epoch": 3.9577730939748528, + "grad_norm": 0.5087375044822693, + "learning_rate": 1.5188527635032555e-05, + "loss": 0.0448, + "step": 108910 + }, + { + "epoch": 3.958136492477651, + "grad_norm": 0.548531174659729, + "learning_rate": 1.5183754869335277e-05, + "loss": 0.0689, + "step": 108920 + }, + { + "epoch": 3.958499890980449, + "grad_norm": 0.27257245779037476, + "learning_rate": 1.5178982526585073e-05, + "loss": 0.0649, + "step": 108930 + }, + { + "epoch": 3.958863289483247, + "grad_norm": 0.3826998770236969, + "learning_rate": 1.5174210606987563e-05, + "loss": 0.0729, + "step": 108940 + }, + { + "epoch": 3.9592266879860456, + "grad_norm": 0.6397566795349121, + "learning_rate": 1.5169439110748364e-05, + "loss": 0.0691, + "step": 108950 + }, + { + "epoch": 3.9595900864888436, + "grad_norm": 0.2969396114349365, + "learning_rate": 1.5164668038073055e-05, + "loss": 0.0767, + "step": 108960 + }, + { + "epoch": 3.959953484991642, + "grad_norm": 0.6821795105934143, + "learning_rate": 1.5159897389167204e-05, + "loss": 0.0525, + "step": 108970 + }, + { + "epoch": 3.96031688349444, + "grad_norm": 0.7459032535552979, + "learning_rate": 1.5155127164236369e-05, + "loss": 0.057, + "step": 108980 + }, + { + "epoch": 3.960680281997238, + "grad_norm": 0.3866112232208252, + "learning_rate": 1.5150357363486067e-05, + "loss": 0.0642, + "step": 108990 + }, + { + "epoch": 3.9610436805000364, + "grad_norm": 0.5428589582443237, + "learning_rate": 1.5145587987121826e-05, + "loss": 0.0698, + "step": 109000 + }, + { + "epoch": 3.9614070790028344, + "grad_norm": 0.5992247462272644, + "learning_rate": 1.514081903534914e-05, + "loss": 0.0881, + "step": 109010 + }, + { + "epoch": 3.961770477505633, + "grad_norm": 0.5933393836021423, + "learning_rate": 1.5136050508373482e-05, + "loss": 0.0744, + "step": 109020 + }, + { + "epoch": 3.962133876008431, + "grad_norm": 1.02914297580719, + "learning_rate": 1.5131282406400304e-05, + "loss": 0.0783, + "step": 109030 + }, + { + "epoch": 3.962497274511229, + "grad_norm": 0.27180367708206177, + "learning_rate": 1.5126514729635063e-05, + "loss": 0.078, + "step": 109040 + }, + { + "epoch": 3.9628606730140272, + "grad_norm": 1.8903266191482544, + "learning_rate": 1.5121747478283166e-05, + "loss": 0.0744, + "step": 109050 + }, + { + "epoch": 3.963224071516825, + "grad_norm": 0.5451824069023132, + "learning_rate": 1.5116980652550028e-05, + "loss": 0.0744, + "step": 109060 + }, + { + "epoch": 3.9635874700196236, + "grad_norm": 0.6950256824493408, + "learning_rate": 1.511221425264103e-05, + "loss": 0.4433, + "step": 109070 + }, + { + "epoch": 3.9639508685224216, + "grad_norm": 1.0612424612045288, + "learning_rate": 1.5107448278761533e-05, + "loss": 0.0543, + "step": 109080 + }, + { + "epoch": 3.9643142670252196, + "grad_norm": 0.3324570953845978, + "learning_rate": 1.5102682731116893e-05, + "loss": 0.0761, + "step": 109090 + }, + { + "epoch": 3.964677665528018, + "grad_norm": 0.48673510551452637, + "learning_rate": 1.5097917609912443e-05, + "loss": 0.0582, + "step": 109100 + }, + { + "epoch": 3.9650410640308165, + "grad_norm": 2.974033832550049, + "learning_rate": 1.5093152915353492e-05, + "loss": 0.1036, + "step": 109110 + }, + { + "epoch": 3.9654044625336144, + "grad_norm": 0.7069442868232727, + "learning_rate": 1.5088388647645335e-05, + "loss": 0.0624, + "step": 109120 + }, + { + "epoch": 3.9657678610364124, + "grad_norm": 0.3872590661048889, + "learning_rate": 1.508362480699324e-05, + "loss": 0.0643, + "step": 109130 + }, + { + "epoch": 3.966131259539211, + "grad_norm": 4.4336676597595215, + "learning_rate": 1.5078861393602467e-05, + "loss": 0.0811, + "step": 109140 + }, + { + "epoch": 3.966494658042009, + "grad_norm": 0.5968475341796875, + "learning_rate": 1.5074098407678267e-05, + "loss": 0.0612, + "step": 109150 + }, + { + "epoch": 3.9668580565448073, + "grad_norm": 0.8365516662597656, + "learning_rate": 1.5069335849425845e-05, + "loss": 0.0629, + "step": 109160 + }, + { + "epoch": 3.9672214550476053, + "grad_norm": 1.566721796989441, + "learning_rate": 1.506457371905041e-05, + "loss": 3.8251, + "step": 109170 + }, + { + "epoch": 3.9675848535504032, + "grad_norm": 0.6778194308280945, + "learning_rate": 1.5059812016757138e-05, + "loss": 0.062, + "step": 109180 + }, + { + "epoch": 3.9679482520532017, + "grad_norm": 3.5872347354888916, + "learning_rate": 1.5055050742751198e-05, + "loss": 0.0696, + "step": 109190 + }, + { + "epoch": 3.9683116505559997, + "grad_norm": 1.0359376668930054, + "learning_rate": 1.5050289897237742e-05, + "loss": 0.0769, + "step": 109200 + }, + { + "epoch": 3.9683116505559997, + "eval_loss": 0.302498459815979, + "eval_runtime": 180.0974, + "eval_samples_per_second": 41.167, + "eval_steps_per_second": 5.147, + "eval_wer": 0.1293045546136112, + "step": 109200 + }, + { + "epoch": 3.968675049058798, + "grad_norm": 0.8150886297225952, + "learning_rate": 1.5045529480421893e-05, + "loss": 0.0748, + "step": 109210 + }, + { + "epoch": 3.969038447561596, + "grad_norm": 0.4916574954986572, + "learning_rate": 1.5040769492508761e-05, + "loss": 0.0614, + "step": 109220 + }, + { + "epoch": 3.969401846064394, + "grad_norm": 0.4304117262363434, + "learning_rate": 1.5036009933703433e-05, + "loss": 0.0681, + "step": 109230 + }, + { + "epoch": 3.9697652445671925, + "grad_norm": 1.1975165605545044, + "learning_rate": 1.5031250804210986e-05, + "loss": 0.0735, + "step": 109240 + }, + { + "epoch": 3.9701286430699905, + "grad_norm": 0.8331923484802246, + "learning_rate": 1.5026492104236478e-05, + "loss": 0.0701, + "step": 109250 + }, + { + "epoch": 3.970492041572789, + "grad_norm": 0.38081446290016174, + "learning_rate": 1.5021733833984936e-05, + "loss": 0.0736, + "step": 109260 + }, + { + "epoch": 3.970855440075587, + "grad_norm": 1.5401438474655151, + "learning_rate": 1.5016975993661374e-05, + "loss": 0.0549, + "step": 109270 + }, + { + "epoch": 3.971218838578385, + "grad_norm": 0.4230031371116638, + "learning_rate": 1.5012218583470803e-05, + "loss": 0.0806, + "step": 109280 + }, + { + "epoch": 3.9715822370811833, + "grad_norm": 0.6398650407791138, + "learning_rate": 1.5007461603618197e-05, + "loss": 0.0674, + "step": 109290 + }, + { + "epoch": 3.9719456355839813, + "grad_norm": 0.9541281461715698, + "learning_rate": 1.5002705054308518e-05, + "loss": 0.0754, + "step": 109300 + }, + { + "epoch": 3.9723090340867797, + "grad_norm": 0.939696192741394, + "learning_rate": 1.4997948935746708e-05, + "loss": 0.0699, + "step": 109310 + }, + { + "epoch": 3.9726724325895777, + "grad_norm": 0.3460374176502228, + "learning_rate": 1.4993193248137682e-05, + "loss": 1.1332, + "step": 109320 + }, + { + "epoch": 3.9730358310923757, + "grad_norm": 0.606484055519104, + "learning_rate": 1.498843799168636e-05, + "loss": 0.0736, + "step": 109330 + }, + { + "epoch": 3.973399229595174, + "grad_norm": 0.8378924131393433, + "learning_rate": 1.4983683166597629e-05, + "loss": 0.7118, + "step": 109340 + }, + { + "epoch": 3.973762628097972, + "grad_norm": 1.4549202919006348, + "learning_rate": 1.497892877307635e-05, + "loss": 0.0853, + "step": 109350 + }, + { + "epoch": 3.9741260266007705, + "grad_norm": 0.3216412365436554, + "learning_rate": 1.4974174811327373e-05, + "loss": 0.0841, + "step": 109360 + }, + { + "epoch": 3.9744894251035685, + "grad_norm": 2.007521867752075, + "learning_rate": 1.4969421281555525e-05, + "loss": 0.0604, + "step": 109370 + }, + { + "epoch": 3.9748528236063665, + "grad_norm": 0.438717782497406, + "learning_rate": 1.4964668183965636e-05, + "loss": 0.0626, + "step": 109380 + }, + { + "epoch": 3.975216222109165, + "grad_norm": 0.6095426082611084, + "learning_rate": 1.4959915518762486e-05, + "loss": 0.0828, + "step": 109390 + }, + { + "epoch": 3.9755796206119633, + "grad_norm": 0.5519289374351501, + "learning_rate": 1.4955163286150853e-05, + "loss": 0.0595, + "step": 109400 + }, + { + "epoch": 3.9759430191147613, + "grad_norm": 0.5407907366752625, + "learning_rate": 1.4950411486335497e-05, + "loss": 0.294, + "step": 109410 + }, + { + "epoch": 3.9763064176175593, + "grad_norm": 0.3119775354862213, + "learning_rate": 1.4945660119521144e-05, + "loss": 0.0772, + "step": 109420 + }, + { + "epoch": 3.9766698161203577, + "grad_norm": 1.3484299182891846, + "learning_rate": 1.4940909185912527e-05, + "loss": 0.1017, + "step": 109430 + }, + { + "epoch": 3.9770332146231557, + "grad_norm": 0.6218773126602173, + "learning_rate": 1.493615868571435e-05, + "loss": 0.0633, + "step": 109440 + }, + { + "epoch": 3.977396613125954, + "grad_norm": 0.6401359438896179, + "learning_rate": 1.4931408619131285e-05, + "loss": 0.0718, + "step": 109450 + }, + { + "epoch": 3.977760011628752, + "grad_norm": 32.70407485961914, + "learning_rate": 1.4926658986367986e-05, + "loss": 0.439, + "step": 109460 + }, + { + "epoch": 3.97812341013155, + "grad_norm": 0.718180775642395, + "learning_rate": 1.4921909787629124e-05, + "loss": 0.0747, + "step": 109470 + }, + { + "epoch": 3.9784868086343486, + "grad_norm": 0.38643378019332886, + "learning_rate": 1.491716102311931e-05, + "loss": 0.064, + "step": 109480 + }, + { + "epoch": 3.9788502071371465, + "grad_norm": 0.7724172472953796, + "learning_rate": 1.4912412693043155e-05, + "loss": 0.074, + "step": 109490 + }, + { + "epoch": 3.979213605639945, + "grad_norm": 0.7316296100616455, + "learning_rate": 1.4907664797605242e-05, + "loss": 0.0763, + "step": 109500 + }, + { + "epoch": 3.979577004142743, + "grad_norm": 0.456412136554718, + "learning_rate": 1.4902917337010133e-05, + "loss": 0.064, + "step": 109510 + }, + { + "epoch": 3.979940402645541, + "grad_norm": 0.3653579354286194, + "learning_rate": 1.4898170311462404e-05, + "loss": 0.1424, + "step": 109520 + }, + { + "epoch": 3.9803038011483394, + "grad_norm": 0.3944752514362335, + "learning_rate": 1.4893423721166572e-05, + "loss": 0.0611, + "step": 109530 + }, + { + "epoch": 3.9806671996511374, + "grad_norm": 2.8756213188171387, + "learning_rate": 1.4888677566327153e-05, + "loss": 0.0884, + "step": 109540 + }, + { + "epoch": 3.981030598153936, + "grad_norm": 1.020673394203186, + "learning_rate": 1.4883931847148642e-05, + "loss": 0.0708, + "step": 109550 + }, + { + "epoch": 3.9813939966567338, + "grad_norm": 0.3214241862297058, + "learning_rate": 1.4879186563835504e-05, + "loss": 0.0694, + "step": 109560 + }, + { + "epoch": 3.9817573951595318, + "grad_norm": 0.41782814264297485, + "learning_rate": 1.4874441716592216e-05, + "loss": 0.0709, + "step": 109570 + }, + { + "epoch": 3.98212079366233, + "grad_norm": 0.6422412395477295, + "learning_rate": 1.4869697305623209e-05, + "loss": 0.0669, + "step": 109580 + }, + { + "epoch": 3.982484192165128, + "grad_norm": 0.4334978461265564, + "learning_rate": 1.4864953331132903e-05, + "loss": 0.0708, + "step": 109590 + }, + { + "epoch": 3.9828475906679266, + "grad_norm": 0.5081255435943604, + "learning_rate": 1.4860209793325693e-05, + "loss": 0.0863, + "step": 109600 + }, + { + "epoch": 3.9832109891707246, + "grad_norm": 0.4459257125854492, + "learning_rate": 1.4855466692405959e-05, + "loss": 0.0794, + "step": 109610 + }, + { + "epoch": 3.9835743876735226, + "grad_norm": 15.502050399780273, + "learning_rate": 1.4850724028578077e-05, + "loss": 0.1751, + "step": 109620 + }, + { + "epoch": 3.983937786176321, + "grad_norm": 0.6289138793945312, + "learning_rate": 1.4845981802046388e-05, + "loss": 0.0545, + "step": 109630 + }, + { + "epoch": 3.984301184679119, + "grad_norm": 0.4336656630039215, + "learning_rate": 1.4841240013015217e-05, + "loss": 0.0908, + "step": 109640 + }, + { + "epoch": 3.9846645831819174, + "grad_norm": 0.7223489880561829, + "learning_rate": 1.4836498661688857e-05, + "loss": 0.0717, + "step": 109650 + }, + { + "epoch": 3.9850279816847154, + "grad_norm": 0.6017643809318542, + "learning_rate": 1.483175774827162e-05, + "loss": 0.0628, + "step": 109660 + }, + { + "epoch": 3.9853913801875134, + "grad_norm": 0.5750892758369446, + "learning_rate": 1.4827017272967758e-05, + "loss": 0.0671, + "step": 109670 + }, + { + "epoch": 3.985754778690312, + "grad_norm": 0.5592082738876343, + "learning_rate": 1.482227723598153e-05, + "loss": 0.0633, + "step": 109680 + }, + { + "epoch": 3.9861181771931102, + "grad_norm": 0.4206995964050293, + "learning_rate": 1.4817537637517162e-05, + "loss": 0.0846, + "step": 109690 + }, + { + "epoch": 3.986481575695908, + "grad_norm": 1.356323003768921, + "learning_rate": 1.4812798477778859e-05, + "loss": 0.083, + "step": 109700 + }, + { + "epoch": 3.986844974198706, + "grad_norm": 0.34790241718292236, + "learning_rate": 1.4808059756970832e-05, + "loss": 0.0781, + "step": 109710 + }, + { + "epoch": 3.9872083727015046, + "grad_norm": 0.349802702665329, + "learning_rate": 1.4803321475297246e-05, + "loss": 0.087, + "step": 109720 + }, + { + "epoch": 3.9875717712043026, + "grad_norm": 0.7321351766586304, + "learning_rate": 1.4798583632962259e-05, + "loss": 0.0776, + "step": 109730 + }, + { + "epoch": 3.987935169707101, + "grad_norm": 0.3417205512523651, + "learning_rate": 1.4793846230170009e-05, + "loss": 0.0593, + "step": 109740 + }, + { + "epoch": 3.988298568209899, + "grad_norm": 0.5319487452507019, + "learning_rate": 1.4789109267124598e-05, + "loss": 0.0981, + "step": 109750 + }, + { + "epoch": 3.988661966712697, + "grad_norm": 0.4192574918270111, + "learning_rate": 1.478437274403015e-05, + "loss": 0.0628, + "step": 109760 + }, + { + "epoch": 3.9890253652154954, + "grad_norm": 0.5519381165504456, + "learning_rate": 1.4779636661090731e-05, + "loss": 0.059, + "step": 109770 + }, + { + "epoch": 3.9893887637182934, + "grad_norm": 2.7939960956573486, + "learning_rate": 1.4774901018510407e-05, + "loss": 0.063, + "step": 109780 + }, + { + "epoch": 3.989752162221092, + "grad_norm": 0.9663445353507996, + "learning_rate": 1.4770165816493214e-05, + "loss": 2.4534, + "step": 109790 + }, + { + "epoch": 3.99011556072389, + "grad_norm": 2.655893325805664, + "learning_rate": 1.4765431055243173e-05, + "loss": 0.0592, + "step": 109800 + }, + { + "epoch": 3.99011556072389, + "eval_loss": 0.3074624836444855, + "eval_runtime": 179.5996, + "eval_samples_per_second": 41.281, + "eval_steps_per_second": 5.161, + "eval_wer": 0.13042097046490098, + "step": 109800 + }, + { + "epoch": 3.990478959226688, + "grad_norm": 146.27210998535156, + "learning_rate": 1.4760696734964296e-05, + "loss": 2.3423, + "step": 109810 + }, + { + "epoch": 3.9908423577294863, + "grad_norm": 1.0552388429641724, + "learning_rate": 1.4755962855860572e-05, + "loss": 0.0616, + "step": 109820 + }, + { + "epoch": 3.9912057562322842, + "grad_norm": 0.38608866930007935, + "learning_rate": 1.4751229418135956e-05, + "loss": 0.0719, + "step": 109830 + }, + { + "epoch": 3.9915691547350827, + "grad_norm": 0.4337970018386841, + "learning_rate": 1.474649642199439e-05, + "loss": 0.0761, + "step": 109840 + }, + { + "epoch": 3.9919325532378807, + "grad_norm": 0.6015897989273071, + "learning_rate": 1.4741763867639821e-05, + "loss": 0.0948, + "step": 109850 + }, + { + "epoch": 3.9922959517406786, + "grad_norm": 0.8983295559883118, + "learning_rate": 1.4737031755276148e-05, + "loss": 0.0706, + "step": 109860 + }, + { + "epoch": 3.992659350243477, + "grad_norm": 2.648515224456787, + "learning_rate": 1.4732300085107265e-05, + "loss": 0.065, + "step": 109870 + }, + { + "epoch": 3.993022748746275, + "grad_norm": 0.7430446147918701, + "learning_rate": 1.4727568857337032e-05, + "loss": 0.0688, + "step": 109880 + }, + { + "epoch": 3.9933861472490735, + "grad_norm": 0.7264726161956787, + "learning_rate": 1.47228380721693e-05, + "loss": 0.0792, + "step": 109890 + }, + { + "epoch": 3.9937495457518715, + "grad_norm": 0.5347972512245178, + "learning_rate": 1.4718107729807922e-05, + "loss": 0.0627, + "step": 109900 + }, + { + "epoch": 3.9941129442546695, + "grad_norm": 0.5735613107681274, + "learning_rate": 1.4713377830456696e-05, + "loss": 0.1417, + "step": 109910 + }, + { + "epoch": 3.994476342757468, + "grad_norm": 1.1950944662094116, + "learning_rate": 1.4708648374319419e-05, + "loss": 0.0629, + "step": 109920 + }, + { + "epoch": 3.994839741260266, + "grad_norm": 0.30401110649108887, + "learning_rate": 1.4703919361599868e-05, + "loss": 0.0715, + "step": 109930 + }, + { + "epoch": 3.9952031397630643, + "grad_norm": 1.0371030569076538, + "learning_rate": 1.4699190792501789e-05, + "loss": 0.0693, + "step": 109940 + }, + { + "epoch": 3.9955665382658623, + "grad_norm": 1.0880746841430664, + "learning_rate": 1.4694462667228936e-05, + "loss": 0.1114, + "step": 109950 + }, + { + "epoch": 3.9959299367686603, + "grad_norm": 0.6585062146186829, + "learning_rate": 1.468973498598502e-05, + "loss": 0.0882, + "step": 109960 + }, + { + "epoch": 3.9962933352714587, + "grad_norm": 0.39981094002723694, + "learning_rate": 1.4685007748973742e-05, + "loss": 0.0591, + "step": 109970 + }, + { + "epoch": 3.996656733774257, + "grad_norm": 1.2546730041503906, + "learning_rate": 1.4680280956398778e-05, + "loss": 0.0657, + "step": 109980 + }, + { + "epoch": 3.997020132277055, + "grad_norm": 0.3919306695461273, + "learning_rate": 1.4675554608463776e-05, + "loss": 0.2914, + "step": 109990 + }, + { + "epoch": 3.997383530779853, + "grad_norm": 3.1263980865478516, + "learning_rate": 1.4670828705372408e-05, + "loss": 0.0869, + "step": 110000 + }, + { + "epoch": 3.9977469292826515, + "grad_norm": 0.8971359729766846, + "learning_rate": 1.4666103247328276e-05, + "loss": 0.1294, + "step": 110010 + }, + { + "epoch": 3.9981103277854495, + "grad_norm": 0.5809153914451599, + "learning_rate": 1.4661378234534986e-05, + "loss": 0.0759, + "step": 110020 + }, + { + "epoch": 3.998473726288248, + "grad_norm": 1.7673100233078003, + "learning_rate": 1.4656653667196112e-05, + "loss": 0.0733, + "step": 110030 + }, + { + "epoch": 3.998837124791046, + "grad_norm": 0.23588208854198456, + "learning_rate": 1.4651929545515248e-05, + "loss": 0.0677, + "step": 110040 + }, + { + "epoch": 3.999200523293844, + "grad_norm": 0.581369161605835, + "learning_rate": 1.4647205869695913e-05, + "loss": 0.0658, + "step": 110050 + }, + { + "epoch": 3.9995639217966423, + "grad_norm": 0.7178440093994141, + "learning_rate": 1.4642482639941643e-05, + "loss": 0.0552, + "step": 110060 + }, + { + "epoch": 3.9999273202994403, + "grad_norm": 0.5883386135101318, + "learning_rate": 1.4637759856455947e-05, + "loss": 0.0751, + "step": 110070 + }, + { + "epoch": 4.000290718802239, + "grad_norm": 0.4838172197341919, + "learning_rate": 1.4633037519442297e-05, + "loss": 0.0578, + "step": 110080 + }, + { + "epoch": 4.000654117305037, + "grad_norm": 0.20663967728614807, + "learning_rate": 1.4628315629104183e-05, + "loss": 0.0619, + "step": 110090 + }, + { + "epoch": 4.001017515807835, + "grad_norm": 0.6073209047317505, + "learning_rate": 1.4623594185645052e-05, + "loss": 0.0701, + "step": 110100 + }, + { + "epoch": 4.001380914310633, + "grad_norm": 0.21287468075752258, + "learning_rate": 1.4618873189268322e-05, + "loss": 0.0609, + "step": 110110 + }, + { + "epoch": 4.001744312813432, + "grad_norm": 0.5142436623573303, + "learning_rate": 1.4614152640177414e-05, + "loss": 0.0583, + "step": 110120 + }, + { + "epoch": 4.0021077113162296, + "grad_norm": 0.4657193124294281, + "learning_rate": 1.4609432538575705e-05, + "loss": 0.4652, + "step": 110130 + }, + { + "epoch": 4.0024711098190275, + "grad_norm": 0.3312014043331146, + "learning_rate": 1.4604712884666588e-05, + "loss": 0.0592, + "step": 110140 + }, + { + "epoch": 4.0028345083218255, + "grad_norm": 0.24331605434417725, + "learning_rate": 1.4599993678653404e-05, + "loss": 0.5292, + "step": 110150 + }, + { + "epoch": 4.0031979068246235, + "grad_norm": 0.34533190727233887, + "learning_rate": 1.4595274920739487e-05, + "loss": 0.0605, + "step": 110160 + }, + { + "epoch": 4.003561305327422, + "grad_norm": 0.6242371201515198, + "learning_rate": 1.4590556611128161e-05, + "loss": 0.0617, + "step": 110170 + }, + { + "epoch": 4.00392470383022, + "grad_norm": 0.3273410201072693, + "learning_rate": 1.4585838750022707e-05, + "loss": 0.0463, + "step": 110180 + }, + { + "epoch": 4.004288102333018, + "grad_norm": 0.7081814408302307, + "learning_rate": 1.4581121337626402e-05, + "loss": 0.0493, + "step": 110190 + }, + { + "epoch": 4.004651500835816, + "grad_norm": 59.34846496582031, + "learning_rate": 1.4576404374142514e-05, + "loss": 0.6547, + "step": 110200 + }, + { + "epoch": 4.005014899338614, + "grad_norm": 0.32631370425224304, + "learning_rate": 1.457168785977428e-05, + "loss": 0.0666, + "step": 110210 + }, + { + "epoch": 4.005378297841413, + "grad_norm": 1.6881418228149414, + "learning_rate": 1.4566971794724904e-05, + "loss": 0.0651, + "step": 110220 + }, + { + "epoch": 4.005741696344211, + "grad_norm": 3.0024871826171875, + "learning_rate": 1.4562256179197595e-05, + "loss": 0.0681, + "step": 110230 + }, + { + "epoch": 4.006105094847009, + "grad_norm": 0.28225383162498474, + "learning_rate": 1.4557541013395526e-05, + "loss": 0.0707, + "step": 110240 + }, + { + "epoch": 4.006468493349807, + "grad_norm": 0.6246106624603271, + "learning_rate": 1.4552826297521871e-05, + "loss": 0.0594, + "step": 110250 + }, + { + "epoch": 4.006831891852605, + "grad_norm": 1.095879077911377, + "learning_rate": 1.4548112031779751e-05, + "loss": 0.0722, + "step": 110260 + }, + { + "epoch": 4.007195290355404, + "grad_norm": 0.6250698566436768, + "learning_rate": 1.4543398216372295e-05, + "loss": 0.0604, + "step": 110270 + }, + { + "epoch": 4.007558688858202, + "grad_norm": 0.6139402389526367, + "learning_rate": 1.4538684851502615e-05, + "loss": 0.1047, + "step": 110280 + }, + { + "epoch": 4.007922087361, + "grad_norm": 0.4828980267047882, + "learning_rate": 1.4533971937373776e-05, + "loss": 0.0574, + "step": 110290 + }, + { + "epoch": 4.008285485863798, + "grad_norm": 0.7050805687904358, + "learning_rate": 1.4529259474188844e-05, + "loss": 0.0706, + "step": 110300 + }, + { + "epoch": 4.008648884366597, + "grad_norm": 0.49131813645362854, + "learning_rate": 1.4524547462150876e-05, + "loss": 0.0823, + "step": 110310 + }, + { + "epoch": 4.009012282869395, + "grad_norm": 2.5979621410369873, + "learning_rate": 1.4519835901462878e-05, + "loss": 0.06, + "step": 110320 + }, + { + "epoch": 4.009375681372193, + "grad_norm": 1.8901911973953247, + "learning_rate": 1.4515124792327861e-05, + "loss": 0.0635, + "step": 110330 + }, + { + "epoch": 4.009739079874991, + "grad_norm": 2.455570697784424, + "learning_rate": 1.4510414134948814e-05, + "loss": 0.0487, + "step": 110340 + }, + { + "epoch": 4.010102478377789, + "grad_norm": 0.5497618913650513, + "learning_rate": 1.4505703929528707e-05, + "loss": 0.0578, + "step": 110350 + }, + { + "epoch": 4.010465876880588, + "grad_norm": 0.5679813027381897, + "learning_rate": 1.4500994176270471e-05, + "loss": 0.0698, + "step": 110360 + }, + { + "epoch": 4.010829275383386, + "grad_norm": 0.7943199276924133, + "learning_rate": 1.4496284875377036e-05, + "loss": 0.0686, + "step": 110370 + }, + { + "epoch": 4.011192673886184, + "grad_norm": 0.9593531489372253, + "learning_rate": 1.4492046891512567e-05, + "loss": 3.0162, + "step": 110380 + }, + { + "epoch": 4.011556072388982, + "grad_norm": 2.073781967163086, + "learning_rate": 1.4487338450671259e-05, + "loss": 0.0471, + "step": 110390 + }, + { + "epoch": 4.01191947089178, + "grad_norm": 0.6789143681526184, + "learning_rate": 1.4482630462783132e-05, + "loss": 0.0593, + "step": 110400 + }, + { + "epoch": 4.01191947089178, + "eval_loss": 0.3222469091415405, + "eval_runtime": 179.3672, + "eval_samples_per_second": 41.334, + "eval_steps_per_second": 5.168, + "eval_wer": 0.13037558770671848, + "step": 110400 + }, + { + "epoch": 4.0122828693945785, + "grad_norm": 0.3693692088127136, + "learning_rate": 1.4477922928051047e-05, + "loss": 0.0701, + "step": 110410 + }, + { + "epoch": 4.012646267897376, + "grad_norm": 0.8600411415100098, + "learning_rate": 1.4473215846677818e-05, + "loss": 0.0631, + "step": 110420 + }, + { + "epoch": 4.013009666400174, + "grad_norm": 1.3148378133773804, + "learning_rate": 1.4468509218866261e-05, + "loss": 0.0821, + "step": 110430 + }, + { + "epoch": 4.013373064902972, + "grad_norm": 0.508591890335083, + "learning_rate": 1.446380304481918e-05, + "loss": 0.069, + "step": 110440 + }, + { + "epoch": 4.01373646340577, + "grad_norm": 2.257439136505127, + "learning_rate": 1.4459097324739329e-05, + "loss": 0.0722, + "step": 110450 + }, + { + "epoch": 4.014099861908569, + "grad_norm": 0.6121902465820312, + "learning_rate": 1.4454392058829472e-05, + "loss": 0.1137, + "step": 110460 + }, + { + "epoch": 4.014463260411367, + "grad_norm": 0.5297804474830627, + "learning_rate": 1.4449687247292349e-05, + "loss": 0.0496, + "step": 110470 + }, + { + "epoch": 4.014826658914165, + "grad_norm": 1.0392407178878784, + "learning_rate": 1.4444982890330653e-05, + "loss": 0.05, + "step": 110480 + }, + { + "epoch": 4.015190057416963, + "grad_norm": 0.29816481471061707, + "learning_rate": 1.4440278988147087e-05, + "loss": 0.0524, + "step": 110490 + }, + { + "epoch": 4.015553455919761, + "grad_norm": 0.9273152947425842, + "learning_rate": 1.4435575540944332e-05, + "loss": 0.2128, + "step": 110500 + }, + { + "epoch": 4.01591685442256, + "grad_norm": 0.6994959712028503, + "learning_rate": 1.4430872548925046e-05, + "loss": 0.0778, + "step": 110510 + }, + { + "epoch": 4.016280252925358, + "grad_norm": 0.4037676155567169, + "learning_rate": 1.4426170012291848e-05, + "loss": 0.0602, + "step": 110520 + }, + { + "epoch": 4.016643651428156, + "grad_norm": 0.2352452278137207, + "learning_rate": 1.4421467931247362e-05, + "loss": 0.0559, + "step": 110530 + }, + { + "epoch": 4.017007049930954, + "grad_norm": 0.3989976942539215, + "learning_rate": 1.4416766305994184e-05, + "loss": 0.0505, + "step": 110540 + }, + { + "epoch": 4.017370448433752, + "grad_norm": 0.42041394114494324, + "learning_rate": 1.4412065136734904e-05, + "loss": 0.0964, + "step": 110550 + }, + { + "epoch": 4.017733846936551, + "grad_norm": 0.9922043085098267, + "learning_rate": 1.4407364423672048e-05, + "loss": 0.0782, + "step": 110560 + }, + { + "epoch": 4.018097245439349, + "grad_norm": 0.4274202585220337, + "learning_rate": 1.4402664167008178e-05, + "loss": 0.0664, + "step": 110570 + }, + { + "epoch": 4.018460643942147, + "grad_norm": 0.520118236541748, + "learning_rate": 1.439796436694581e-05, + "loss": 0.0601, + "step": 110580 + }, + { + "epoch": 4.018824042444945, + "grad_norm": 0.9387579560279846, + "learning_rate": 1.4393265023687425e-05, + "loss": 0.0511, + "step": 110590 + }, + { + "epoch": 4.019187440947744, + "grad_norm": 0.9909424781799316, + "learning_rate": 1.438856613743551e-05, + "loss": 0.057, + "step": 110600 + }, + { + "epoch": 4.019550839450542, + "grad_norm": 0.53632652759552, + "learning_rate": 1.4383867708392537e-05, + "loss": 0.0729, + "step": 110610 + }, + { + "epoch": 4.01991423795334, + "grad_norm": 0.6779784560203552, + "learning_rate": 1.4379169736760923e-05, + "loss": 0.0808, + "step": 110620 + }, + { + "epoch": 4.020277636456138, + "grad_norm": 0.3937224745750427, + "learning_rate": 1.4374472222743093e-05, + "loss": 0.0551, + "step": 110630 + }, + { + "epoch": 4.020641034958936, + "grad_norm": 4.022054672241211, + "learning_rate": 1.4369775166541449e-05, + "loss": 3.4486, + "step": 110640 + }, + { + "epoch": 4.0210044334617345, + "grad_norm": 0.4625096619129181, + "learning_rate": 1.4365078568358383e-05, + "loss": 0.0543, + "step": 110650 + }, + { + "epoch": 4.0213678319645325, + "grad_norm": 0.6315404772758484, + "learning_rate": 1.4360382428396232e-05, + "loss": 0.0658, + "step": 110660 + }, + { + "epoch": 4.0217312304673305, + "grad_norm": 5.309476375579834, + "learning_rate": 1.4355686746857344e-05, + "loss": 0.0509, + "step": 110670 + }, + { + "epoch": 4.0220946289701285, + "grad_norm": 1.7463594675064087, + "learning_rate": 1.4350991523944046e-05, + "loss": 0.056, + "step": 110680 + }, + { + "epoch": 4.0224580274729265, + "grad_norm": 0.3528885245323181, + "learning_rate": 1.434629675985864e-05, + "loss": 0.1695, + "step": 110690 + }, + { + "epoch": 4.022821425975725, + "grad_norm": 0.8287866115570068, + "learning_rate": 1.4341602454803393e-05, + "loss": 0.3779, + "step": 110700 + }, + { + "epoch": 4.023184824478523, + "grad_norm": 0.5021520256996155, + "learning_rate": 1.4336908608980582e-05, + "loss": 0.0684, + "step": 110710 + }, + { + "epoch": 4.023548222981321, + "grad_norm": 0.5340952277183533, + "learning_rate": 1.4332215222592418e-05, + "loss": 0.0702, + "step": 110720 + }, + { + "epoch": 4.023911621484119, + "grad_norm": 0.44070643186569214, + "learning_rate": 1.4327522295841168e-05, + "loss": 0.0556, + "step": 110730 + }, + { + "epoch": 4.024275019986917, + "grad_norm": 0.3349458873271942, + "learning_rate": 1.4322829828928996e-05, + "loss": 0.0545, + "step": 110740 + }, + { + "epoch": 4.024638418489716, + "grad_norm": 0.5574124455451965, + "learning_rate": 1.4318137822058109e-05, + "loss": 0.0501, + "step": 110750 + }, + { + "epoch": 4.025001816992514, + "grad_norm": 1.3972676992416382, + "learning_rate": 1.4313446275430647e-05, + "loss": 0.0691, + "step": 110760 + }, + { + "epoch": 4.025365215495312, + "grad_norm": 0.916902482509613, + "learning_rate": 1.4308755189248763e-05, + "loss": 0.1109, + "step": 110770 + }, + { + "epoch": 4.02572861399811, + "grad_norm": 0.410158634185791, + "learning_rate": 1.4304064563714576e-05, + "loss": 0.0549, + "step": 110780 + }, + { + "epoch": 4.026092012500908, + "grad_norm": 1.5064035654067993, + "learning_rate": 1.4299374399030202e-05, + "loss": 1.0608, + "step": 110790 + }, + { + "epoch": 4.026455411003707, + "grad_norm": 0.44640934467315674, + "learning_rate": 1.42946846953977e-05, + "loss": 0.0566, + "step": 110800 + }, + { + "epoch": 4.026818809506505, + "grad_norm": 0.6432878971099854, + "learning_rate": 1.4289995453019145e-05, + "loss": 0.0722, + "step": 110810 + }, + { + "epoch": 4.027182208009303, + "grad_norm": 0.7535707950592041, + "learning_rate": 1.4285306672096583e-05, + "loss": 0.0758, + "step": 110820 + }, + { + "epoch": 4.027545606512101, + "grad_norm": 0.8665387630462646, + "learning_rate": 1.4280618352832043e-05, + "loss": 0.0837, + "step": 110830 + }, + { + "epoch": 4.027909005014899, + "grad_norm": 0.7218203544616699, + "learning_rate": 1.4275930495427506e-05, + "loss": 0.0612, + "step": 110840 + }, + { + "epoch": 4.028272403517698, + "grad_norm": 1.0231331586837769, + "learning_rate": 1.427124310008498e-05, + "loss": 0.0635, + "step": 110850 + }, + { + "epoch": 4.028635802020496, + "grad_norm": 0.5189678072929382, + "learning_rate": 1.4266556167006396e-05, + "loss": 0.074, + "step": 110860 + }, + { + "epoch": 4.028999200523294, + "grad_norm": 0.621478796005249, + "learning_rate": 1.4261869696393735e-05, + "loss": 0.061, + "step": 110870 + }, + { + "epoch": 4.029362599026092, + "grad_norm": 2.097764730453491, + "learning_rate": 1.425718368844889e-05, + "loss": 0.0539, + "step": 110880 + }, + { + "epoch": 4.029725997528891, + "grad_norm": 0.3547973036766052, + "learning_rate": 1.4252498143373793e-05, + "loss": 0.0468, + "step": 110890 + }, + { + "epoch": 4.030089396031689, + "grad_norm": 0.2843954265117645, + "learning_rate": 1.4247813061370297e-05, + "loss": 0.0521, + "step": 110900 + }, + { + "epoch": 4.030452794534487, + "grad_norm": 0.36639404296875, + "learning_rate": 1.424312844264028e-05, + "loss": 0.0638, + "step": 110910 + }, + { + "epoch": 4.030816193037285, + "grad_norm": 0.4634372889995575, + "learning_rate": 1.4238444287385588e-05, + "loss": 0.079, + "step": 110920 + }, + { + "epoch": 4.0311795915400825, + "grad_norm": 0.5150337815284729, + "learning_rate": 1.4233760595808049e-05, + "loss": 0.1644, + "step": 110930 + }, + { + "epoch": 4.031542990042881, + "grad_norm": 1.6643534898757935, + "learning_rate": 1.4229077368109451e-05, + "loss": 0.0646, + "step": 110940 + }, + { + "epoch": 4.031906388545679, + "grad_norm": 0.9327892065048218, + "learning_rate": 1.4224394604491586e-05, + "loss": 0.0581, + "step": 110950 + }, + { + "epoch": 4.032269787048477, + "grad_norm": 1.278937578201294, + "learning_rate": 1.4219712305156218e-05, + "loss": 0.0637, + "step": 110960 + }, + { + "epoch": 4.032633185551275, + "grad_norm": 0.9296409487724304, + "learning_rate": 1.4215030470305102e-05, + "loss": 0.0624, + "step": 110970 + }, + { + "epoch": 4.032996584054073, + "grad_norm": 0.5513620972633362, + "learning_rate": 1.4210349100139936e-05, + "loss": 1.9567, + "step": 110980 + }, + { + "epoch": 4.033359982556872, + "grad_norm": 0.42453351616859436, + "learning_rate": 1.4205668194862448e-05, + "loss": 0.0577, + "step": 110990 + }, + { + "epoch": 4.03372338105967, + "grad_norm": 1.1154534816741943, + "learning_rate": 1.4200987754674294e-05, + "loss": 0.0633, + "step": 111000 + }, + { + "epoch": 4.03372338105967, + "eval_loss": 0.2871040403842926, + "eval_runtime": 179.1702, + "eval_samples_per_second": 41.38, + "eval_steps_per_second": 5.174, + "eval_wer": 0.13044820011981048, + "step": 111000 + }, + { + "epoch": 4.034086779562468, + "grad_norm": 0.23643670976161957, + "learning_rate": 1.4196307779777173e-05, + "loss": 0.0728, + "step": 111010 + }, + { + "epoch": 4.034450178065266, + "grad_norm": 0.7400628924369812, + "learning_rate": 1.4191628270372703e-05, + "loss": 0.0715, + "step": 111020 + }, + { + "epoch": 4.034813576568064, + "grad_norm": 0.6206227540969849, + "learning_rate": 1.4186949226662522e-05, + "loss": 0.0586, + "step": 111030 + }, + { + "epoch": 4.035176975070863, + "grad_norm": 6.141451358795166, + "learning_rate": 1.4182270648848215e-05, + "loss": 0.1303, + "step": 111040 + }, + { + "epoch": 4.035540373573661, + "grad_norm": 0.33030861616134644, + "learning_rate": 1.4177592537131376e-05, + "loss": 1.078, + "step": 111050 + }, + { + "epoch": 4.035903772076459, + "grad_norm": 0.5480292439460754, + "learning_rate": 1.4172914891713569e-05, + "loss": 0.0622, + "step": 111060 + }, + { + "epoch": 4.036267170579257, + "grad_norm": 1.9571572542190552, + "learning_rate": 1.4168237712796347e-05, + "loss": 0.0547, + "step": 111070 + }, + { + "epoch": 4.036630569082055, + "grad_norm": 0.8759858012199402, + "learning_rate": 1.4163561000581213e-05, + "loss": 0.0631, + "step": 111080 + }, + { + "epoch": 4.036993967584854, + "grad_norm": 0.46415144205093384, + "learning_rate": 1.415888475526969e-05, + "loss": 0.054, + "step": 111090 + }, + { + "epoch": 4.037357366087652, + "grad_norm": 1.5888949632644653, + "learning_rate": 1.4154208977063227e-05, + "loss": 0.0835, + "step": 111100 + }, + { + "epoch": 4.03772076459045, + "grad_norm": 0.47662070393562317, + "learning_rate": 1.4149533666163331e-05, + "loss": 0.0614, + "step": 111110 + }, + { + "epoch": 4.038084163093248, + "grad_norm": 0.4895434081554413, + "learning_rate": 1.4144858822771412e-05, + "loss": 0.0696, + "step": 111120 + }, + { + "epoch": 4.038447561596046, + "grad_norm": 0.5362039804458618, + "learning_rate": 1.4140184447088916e-05, + "loss": 0.2058, + "step": 111130 + }, + { + "epoch": 4.038810960098845, + "grad_norm": 0.28153735399246216, + "learning_rate": 1.4135510539317212e-05, + "loss": 0.0608, + "step": 111140 + }, + { + "epoch": 4.039174358601643, + "grad_norm": 0.323169469833374, + "learning_rate": 1.4130837099657724e-05, + "loss": 0.0578, + "step": 111150 + }, + { + "epoch": 4.039537757104441, + "grad_norm": 0.43453449010849, + "learning_rate": 1.412616412831178e-05, + "loss": 0.0689, + "step": 111160 + }, + { + "epoch": 4.039901155607239, + "grad_norm": 0.42590922117233276, + "learning_rate": 1.4121491625480749e-05, + "loss": 0.0676, + "step": 111170 + }, + { + "epoch": 4.0402645541100375, + "grad_norm": 0.39531514048576355, + "learning_rate": 1.4116819591365924e-05, + "loss": 0.0565, + "step": 111180 + }, + { + "epoch": 4.0406279526128355, + "grad_norm": 0.2730831801891327, + "learning_rate": 1.411214802616862e-05, + "loss": 0.0615, + "step": 111190 + }, + { + "epoch": 4.0409913511156335, + "grad_norm": 0.47754859924316406, + "learning_rate": 1.410747693009012e-05, + "loss": 0.0462, + "step": 111200 + }, + { + "epoch": 4.041354749618431, + "grad_norm": 4.3222270011901855, + "learning_rate": 1.4102806303331695e-05, + "loss": 0.0711, + "step": 111210 + }, + { + "epoch": 4.041718148121229, + "grad_norm": 0.7736272811889648, + "learning_rate": 1.4098136146094559e-05, + "loss": 0.0492, + "step": 111220 + }, + { + "epoch": 4.042081546624028, + "grad_norm": 1.073490023612976, + "learning_rate": 1.4093466458579962e-05, + "loss": 0.0753, + "step": 111230 + }, + { + "epoch": 4.042444945126826, + "grad_norm": 0.35597535967826843, + "learning_rate": 1.4088797240989071e-05, + "loss": 0.055, + "step": 111240 + }, + { + "epoch": 4.042808343629624, + "grad_norm": 0.5514324307441711, + "learning_rate": 1.4084128493523102e-05, + "loss": 0.0664, + "step": 111250 + }, + { + "epoch": 4.043171742132422, + "grad_norm": 0.2898502051830292, + "learning_rate": 1.4079460216383186e-05, + "loss": 0.0663, + "step": 111260 + }, + { + "epoch": 4.04353514063522, + "grad_norm": 16.011980056762695, + "learning_rate": 1.4074792409770487e-05, + "loss": 0.0632, + "step": 111270 + }, + { + "epoch": 4.043898539138019, + "grad_norm": 0.34230297803878784, + "learning_rate": 1.4070125073886097e-05, + "loss": 0.0545, + "step": 111280 + }, + { + "epoch": 4.044261937640817, + "grad_norm": 0.6182803511619568, + "learning_rate": 1.4065458208931132e-05, + "loss": 0.0614, + "step": 111290 + }, + { + "epoch": 4.044625336143615, + "grad_norm": 3.1813621520996094, + "learning_rate": 1.4060791815106666e-05, + "loss": 0.0556, + "step": 111300 + }, + { + "epoch": 4.044988734646413, + "grad_norm": 3.2446606159210205, + "learning_rate": 1.4056125892613773e-05, + "loss": 0.0733, + "step": 111310 + }, + { + "epoch": 4.045352133149211, + "grad_norm": 0.8564647436141968, + "learning_rate": 1.4051460441653463e-05, + "loss": 0.0706, + "step": 111320 + }, + { + "epoch": 4.04571553165201, + "grad_norm": 59.863319396972656, + "learning_rate": 1.4046795462426767e-05, + "loss": 0.8816, + "step": 111330 + }, + { + "epoch": 4.046078930154808, + "grad_norm": 0.32583507895469666, + "learning_rate": 1.4042130955134686e-05, + "loss": 0.0482, + "step": 111340 + }, + { + "epoch": 4.046442328657606, + "grad_norm": 0.5416057705879211, + "learning_rate": 1.4037466919978201e-05, + "loss": 0.0531, + "step": 111350 + }, + { + "epoch": 4.046805727160404, + "grad_norm": 0.40496620535850525, + "learning_rate": 1.4032803357158253e-05, + "loss": 0.0605, + "step": 111360 + }, + { + "epoch": 4.047169125663202, + "grad_norm": 1.0938149690628052, + "learning_rate": 1.4028140266875797e-05, + "loss": 0.0873, + "step": 111370 + }, + { + "epoch": 4.047532524166001, + "grad_norm": 0.465610533952713, + "learning_rate": 1.4023477649331718e-05, + "loss": 0.0675, + "step": 111380 + }, + { + "epoch": 4.047895922668799, + "grad_norm": 0.5324172973632812, + "learning_rate": 1.4018815504726953e-05, + "loss": 0.1398, + "step": 111390 + }, + { + "epoch": 4.048259321171597, + "grad_norm": 0.6042605638504028, + "learning_rate": 1.4014153833262347e-05, + "loss": 0.0731, + "step": 111400 + }, + { + "epoch": 4.048622719674395, + "grad_norm": 1.338255524635315, + "learning_rate": 1.4009492635138777e-05, + "loss": 0.0899, + "step": 111410 + }, + { + "epoch": 4.048986118177193, + "grad_norm": 0.43646422028541565, + "learning_rate": 1.400483191055705e-05, + "loss": 0.0484, + "step": 111420 + }, + { + "epoch": 4.0493495166799915, + "grad_norm": 28.60755729675293, + "learning_rate": 1.4000171659717999e-05, + "loss": 0.5856, + "step": 111430 + }, + { + "epoch": 4.0497129151827895, + "grad_norm": 0.8646038174629211, + "learning_rate": 1.399551188282241e-05, + "loss": 0.0675, + "step": 111440 + }, + { + "epoch": 4.0500763136855875, + "grad_norm": 0.30594268441200256, + "learning_rate": 1.3990852580071073e-05, + "loss": 0.1136, + "step": 111450 + }, + { + "epoch": 4.0504397121883855, + "grad_norm": 0.5570999979972839, + "learning_rate": 1.3986193751664717e-05, + "loss": 0.0656, + "step": 111460 + }, + { + "epoch": 4.050803110691184, + "grad_norm": 2.459162950515747, + "learning_rate": 1.3981535397804093e-05, + "loss": 0.0414, + "step": 111470 + }, + { + "epoch": 4.051166509193982, + "grad_norm": 0.7406504154205322, + "learning_rate": 1.3976877518689887e-05, + "loss": 0.055, + "step": 111480 + }, + { + "epoch": 4.05152990769678, + "grad_norm": 0.36871564388275146, + "learning_rate": 1.3972220114522827e-05, + "loss": 0.0609, + "step": 111490 + }, + { + "epoch": 4.051893306199578, + "grad_norm": 0.8053199648857117, + "learning_rate": 1.3967563185503557e-05, + "loss": 0.0804, + "step": 111500 + }, + { + "epoch": 4.052256704702376, + "grad_norm": 0.557873547077179, + "learning_rate": 1.3962906731832746e-05, + "loss": 0.0676, + "step": 111510 + }, + { + "epoch": 4.052620103205175, + "grad_norm": 1.9856547117233276, + "learning_rate": 1.3958250753711002e-05, + "loss": 0.0771, + "step": 111520 + }, + { + "epoch": 4.052983501707973, + "grad_norm": 0.48433226346969604, + "learning_rate": 1.3953595251338947e-05, + "loss": 0.0558, + "step": 111530 + }, + { + "epoch": 4.053346900210771, + "grad_norm": 0.42576339840888977, + "learning_rate": 1.3948940224917167e-05, + "loss": 0.0643, + "step": 111540 + }, + { + "epoch": 4.053710298713569, + "grad_norm": 0.7690130472183228, + "learning_rate": 1.3944285674646245e-05, + "loss": 0.0637, + "step": 111550 + }, + { + "epoch": 4.054073697216367, + "grad_norm": 0.28431037068367004, + "learning_rate": 1.393963160072671e-05, + "loss": 0.0623, + "step": 111560 + }, + { + "epoch": 4.054437095719166, + "grad_norm": 1.6892107725143433, + "learning_rate": 1.3934978003359095e-05, + "loss": 0.0553, + "step": 111570 + }, + { + "epoch": 4.054800494221964, + "grad_norm": 0.3961574137210846, + "learning_rate": 1.3930324882743906e-05, + "loss": 0.0575, + "step": 111580 + }, + { + "epoch": 4.055163892724762, + "grad_norm": 0.4090615212917328, + "learning_rate": 1.3925672239081644e-05, + "loss": 0.0495, + "step": 111590 + }, + { + "epoch": 4.05552729122756, + "grad_norm": 0.4748428165912628, + "learning_rate": 1.3921020072572749e-05, + "loss": 0.0797, + "step": 111600 + }, + { + "epoch": 4.05552729122756, + "eval_loss": 0.3094218969345093, + "eval_runtime": 178.8555, + "eval_samples_per_second": 41.452, + "eval_steps_per_second": 5.183, + "eval_wer": 0.1288416504801496, + "step": 111600 + }, + { + "epoch": 4.055890689730358, + "grad_norm": 0.6011778712272644, + "learning_rate": 1.3916368383417694e-05, + "loss": 0.0691, + "step": 111610 + }, + { + "epoch": 4.056254088233157, + "grad_norm": 0.7090577483177185, + "learning_rate": 1.3911717171816868e-05, + "loss": 0.0546, + "step": 111620 + }, + { + "epoch": 4.056617486735955, + "grad_norm": 1.0578325986862183, + "learning_rate": 1.3907066437970718e-05, + "loss": 0.0682, + "step": 111630 + }, + { + "epoch": 4.056980885238753, + "grad_norm": 0.3923257887363434, + "learning_rate": 1.3902416182079591e-05, + "loss": 0.0846, + "step": 111640 + }, + { + "epoch": 4.057344283741551, + "grad_norm": 2.852869749069214, + "learning_rate": 1.389776640434388e-05, + "loss": 0.0787, + "step": 111650 + }, + { + "epoch": 4.057707682244349, + "grad_norm": 0.35996344685554504, + "learning_rate": 1.3893117104963903e-05, + "loss": 0.0758, + "step": 111660 + }, + { + "epoch": 4.058071080747148, + "grad_norm": 0.7732596397399902, + "learning_rate": 1.3888468284139994e-05, + "loss": 0.0551, + "step": 111670 + }, + { + "epoch": 4.058434479249946, + "grad_norm": 1.0408018827438354, + "learning_rate": 1.3883819942072446e-05, + "loss": 0.0633, + "step": 111680 + }, + { + "epoch": 4.058797877752744, + "grad_norm": 0.3703053891658783, + "learning_rate": 1.3879172078961561e-05, + "loss": 0.0597, + "step": 111690 + }, + { + "epoch": 4.059161276255542, + "grad_norm": 0.7436791658401489, + "learning_rate": 1.3874524695007568e-05, + "loss": 0.0552, + "step": 111700 + }, + { + "epoch": 4.05952467475834, + "grad_norm": 0.3398180305957794, + "learning_rate": 1.3869877790410734e-05, + "loss": 0.0528, + "step": 111710 + }, + { + "epoch": 4.059888073261138, + "grad_norm": 1.0248258113861084, + "learning_rate": 1.3865231365371245e-05, + "loss": 0.0779, + "step": 111720 + }, + { + "epoch": 4.060251471763936, + "grad_norm": 0.3211299180984497, + "learning_rate": 1.3860585420089336e-05, + "loss": 0.0874, + "step": 111730 + }, + { + "epoch": 4.060614870266734, + "grad_norm": 0.44448596239089966, + "learning_rate": 1.385593995476516e-05, + "loss": 0.0578, + "step": 111740 + }, + { + "epoch": 4.060978268769532, + "grad_norm": 0.36641961336135864, + "learning_rate": 1.3851294969598888e-05, + "loss": 0.077, + "step": 111750 + }, + { + "epoch": 4.061341667272331, + "grad_norm": 0.8280020952224731, + "learning_rate": 1.3846650464790633e-05, + "loss": 0.0618, + "step": 111760 + }, + { + "epoch": 4.061705065775129, + "grad_norm": 1.596620798110962, + "learning_rate": 1.3842006440540542e-05, + "loss": 0.0688, + "step": 111770 + }, + { + "epoch": 4.062068464277927, + "grad_norm": 0.3118537366390228, + "learning_rate": 1.3837362897048684e-05, + "loss": 0.0477, + "step": 111780 + }, + { + "epoch": 4.062431862780725, + "grad_norm": 0.9146553874015808, + "learning_rate": 1.3832719834515151e-05, + "loss": 0.0631, + "step": 111790 + }, + { + "epoch": 4.062795261283523, + "grad_norm": 1.1774924993515015, + "learning_rate": 1.3828077253139978e-05, + "loss": 0.054, + "step": 111800 + }, + { + "epoch": 4.063158659786322, + "grad_norm": 0.6028741598129272, + "learning_rate": 1.3823435153123209e-05, + "loss": 0.0627, + "step": 111810 + }, + { + "epoch": 4.06352205828912, + "grad_norm": 5.221044540405273, + "learning_rate": 1.3818793534664848e-05, + "loss": 0.0515, + "step": 111820 + }, + { + "epoch": 4.063885456791918, + "grad_norm": 0.31061217188835144, + "learning_rate": 1.3814152397964906e-05, + "loss": 0.0912, + "step": 111830 + }, + { + "epoch": 4.064248855294716, + "grad_norm": 0.45412084460258484, + "learning_rate": 1.3809511743223324e-05, + "loss": 0.0507, + "step": 111840 + }, + { + "epoch": 4.064612253797514, + "grad_norm": 0.2962772846221924, + "learning_rate": 1.3804871570640077e-05, + "loss": 0.0543, + "step": 111850 + }, + { + "epoch": 4.064975652300313, + "grad_norm": 0.30459967255592346, + "learning_rate": 1.380023188041506e-05, + "loss": 0.0648, + "step": 111860 + }, + { + "epoch": 4.065339050803111, + "grad_norm": 0.43784353137016296, + "learning_rate": 1.3795592672748223e-05, + "loss": 0.0585, + "step": 111870 + }, + { + "epoch": 4.065702449305909, + "grad_norm": 0.5347334146499634, + "learning_rate": 1.3790953947839421e-05, + "loss": 0.0576, + "step": 111880 + }, + { + "epoch": 4.066065847808707, + "grad_norm": 0.750178337097168, + "learning_rate": 1.3786315705888542e-05, + "loss": 0.0503, + "step": 111890 + }, + { + "epoch": 4.066429246311505, + "grad_norm": 0.21846427023410797, + "learning_rate": 1.3781677947095412e-05, + "loss": 0.069, + "step": 111900 + }, + { + "epoch": 4.066792644814304, + "grad_norm": 0.3489988148212433, + "learning_rate": 1.3777040671659866e-05, + "loss": 0.0562, + "step": 111910 + }, + { + "epoch": 4.067156043317102, + "grad_norm": 0.5933734774589539, + "learning_rate": 1.3772403879781703e-05, + "loss": 0.0663, + "step": 111920 + }, + { + "epoch": 4.0675194418199, + "grad_norm": 0.31282684206962585, + "learning_rate": 1.3767767571660722e-05, + "loss": 0.0552, + "step": 111930 + }, + { + "epoch": 4.067882840322698, + "grad_norm": 0.5128657817840576, + "learning_rate": 1.3763131747496657e-05, + "loss": 0.0528, + "step": 111940 + }, + { + "epoch": 4.068246238825496, + "grad_norm": 0.5472941994667053, + "learning_rate": 1.3758496407489268e-05, + "loss": 0.0844, + "step": 111950 + }, + { + "epoch": 4.0686096373282945, + "grad_norm": 0.41867172718048096, + "learning_rate": 1.3753861551838271e-05, + "loss": 0.1142, + "step": 111960 + }, + { + "epoch": 4.0689730358310925, + "grad_norm": 2.5127737522125244, + "learning_rate": 1.3749227180743374e-05, + "loss": 0.0838, + "step": 111970 + }, + { + "epoch": 4.0693364343338905, + "grad_norm": 1.4139436483383179, + "learning_rate": 1.374459329440424e-05, + "loss": 0.0921, + "step": 111980 + }, + { + "epoch": 4.0696998328366885, + "grad_norm": 0.5689426064491272, + "learning_rate": 1.3739959893020543e-05, + "loss": 0.5989, + "step": 111990 + }, + { + "epoch": 4.070063231339487, + "grad_norm": 0.745959997177124, + "learning_rate": 1.373532697679189e-05, + "loss": 0.0635, + "step": 112000 + }, + { + "epoch": 4.070426629842285, + "grad_norm": 0.531570315361023, + "learning_rate": 1.3730694545917938e-05, + "loss": 0.0692, + "step": 112010 + }, + { + "epoch": 4.070790028345083, + "grad_norm": 1.154670000076294, + "learning_rate": 1.3726062600598252e-05, + "loss": 0.0781, + "step": 112020 + }, + { + "epoch": 4.071153426847881, + "grad_norm": 0.7137158513069153, + "learning_rate": 1.3721431141032426e-05, + "loss": 0.0566, + "step": 112030 + }, + { + "epoch": 4.071516825350679, + "grad_norm": 0.5212516188621521, + "learning_rate": 1.3716800167419991e-05, + "loss": 0.0522, + "step": 112040 + }, + { + "epoch": 4.071880223853478, + "grad_norm": 0.4456008970737457, + "learning_rate": 1.3712169679960495e-05, + "loss": 0.0596, + "step": 112050 + }, + { + "epoch": 4.072243622356276, + "grad_norm": 1.1105504035949707, + "learning_rate": 1.3707539678853443e-05, + "loss": 0.0598, + "step": 112060 + }, + { + "epoch": 4.072607020859074, + "grad_norm": 0.3087688684463501, + "learning_rate": 1.3702910164298338e-05, + "loss": 0.0592, + "step": 112070 + }, + { + "epoch": 4.072970419361872, + "grad_norm": 0.8320184946060181, + "learning_rate": 1.3698281136494628e-05, + "loss": 0.0709, + "step": 112080 + }, + { + "epoch": 4.07333381786467, + "grad_norm": 3.412813901901245, + "learning_rate": 1.3693652595641782e-05, + "loss": 0.0526, + "step": 112090 + }, + { + "epoch": 4.073697216367469, + "grad_norm": 0.5016017556190491, + "learning_rate": 1.3689024541939196e-05, + "loss": 0.0607, + "step": 112100 + }, + { + "epoch": 4.074060614870267, + "grad_norm": 0.3929903507232666, + "learning_rate": 1.3684396975586322e-05, + "loss": 0.0621, + "step": 112110 + }, + { + "epoch": 4.074424013373065, + "grad_norm": 0.8663429617881775, + "learning_rate": 1.3679769896782507e-05, + "loss": 0.0638, + "step": 112120 + }, + { + "epoch": 4.074787411875863, + "grad_norm": 0.8499599099159241, + "learning_rate": 1.3675143305727145e-05, + "loss": 0.064, + "step": 112130 + }, + { + "epoch": 4.075150810378661, + "grad_norm": 0.5362977981567383, + "learning_rate": 1.3670517202619538e-05, + "loss": 0.0531, + "step": 112140 + }, + { + "epoch": 4.07551420888146, + "grad_norm": 0.3657929599285126, + "learning_rate": 1.3665891587659058e-05, + "loss": 0.0506, + "step": 112150 + }, + { + "epoch": 4.075877607384258, + "grad_norm": 0.4857289493083954, + "learning_rate": 1.3661266461044973e-05, + "loss": 0.073, + "step": 112160 + }, + { + "epoch": 4.076241005887056, + "grad_norm": 0.36839261651039124, + "learning_rate": 1.3656641822976579e-05, + "loss": 0.0501, + "step": 112170 + }, + { + "epoch": 4.076604404389854, + "grad_norm": 0.6398412585258484, + "learning_rate": 1.3652017673653122e-05, + "loss": 0.0451, + "step": 112180 + }, + { + "epoch": 4.076967802892652, + "grad_norm": 0.5313104391098022, + "learning_rate": 1.3647394013273848e-05, + "loss": 0.0478, + "step": 112190 + }, + { + "epoch": 4.077331201395451, + "grad_norm": 0.8435815572738647, + "learning_rate": 1.3642770842037972e-05, + "loss": 0.0698, + "step": 112200 + }, + { + "epoch": 4.077331201395451, + "eval_loss": 0.3243897259235382, + "eval_runtime": 179.2488, + "eval_samples_per_second": 41.361, + "eval_steps_per_second": 5.172, + "eval_wer": 0.12874180841214805, + "step": 112200 + }, + { + "epoch": 4.077694599898249, + "grad_norm": 0.5462674498558044, + "learning_rate": 1.3638148160144701e-05, + "loss": 0.0626, + "step": 112210 + }, + { + "epoch": 4.0780579984010465, + "grad_norm": 0.5389562249183655, + "learning_rate": 1.3633525967793192e-05, + "loss": 0.0675, + "step": 112220 + }, + { + "epoch": 4.0784213969038445, + "grad_norm": 0.4394398629665375, + "learning_rate": 1.3628904265182612e-05, + "loss": 0.0567, + "step": 112230 + }, + { + "epoch": 4.0787847954066425, + "grad_norm": 0.36386388540267944, + "learning_rate": 1.3624283052512075e-05, + "loss": 0.0518, + "step": 112240 + }, + { + "epoch": 4.079148193909441, + "grad_norm": 0.3440745174884796, + "learning_rate": 1.3619662329980723e-05, + "loss": 0.0608, + "step": 112250 + }, + { + "epoch": 4.079511592412239, + "grad_norm": 0.540234386920929, + "learning_rate": 1.361504209778762e-05, + "loss": 0.0715, + "step": 112260 + }, + { + "epoch": 4.079874990915037, + "grad_norm": 0.803322434425354, + "learning_rate": 1.3610422356131858e-05, + "loss": 0.0674, + "step": 112270 + }, + { + "epoch": 4.080238389417835, + "grad_norm": 0.3784193992614746, + "learning_rate": 1.3605803105212459e-05, + "loss": 0.1557, + "step": 112280 + }, + { + "epoch": 4.080601787920633, + "grad_norm": 0.4170146584510803, + "learning_rate": 1.3601184345228463e-05, + "loss": 0.0572, + "step": 112290 + }, + { + "epoch": 4.080965186423432, + "grad_norm": 0.46115851402282715, + "learning_rate": 1.359656607637888e-05, + "loss": 0.0536, + "step": 112300 + }, + { + "epoch": 4.08132858492623, + "grad_norm": 0.39042162895202637, + "learning_rate": 1.3591948298862698e-05, + "loss": 0.061, + "step": 112310 + }, + { + "epoch": 4.081691983429028, + "grad_norm": 0.5178929567337036, + "learning_rate": 1.3587331012878864e-05, + "loss": 0.0664, + "step": 112320 + }, + { + "epoch": 4.082055381931826, + "grad_norm": 0.5064478516578674, + "learning_rate": 1.358271421862633e-05, + "loss": 0.0662, + "step": 112330 + }, + { + "epoch": 4.082418780434625, + "grad_norm": 0.2574649751186371, + "learning_rate": 1.3578097916304023e-05, + "loss": 0.0439, + "step": 112340 + }, + { + "epoch": 4.082782178937423, + "grad_norm": 0.3785637617111206, + "learning_rate": 1.357348210611084e-05, + "loss": 0.0535, + "step": 112350 + }, + { + "epoch": 4.083145577440221, + "grad_norm": 0.31486454606056213, + "learning_rate": 1.3568866788245652e-05, + "loss": 0.0949, + "step": 112360 + }, + { + "epoch": 4.083508975943019, + "grad_norm": 0.732257604598999, + "learning_rate": 1.3564251962907331e-05, + "loss": 0.0806, + "step": 112370 + }, + { + "epoch": 4.083872374445817, + "grad_norm": 0.43957632780075073, + "learning_rate": 1.3559637630294683e-05, + "loss": 0.0681, + "step": 112380 + }, + { + "epoch": 4.084235772948616, + "grad_norm": 0.997170627117157, + "learning_rate": 1.3555023790606566e-05, + "loss": 0.0501, + "step": 112390 + }, + { + "epoch": 4.084599171451414, + "grad_norm": 161.74917602539062, + "learning_rate": 1.3550410444041741e-05, + "loss": 2.0045, + "step": 112400 + }, + { + "epoch": 4.084962569954212, + "grad_norm": 0.7197566628456116, + "learning_rate": 1.3545797590799003e-05, + "loss": 0.0535, + "step": 112410 + }, + { + "epoch": 4.08532596845701, + "grad_norm": 0.7341930270195007, + "learning_rate": 1.3541185231077085e-05, + "loss": 0.0845, + "step": 112420 + }, + { + "epoch": 4.085689366959808, + "grad_norm": 0.393226683139801, + "learning_rate": 1.3536573365074724e-05, + "loss": 0.0971, + "step": 112430 + }, + { + "epoch": 4.086052765462607, + "grad_norm": 0.3241816461086273, + "learning_rate": 1.3531961992990627e-05, + "loss": 0.0446, + "step": 112440 + }, + { + "epoch": 4.086416163965405, + "grad_norm": 0.755688488483429, + "learning_rate": 1.3527351115023496e-05, + "loss": 0.0686, + "step": 112450 + }, + { + "epoch": 4.086779562468203, + "grad_norm": 0.28549787402153015, + "learning_rate": 1.3522740731371975e-05, + "loss": 0.1545, + "step": 112460 + }, + { + "epoch": 4.087142960971001, + "grad_norm": 2.1800248622894287, + "learning_rate": 1.3518130842234721e-05, + "loss": 0.0571, + "step": 112470 + }, + { + "epoch": 4.087506359473799, + "grad_norm": 0.7874051332473755, + "learning_rate": 1.3513521447810354e-05, + "loss": 0.0672, + "step": 112480 + }, + { + "epoch": 4.0878697579765975, + "grad_norm": 1.9011385440826416, + "learning_rate": 1.3508912548297491e-05, + "loss": 0.0732, + "step": 112490 + }, + { + "epoch": 4.0882331564793954, + "grad_norm": 0.41868069767951965, + "learning_rate": 1.3504304143894692e-05, + "loss": 0.0611, + "step": 112500 + }, + { + "epoch": 4.088596554982193, + "grad_norm": 0.8152614831924438, + "learning_rate": 1.349969623480053e-05, + "loss": 0.0696, + "step": 112510 + }, + { + "epoch": 4.088959953484991, + "grad_norm": 0.7754275798797607, + "learning_rate": 1.3495088821213526e-05, + "loss": 0.0734, + "step": 112520 + }, + { + "epoch": 4.089323351987789, + "grad_norm": 0.3078191876411438, + "learning_rate": 1.3490481903332226e-05, + "loss": 0.0669, + "step": 112530 + }, + { + "epoch": 4.089686750490588, + "grad_norm": 1.8453993797302246, + "learning_rate": 1.3485875481355098e-05, + "loss": 0.053, + "step": 112540 + }, + { + "epoch": 4.090050148993386, + "grad_norm": 0.5882243514060974, + "learning_rate": 1.3481269555480642e-05, + "loss": 0.0562, + "step": 112550 + }, + { + "epoch": 4.090413547496184, + "grad_norm": 0.2845616042613983, + "learning_rate": 1.3476664125907284e-05, + "loss": 0.0624, + "step": 112560 + }, + { + "epoch": 4.090776945998982, + "grad_norm": 1.1381127834320068, + "learning_rate": 1.3472059192833475e-05, + "loss": 0.0648, + "step": 112570 + }, + { + "epoch": 4.091140344501781, + "grad_norm": 0.5941457748413086, + "learning_rate": 1.3467454756457612e-05, + "loss": 0.0596, + "step": 112580 + }, + { + "epoch": 4.091503743004579, + "grad_norm": 0.6055946350097656, + "learning_rate": 1.3462850816978103e-05, + "loss": 0.5211, + "step": 112590 + }, + { + "epoch": 4.091867141507377, + "grad_norm": 0.3982195258140564, + "learning_rate": 1.3458247374593292e-05, + "loss": 0.0668, + "step": 112600 + }, + { + "epoch": 4.092230540010175, + "grad_norm": 0.5881565809249878, + "learning_rate": 1.3453644429501539e-05, + "loss": 0.0974, + "step": 112610 + }, + { + "epoch": 4.092593938512973, + "grad_norm": 0.5674206614494324, + "learning_rate": 1.3449041981901162e-05, + "loss": 0.0665, + "step": 112620 + }, + { + "epoch": 4.092957337015772, + "grad_norm": 0.8177425265312195, + "learning_rate": 1.344444003199048e-05, + "loss": 0.065, + "step": 112630 + }, + { + "epoch": 4.09332073551857, + "grad_norm": 0.5913267135620117, + "learning_rate": 1.343983857996775e-05, + "loss": 0.0532, + "step": 112640 + }, + { + "epoch": 4.093684134021368, + "grad_norm": 0.33033841848373413, + "learning_rate": 1.3435237626031256e-05, + "loss": 0.0533, + "step": 112650 + }, + { + "epoch": 4.094047532524166, + "grad_norm": 0.5540090203285217, + "learning_rate": 1.3430637170379215e-05, + "loss": 0.0609, + "step": 112660 + }, + { + "epoch": 4.094410931026964, + "grad_norm": 0.4232136011123657, + "learning_rate": 1.3426037213209852e-05, + "loss": 0.0564, + "step": 112670 + }, + { + "epoch": 4.094774329529763, + "grad_norm": 0.8112702965736389, + "learning_rate": 1.342143775472137e-05, + "loss": 0.0512, + "step": 112680 + }, + { + "epoch": 4.095137728032561, + "grad_norm": 0.43123000860214233, + "learning_rate": 1.3416838795111944e-05, + "loss": 0.0432, + "step": 112690 + }, + { + "epoch": 4.095501126535359, + "grad_norm": 0.1756378710269928, + "learning_rate": 1.3412240334579713e-05, + "loss": 0.2854, + "step": 112700 + }, + { + "epoch": 4.095864525038157, + "grad_norm": 0.3354843258857727, + "learning_rate": 1.3407642373322816e-05, + "loss": 0.0786, + "step": 112710 + }, + { + "epoch": 4.096227923540955, + "grad_norm": 0.34515076875686646, + "learning_rate": 1.3403044911539364e-05, + "loss": 0.0691, + "step": 112720 + }, + { + "epoch": 4.0965913220437535, + "grad_norm": 0.33889952301979065, + "learning_rate": 1.3398447949427456e-05, + "loss": 0.0643, + "step": 112730 + }, + { + "epoch": 4.0969547205465515, + "grad_norm": 0.36444467306137085, + "learning_rate": 1.3393851487185135e-05, + "loss": 0.0531, + "step": 112740 + }, + { + "epoch": 4.0973181190493495, + "grad_norm": 0.37586820125579834, + "learning_rate": 1.3389255525010461e-05, + "loss": 0.0596, + "step": 112750 + }, + { + "epoch": 4.0976815175521475, + "grad_norm": 0.2949109971523285, + "learning_rate": 1.3384660063101454e-05, + "loss": 0.0646, + "step": 112760 + }, + { + "epoch": 4.0980449160549455, + "grad_norm": 1.0989277362823486, + "learning_rate": 1.3380065101656126e-05, + "loss": 0.0734, + "step": 112770 + }, + { + "epoch": 4.098408314557744, + "grad_norm": 1.996840238571167, + "learning_rate": 1.337547064087244e-05, + "loss": 0.0589, + "step": 112780 + }, + { + "epoch": 4.098771713060542, + "grad_norm": 0.34807854890823364, + "learning_rate": 1.3370876680948365e-05, + "loss": 0.057, + "step": 112790 + }, + { + "epoch": 4.09913511156334, + "grad_norm": 1.1680188179016113, + "learning_rate": 1.3366283222081847e-05, + "loss": 0.0604, + "step": 112800 + }, + { + "epoch": 4.09913511156334, + "eval_loss": 0.3146750032901764, + "eval_runtime": 179.6068, + "eval_samples_per_second": 41.279, + "eval_steps_per_second": 5.161, + "eval_wer": 0.1273621725633997, + "step": 112800 + }, + { + "epoch": 4.099498510066138, + "grad_norm": 0.31680727005004883, + "learning_rate": 1.3361690264470783e-05, + "loss": 0.0601, + "step": 112810 + }, + { + "epoch": 4.099861908568936, + "grad_norm": 0.5075859427452087, + "learning_rate": 1.3357097808313074e-05, + "loss": 0.05, + "step": 112820 + }, + { + "epoch": 4.100225307071735, + "grad_norm": 0.3714093267917633, + "learning_rate": 1.3352505853806604e-05, + "loss": 0.0483, + "step": 112830 + }, + { + "epoch": 4.100588705574533, + "grad_norm": 0.3648132383823395, + "learning_rate": 1.3347914401149208e-05, + "loss": 0.046, + "step": 112840 + }, + { + "epoch": 4.100952104077331, + "grad_norm": 0.5334128737449646, + "learning_rate": 1.334332345053872e-05, + "loss": 0.0688, + "step": 112850 + }, + { + "epoch": 4.101315502580129, + "grad_norm": 0.4606197476387024, + "learning_rate": 1.3338733002172948e-05, + "loss": 0.071, + "step": 112860 + }, + { + "epoch": 4.101678901082927, + "grad_norm": 0.47062140703201294, + "learning_rate": 1.3334143056249692e-05, + "loss": 0.0665, + "step": 112870 + }, + { + "epoch": 4.102042299585726, + "grad_norm": 1.2191188335418701, + "learning_rate": 1.3329553612966697e-05, + "loss": 0.0644, + "step": 112880 + }, + { + "epoch": 4.102405698088524, + "grad_norm": 0.48544201254844666, + "learning_rate": 1.3324964672521712e-05, + "loss": 0.0657, + "step": 112890 + }, + { + "epoch": 4.102769096591322, + "grad_norm": 0.748146653175354, + "learning_rate": 1.332037623511247e-05, + "loss": 0.0774, + "step": 112900 + }, + { + "epoch": 4.10313249509412, + "grad_norm": 0.43806397914886475, + "learning_rate": 1.3315788300936646e-05, + "loss": 0.066, + "step": 112910 + }, + { + "epoch": 4.103495893596919, + "grad_norm": 0.6744257211685181, + "learning_rate": 1.3311200870191937e-05, + "loss": 0.0598, + "step": 112920 + }, + { + "epoch": 4.103859292099717, + "grad_norm": 0.44944775104522705, + "learning_rate": 1.3306613943075988e-05, + "loss": 0.062, + "step": 112930 + }, + { + "epoch": 4.104222690602515, + "grad_norm": 0.46824315190315247, + "learning_rate": 1.3302027519786453e-05, + "loss": 0.0571, + "step": 112940 + }, + { + "epoch": 4.104586089105313, + "grad_norm": 1.0917015075683594, + "learning_rate": 1.3297441600520918e-05, + "loss": 0.0711, + "step": 112950 + }, + { + "epoch": 4.104949487608111, + "grad_norm": 0.3450702428817749, + "learning_rate": 1.3292856185476987e-05, + "loss": 0.0726, + "step": 112960 + }, + { + "epoch": 4.10531288611091, + "grad_norm": 0.49435433745384216, + "learning_rate": 1.3288271274852232e-05, + "loss": 0.073, + "step": 112970 + }, + { + "epoch": 4.105676284613708, + "grad_norm": 0.7066315412521362, + "learning_rate": 1.3283686868844203e-05, + "loss": 0.0654, + "step": 112980 + }, + { + "epoch": 4.106039683116506, + "grad_norm": 0.4903556704521179, + "learning_rate": 1.3279102967650414e-05, + "loss": 0.0519, + "step": 112990 + }, + { + "epoch": 4.106403081619304, + "grad_norm": 3.5668628215789795, + "learning_rate": 1.3274519571468372e-05, + "loss": 0.0664, + "step": 113000 + }, + { + "epoch": 4.1067664801221015, + "grad_norm": 0.6797897219657898, + "learning_rate": 1.3269936680495573e-05, + "loss": 0.0626, + "step": 113010 + }, + { + "epoch": 4.1071298786249, + "grad_norm": 0.6676300168037415, + "learning_rate": 1.3265354294929455e-05, + "loss": 0.0471, + "step": 113020 + }, + { + "epoch": 4.107493277127698, + "grad_norm": 0.3579924404621124, + "learning_rate": 1.326077241496747e-05, + "loss": 0.0484, + "step": 113030 + }, + { + "epoch": 4.107856675630496, + "grad_norm": 1.363911509513855, + "learning_rate": 1.3256191040807048e-05, + "loss": 0.0527, + "step": 113040 + }, + { + "epoch": 4.108220074133294, + "grad_norm": 0.37151971459388733, + "learning_rate": 1.3251610172645553e-05, + "loss": 0.0579, + "step": 113050 + }, + { + "epoch": 4.108583472636092, + "grad_norm": 0.368559330701828, + "learning_rate": 1.3247029810680378e-05, + "loss": 0.0671, + "step": 113060 + }, + { + "epoch": 4.108946871138891, + "grad_norm": 3.4040791988372803, + "learning_rate": 1.324244995510887e-05, + "loss": 0.0607, + "step": 113070 + }, + { + "epoch": 4.109310269641689, + "grad_norm": 0.24719972908496857, + "learning_rate": 1.323787060612837e-05, + "loss": 0.0534, + "step": 113080 + }, + { + "epoch": 4.109673668144487, + "grad_norm": 0.29014429450035095, + "learning_rate": 1.3233291763936167e-05, + "loss": 0.0538, + "step": 113090 + }, + { + "epoch": 4.110037066647285, + "grad_norm": 0.49623697996139526, + "learning_rate": 1.3228713428729553e-05, + "loss": 0.0659, + "step": 113100 + }, + { + "epoch": 4.110400465150083, + "grad_norm": 0.7023900747299194, + "learning_rate": 1.3224135600705798e-05, + "loss": 0.0714, + "step": 113110 + }, + { + "epoch": 4.110763863652882, + "grad_norm": 1.4295860528945923, + "learning_rate": 1.321955828006215e-05, + "loss": 0.0685, + "step": 113120 + }, + { + "epoch": 4.11112726215568, + "grad_norm": 0.9260228276252747, + "learning_rate": 1.321498146699581e-05, + "loss": 0.0601, + "step": 113130 + }, + { + "epoch": 4.111490660658478, + "grad_norm": 0.5110155344009399, + "learning_rate": 1.3210405161703987e-05, + "loss": 0.0489, + "step": 113140 + }, + { + "epoch": 4.111854059161276, + "grad_norm": 0.49911022186279297, + "learning_rate": 1.3205829364383871e-05, + "loss": 0.0497, + "step": 113150 + }, + { + "epoch": 4.112217457664075, + "grad_norm": 0.44169220328330994, + "learning_rate": 1.3201254075232592e-05, + "loss": 0.0716, + "step": 113160 + }, + { + "epoch": 4.112580856166873, + "grad_norm": 0.34353122115135193, + "learning_rate": 1.3196679294447295e-05, + "loss": 0.0888, + "step": 113170 + }, + { + "epoch": 4.112944254669671, + "grad_norm": 0.28636273741722107, + "learning_rate": 1.3192105022225098e-05, + "loss": 0.0494, + "step": 113180 + }, + { + "epoch": 4.113307653172469, + "grad_norm": 1.3827937841415405, + "learning_rate": 1.3187531258763078e-05, + "loss": 0.054, + "step": 113190 + }, + { + "epoch": 4.113671051675267, + "grad_norm": 0.4005539119243622, + "learning_rate": 1.3182958004258306e-05, + "loss": 0.0566, + "step": 113200 + }, + { + "epoch": 4.114034450178066, + "grad_norm": 0.529242217540741, + "learning_rate": 1.3178385258907827e-05, + "loss": 0.0661, + "step": 113210 + }, + { + "epoch": 4.114397848680864, + "grad_norm": 0.6762093901634216, + "learning_rate": 1.3173813022908677e-05, + "loss": 0.083, + "step": 113220 + }, + { + "epoch": 4.114761247183662, + "grad_norm": 0.6283437013626099, + "learning_rate": 1.3169241296457835e-05, + "loss": 0.1065, + "step": 113230 + }, + { + "epoch": 4.11512464568646, + "grad_norm": 1.0765012502670288, + "learning_rate": 1.316467007975229e-05, + "loss": 0.0463, + "step": 113240 + }, + { + "epoch": 4.115488044189258, + "grad_norm": 0.43171441555023193, + "learning_rate": 1.3160099372989004e-05, + "loss": 0.0582, + "step": 113250 + }, + { + "epoch": 4.1158514426920565, + "grad_norm": 0.26878660917282104, + "learning_rate": 1.3155529176364917e-05, + "loss": 0.0657, + "step": 113260 + }, + { + "epoch": 4.1162148411948545, + "grad_norm": 0.6298261880874634, + "learning_rate": 1.3150959490076929e-05, + "loss": 0.0584, + "step": 113270 + }, + { + "epoch": 4.1165782396976525, + "grad_norm": 0.36110997200012207, + "learning_rate": 1.3146390314321944e-05, + "loss": 0.05, + "step": 113280 + }, + { + "epoch": 4.1169416382004504, + "grad_norm": 0.2957223653793335, + "learning_rate": 1.3141821649296803e-05, + "loss": 0.0494, + "step": 113290 + }, + { + "epoch": 4.117305036703248, + "grad_norm": 0.17959628999233246, + "learning_rate": 1.31372534951984e-05, + "loss": 0.0549, + "step": 113300 + }, + { + "epoch": 4.117668435206047, + "grad_norm": 0.5282506942749023, + "learning_rate": 1.3132685852223526e-05, + "loss": 0.9037, + "step": 113310 + }, + { + "epoch": 4.118031833708845, + "grad_norm": 0.44463062286376953, + "learning_rate": 1.3128118720569002e-05, + "loss": 0.0727, + "step": 113320 + }, + { + "epoch": 4.118395232211643, + "grad_norm": 0.9258912801742554, + "learning_rate": 1.3123552100431593e-05, + "loss": 0.0559, + "step": 113330 + }, + { + "epoch": 4.118758630714441, + "grad_norm": 0.6866888999938965, + "learning_rate": 1.311898599200807e-05, + "loss": 0.047, + "step": 113340 + }, + { + "epoch": 4.119122029217239, + "grad_norm": 0.4890584647655487, + "learning_rate": 1.3114420395495164e-05, + "loss": 0.0588, + "step": 113350 + }, + { + "epoch": 4.119485427720038, + "grad_norm": 0.5454927682876587, + "learning_rate": 1.3109855311089606e-05, + "loss": 0.0704, + "step": 113360 + }, + { + "epoch": 4.119848826222836, + "grad_norm": 0.9774706959724426, + "learning_rate": 1.3105290738988068e-05, + "loss": 0.0824, + "step": 113370 + }, + { + "epoch": 4.120212224725634, + "grad_norm": 0.5349249243736267, + "learning_rate": 1.3100726679387228e-05, + "loss": 0.0626, + "step": 113380 + }, + { + "epoch": 4.120575623228432, + "grad_norm": 0.38488900661468506, + "learning_rate": 1.3096163132483741e-05, + "loss": 1.1301, + "step": 113390 + }, + { + "epoch": 4.12093902173123, + "grad_norm": 0.48048198223114014, + "learning_rate": 1.3091600098474238e-05, + "loss": 0.0758, + "step": 113400 + }, + { + "epoch": 4.12093902173123, + "eval_loss": 0.2987889349460602, + "eval_runtime": 178.5175, + "eval_samples_per_second": 41.531, + "eval_steps_per_second": 5.193, + "eval_wer": 0.12876903806705756, + "step": 113400 + }, + { + "epoch": 4.121302420234029, + "grad_norm": 0.39969778060913086, + "learning_rate": 1.3087037577555309e-05, + "loss": 0.0697, + "step": 113410 + }, + { + "epoch": 4.121665818736827, + "grad_norm": 1.1826426982879639, + "learning_rate": 1.3082475569923553e-05, + "loss": 0.0586, + "step": 113420 + }, + { + "epoch": 4.122029217239625, + "grad_norm": 0.36098846793174744, + "learning_rate": 1.3077914075775499e-05, + "loss": 0.0632, + "step": 113430 + }, + { + "epoch": 4.122392615742423, + "grad_norm": 0.37273460626602173, + "learning_rate": 1.3073353095307733e-05, + "loss": 0.0491, + "step": 113440 + }, + { + "epoch": 4.122756014245221, + "grad_norm": 0.4271377921104431, + "learning_rate": 1.3068792628716736e-05, + "loss": 0.067, + "step": 113450 + }, + { + "epoch": 4.12311941274802, + "grad_norm": 0.5918843746185303, + "learning_rate": 1.3064232676199023e-05, + "loss": 0.0751, + "step": 113460 + }, + { + "epoch": 4.123482811250818, + "grad_norm": 1.6499254703521729, + "learning_rate": 1.3059673237951044e-05, + "loss": 0.0618, + "step": 113470 + }, + { + "epoch": 4.123846209753616, + "grad_norm": 0.35528233647346497, + "learning_rate": 1.3055114314169265e-05, + "loss": 0.0704, + "step": 113480 + }, + { + "epoch": 4.124209608256414, + "grad_norm": 0.2478209286928177, + "learning_rate": 1.3050555905050107e-05, + "loss": 0.0519, + "step": 113490 + }, + { + "epoch": 4.124573006759213, + "grad_norm": 0.3479679524898529, + "learning_rate": 1.304599801078999e-05, + "loss": 0.0691, + "step": 113500 + }, + { + "epoch": 4.1249364052620106, + "grad_norm": 0.5923532843589783, + "learning_rate": 1.3041440631585278e-05, + "loss": 0.0537, + "step": 113510 + }, + { + "epoch": 4.1252998037648085, + "grad_norm": 0.7202960848808289, + "learning_rate": 1.3036883767632339e-05, + "loss": 0.0693, + "step": 113520 + }, + { + "epoch": 4.1256632022676065, + "grad_norm": 0.3048873841762543, + "learning_rate": 1.3032327419127513e-05, + "loss": 0.0655, + "step": 113530 + }, + { + "epoch": 4.1260266007704045, + "grad_norm": 0.3401569724082947, + "learning_rate": 1.3027771586267129e-05, + "loss": 0.0435, + "step": 113540 + }, + { + "epoch": 4.126389999273203, + "grad_norm": 1.8426971435546875, + "learning_rate": 1.3023216269247457e-05, + "loss": 0.0668, + "step": 113550 + }, + { + "epoch": 4.126753397776001, + "grad_norm": 0.6817682385444641, + "learning_rate": 1.3018661468264795e-05, + "loss": 0.0567, + "step": 113560 + }, + { + "epoch": 4.127116796278799, + "grad_norm": 0.8271031379699707, + "learning_rate": 1.3014107183515362e-05, + "loss": 0.1007, + "step": 113570 + }, + { + "epoch": 4.127480194781597, + "grad_norm": 0.24805913865566254, + "learning_rate": 1.300955341519542e-05, + "loss": 0.0572, + "step": 113580 + }, + { + "epoch": 4.127843593284395, + "grad_norm": 15.380496978759766, + "learning_rate": 1.3005000163501152e-05, + "loss": 0.0565, + "step": 113590 + }, + { + "epoch": 4.128206991787194, + "grad_norm": 2.278918981552124, + "learning_rate": 1.300044742862876e-05, + "loss": 0.0818, + "step": 113600 + }, + { + "epoch": 4.128570390289992, + "grad_norm": 0.5283384919166565, + "learning_rate": 1.2995895210774381e-05, + "loss": 0.049, + "step": 113610 + }, + { + "epoch": 4.12893378879279, + "grad_norm": 2.944115161895752, + "learning_rate": 1.299134351013417e-05, + "loss": 0.0599, + "step": 113620 + }, + { + "epoch": 4.129297187295588, + "grad_norm": 1.6555734872817993, + "learning_rate": 1.2986792326904235e-05, + "loss": 0.0717, + "step": 113630 + }, + { + "epoch": 4.129660585798386, + "grad_norm": 0.5407847762107849, + "learning_rate": 1.2982241661280688e-05, + "loss": 0.053, + "step": 113640 + }, + { + "epoch": 4.130023984301185, + "grad_norm": 0.7451368570327759, + "learning_rate": 1.2977691513459578e-05, + "loss": 0.0681, + "step": 113650 + }, + { + "epoch": 4.130387382803983, + "grad_norm": 5.28464937210083, + "learning_rate": 1.2973141883636978e-05, + "loss": 0.0695, + "step": 113660 + }, + { + "epoch": 4.130750781306781, + "grad_norm": 0.9070919156074524, + "learning_rate": 1.296859277200888e-05, + "loss": 0.0868, + "step": 113670 + }, + { + "epoch": 4.131114179809579, + "grad_norm": 0.5017779469490051, + "learning_rate": 1.2964044178771333e-05, + "loss": 0.0691, + "step": 113680 + }, + { + "epoch": 4.131477578312377, + "grad_norm": 0.41018345952033997, + "learning_rate": 1.295949610412029e-05, + "loss": 0.0495, + "step": 113690 + }, + { + "epoch": 4.131840976815176, + "grad_norm": 1.58717942237854, + "learning_rate": 1.2954948548251724e-05, + "loss": 0.0719, + "step": 113700 + }, + { + "epoch": 4.132204375317974, + "grad_norm": 0.3683645725250244, + "learning_rate": 1.2950401511361554e-05, + "loss": 0.0739, + "step": 113710 + }, + { + "epoch": 4.132567773820772, + "grad_norm": 0.510909914970398, + "learning_rate": 1.2945854993645726e-05, + "loss": 0.08, + "step": 113720 + }, + { + "epoch": 4.13293117232357, + "grad_norm": 1.1862256526947021, + "learning_rate": 1.2941308995300111e-05, + "loss": 0.057, + "step": 113730 + }, + { + "epoch": 4.133294570826369, + "grad_norm": 0.42124128341674805, + "learning_rate": 1.2936763516520595e-05, + "loss": 0.0452, + "step": 113740 + }, + { + "epoch": 4.133657969329167, + "grad_norm": 0.6174753308296204, + "learning_rate": 1.2932218557503007e-05, + "loss": 0.0952, + "step": 113750 + }, + { + "epoch": 4.134021367831965, + "grad_norm": 0.9886456727981567, + "learning_rate": 1.2927674118443184e-05, + "loss": 0.0912, + "step": 113760 + }, + { + "epoch": 4.134384766334763, + "grad_norm": 0.4314543306827545, + "learning_rate": 1.292313019953693e-05, + "loss": 0.0655, + "step": 113770 + }, + { + "epoch": 4.134748164837561, + "grad_norm": 0.4748517870903015, + "learning_rate": 1.2918586800980037e-05, + "loss": 0.0575, + "step": 113780 + }, + { + "epoch": 4.1351115633403595, + "grad_norm": 0.5830983519554138, + "learning_rate": 1.2914043922968244e-05, + "loss": 0.0471, + "step": 113790 + }, + { + "epoch": 4.135474961843157, + "grad_norm": 0.43536341190338135, + "learning_rate": 1.2909501565697305e-05, + "loss": 0.0533, + "step": 113800 + }, + { + "epoch": 4.135838360345955, + "grad_norm": 0.5548887252807617, + "learning_rate": 1.2904959729362904e-05, + "loss": 0.0665, + "step": 113810 + }, + { + "epoch": 4.136201758848753, + "grad_norm": 0.49552205204963684, + "learning_rate": 1.2900418414160775e-05, + "loss": 0.0772, + "step": 113820 + }, + { + "epoch": 4.136565157351551, + "grad_norm": 0.6507740616798401, + "learning_rate": 1.2895877620286556e-05, + "loss": 0.0564, + "step": 113830 + }, + { + "epoch": 4.13692855585435, + "grad_norm": 2.410308361053467, + "learning_rate": 1.2891337347935916e-05, + "loss": 0.8506, + "step": 113840 + }, + { + "epoch": 4.137291954357148, + "grad_norm": 1.289736270904541, + "learning_rate": 1.2886797597304456e-05, + "loss": 0.0775, + "step": 113850 + }, + { + "epoch": 4.137655352859946, + "grad_norm": 0.49139320850372314, + "learning_rate": 1.2882258368587785e-05, + "loss": 0.1908, + "step": 113860 + }, + { + "epoch": 4.138018751362744, + "grad_norm": 13.986310958862305, + "learning_rate": 1.287771966198149e-05, + "loss": 0.0872, + "step": 113870 + }, + { + "epoch": 4.138382149865542, + "grad_norm": 0.320834755897522, + "learning_rate": 1.2873181477681134e-05, + "loss": 0.0608, + "step": 113880 + }, + { + "epoch": 4.138745548368341, + "grad_norm": 0.6267417073249817, + "learning_rate": 1.2868643815882228e-05, + "loss": 0.0474, + "step": 113890 + }, + { + "epoch": 4.139108946871139, + "grad_norm": 0.6176286935806274, + "learning_rate": 1.2864106676780308e-05, + "loss": 0.06, + "step": 113900 + }, + { + "epoch": 4.139472345373937, + "grad_norm": 1.188921570777893, + "learning_rate": 1.285957006057083e-05, + "loss": 0.0597, + "step": 113910 + }, + { + "epoch": 4.139835743876735, + "grad_norm": 0.4680976867675781, + "learning_rate": 1.2855033967449304e-05, + "loss": 0.0616, + "step": 113920 + }, + { + "epoch": 4.140199142379533, + "grad_norm": 1.202904462814331, + "learning_rate": 1.2850498397611144e-05, + "loss": 0.0528, + "step": 113930 + }, + { + "epoch": 4.140562540882332, + "grad_norm": 0.6726404428482056, + "learning_rate": 1.2845963351251786e-05, + "loss": 0.0571, + "step": 113940 + }, + { + "epoch": 4.14092593938513, + "grad_norm": 0.5581681132316589, + "learning_rate": 1.2841428828566604e-05, + "loss": 0.069, + "step": 113950 + }, + { + "epoch": 4.141289337887928, + "grad_norm": 1.7517188787460327, + "learning_rate": 1.2836894829751015e-05, + "loss": 0.0647, + "step": 113960 + }, + { + "epoch": 4.141652736390726, + "grad_norm": 0.6693496108055115, + "learning_rate": 1.2832361355000339e-05, + "loss": 0.0517, + "step": 113970 + }, + { + "epoch": 4.142016134893524, + "grad_norm": 0.4584248661994934, + "learning_rate": 1.2827828404509935e-05, + "loss": 0.0677, + "step": 113980 + }, + { + "epoch": 4.142379533396323, + "grad_norm": 0.39411190152168274, + "learning_rate": 1.282329597847508e-05, + "loss": 0.05, + "step": 113990 + }, + { + "epoch": 4.142742931899121, + "grad_norm": 0.3312693238258362, + "learning_rate": 1.2818764077091077e-05, + "loss": 0.0519, + "step": 114000 + }, + { + "epoch": 4.142742931899121, + "eval_loss": 0.3173038363456726, + "eval_runtime": 178.9243, + "eval_samples_per_second": 41.437, + "eval_steps_per_second": 5.181, + "eval_wer": 0.1280792201426834, + "step": 114000 + }, + { + "epoch": 4.143106330401919, + "grad_norm": 0.6391065716743469, + "learning_rate": 1.2814232700553191e-05, + "loss": 0.0669, + "step": 114010 + }, + { + "epoch": 4.143469728904717, + "grad_norm": 0.6498408317565918, + "learning_rate": 1.2809701849056671e-05, + "loss": 0.0594, + "step": 114020 + }, + { + "epoch": 4.143833127407515, + "grad_norm": 0.3123835623264313, + "learning_rate": 1.2805171522796715e-05, + "loss": 0.0644, + "step": 114030 + }, + { + "epoch": 4.1441965259103135, + "grad_norm": 1.046025276184082, + "learning_rate": 1.2800641721968537e-05, + "loss": 0.0546, + "step": 114040 + }, + { + "epoch": 4.1445599244131115, + "grad_norm": 0.5524206757545471, + "learning_rate": 1.2796112446767286e-05, + "loss": 0.0877, + "step": 114050 + }, + { + "epoch": 4.1449233229159095, + "grad_norm": 0.873522162437439, + "learning_rate": 1.2791583697388143e-05, + "loss": 0.0731, + "step": 114060 + }, + { + "epoch": 4.1452867214187075, + "grad_norm": 0.4891306161880493, + "learning_rate": 1.2787055474026216e-05, + "loss": 0.0705, + "step": 114070 + }, + { + "epoch": 4.145650119921506, + "grad_norm": 0.36137646436691284, + "learning_rate": 1.278252777687662e-05, + "loss": 0.0818, + "step": 114080 + }, + { + "epoch": 4.146013518424304, + "grad_norm": 0.3782752454280853, + "learning_rate": 1.2778000606134428e-05, + "loss": 0.0723, + "step": 114090 + }, + { + "epoch": 4.146376916927102, + "grad_norm": 0.6145089268684387, + "learning_rate": 1.2773473961994697e-05, + "loss": 0.0587, + "step": 114100 + }, + { + "epoch": 4.1467403154299, + "grad_norm": 4.576334476470947, + "learning_rate": 1.2768947844652474e-05, + "loss": 0.0809, + "step": 114110 + }, + { + "epoch": 4.147103713932698, + "grad_norm": 0.6847585439682007, + "learning_rate": 1.276442225430278e-05, + "loss": 0.0586, + "step": 114120 + }, + { + "epoch": 4.147467112435497, + "grad_norm": 0.6855227947235107, + "learning_rate": 1.2759897191140586e-05, + "loss": 0.0534, + "step": 114130 + }, + { + "epoch": 4.147830510938295, + "grad_norm": 0.34615084528923035, + "learning_rate": 1.2755372655360875e-05, + "loss": 0.0522, + "step": 114140 + }, + { + "epoch": 4.148193909441093, + "grad_norm": 1.5634496212005615, + "learning_rate": 1.2750848647158586e-05, + "loss": 0.0662, + "step": 114150 + }, + { + "epoch": 4.148557307943891, + "grad_norm": 0.3985532224178314, + "learning_rate": 1.2746325166728656e-05, + "loss": 0.0716, + "step": 114160 + }, + { + "epoch": 4.148920706446689, + "grad_norm": 1.0279290676116943, + "learning_rate": 1.2741802214265969e-05, + "loss": 0.0597, + "step": 114170 + }, + { + "epoch": 4.149284104949488, + "grad_norm": 0.3082030713558197, + "learning_rate": 1.2737279789965417e-05, + "loss": 0.0802, + "step": 114180 + }, + { + "epoch": 4.149647503452286, + "grad_norm": 0.3072323203086853, + "learning_rate": 1.2732757894021829e-05, + "loss": 0.0595, + "step": 114190 + }, + { + "epoch": 4.150010901955084, + "grad_norm": 0.3936696946620941, + "learning_rate": 1.2728236526630077e-05, + "loss": 0.0528, + "step": 114200 + }, + { + "epoch": 4.150374300457882, + "grad_norm": 0.5724795460700989, + "learning_rate": 1.2723715687984938e-05, + "loss": 0.1364, + "step": 114210 + }, + { + "epoch": 4.15073769896068, + "grad_norm": 0.5028474926948547, + "learning_rate": 1.2719195378281223e-05, + "loss": 0.0613, + "step": 114220 + }, + { + "epoch": 4.151101097463479, + "grad_norm": 0.5082797408103943, + "learning_rate": 1.2714675597713672e-05, + "loss": 0.0633, + "step": 114230 + }, + { + "epoch": 4.151464495966277, + "grad_norm": 0.3836214244365692, + "learning_rate": 1.271015634647704e-05, + "loss": 0.0624, + "step": 114240 + }, + { + "epoch": 4.151827894469075, + "grad_norm": 0.6407490968704224, + "learning_rate": 1.2705637624766042e-05, + "loss": 0.0571, + "step": 114250 + }, + { + "epoch": 4.152191292971873, + "grad_norm": 0.4085807204246521, + "learning_rate": 1.2701119432775389e-05, + "loss": 0.0735, + "step": 114260 + }, + { + "epoch": 4.152554691474671, + "grad_norm": 3.630969762802124, + "learning_rate": 1.2696601770699723e-05, + "loss": 0.081, + "step": 114270 + }, + { + "epoch": 4.15291808997747, + "grad_norm": 2.0950167179107666, + "learning_rate": 1.2692084638733725e-05, + "loss": 0.0455, + "step": 114280 + }, + { + "epoch": 4.153281488480268, + "grad_norm": 0.29622146487236023, + "learning_rate": 1.2687568037071989e-05, + "loss": 0.0506, + "step": 114290 + }, + { + "epoch": 4.1536448869830656, + "grad_norm": 0.46987539529800415, + "learning_rate": 1.268305196590916e-05, + "loss": 0.0421, + "step": 114300 + }, + { + "epoch": 4.1540082854858635, + "grad_norm": 0.39149653911590576, + "learning_rate": 1.2678536425439785e-05, + "loss": 0.0598, + "step": 114310 + }, + { + "epoch": 4.154371683988662, + "grad_norm": 1.2286535501480103, + "learning_rate": 1.2674021415858445e-05, + "loss": 0.0584, + "step": 114320 + }, + { + "epoch": 4.15473508249146, + "grad_norm": 0.3485181927680969, + "learning_rate": 1.2669506937359649e-05, + "loss": 0.0596, + "step": 114330 + }, + { + "epoch": 4.155098480994258, + "grad_norm": 0.9163201451301575, + "learning_rate": 1.2664992990137947e-05, + "loss": 0.0466, + "step": 114340 + }, + { + "epoch": 4.155461879497056, + "grad_norm": 0.2799241244792938, + "learning_rate": 1.2660479574387796e-05, + "loss": 0.0516, + "step": 114350 + }, + { + "epoch": 4.155825277999854, + "grad_norm": 0.4756034314632416, + "learning_rate": 1.2655966690303689e-05, + "loss": 0.0797, + "step": 114360 + }, + { + "epoch": 4.156188676502653, + "grad_norm": 0.5071646571159363, + "learning_rate": 1.2651454338080043e-05, + "loss": 0.0625, + "step": 114370 + }, + { + "epoch": 4.156552075005451, + "grad_norm": 0.2303503006696701, + "learning_rate": 1.2646942517911298e-05, + "loss": 0.1184, + "step": 114380 + }, + { + "epoch": 4.156915473508249, + "grad_norm": 0.3640551269054413, + "learning_rate": 1.2642431229991847e-05, + "loss": 0.0476, + "step": 114390 + }, + { + "epoch": 4.157278872011047, + "grad_norm": 0.9771270155906677, + "learning_rate": 1.2637920474516074e-05, + "loss": 0.0596, + "step": 114400 + }, + { + "epoch": 4.157642270513845, + "grad_norm": 0.4869442880153656, + "learning_rate": 1.2633410251678313e-05, + "loss": 0.0765, + "step": 114410 + }, + { + "epoch": 4.158005669016644, + "grad_norm": 5.728058338165283, + "learning_rate": 1.2628900561672913e-05, + "loss": 0.0707, + "step": 114420 + }, + { + "epoch": 4.158369067519442, + "grad_norm": 0.33092445135116577, + "learning_rate": 1.2624391404694156e-05, + "loss": 0.0514, + "step": 114430 + }, + { + "epoch": 4.15873246602224, + "grad_norm": 0.5583271384239197, + "learning_rate": 1.2619882780936358e-05, + "loss": 0.0618, + "step": 114440 + }, + { + "epoch": 4.159095864525038, + "grad_norm": 0.8753048777580261, + "learning_rate": 1.2615374690593751e-05, + "loss": 0.0672, + "step": 114450 + }, + { + "epoch": 4.159459263027836, + "grad_norm": 0.7249387502670288, + "learning_rate": 1.2610867133860594e-05, + "loss": 0.0619, + "step": 114460 + }, + { + "epoch": 4.159822661530635, + "grad_norm": 1.9549199342727661, + "learning_rate": 1.2606360110931081e-05, + "loss": 0.0645, + "step": 114470 + }, + { + "epoch": 4.160186060033433, + "grad_norm": 1.0763561725616455, + "learning_rate": 1.2601853621999419e-05, + "loss": 0.0519, + "step": 114480 + }, + { + "epoch": 4.160549458536231, + "grad_norm": 0.9392730593681335, + "learning_rate": 1.2597347667259768e-05, + "loss": 0.047, + "step": 114490 + }, + { + "epoch": 4.160912857039029, + "grad_norm": 7.717432022094727, + "learning_rate": 1.2592842246906286e-05, + "loss": 0.0801, + "step": 114500 + }, + { + "epoch": 4.161276255541827, + "grad_norm": 0.4307349920272827, + "learning_rate": 1.2588337361133079e-05, + "loss": 0.0749, + "step": 114510 + }, + { + "epoch": 4.161639654044626, + "grad_norm": 0.6074416041374207, + "learning_rate": 1.2583833010134255e-05, + "loss": 0.0548, + "step": 114520 + }, + { + "epoch": 4.162003052547424, + "grad_norm": 0.41101697087287903, + "learning_rate": 1.257932919410389e-05, + "loss": 0.0763, + "step": 114530 + }, + { + "epoch": 4.162366451050222, + "grad_norm": 0.47247016429901123, + "learning_rate": 1.2574825913236043e-05, + "loss": 0.0527, + "step": 114540 + }, + { + "epoch": 4.16272984955302, + "grad_norm": 0.8120209574699402, + "learning_rate": 1.257032316772473e-05, + "loss": 0.053, + "step": 114550 + }, + { + "epoch": 4.163093248055818, + "grad_norm": 0.4224017560482025, + "learning_rate": 1.256582095776398e-05, + "loss": 0.0652, + "step": 114560 + }, + { + "epoch": 4.1634566465586165, + "grad_norm": 0.7514461278915405, + "learning_rate": 1.256131928354774e-05, + "loss": 0.0596, + "step": 114570 + }, + { + "epoch": 4.1638200450614145, + "grad_norm": 0.5608872175216675, + "learning_rate": 1.2556818145270017e-05, + "loss": 0.0599, + "step": 114580 + }, + { + "epoch": 4.164183443564212, + "grad_norm": 0.22857458889484406, + "learning_rate": 1.2552317543124717e-05, + "loss": 0.0619, + "step": 114590 + }, + { + "epoch": 4.16454684206701, + "grad_norm": 0.5940126180648804, + "learning_rate": 1.2547817477305773e-05, + "loss": 0.0539, + "step": 114600 + }, + { + "epoch": 4.16454684206701, + "eval_loss": 0.32606130838394165, + "eval_runtime": 179.2442, + "eval_samples_per_second": 41.363, + "eval_steps_per_second": 5.172, + "eval_wer": 0.12764354566413127, + "step": 114600 + }, + { + "epoch": 4.164910240569808, + "grad_norm": 0.45421409606933594, + "learning_rate": 1.2543317948007063e-05, + "loss": 0.0765, + "step": 114610 + }, + { + "epoch": 4.165273639072607, + "grad_norm": 1.46690833568573, + "learning_rate": 1.253881895542246e-05, + "loss": 0.0599, + "step": 114620 + }, + { + "epoch": 4.165637037575405, + "grad_norm": 0.26001593470573425, + "learning_rate": 1.2534320499745811e-05, + "loss": 0.0466, + "step": 114630 + }, + { + "epoch": 4.166000436078203, + "grad_norm": 0.26628535985946655, + "learning_rate": 1.2529822581170947e-05, + "loss": 0.0494, + "step": 114640 + }, + { + "epoch": 4.166363834581001, + "grad_norm": 0.5085469484329224, + "learning_rate": 1.2525325199891653e-05, + "loss": 0.0526, + "step": 114650 + }, + { + "epoch": 4.1667272330838, + "grad_norm": 31.338340759277344, + "learning_rate": 1.2520828356101716e-05, + "loss": 0.0675, + "step": 114660 + }, + { + "epoch": 4.167090631586598, + "grad_norm": 1.0442249774932861, + "learning_rate": 1.2516332049994866e-05, + "loss": 0.0646, + "step": 114670 + }, + { + "epoch": 4.167454030089396, + "grad_norm": 0.6965683102607727, + "learning_rate": 1.251183628176487e-05, + "loss": 0.0552, + "step": 114680 + }, + { + "epoch": 4.167817428592194, + "grad_norm": 0.42971551418304443, + "learning_rate": 1.250734105160541e-05, + "loss": 0.0583, + "step": 114690 + }, + { + "epoch": 4.168180827094992, + "grad_norm": 0.34043270349502563, + "learning_rate": 1.250284635971018e-05, + "loss": 0.0619, + "step": 114700 + }, + { + "epoch": 4.168544225597791, + "grad_norm": 0.6225563287734985, + "learning_rate": 1.249835220627282e-05, + "loss": 0.0668, + "step": 114710 + }, + { + "epoch": 4.168907624100589, + "grad_norm": 0.7544811367988586, + "learning_rate": 1.2493858591486998e-05, + "loss": 0.0522, + "step": 114720 + }, + { + "epoch": 4.169271022603387, + "grad_norm": 0.51103675365448, + "learning_rate": 1.2489365515546306e-05, + "loss": 0.051, + "step": 114730 + }, + { + "epoch": 4.169634421106185, + "grad_norm": 0.8677123188972473, + "learning_rate": 1.2484872978644349e-05, + "loss": 0.0456, + "step": 114740 + }, + { + "epoch": 4.169997819608983, + "grad_norm": 0.3584132790565491, + "learning_rate": 1.2480380980974676e-05, + "loss": 0.0679, + "step": 114750 + }, + { + "epoch": 4.170361218111782, + "grad_norm": 0.36030539870262146, + "learning_rate": 1.247588952273084e-05, + "loss": 0.0744, + "step": 114760 + }, + { + "epoch": 4.17072461661458, + "grad_norm": 3.283201217651367, + "learning_rate": 1.2471398604106368e-05, + "loss": 0.0603, + "step": 114770 + }, + { + "epoch": 4.171088015117378, + "grad_norm": 0.437751829624176, + "learning_rate": 1.246690822529476e-05, + "loss": 0.0508, + "step": 114780 + }, + { + "epoch": 4.171451413620176, + "grad_norm": 0.37101438641548157, + "learning_rate": 1.2462418386489474e-05, + "loss": 0.0639, + "step": 114790 + }, + { + "epoch": 4.171814812122974, + "grad_norm": 0.5961673259735107, + "learning_rate": 1.2457929087883982e-05, + "loss": 0.0548, + "step": 114800 + }, + { + "epoch": 4.1721782106257725, + "grad_norm": 0.9467266798019409, + "learning_rate": 1.2453440329671682e-05, + "loss": 0.0457, + "step": 114810 + }, + { + "epoch": 4.1725416091285705, + "grad_norm": 1.0820558071136475, + "learning_rate": 1.2448952112046014e-05, + "loss": 0.0864, + "step": 114820 + }, + { + "epoch": 4.1729050076313685, + "grad_norm": 1.219152569770813, + "learning_rate": 1.2444464435200335e-05, + "loss": 0.0458, + "step": 114830 + }, + { + "epoch": 4.1732684061341665, + "grad_norm": 0.5412958264350891, + "learning_rate": 1.2439977299328021e-05, + "loss": 0.0587, + "step": 114840 + }, + { + "epoch": 4.1736318046369645, + "grad_norm": 1.5153650045394897, + "learning_rate": 1.2435490704622384e-05, + "loss": 0.0722, + "step": 114850 + }, + { + "epoch": 4.173995203139763, + "grad_norm": 0.6175846457481384, + "learning_rate": 1.2431004651276751e-05, + "loss": 0.0602, + "step": 114860 + }, + { + "epoch": 4.174358601642561, + "grad_norm": 0.7764977812767029, + "learning_rate": 1.2426519139484404e-05, + "loss": 0.0556, + "step": 114870 + }, + { + "epoch": 4.174722000145359, + "grad_norm": 1.1157594919204712, + "learning_rate": 1.2422034169438623e-05, + "loss": 0.059, + "step": 114880 + }, + { + "epoch": 4.175085398648157, + "grad_norm": 0.2795602083206177, + "learning_rate": 1.2417549741332626e-05, + "loss": 0.0476, + "step": 114890 + }, + { + "epoch": 4.175448797150956, + "grad_norm": 1.7806609869003296, + "learning_rate": 1.2413065855359643e-05, + "loss": 0.0533, + "step": 114900 + }, + { + "epoch": 4.175812195653754, + "grad_norm": 0.41895151138305664, + "learning_rate": 1.2408582511712865e-05, + "loss": 0.0586, + "step": 114910 + }, + { + "epoch": 4.176175594156552, + "grad_norm": 4.639927387237549, + "learning_rate": 1.240409971058548e-05, + "loss": 0.0753, + "step": 114920 + }, + { + "epoch": 4.17653899265935, + "grad_norm": 0.5736679434776306, + "learning_rate": 1.239961745217061e-05, + "loss": 0.0558, + "step": 114930 + }, + { + "epoch": 4.176902391162148, + "grad_norm": 0.38681358098983765, + "learning_rate": 1.23951357366614e-05, + "loss": 0.0807, + "step": 114940 + }, + { + "epoch": 4.177265789664947, + "grad_norm": 3.046159505844116, + "learning_rate": 1.2390654564250926e-05, + "loss": 0.0538, + "step": 114950 + }, + { + "epoch": 4.177629188167745, + "grad_norm": 0.7652380466461182, + "learning_rate": 1.2386173935132303e-05, + "loss": 0.0783, + "step": 114960 + }, + { + "epoch": 4.177992586670543, + "grad_norm": 0.5098959803581238, + "learning_rate": 1.2381693849498551e-05, + "loss": 0.0581, + "step": 114970 + }, + { + "epoch": 4.178355985173341, + "grad_norm": 0.18062551319599152, + "learning_rate": 1.2377214307542729e-05, + "loss": 0.0618, + "step": 114980 + }, + { + "epoch": 4.178719383676139, + "grad_norm": 0.37934646010398865, + "learning_rate": 1.2372735309457819e-05, + "loss": 0.0653, + "step": 114990 + }, + { + "epoch": 4.179082782178938, + "grad_norm": 0.48118042945861816, + "learning_rate": 1.2368256855436816e-05, + "loss": 0.0598, + "step": 115000 + }, + { + "epoch": 4.179446180681736, + "grad_norm": 1.4239192008972168, + "learning_rate": 1.2363778945672683e-05, + "loss": 0.1146, + "step": 115010 + }, + { + "epoch": 4.179809579184534, + "grad_norm": 0.6761791706085205, + "learning_rate": 1.2359301580358362e-05, + "loss": 0.0623, + "step": 115020 + }, + { + "epoch": 4.180172977687332, + "grad_norm": 0.5229442119598389, + "learning_rate": 1.2354824759686754e-05, + "loss": 0.0637, + "step": 115030 + }, + { + "epoch": 4.18053637619013, + "grad_norm": 0.4049164354801178, + "learning_rate": 1.2350348483850755e-05, + "loss": 0.0489, + "step": 115040 + }, + { + "epoch": 4.180899774692929, + "grad_norm": 0.41811639070510864, + "learning_rate": 1.234587275304323e-05, + "loss": 0.0697, + "step": 115050 + }, + { + "epoch": 4.181263173195727, + "grad_norm": 0.35178762674331665, + "learning_rate": 1.2341397567457036e-05, + "loss": 0.0782, + "step": 115060 + }, + { + "epoch": 4.181626571698525, + "grad_norm": 0.7727785110473633, + "learning_rate": 1.233692292728497e-05, + "loss": 0.0621, + "step": 115070 + }, + { + "epoch": 4.181989970201323, + "grad_norm": 0.7113915681838989, + "learning_rate": 1.2332448832719851e-05, + "loss": 0.0681, + "step": 115080 + }, + { + "epoch": 4.1823533687041206, + "grad_norm": 0.6821020841598511, + "learning_rate": 1.2327975283954429e-05, + "loss": 0.0485, + "step": 115090 + }, + { + "epoch": 4.182716767206919, + "grad_norm": 0.8478249311447144, + "learning_rate": 1.2323502281181464e-05, + "loss": 0.0532, + "step": 115100 + }, + { + "epoch": 4.183080165709717, + "grad_norm": 0.8352124691009521, + "learning_rate": 1.2319029824593687e-05, + "loss": 0.0708, + "step": 115110 + }, + { + "epoch": 4.183443564212515, + "grad_norm": 0.6750608086585999, + "learning_rate": 1.2314557914383804e-05, + "loss": 0.0765, + "step": 115120 + }, + { + "epoch": 4.183806962715313, + "grad_norm": 0.33730462193489075, + "learning_rate": 1.2310086550744474e-05, + "loss": 0.0808, + "step": 115130 + }, + { + "epoch": 4.184170361218111, + "grad_norm": 0.637880265712738, + "learning_rate": 1.2305615733868364e-05, + "loss": 0.057, + "step": 115140 + }, + { + "epoch": 4.18453375972091, + "grad_norm": 1.4142619371414185, + "learning_rate": 1.2301145463948105e-05, + "loss": 0.0628, + "step": 115150 + }, + { + "epoch": 4.184897158223708, + "grad_norm": 0.47432175278663635, + "learning_rate": 1.2296675741176316e-05, + "loss": 0.0775, + "step": 115160 + }, + { + "epoch": 4.185260556726506, + "grad_norm": 0.3818480670452118, + "learning_rate": 1.2292206565745562e-05, + "loss": 0.0738, + "step": 115170 + }, + { + "epoch": 4.185623955229304, + "grad_norm": 0.458845853805542, + "learning_rate": 1.2287737937848412e-05, + "loss": 0.0476, + "step": 115180 + }, + { + "epoch": 4.185987353732102, + "grad_norm": 0.4036356806755066, + "learning_rate": 1.2283269857677402e-05, + "loss": 0.0485, + "step": 115190 + }, + { + "epoch": 4.186350752234901, + "grad_norm": 0.7177650332450867, + "learning_rate": 1.227880232542506e-05, + "loss": 0.0614, + "step": 115200 + }, + { + "epoch": 4.186350752234901, + "eval_loss": 0.33033162355422974, + "eval_runtime": 179.5501, + "eval_samples_per_second": 41.292, + "eval_steps_per_second": 5.163, + "eval_wer": 0.12759816290594878, + "step": 115200 + }, + { + "epoch": 4.186714150737699, + "grad_norm": 0.61422199010849, + "learning_rate": 1.2274335341283851e-05, + "loss": 0.0672, + "step": 115210 + }, + { + "epoch": 4.187077549240497, + "grad_norm": 4.265668869018555, + "learning_rate": 1.2269868905446265e-05, + "loss": 0.0565, + "step": 115220 + }, + { + "epoch": 4.187440947743295, + "grad_norm": 1.605099081993103, + "learning_rate": 1.2265403018104726e-05, + "loss": 0.058, + "step": 115230 + }, + { + "epoch": 4.187804346246094, + "grad_norm": 0.9331768751144409, + "learning_rate": 1.2260937679451659e-05, + "loss": 0.6245, + "step": 115240 + }, + { + "epoch": 4.188167744748892, + "grad_norm": 0.6140132546424866, + "learning_rate": 1.2256472889679462e-05, + "loss": 0.0672, + "step": 115250 + }, + { + "epoch": 4.18853114325169, + "grad_norm": 0.43287473917007446, + "learning_rate": 1.2252008648980518e-05, + "loss": 0.0775, + "step": 115260 + }, + { + "epoch": 4.188894541754488, + "grad_norm": 18.50632667541504, + "learning_rate": 1.2247544957547153e-05, + "loss": 0.0584, + "step": 115270 + }, + { + "epoch": 4.189257940257286, + "grad_norm": 0.5341188907623291, + "learning_rate": 1.22430818155717e-05, + "loss": 0.061, + "step": 115280 + }, + { + "epoch": 4.189621338760085, + "grad_norm": 2.5828707218170166, + "learning_rate": 1.2238619223246464e-05, + "loss": 0.06, + "step": 115290 + }, + { + "epoch": 4.189984737262883, + "grad_norm": 0.912663996219635, + "learning_rate": 1.223415718076373e-05, + "loss": 0.0714, + "step": 115300 + }, + { + "epoch": 4.190348135765681, + "grad_norm": 0.7936631441116333, + "learning_rate": 1.2229695688315735e-05, + "loss": 0.0603, + "step": 115310 + }, + { + "epoch": 4.190711534268479, + "grad_norm": 1.1647089719772339, + "learning_rate": 1.2225234746094713e-05, + "loss": 0.0536, + "step": 115320 + }, + { + "epoch": 4.191074932771277, + "grad_norm": 0.5609497427940369, + "learning_rate": 1.2220774354292874e-05, + "loss": 0.0611, + "step": 115330 + }, + { + "epoch": 4.1914383312740755, + "grad_norm": 0.671658456325531, + "learning_rate": 1.2216314513102409e-05, + "loss": 0.0584, + "step": 115340 + }, + { + "epoch": 4.1918017297768735, + "grad_norm": 0.7967808246612549, + "learning_rate": 1.2211855222715458e-05, + "loss": 0.0605, + "step": 115350 + }, + { + "epoch": 4.1921651282796715, + "grad_norm": 0.4767843782901764, + "learning_rate": 1.2207396483324166e-05, + "loss": 0.0573, + "step": 115360 + }, + { + "epoch": 4.1925285267824695, + "grad_norm": 0.601372480392456, + "learning_rate": 1.220293829512065e-05, + "loss": 0.0879, + "step": 115370 + }, + { + "epoch": 4.192891925285267, + "grad_norm": 0.32629552483558655, + "learning_rate": 1.219892639716179e-05, + "loss": 4.6326, + "step": 115380 + }, + { + "epoch": 4.193255323788066, + "grad_norm": 0.8194393515586853, + "learning_rate": 1.2194469256744206e-05, + "loss": 0.0553, + "step": 115390 + }, + { + "epoch": 4.193618722290864, + "grad_norm": 0.4936831593513489, + "learning_rate": 1.2190012668071382e-05, + "loss": 1.3225, + "step": 115400 + }, + { + "epoch": 4.193982120793662, + "grad_norm": 0.5675899386405945, + "learning_rate": 1.2185556631335335e-05, + "loss": 0.0516, + "step": 115410 + }, + { + "epoch": 4.19434551929646, + "grad_norm": 0.462312787771225, + "learning_rate": 1.2181101146728069e-05, + "loss": 0.0628, + "step": 115420 + }, + { + "epoch": 4.194708917799258, + "grad_norm": 1.7180153131484985, + "learning_rate": 1.2176646214441534e-05, + "loss": 0.0525, + "step": 115430 + }, + { + "epoch": 4.195072316302057, + "grad_norm": 0.39138999581336975, + "learning_rate": 1.2172191834667688e-05, + "loss": 0.058, + "step": 115440 + }, + { + "epoch": 4.195435714804855, + "grad_norm": 6.312756538391113, + "learning_rate": 1.2167738007598452e-05, + "loss": 0.6926, + "step": 115450 + }, + { + "epoch": 4.195799113307653, + "grad_norm": 0.26518774032592773, + "learning_rate": 1.2163284733425743e-05, + "loss": 0.0688, + "step": 115460 + }, + { + "epoch": 4.196162511810451, + "grad_norm": 1.2338483333587646, + "learning_rate": 1.215883201234141e-05, + "loss": 0.0958, + "step": 115470 + }, + { + "epoch": 4.19652591031325, + "grad_norm": 0.4234517514705658, + "learning_rate": 1.2154379844537315e-05, + "loss": 0.0533, + "step": 115480 + }, + { + "epoch": 4.196889308816048, + "grad_norm": 0.7015941143035889, + "learning_rate": 1.2149928230205288e-05, + "loss": 0.0646, + "step": 115490 + }, + { + "epoch": 4.197252707318846, + "grad_norm": 0.7061643004417419, + "learning_rate": 1.2145477169537142e-05, + "loss": 0.0719, + "step": 115500 + }, + { + "epoch": 4.197616105821644, + "grad_norm": 0.7136194109916687, + "learning_rate": 1.2141026662724638e-05, + "loss": 0.1861, + "step": 115510 + }, + { + "epoch": 4.197979504324442, + "grad_norm": 1.6546835899353027, + "learning_rate": 1.2136576709959546e-05, + "loss": 0.0674, + "step": 115520 + }, + { + "epoch": 4.198342902827241, + "grad_norm": 1.6519831418991089, + "learning_rate": 1.2132127311433602e-05, + "loss": 0.0637, + "step": 115530 + }, + { + "epoch": 4.198706301330039, + "grad_norm": 0.9831448197364807, + "learning_rate": 1.21276784673385e-05, + "loss": 0.0655, + "step": 115540 + }, + { + "epoch": 4.199069699832837, + "grad_norm": 0.7948331832885742, + "learning_rate": 1.2123230177865933e-05, + "loss": 0.0488, + "step": 115550 + }, + { + "epoch": 4.199433098335635, + "grad_norm": 0.45860570669174194, + "learning_rate": 1.2118782443207568e-05, + "loss": 0.0785, + "step": 115560 + }, + { + "epoch": 4.199796496838433, + "grad_norm": 1.3545856475830078, + "learning_rate": 1.2114335263555033e-05, + "loss": 0.086, + "step": 115570 + }, + { + "epoch": 4.200159895341232, + "grad_norm": 0.6654021739959717, + "learning_rate": 1.210988863909994e-05, + "loss": 0.0565, + "step": 115580 + }, + { + "epoch": 4.20052329384403, + "grad_norm": 0.3669990301132202, + "learning_rate": 1.210544257003388e-05, + "loss": 0.0695, + "step": 115590 + }, + { + "epoch": 4.2008866923468275, + "grad_norm": 0.6029540300369263, + "learning_rate": 1.2100997056548436e-05, + "loss": 0.0567, + "step": 115600 + }, + { + "epoch": 4.2012500908496255, + "grad_norm": 0.5935440063476562, + "learning_rate": 1.209655209883512e-05, + "loss": 0.0573, + "step": 115610 + }, + { + "epoch": 4.2016134893524235, + "grad_norm": 1.650621771812439, + "learning_rate": 1.2092107697085467e-05, + "loss": 0.09, + "step": 115620 + }, + { + "epoch": 4.201976887855222, + "grad_norm": 0.2956644296646118, + "learning_rate": 1.2087663851490963e-05, + "loss": 0.0537, + "step": 115630 + }, + { + "epoch": 4.20234028635802, + "grad_norm": 0.602063775062561, + "learning_rate": 1.2083220562243094e-05, + "loss": 0.0626, + "step": 115640 + }, + { + "epoch": 4.202703684860818, + "grad_norm": 0.6536275744438171, + "learning_rate": 1.2078777829533283e-05, + "loss": 0.0556, + "step": 115650 + }, + { + "epoch": 4.203067083363616, + "grad_norm": 0.5045903325080872, + "learning_rate": 1.207433565355296e-05, + "loss": 0.4054, + "step": 115660 + }, + { + "epoch": 4.203430481866414, + "grad_norm": 0.513131856918335, + "learning_rate": 1.2069894034493534e-05, + "loss": 0.0532, + "step": 115670 + }, + { + "epoch": 4.203793880369213, + "grad_norm": 0.37641045451164246, + "learning_rate": 1.2065452972546359e-05, + "loss": 1.5798, + "step": 115680 + }, + { + "epoch": 4.204157278872011, + "grad_norm": 0.26772618293762207, + "learning_rate": 1.2061012467902797e-05, + "loss": 0.0484, + "step": 115690 + }, + { + "epoch": 4.204520677374809, + "grad_norm": 2.2048678398132324, + "learning_rate": 1.2056572520754175e-05, + "loss": 0.0441, + "step": 115700 + }, + { + "epoch": 4.204884075877607, + "grad_norm": 0.37708210945129395, + "learning_rate": 1.2052133131291785e-05, + "loss": 0.0616, + "step": 115710 + }, + { + "epoch": 4.205247474380405, + "grad_norm": 0.9134958982467651, + "learning_rate": 1.2047694299706908e-05, + "loss": 0.0575, + "step": 115720 + }, + { + "epoch": 4.205610872883204, + "grad_norm": 2.1388869285583496, + "learning_rate": 1.2043256026190799e-05, + "loss": 0.72, + "step": 115730 + }, + { + "epoch": 4.205974271386002, + "grad_norm": 0.324491947889328, + "learning_rate": 1.2038818310934697e-05, + "loss": 0.0527, + "step": 115740 + }, + { + "epoch": 4.2063376698888, + "grad_norm": 0.5549638271331787, + "learning_rate": 1.203438115412979e-05, + "loss": 0.0604, + "step": 115750 + }, + { + "epoch": 4.206701068391598, + "grad_norm": 0.425426721572876, + "learning_rate": 1.2029944555967265e-05, + "loss": 0.0594, + "step": 115760 + }, + { + "epoch": 4.207064466894396, + "grad_norm": 0.5203282833099365, + "learning_rate": 1.2025508516638292e-05, + "loss": 0.0615, + "step": 115770 + }, + { + "epoch": 4.207427865397195, + "grad_norm": 0.37468624114990234, + "learning_rate": 1.2021073036333985e-05, + "loss": 0.0637, + "step": 115780 + }, + { + "epoch": 4.207791263899993, + "grad_norm": 0.9887217879295349, + "learning_rate": 1.201663811524546e-05, + "loss": 0.0473, + "step": 115790 + }, + { + "epoch": 4.208154662402791, + "grad_norm": 2.432356357574463, + "learning_rate": 1.2012203753563805e-05, + "loss": 0.0661, + "step": 115800 + }, + { + "epoch": 4.208154662402791, + "eval_loss": 0.3070759177207947, + "eval_runtime": 179.1817, + "eval_samples_per_second": 41.377, + "eval_steps_per_second": 5.174, + "eval_wer": 0.12718064153066966, + "step": 115800 + }, + { + "epoch": 4.208518060905589, + "grad_norm": 0.36186277866363525, + "learning_rate": 1.2007769951480088e-05, + "loss": 0.0914, + "step": 115810 + }, + { + "epoch": 4.208881459408388, + "grad_norm": 0.5292090773582458, + "learning_rate": 1.2003336709185329e-05, + "loss": 0.0758, + "step": 115820 + }, + { + "epoch": 4.209244857911186, + "grad_norm": 0.5596882104873657, + "learning_rate": 1.199890402687055e-05, + "loss": 0.0745, + "step": 115830 + }, + { + "epoch": 4.209608256413984, + "grad_norm": 0.5243220329284668, + "learning_rate": 1.1994471904726737e-05, + "loss": 0.0557, + "step": 115840 + }, + { + "epoch": 4.209971654916782, + "grad_norm": 0.4961388111114502, + "learning_rate": 1.1990040342944863e-05, + "loss": 0.0517, + "step": 115850 + }, + { + "epoch": 4.21033505341958, + "grad_norm": 0.29883086681365967, + "learning_rate": 1.1985609341715853e-05, + "loss": 0.0703, + "step": 115860 + }, + { + "epoch": 4.2106984519223785, + "grad_norm": 0.7670660614967346, + "learning_rate": 1.1981178901230633e-05, + "loss": 0.0695, + "step": 115870 + }, + { + "epoch": 4.2110618504251764, + "grad_norm": 0.2789497375488281, + "learning_rate": 1.19767490216801e-05, + "loss": 0.0428, + "step": 115880 + }, + { + "epoch": 4.211425248927974, + "grad_norm": 0.6327366828918457, + "learning_rate": 1.1972319703255107e-05, + "loss": 0.0506, + "step": 115890 + }, + { + "epoch": 4.211788647430772, + "grad_norm": 0.4748649597167969, + "learning_rate": 1.1967890946146507e-05, + "loss": 0.065, + "step": 115900 + }, + { + "epoch": 4.21215204593357, + "grad_norm": 0.7210052013397217, + "learning_rate": 1.1963462750545123e-05, + "loss": 0.0714, + "step": 115910 + }, + { + "epoch": 4.212515444436369, + "grad_norm": 0.351001113653183, + "learning_rate": 1.195903511664174e-05, + "loss": 0.0614, + "step": 115920 + }, + { + "epoch": 4.212878842939167, + "grad_norm": 0.3562362790107727, + "learning_rate": 1.195460804462713e-05, + "loss": 0.0528, + "step": 115930 + }, + { + "epoch": 4.213242241441965, + "grad_norm": 0.5658897161483765, + "learning_rate": 1.1950181534692046e-05, + "loss": 0.0463, + "step": 115940 + }, + { + "epoch": 4.213605639944763, + "grad_norm": 0.5244725942611694, + "learning_rate": 1.1945755587027216e-05, + "loss": 0.051, + "step": 115950 + }, + { + "epoch": 4.213969038447561, + "grad_norm": 0.6189519166946411, + "learning_rate": 1.1941330201823322e-05, + "loss": 0.0716, + "step": 115960 + }, + { + "epoch": 4.21433243695036, + "grad_norm": 0.7561732530593872, + "learning_rate": 1.1936905379271046e-05, + "loss": 0.0704, + "step": 115970 + }, + { + "epoch": 4.214695835453158, + "grad_norm": 0.5961002111434937, + "learning_rate": 1.193248111956104e-05, + "loss": 0.0562, + "step": 115980 + }, + { + "epoch": 4.215059233955956, + "grad_norm": 0.6460363864898682, + "learning_rate": 1.1928057422883937e-05, + "loss": 0.0485, + "step": 115990 + }, + { + "epoch": 4.215422632458754, + "grad_norm": 0.9474358558654785, + "learning_rate": 1.1923634289430321e-05, + "loss": 0.1026, + "step": 116000 + }, + { + "epoch": 4.215786030961552, + "grad_norm": 0.29789021611213684, + "learning_rate": 1.1919211719390785e-05, + "loss": 0.0677, + "step": 116010 + }, + { + "epoch": 4.216149429464351, + "grad_norm": 0.38266611099243164, + "learning_rate": 1.1914789712955868e-05, + "loss": 0.0585, + "step": 116020 + }, + { + "epoch": 4.216512827967149, + "grad_norm": 0.5584983825683594, + "learning_rate": 1.1910368270316102e-05, + "loss": 0.052, + "step": 116030 + }, + { + "epoch": 4.216876226469947, + "grad_norm": 0.2954959571361542, + "learning_rate": 1.1905947391661995e-05, + "loss": 0.0757, + "step": 116040 + }, + { + "epoch": 4.217239624972745, + "grad_norm": 0.7285795211791992, + "learning_rate": 1.1901527077184036e-05, + "loss": 0.0704, + "step": 116050 + }, + { + "epoch": 4.217603023475544, + "grad_norm": 0.4272112548351288, + "learning_rate": 1.189710732707266e-05, + "loss": 0.0622, + "step": 116060 + }, + { + "epoch": 4.217966421978342, + "grad_norm": 0.4311266839504242, + "learning_rate": 1.189268814151831e-05, + "loss": 0.0578, + "step": 116070 + }, + { + "epoch": 4.21832982048114, + "grad_norm": 0.818369448184967, + "learning_rate": 1.1888269520711393e-05, + "loss": 0.0706, + "step": 116080 + }, + { + "epoch": 4.218693218983938, + "grad_norm": 2.2008750438690186, + "learning_rate": 1.1883851464842299e-05, + "loss": 0.0473, + "step": 116090 + }, + { + "epoch": 4.219056617486736, + "grad_norm": 0.7152899503707886, + "learning_rate": 1.187943397410137e-05, + "loss": 0.0477, + "step": 116100 + }, + { + "epoch": 4.2194200159895345, + "grad_norm": 1.1275982856750488, + "learning_rate": 1.1875017048678947e-05, + "loss": 0.0679, + "step": 116110 + }, + { + "epoch": 4.2197834144923325, + "grad_norm": 0.5166335701942444, + "learning_rate": 1.1870600688765337e-05, + "loss": 0.057, + "step": 116120 + }, + { + "epoch": 4.2201468129951305, + "grad_norm": 0.6710319519042969, + "learning_rate": 1.1866184894550845e-05, + "loss": 0.0873, + "step": 116130 + }, + { + "epoch": 4.2205102114979285, + "grad_norm": 0.6926589012145996, + "learning_rate": 1.18617696662257e-05, + "loss": 0.0508, + "step": 116140 + }, + { + "epoch": 4.2208736100007265, + "grad_norm": 0.46043068170547485, + "learning_rate": 1.1857355003980167e-05, + "loss": 0.0598, + "step": 116150 + }, + { + "epoch": 4.221237008503525, + "grad_norm": 0.6792109608650208, + "learning_rate": 1.1852940908004426e-05, + "loss": 0.0506, + "step": 116160 + }, + { + "epoch": 4.221600407006323, + "grad_norm": 0.7868732213973999, + "learning_rate": 1.1848527378488703e-05, + "loss": 0.0671, + "step": 116170 + }, + { + "epoch": 4.221963805509121, + "grad_norm": 0.4833846688270569, + "learning_rate": 1.1844114415623132e-05, + "loss": 0.0574, + "step": 116180 + }, + { + "epoch": 4.222327204011919, + "grad_norm": 0.7338122129440308, + "learning_rate": 1.183970201959787e-05, + "loss": 0.0572, + "step": 116190 + }, + { + "epoch": 4.222690602514717, + "grad_norm": 0.42131030559539795, + "learning_rate": 1.1835290190603016e-05, + "loss": 0.0536, + "step": 116200 + }, + { + "epoch": 4.223054001017516, + "grad_norm": 0.3166612982749939, + "learning_rate": 1.1830878928828668e-05, + "loss": 0.0471, + "step": 116210 + }, + { + "epoch": 4.223417399520314, + "grad_norm": 0.880670964717865, + "learning_rate": 1.1826468234464888e-05, + "loss": 0.0783, + "step": 116220 + }, + { + "epoch": 4.223780798023112, + "grad_norm": 0.5158451795578003, + "learning_rate": 1.1822058107701733e-05, + "loss": 0.0568, + "step": 116230 + }, + { + "epoch": 4.22414419652591, + "grad_norm": 0.3280569612979889, + "learning_rate": 1.1817648548729197e-05, + "loss": 0.0477, + "step": 116240 + }, + { + "epoch": 4.224507595028708, + "grad_norm": 0.4095805585384369, + "learning_rate": 1.181323955773728e-05, + "loss": 0.0758, + "step": 116250 + }, + { + "epoch": 4.224870993531507, + "grad_norm": 0.3900350332260132, + "learning_rate": 1.1808831134915951e-05, + "loss": 0.0553, + "step": 116260 + }, + { + "epoch": 4.225234392034305, + "grad_norm": 0.6116195321083069, + "learning_rate": 1.1804423280455168e-05, + "loss": 0.0631, + "step": 116270 + }, + { + "epoch": 4.225597790537103, + "grad_norm": 0.317804217338562, + "learning_rate": 1.1800015994544822e-05, + "loss": 0.0583, + "step": 116280 + }, + { + "epoch": 4.225961189039901, + "grad_norm": 0.6528775095939636, + "learning_rate": 1.1795609277374834e-05, + "loss": 0.0456, + "step": 116290 + }, + { + "epoch": 4.226324587542699, + "grad_norm": 1.3475067615509033, + "learning_rate": 1.179120312913504e-05, + "loss": 0.0786, + "step": 116300 + }, + { + "epoch": 4.226687986045498, + "grad_norm": 0.27601638436317444, + "learning_rate": 1.1786797550015324e-05, + "loss": 0.0654, + "step": 116310 + }, + { + "epoch": 4.227051384548296, + "grad_norm": 6.547038555145264, + "learning_rate": 1.1782392540205481e-05, + "loss": 0.0543, + "step": 116320 + }, + { + "epoch": 4.227414783051094, + "grad_norm": 0.2848477363586426, + "learning_rate": 1.1777988099895326e-05, + "loss": 0.061, + "step": 116330 + }, + { + "epoch": 4.227778181553892, + "grad_norm": 2.2146904468536377, + "learning_rate": 1.1773584229274609e-05, + "loss": 0.0793, + "step": 116340 + }, + { + "epoch": 4.22814158005669, + "grad_norm": 0.5318537354469299, + "learning_rate": 1.1769180928533086e-05, + "loss": 0.0623, + "step": 116350 + }, + { + "epoch": 4.228504978559489, + "grad_norm": 0.993850588798523, + "learning_rate": 1.1764778197860482e-05, + "loss": 0.0606, + "step": 116360 + }, + { + "epoch": 4.228868377062287, + "grad_norm": 0.7397371530532837, + "learning_rate": 1.1760376037446504e-05, + "loss": 0.0746, + "step": 116370 + }, + { + "epoch": 4.229231775565085, + "grad_norm": 0.6844049096107483, + "learning_rate": 1.1755974447480809e-05, + "loss": 0.068, + "step": 116380 + }, + { + "epoch": 4.2295951740678825, + "grad_norm": 0.6430009007453918, + "learning_rate": 1.175157342815306e-05, + "loss": 0.0447, + "step": 116390 + }, + { + "epoch": 4.229958572570681, + "grad_norm": 2.6813087463378906, + "learning_rate": 1.1747172979652853e-05, + "loss": 0.0525, + "step": 116400 + }, + { + "epoch": 4.229958572570681, + "eval_loss": 0.3230968713760376, + "eval_runtime": 180.4532, + "eval_samples_per_second": 41.085, + "eval_steps_per_second": 5.137, + "eval_wer": 0.12699003394630312, + "step": 116400 + }, + { + "epoch": 4.230321971073479, + "grad_norm": 8.114737510681152, + "learning_rate": 1.1742773102169832e-05, + "loss": 0.1612, + "step": 116410 + }, + { + "epoch": 4.230685369576277, + "grad_norm": 0.7535303235054016, + "learning_rate": 1.1738373795893537e-05, + "loss": 0.077, + "step": 116420 + }, + { + "epoch": 4.231048768079075, + "grad_norm": 0.2678695321083069, + "learning_rate": 1.1733975061013538e-05, + "loss": 0.0473, + "step": 116430 + }, + { + "epoch": 4.231412166581873, + "grad_norm": 0.7494844794273376, + "learning_rate": 1.1729576897719336e-05, + "loss": 0.0452, + "step": 116440 + }, + { + "epoch": 4.231775565084672, + "grad_norm": 1.5150567293167114, + "learning_rate": 1.1725179306200467e-05, + "loss": 0.0575, + "step": 116450 + }, + { + "epoch": 4.23213896358747, + "grad_norm": 0.5527431964874268, + "learning_rate": 1.1720782286646382e-05, + "loss": 0.0643, + "step": 116460 + }, + { + "epoch": 4.232502362090268, + "grad_norm": 0.6264133453369141, + "learning_rate": 1.1716385839246549e-05, + "loss": 0.0611, + "step": 116470 + }, + { + "epoch": 4.232865760593066, + "grad_norm": 0.8068933486938477, + "learning_rate": 1.1711989964190376e-05, + "loss": 0.0589, + "step": 116480 + }, + { + "epoch": 4.233229159095864, + "grad_norm": 0.30341091752052307, + "learning_rate": 1.170759466166728e-05, + "loss": 0.06, + "step": 116490 + }, + { + "epoch": 4.233592557598663, + "grad_norm": 0.5522620677947998, + "learning_rate": 1.1703199931866631e-05, + "loss": 0.0596, + "step": 116500 + }, + { + "epoch": 4.233955956101461, + "grad_norm": 0.29794126749038696, + "learning_rate": 1.16988057749778e-05, + "loss": 0.0546, + "step": 116510 + }, + { + "epoch": 4.234319354604259, + "grad_norm": 0.9130751490592957, + "learning_rate": 1.169441219119009e-05, + "loss": 0.0749, + "step": 116520 + }, + { + "epoch": 4.234682753107057, + "grad_norm": 0.8570445775985718, + "learning_rate": 1.169001918069283e-05, + "loss": 0.0569, + "step": 116530 + }, + { + "epoch": 4.235046151609855, + "grad_norm": 0.38336512446403503, + "learning_rate": 1.1685626743675265e-05, + "loss": 0.0468, + "step": 116540 + }, + { + "epoch": 4.235409550112654, + "grad_norm": 0.8326718807220459, + "learning_rate": 1.168123488032669e-05, + "loss": 0.0576, + "step": 116550 + }, + { + "epoch": 4.235772948615452, + "grad_norm": 0.4676150977611542, + "learning_rate": 1.1676843590836308e-05, + "loss": 0.079, + "step": 116560 + }, + { + "epoch": 4.23613634711825, + "grad_norm": 0.4062000811100006, + "learning_rate": 1.1672452875393339e-05, + "loss": 0.0629, + "step": 116570 + }, + { + "epoch": 4.236499745621048, + "grad_norm": 0.4659651815891266, + "learning_rate": 1.166806273418695e-05, + "loss": 0.0597, + "step": 116580 + }, + { + "epoch": 4.236863144123846, + "grad_norm": 0.49495962262153625, + "learning_rate": 1.16636731674063e-05, + "loss": 0.0542, + "step": 116590 + }, + { + "epoch": 4.237226542626645, + "grad_norm": 2.226994514465332, + "learning_rate": 1.165928417524052e-05, + "loss": 0.0659, + "step": 116600 + }, + { + "epoch": 4.237589941129443, + "grad_norm": 0.429401695728302, + "learning_rate": 1.165489575787873e-05, + "loss": 0.0694, + "step": 116610 + }, + { + "epoch": 4.237953339632241, + "grad_norm": 0.5462735295295715, + "learning_rate": 1.1650507915509991e-05, + "loss": 0.0714, + "step": 116620 + }, + { + "epoch": 4.238316738135039, + "grad_norm": 0.8976079821586609, + "learning_rate": 1.164612064832337e-05, + "loss": 0.0481, + "step": 116630 + }, + { + "epoch": 4.2386801366378375, + "grad_norm": 0.4468517005443573, + "learning_rate": 1.1641733956507894e-05, + "loss": 0.0467, + "step": 116640 + }, + { + "epoch": 4.2390435351406355, + "grad_norm": 0.6745365858078003, + "learning_rate": 1.1637347840252585e-05, + "loss": 0.0673, + "step": 116650 + }, + { + "epoch": 4.2394069336434335, + "grad_norm": 0.3953607380390167, + "learning_rate": 1.1632962299746403e-05, + "loss": 0.0659, + "step": 116660 + }, + { + "epoch": 4.2397703321462314, + "grad_norm": 0.834370493888855, + "learning_rate": 1.1628577335178323e-05, + "loss": 0.0639, + "step": 116670 + }, + { + "epoch": 4.240133730649029, + "grad_norm": 3.582240104675293, + "learning_rate": 1.1624192946737252e-05, + "loss": 0.1856, + "step": 116680 + }, + { + "epoch": 4.240497129151828, + "grad_norm": 0.36230847239494324, + "learning_rate": 1.1619809134612134e-05, + "loss": 0.0512, + "step": 116690 + }, + { + "epoch": 4.240860527654626, + "grad_norm": 0.6795392632484436, + "learning_rate": 1.1615425898991824e-05, + "loss": 0.0562, + "step": 116700 + }, + { + "epoch": 4.241223926157424, + "grad_norm": 0.7256725430488586, + "learning_rate": 1.16110432400652e-05, + "loss": 0.0618, + "step": 116710 + }, + { + "epoch": 4.241587324660222, + "grad_norm": 0.428634911775589, + "learning_rate": 1.1606661158021076e-05, + "loss": 0.0771, + "step": 116720 + }, + { + "epoch": 4.24195072316302, + "grad_norm": 0.9015412926673889, + "learning_rate": 1.1602279653048267e-05, + "loss": 0.0741, + "step": 116730 + }, + { + "epoch": 4.242314121665819, + "grad_norm": 0.2342122346162796, + "learning_rate": 1.159789872533556e-05, + "loss": 0.043, + "step": 116740 + }, + { + "epoch": 4.242677520168617, + "grad_norm": 1.0161068439483643, + "learning_rate": 1.1593518375071721e-05, + "loss": 0.0574, + "step": 116750 + }, + { + "epoch": 4.243040918671415, + "grad_norm": 0.4920096695423126, + "learning_rate": 1.1589138602445465e-05, + "loss": 0.0665, + "step": 116760 + }, + { + "epoch": 4.243404317174213, + "grad_norm": 1.862987995147705, + "learning_rate": 1.1584759407645522e-05, + "loss": 0.0661, + "step": 116770 + }, + { + "epoch": 4.243767715677011, + "grad_norm": 0.40719863772392273, + "learning_rate": 1.1580380790860545e-05, + "loss": 0.0548, + "step": 116780 + }, + { + "epoch": 4.24413111417981, + "grad_norm": 0.3578474223613739, + "learning_rate": 1.157600275227923e-05, + "loss": 0.0539, + "step": 116790 + }, + { + "epoch": 4.244494512682608, + "grad_norm": 0.5226281881332397, + "learning_rate": 1.1571625292090185e-05, + "loss": 0.0828, + "step": 116800 + }, + { + "epoch": 4.244857911185406, + "grad_norm": 0.45012742280960083, + "learning_rate": 1.1567248410482037e-05, + "loss": 0.0675, + "step": 116810 + }, + { + "epoch": 4.245221309688204, + "grad_norm": 0.42035412788391113, + "learning_rate": 1.1562872107643341e-05, + "loss": 0.0718, + "step": 116820 + }, + { + "epoch": 4.245584708191002, + "grad_norm": 0.39669880270957947, + "learning_rate": 1.1558496383762699e-05, + "loss": 0.0595, + "step": 116830 + }, + { + "epoch": 4.245948106693801, + "grad_norm": 0.44172903895378113, + "learning_rate": 1.1554121239028609e-05, + "loss": 0.0624, + "step": 116840 + }, + { + "epoch": 4.246311505196599, + "grad_norm": 0.5983627438545227, + "learning_rate": 1.1549746673629607e-05, + "loss": 0.0551, + "step": 116850 + }, + { + "epoch": 4.246674903699397, + "grad_norm": 0.221538707613945, + "learning_rate": 1.1545372687754153e-05, + "loss": 0.069, + "step": 116860 + }, + { + "epoch": 4.247038302202195, + "grad_norm": 0.4085201025009155, + "learning_rate": 1.154099928159072e-05, + "loss": 0.0656, + "step": 116870 + }, + { + "epoch": 4.247401700704993, + "grad_norm": 0.39535167813301086, + "learning_rate": 1.153662645532774e-05, + "loss": 0.0591, + "step": 116880 + }, + { + "epoch": 4.2477650992077916, + "grad_norm": 0.38008686900138855, + "learning_rate": 1.1532254209153631e-05, + "loss": 0.0496, + "step": 116890 + }, + { + "epoch": 4.2481284977105895, + "grad_norm": 0.6974036693572998, + "learning_rate": 1.1527882543256762e-05, + "loss": 0.0665, + "step": 116900 + }, + { + "epoch": 4.2484918962133875, + "grad_norm": 0.5487545132637024, + "learning_rate": 1.1523511457825508e-05, + "loss": 0.058, + "step": 116910 + }, + { + "epoch": 4.2488552947161855, + "grad_norm": 1.3574179410934448, + "learning_rate": 1.151914095304818e-05, + "loss": 0.0948, + "step": 116920 + }, + { + "epoch": 4.2492186932189835, + "grad_norm": 0.7095970511436462, + "learning_rate": 1.1514771029113123e-05, + "loss": 0.0727, + "step": 116930 + }, + { + "epoch": 4.249582091721782, + "grad_norm": 0.5452703237533569, + "learning_rate": 1.1510401686208589e-05, + "loss": 0.9569, + "step": 116940 + }, + { + "epoch": 4.24994549022458, + "grad_norm": 0.5960811376571655, + "learning_rate": 1.1506032924522857e-05, + "loss": 0.0622, + "step": 116950 + }, + { + "epoch": 4.250308888727378, + "grad_norm": 1.1241512298583984, + "learning_rate": 1.150166474424415e-05, + "loss": 0.1427, + "step": 116960 + }, + { + "epoch": 4.250672287230176, + "grad_norm": 0.6698485612869263, + "learning_rate": 1.1497297145560682e-05, + "loss": 0.0537, + "step": 116970 + }, + { + "epoch": 4.251035685732975, + "grad_norm": 0.8490001559257507, + "learning_rate": 1.1492930128660634e-05, + "loss": 0.0599, + "step": 116980 + }, + { + "epoch": 4.251399084235773, + "grad_norm": 0.25859925150871277, + "learning_rate": 1.148856369373218e-05, + "loss": 1.3398, + "step": 116990 + }, + { + "epoch": 4.251762482738571, + "grad_norm": 3.991356134414673, + "learning_rate": 1.1484197840963434e-05, + "loss": 0.075, + "step": 117000 + }, + { + "epoch": 4.251762482738571, + "eval_loss": 0.29347988963127136, + "eval_runtime": 178.752, + "eval_samples_per_second": 41.476, + "eval_steps_per_second": 5.186, + "eval_wer": 0.1268266560168461, + "step": 117000 + } + ], + "logging_steps": 10, + "max_steps": 165108, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 1800, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.829871528924149e+20, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}