{ "best_metric": 36.4960604331718, "best_model_checkpoint": "./whisper-medium-ka/checkpoint-10000", "epoch": 2.5131942699170646, "eval_steps": 1000, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02535496957403651, "grad_norm": 10.789830207824707, "learning_rate": 4.6000000000000004e-07, "loss": 1.2488, "step": 25 }, { "epoch": 0.05070993914807302, "grad_norm": 6.402094841003418, "learning_rate": 9.600000000000001e-07, "loss": 0.9647, "step": 50 }, { "epoch": 0.07606490872210954, "grad_norm": 6.477363586425781, "learning_rate": 1.46e-06, "loss": 0.7572, "step": 75 }, { "epoch": 0.10141987829614604, "grad_norm": 5.079893589019775, "learning_rate": 1.9600000000000003e-06, "loss": 0.636, "step": 100 }, { "epoch": 0.12677484787018256, "grad_norm": 4.092740535736084, "learning_rate": 2.46e-06, "loss": 0.5668, "step": 125 }, { "epoch": 0.15212981744421908, "grad_norm": 5.041500091552734, "learning_rate": 2.96e-06, "loss": 0.5045, "step": 150 }, { "epoch": 0.17748478701825557, "grad_norm": 5.795814514160156, "learning_rate": 3.46e-06, "loss": 0.4991, "step": 175 }, { "epoch": 0.2028397565922921, "grad_norm": 5.117966651916504, "learning_rate": 3.96e-06, "loss": 0.4602, "step": 200 }, { "epoch": 0.2281947261663286, "grad_norm": 5.343120574951172, "learning_rate": 4.4600000000000005e-06, "loss": 0.4458, "step": 225 }, { "epoch": 0.2535496957403651, "grad_norm": 3.3774187564849854, "learning_rate": 4.960000000000001e-06, "loss": 0.4174, "step": 250 }, { "epoch": 0.2789046653144016, "grad_norm": 3.576996326446533, "learning_rate": 5.460000000000001e-06, "loss": 0.4296, "step": 275 }, { "epoch": 0.30425963488843816, "grad_norm": 3.852977991104126, "learning_rate": 5.9600000000000005e-06, "loss": 0.3889, "step": 300 }, { "epoch": 0.32961460446247465, "grad_norm": 4.133119583129883, "learning_rate": 6.460000000000001e-06, "loss": 0.3864, "step": 325 }, { "epoch": 0.35496957403651114, "grad_norm": 4.041072368621826, "learning_rate": 6.96e-06, "loss": 0.3787, "step": 350 }, { "epoch": 0.3803245436105477, "grad_norm": 4.2009806632995605, "learning_rate": 7.4600000000000006e-06, "loss": 0.3722, "step": 375 }, { "epoch": 0.4056795131845842, "grad_norm": 3.9817683696746826, "learning_rate": 7.960000000000002e-06, "loss": 0.366, "step": 400 }, { "epoch": 0.43103448275862066, "grad_norm": 3.8101816177368164, "learning_rate": 8.46e-06, "loss": 0.3712, "step": 425 }, { "epoch": 0.4563894523326572, "grad_norm": 3.7913081645965576, "learning_rate": 8.96e-06, "loss": 0.3743, "step": 450 }, { "epoch": 0.4817444219066937, "grad_norm": 4.321682929992676, "learning_rate": 9.460000000000001e-06, "loss": 0.3467, "step": 475 }, { "epoch": 0.5070993914807302, "grad_norm": 3.346144199371338, "learning_rate": 9.960000000000001e-06, "loss": 0.3429, "step": 500 }, { "epoch": 0.5324543610547667, "grad_norm": 4.033421516418457, "learning_rate": 9.975789473684211e-06, "loss": 0.3521, "step": 525 }, { "epoch": 0.5578093306288032, "grad_norm": 4.106046676635742, "learning_rate": 9.949473684210526e-06, "loss": 0.3219, "step": 550 }, { "epoch": 0.5831643002028397, "grad_norm": 3.8651669025421143, "learning_rate": 9.923157894736844e-06, "loss": 0.3323, "step": 575 }, { "epoch": 0.6085192697768763, "grad_norm": 4.054211616516113, "learning_rate": 9.89684210526316e-06, "loss": 0.3443, "step": 600 }, { "epoch": 0.6338742393509128, "grad_norm": 3.2193450927734375, "learning_rate": 9.870526315789474e-06, "loss": 0.3159, "step": 625 }, { "epoch": 0.6592292089249493, "grad_norm": 2.698641300201416, "learning_rate": 9.84421052631579e-06, "loss": 0.3197, "step": 650 }, { "epoch": 0.6845841784989858, "grad_norm": 2.68990159034729, "learning_rate": 9.817894736842106e-06, "loss": 0.3151, "step": 675 }, { "epoch": 0.7099391480730223, "grad_norm": 3.144531726837158, "learning_rate": 9.791578947368422e-06, "loss": 0.3094, "step": 700 }, { "epoch": 0.7352941176470589, "grad_norm": 3.1064047813415527, "learning_rate": 9.765263157894737e-06, "loss": 0.3026, "step": 725 }, { "epoch": 0.7606490872210954, "grad_norm": 2.2833447456359863, "learning_rate": 9.738947368421054e-06, "loss": 0.3152, "step": 750 }, { "epoch": 0.7860040567951319, "grad_norm": 2.9134676456451416, "learning_rate": 9.712631578947369e-06, "loss": 0.3018, "step": 775 }, { "epoch": 0.8113590263691683, "grad_norm": 3.249375820159912, "learning_rate": 9.686315789473684e-06, "loss": 0.306, "step": 800 }, { "epoch": 0.8367139959432048, "grad_norm": 3.1664018630981445, "learning_rate": 9.66e-06, "loss": 0.3053, "step": 825 }, { "epoch": 0.8620689655172413, "grad_norm": 2.8798232078552246, "learning_rate": 9.633684210526316e-06, "loss": 0.2919, "step": 850 }, { "epoch": 0.8874239350912779, "grad_norm": 3.306312322616577, "learning_rate": 9.607368421052632e-06, "loss": 0.3008, "step": 875 }, { "epoch": 0.9127789046653144, "grad_norm": 3.241096258163452, "learning_rate": 9.581052631578947e-06, "loss": 0.3066, "step": 900 }, { "epoch": 0.9381338742393509, "grad_norm": 3.2305490970611572, "learning_rate": 9.554736842105264e-06, "loss": 0.293, "step": 925 }, { "epoch": 0.9634888438133874, "grad_norm": 4.4702229499816895, "learning_rate": 9.52842105263158e-06, "loss": 0.2901, "step": 950 }, { "epoch": 0.9888438133874239, "grad_norm": 3.1923646926879883, "learning_rate": 9.502105263157896e-06, "loss": 0.29, "step": 975 }, { "epoch": 1.0141987829614605, "grad_norm": 2.5901284217834473, "learning_rate": 9.475789473684212e-06, "loss": 0.2611, "step": 1000 }, { "epoch": 1.0141987829614605, "eval_loss": 0.2935636639595032, "eval_runtime": 7046.6021, "eval_samples_per_second": 0.69, "eval_steps_per_second": 0.086, "eval_wer": 62.766441678696374, "step": 1000 }, { "epoch": 1.039553752535497, "grad_norm": 2.9228506088256836, "learning_rate": 9.449473684210527e-06, "loss": 0.2488, "step": 1025 }, { "epoch": 1.0649087221095335, "grad_norm": 2.1438565254211426, "learning_rate": 9.423157894736842e-06, "loss": 0.2441, "step": 1050 }, { "epoch": 1.09026369168357, "grad_norm": 3.6437671184539795, "learning_rate": 9.396842105263159e-06, "loss": 0.2512, "step": 1075 }, { "epoch": 1.1156186612576064, "grad_norm": 2.413125991821289, "learning_rate": 9.370526315789474e-06, "loss": 0.2447, "step": 1100 }, { "epoch": 1.140973630831643, "grad_norm": 2.880699872970581, "learning_rate": 9.34421052631579e-06, "loss": 0.2463, "step": 1125 }, { "epoch": 1.1663286004056794, "grad_norm": 2.283259630203247, "learning_rate": 9.317894736842105e-06, "loss": 0.2488, "step": 1150 }, { "epoch": 1.1916835699797161, "grad_norm": 3.4118285179138184, "learning_rate": 9.291578947368422e-06, "loss": 0.2482, "step": 1175 }, { "epoch": 1.2170385395537526, "grad_norm": 2.1249563694000244, "learning_rate": 9.265263157894737e-06, "loss": 0.243, "step": 1200 }, { "epoch": 1.2423935091277891, "grad_norm": 2.6854898929595947, "learning_rate": 9.238947368421052e-06, "loss": 0.2563, "step": 1225 }, { "epoch": 1.2677484787018256, "grad_norm": 2.975567102432251, "learning_rate": 9.21263157894737e-06, "loss": 0.2529, "step": 1250 }, { "epoch": 1.293103448275862, "grad_norm": 2.6156153678894043, "learning_rate": 9.186315789473685e-06, "loss": 0.2378, "step": 1275 }, { "epoch": 1.3184584178498986, "grad_norm": 2.3254947662353516, "learning_rate": 9.16e-06, "loss": 0.2457, "step": 1300 }, { "epoch": 1.343813387423935, "grad_norm": 3.162607431411743, "learning_rate": 9.133684210526317e-06, "loss": 0.2452, "step": 1325 }, { "epoch": 1.3691683569979716, "grad_norm": 2.94197678565979, "learning_rate": 9.107368421052632e-06, "loss": 0.2411, "step": 1350 }, { "epoch": 1.394523326572008, "grad_norm": 2.53021502494812, "learning_rate": 9.081052631578949e-06, "loss": 0.2497, "step": 1375 }, { "epoch": 1.4198782961460445, "grad_norm": 2.740830421447754, "learning_rate": 9.054736842105264e-06, "loss": 0.228, "step": 1400 }, { "epoch": 1.445233265720081, "grad_norm": 3.1709723472595215, "learning_rate": 9.02842105263158e-06, "loss": 0.2598, "step": 1425 }, { "epoch": 1.4705882352941178, "grad_norm": 2.508091688156128, "learning_rate": 9.002105263157895e-06, "loss": 0.2508, "step": 1450 }, { "epoch": 1.495943204868154, "grad_norm": 2.7562551498413086, "learning_rate": 8.97578947368421e-06, "loss": 0.2462, "step": 1475 }, { "epoch": 1.5212981744421907, "grad_norm": 3.4288785457611084, "learning_rate": 8.949473684210527e-06, "loss": 0.2351, "step": 1500 }, { "epoch": 1.5466531440162272, "grad_norm": 2.4875781536102295, "learning_rate": 8.923157894736842e-06, "loss": 0.249, "step": 1525 }, { "epoch": 1.5720081135902637, "grad_norm": 2.5765998363494873, "learning_rate": 8.896842105263159e-06, "loss": 0.2354, "step": 1550 }, { "epoch": 1.5973630831643002, "grad_norm": 2.0511434078216553, "learning_rate": 8.870526315789474e-06, "loss": 0.248, "step": 1575 }, { "epoch": 1.6227180527383367, "grad_norm": 3.0492985248565674, "learning_rate": 8.84421052631579e-06, "loss": 0.2443, "step": 1600 }, { "epoch": 1.6480730223123732, "grad_norm": 3.98416805267334, "learning_rate": 8.817894736842107e-06, "loss": 0.2348, "step": 1625 }, { "epoch": 1.6734279918864097, "grad_norm": 2.4991466999053955, "learning_rate": 8.791578947368422e-06, "loss": 0.2514, "step": 1650 }, { "epoch": 1.6987829614604464, "grad_norm": 2.768167734146118, "learning_rate": 8.765263157894739e-06, "loss": 0.2517, "step": 1675 }, { "epoch": 1.7241379310344827, "grad_norm": 3.1816017627716064, "learning_rate": 8.738947368421053e-06, "loss": 0.2421, "step": 1700 }, { "epoch": 1.7494929006085194, "grad_norm": 3.7011590003967285, "learning_rate": 8.712631578947368e-06, "loss": 0.2376, "step": 1725 }, { "epoch": 1.7748478701825556, "grad_norm": 2.3581457138061523, "learning_rate": 8.686315789473685e-06, "loss": 0.2281, "step": 1750 }, { "epoch": 1.8002028397565923, "grad_norm": 2.812277317047119, "learning_rate": 8.66e-06, "loss": 0.2357, "step": 1775 }, { "epoch": 1.8255578093306288, "grad_norm": 3.1732356548309326, "learning_rate": 8.633684210526317e-06, "loss": 0.237, "step": 1800 }, { "epoch": 1.8509127789046653, "grad_norm": 1.9399900436401367, "learning_rate": 8.607368421052632e-06, "loss": 0.2395, "step": 1825 }, { "epoch": 1.8762677484787018, "grad_norm": 1.8843597173690796, "learning_rate": 8.581052631578948e-06, "loss": 0.2282, "step": 1850 }, { "epoch": 1.9016227180527383, "grad_norm": 2.314880132675171, "learning_rate": 8.554736842105263e-06, "loss": 0.2454, "step": 1875 }, { "epoch": 1.9269776876267748, "grad_norm": 4.113000392913818, "learning_rate": 8.528421052631578e-06, "loss": 0.2342, "step": 1900 }, { "epoch": 1.9523326572008113, "grad_norm": 2.076021432876587, "learning_rate": 8.502105263157897e-06, "loss": 0.2313, "step": 1925 }, { "epoch": 1.977687626774848, "grad_norm": 3.887740135192871, "learning_rate": 8.475789473684212e-06, "loss": 0.245, "step": 1950 }, { "epoch": 2.0030425963488843, "grad_norm": 2.359464406967163, "learning_rate": 8.449473684210527e-06, "loss": 0.2539, "step": 1975 }, { "epoch": 2.028397565922921, "grad_norm": 2.299802303314209, "learning_rate": 8.423157894736843e-06, "loss": 0.18, "step": 2000 }, { "epoch": 2.028397565922921, "eval_loss": 0.27181410789489746, "eval_runtime": 7074.1416, "eval_samples_per_second": 0.688, "eval_steps_per_second": 0.086, "eval_wer": 59.46705161718232, "step": 2000 }, { "epoch": 2.0537525354969572, "grad_norm": 4.299188613891602, "learning_rate": 8.396842105263158e-06, "loss": 0.1803, "step": 2025 }, { "epoch": 2.079107505070994, "grad_norm": 2.5189449787139893, "learning_rate": 8.370526315789475e-06, "loss": 0.1927, "step": 2050 }, { "epoch": 2.1044624746450302, "grad_norm": 2.5737998485565186, "learning_rate": 8.34421052631579e-06, "loss": 0.1818, "step": 2075 }, { "epoch": 2.129817444219067, "grad_norm": 3.098142623901367, "learning_rate": 8.317894736842107e-06, "loss": 0.1907, "step": 2100 }, { "epoch": 2.1551724137931036, "grad_norm": 1.9791940450668335, "learning_rate": 8.291578947368422e-06, "loss": 0.1809, "step": 2125 }, { "epoch": 2.18052738336714, "grad_norm": 1.9798074960708618, "learning_rate": 8.265263157894737e-06, "loss": 0.1842, "step": 2150 }, { "epoch": 2.2058823529411766, "grad_norm": 2.5827345848083496, "learning_rate": 8.238947368421053e-06, "loss": 0.1769, "step": 2175 }, { "epoch": 2.231237322515213, "grad_norm": 2.1781139373779297, "learning_rate": 8.212631578947368e-06, "loss": 0.1904, "step": 2200 }, { "epoch": 2.2565922920892496, "grad_norm": 2.2573533058166504, "learning_rate": 8.186315789473685e-06, "loss": 0.1892, "step": 2225 }, { "epoch": 2.281947261663286, "grad_norm": 2.1169838905334473, "learning_rate": 8.16e-06, "loss": 0.1868, "step": 2250 }, { "epoch": 2.3073022312373226, "grad_norm": 3.3498687744140625, "learning_rate": 8.133684210526316e-06, "loss": 0.197, "step": 2275 }, { "epoch": 2.332657200811359, "grad_norm": 2.25311541557312, "learning_rate": 8.107368421052633e-06, "loss": 0.1869, "step": 2300 }, { "epoch": 2.3580121703853956, "grad_norm": 2.396975040435791, "learning_rate": 8.081052631578948e-06, "loss": 0.189, "step": 2325 }, { "epoch": 2.3833671399594323, "grad_norm": 2.147547960281372, "learning_rate": 8.054736842105265e-06, "loss": 0.1893, "step": 2350 }, { "epoch": 2.4087221095334685, "grad_norm": 1.9963374137878418, "learning_rate": 8.02842105263158e-06, "loss": 0.2003, "step": 2375 }, { "epoch": 2.4340770791075053, "grad_norm": 3.571808099746704, "learning_rate": 8.002105263157895e-06, "loss": 0.1904, "step": 2400 }, { "epoch": 2.4594320486815415, "grad_norm": 2.6122543811798096, "learning_rate": 7.975789473684211e-06, "loss": 0.1855, "step": 2425 }, { "epoch": 2.4847870182555782, "grad_norm": 2.9478933811187744, "learning_rate": 7.949473684210526e-06, "loss": 0.1822, "step": 2450 }, { "epoch": 2.5101419878296145, "grad_norm": 2.368997097015381, "learning_rate": 7.923157894736843e-06, "loss": 0.186, "step": 2475 }, { "epoch": 2.535496957403651, "grad_norm": 2.529630661010742, "learning_rate": 7.896842105263158e-06, "loss": 0.191, "step": 2500 }, { "epoch": 2.5608519269776875, "grad_norm": 2.9214324951171875, "learning_rate": 7.870526315789475e-06, "loss": 0.1885, "step": 2525 }, { "epoch": 2.586206896551724, "grad_norm": 2.141688823699951, "learning_rate": 7.84421052631579e-06, "loss": 0.1847, "step": 2550 }, { "epoch": 2.6115618661257605, "grad_norm": 2.1608083248138428, "learning_rate": 7.817894736842105e-06, "loss": 0.1873, "step": 2575 }, { "epoch": 2.636916835699797, "grad_norm": 2.489492416381836, "learning_rate": 7.791578947368423e-06, "loss": 0.1959, "step": 2600 }, { "epoch": 2.662271805273834, "grad_norm": 2.31791353225708, "learning_rate": 7.765263157894738e-06, "loss": 0.1915, "step": 2625 }, { "epoch": 2.68762677484787, "grad_norm": 2.779777765274048, "learning_rate": 7.738947368421053e-06, "loss": 0.1878, "step": 2650 }, { "epoch": 2.7129817444219064, "grad_norm": 2.5656425952911377, "learning_rate": 7.71263157894737e-06, "loss": 0.1925, "step": 2675 }, { "epoch": 2.738336713995943, "grad_norm": 3.0531349182128906, "learning_rate": 7.686315789473685e-06, "loss": 0.189, "step": 2700 }, { "epoch": 2.76369168356998, "grad_norm": 2.912122964859009, "learning_rate": 7.660000000000001e-06, "loss": 0.1906, "step": 2725 }, { "epoch": 2.789046653144016, "grad_norm": 3.0555505752563477, "learning_rate": 7.633684210526316e-06, "loss": 0.1834, "step": 2750 }, { "epoch": 2.814401622718053, "grad_norm": 2.2603671550750732, "learning_rate": 7.607368421052632e-06, "loss": 0.1872, "step": 2775 }, { "epoch": 2.839756592292089, "grad_norm": 2.1887290477752686, "learning_rate": 7.581052631578948e-06, "loss": 0.1787, "step": 2800 }, { "epoch": 2.865111561866126, "grad_norm": 2.393935203552246, "learning_rate": 7.554736842105264e-06, "loss": 0.1883, "step": 2825 }, { "epoch": 2.890466531440162, "grad_norm": 2.3543920516967773, "learning_rate": 7.5284210526315794e-06, "loss": 0.1883, "step": 2850 }, { "epoch": 2.915821501014199, "grad_norm": 2.139833927154541, "learning_rate": 7.502105263157895e-06, "loss": 0.1997, "step": 2875 }, { "epoch": 2.9411764705882355, "grad_norm": 2.270552158355713, "learning_rate": 7.475789473684211e-06, "loss": 0.1932, "step": 2900 }, { "epoch": 2.9665314401622718, "grad_norm": 2.4720232486724854, "learning_rate": 7.449473684210526e-06, "loss": 0.1926, "step": 2925 }, { "epoch": 2.991886409736308, "grad_norm": 2.318767786026001, "learning_rate": 7.4231578947368436e-06, "loss": 0.1837, "step": 2950 }, { "epoch": 3.0172413793103448, "grad_norm": 1.8572547435760498, "learning_rate": 7.3968421052631585e-06, "loss": 0.1584, "step": 2975 }, { "epoch": 3.0425963488843815, "grad_norm": 2.6684017181396484, "learning_rate": 7.370526315789474e-06, "loss": 0.15, "step": 3000 }, { "epoch": 3.0425963488843815, "eval_loss": 0.2813817858695984, "eval_runtime": 7104.648, "eval_samples_per_second": 0.685, "eval_steps_per_second": 0.086, "eval_wer": 59.24206364688593, "step": 3000 }, { "epoch": 0.760241266649912, "grad_norm": 2.2316994667053223, "learning_rate": 8.26e-06, "loss": 0.1649, "step": 3025 }, { "epoch": 0.7665242523247047, "grad_norm": 1.9445712566375732, "learning_rate": 8.242758620689655e-06, "loss": 0.1373, "step": 3050 }, { "epoch": 0.7728072379994974, "grad_norm": 1.9695370197296143, "learning_rate": 8.22551724137931e-06, "loss": 0.1303, "step": 3075 }, { "epoch": 0.77909022367429, "grad_norm": 2.19242262840271, "learning_rate": 8.208275862068967e-06, "loss": 0.1204, "step": 3100 }, { "epoch": 0.7853732093490827, "grad_norm": 1.6107076406478882, "learning_rate": 8.191034482758622e-06, "loss": 0.1206, "step": 3125 }, { "epoch": 0.7916561950238753, "grad_norm": 1.8804924488067627, "learning_rate": 8.173793103448277e-06, "loss": 0.116, "step": 3150 }, { "epoch": 0.797939180698668, "grad_norm": 1.8132656812667847, "learning_rate": 8.156551724137931e-06, "loss": 0.1193, "step": 3175 }, { "epoch": 0.8042221663734607, "grad_norm": 1.9557982683181763, "learning_rate": 8.139310344827586e-06, "loss": 0.1139, "step": 3200 }, { "epoch": 0.8105051520482534, "grad_norm": 1.4428989887237549, "learning_rate": 8.122068965517243e-06, "loss": 0.1033, "step": 3225 }, { "epoch": 0.816788137723046, "grad_norm": 1.4548839330673218, "learning_rate": 8.104827586206898e-06, "loss": 0.0975, "step": 3250 }, { "epoch": 0.8230711233978386, "grad_norm": 1.8930418491363525, "learning_rate": 8.087586206896553e-06, "loss": 0.0997, "step": 3275 }, { "epoch": 0.8293541090726313, "grad_norm": 1.905731439590454, "learning_rate": 8.070344827586207e-06, "loss": 0.1069, "step": 3300 }, { "epoch": 0.835637094747424, "grad_norm": 1.7036992311477661, "learning_rate": 8.053103448275862e-06, "loss": 0.0921, "step": 3325 }, { "epoch": 0.8419200804222167, "grad_norm": 1.7952214479446411, "learning_rate": 8.035862068965517e-06, "loss": 0.1027, "step": 3350 }, { "epoch": 0.8482030660970094, "grad_norm": 2.0625522136688232, "learning_rate": 8.018620689655174e-06, "loss": 0.1051, "step": 3375 }, { "epoch": 0.8544860517718019, "grad_norm": 1.8595600128173828, "learning_rate": 8.001379310344829e-06, "loss": 0.0968, "step": 3400 }, { "epoch": 0.8607690374465946, "grad_norm": 1.5569204092025757, "learning_rate": 7.984137931034484e-06, "loss": 0.099, "step": 3425 }, { "epoch": 0.8670520231213873, "grad_norm": 1.6187876462936401, "learning_rate": 7.966896551724138e-06, "loss": 0.0942, "step": 3450 }, { "epoch": 0.87333500879618, "grad_norm": 1.494591474533081, "learning_rate": 7.949655172413793e-06, "loss": 0.096, "step": 3475 }, { "epoch": 0.8796179944709726, "grad_norm": 1.5578557252883911, "learning_rate": 7.932413793103448e-06, "loss": 0.0885, "step": 3500 }, { "epoch": 0.8859009801457652, "grad_norm": 2.047339677810669, "learning_rate": 7.915172413793105e-06, "loss": 0.098, "step": 3525 }, { "epoch": 0.8921839658205579, "grad_norm": 1.7731865644454956, "learning_rate": 7.89793103448276e-06, "loss": 0.0889, "step": 3550 }, { "epoch": 0.8984669514953506, "grad_norm": 2.1644279956817627, "learning_rate": 7.880689655172414e-06, "loss": 0.0864, "step": 3575 }, { "epoch": 0.9047499371701433, "grad_norm": 1.8717072010040283, "learning_rate": 7.86344827586207e-06, "loss": 0.0847, "step": 3600 }, { "epoch": 0.9110329228449359, "grad_norm": 1.481933832168579, "learning_rate": 7.846206896551724e-06, "loss": 0.0892, "step": 3625 }, { "epoch": 0.9173159085197286, "grad_norm": 2.3373663425445557, "learning_rate": 7.82896551724138e-06, "loss": 0.0904, "step": 3650 }, { "epoch": 0.9235988941945212, "grad_norm": 1.509282112121582, "learning_rate": 7.811724137931036e-06, "loss": 0.0973, "step": 3675 }, { "epoch": 0.9298818798693139, "grad_norm": 1.7900352478027344, "learning_rate": 7.79448275862069e-06, "loss": 0.0815, "step": 3700 }, { "epoch": 0.9361648655441066, "grad_norm": 1.6436471939086914, "learning_rate": 7.777241379310345e-06, "loss": 0.0853, "step": 3725 }, { "epoch": 0.9424478512188992, "grad_norm": 1.4675796031951904, "learning_rate": 7.76e-06, "loss": 0.0876, "step": 3750 }, { "epoch": 0.9487308368936919, "grad_norm": 1.8452798128128052, "learning_rate": 7.742758620689655e-06, "loss": 0.0808, "step": 3775 }, { "epoch": 0.9550138225684845, "grad_norm": 1.3618487119674683, "learning_rate": 7.725517241379312e-06, "loss": 0.086, "step": 3800 }, { "epoch": 0.9612968082432772, "grad_norm": 1.2715941667556763, "learning_rate": 7.708275862068967e-06, "loss": 0.088, "step": 3825 }, { "epoch": 0.9675797939180699, "grad_norm": 1.8105791807174683, "learning_rate": 7.691034482758621e-06, "loss": 0.0899, "step": 3850 }, { "epoch": 0.9738627795928625, "grad_norm": 2.1864423751831055, "learning_rate": 7.673793103448276e-06, "loss": 0.0857, "step": 3875 }, { "epoch": 0.9801457652676552, "grad_norm": 1.5500792264938354, "learning_rate": 7.656551724137931e-06, "loss": 0.0859, "step": 3900 }, { "epoch": 0.9864287509424479, "grad_norm": 1.8707691431045532, "learning_rate": 7.639310344827588e-06, "loss": 0.0882, "step": 3925 }, { "epoch": 0.9927117366172405, "grad_norm": 1.5884675979614258, "learning_rate": 7.622068965517242e-06, "loss": 0.0832, "step": 3950 }, { "epoch": 0.9989947222920331, "grad_norm": 1.444044828414917, "learning_rate": 7.6048275862068975e-06, "loss": 0.0786, "step": 3975 }, { "epoch": 1.0052777079668258, "grad_norm": 1.6613413095474243, "learning_rate": 7.588275862068966e-06, "loss": 0.0694, "step": 4000 }, { "epoch": 1.0052777079668258, "eval_loss": 0.13177034258842468, "eval_runtime": 28232.4806, "eval_samples_per_second": 0.692, "eval_steps_per_second": 0.086, "eval_wer": 40.84150014082778, "step": 4000 }, { "epoch": 1.0115606936416186, "grad_norm": 1.5078247785568237, "learning_rate": 7.571034482758622e-06, "loss": 0.0786, "step": 4025 }, { "epoch": 1.0178436793164112, "grad_norm": 2.1607069969177246, "learning_rate": 7.553793103448277e-06, "loss": 0.0748, "step": 4050 }, { "epoch": 1.0241266649912038, "grad_norm": 1.3016276359558105, "learning_rate": 7.5365517241379315e-06, "loss": 0.0782, "step": 4075 }, { "epoch": 1.0304096506659965, "grad_norm": 1.1447079181671143, "learning_rate": 7.519310344827587e-06, "loss": 0.0708, "step": 4100 }, { "epoch": 1.0366926363407891, "grad_norm": 1.6340376138687134, "learning_rate": 7.502068965517242e-06, "loss": 0.0705, "step": 4125 }, { "epoch": 1.042975622015582, "grad_norm": 1.5793544054031372, "learning_rate": 7.484827586206898e-06, "loss": 0.0723, "step": 4150 }, { "epoch": 1.0492586076903745, "grad_norm": 1.2882245779037476, "learning_rate": 7.467586206896552e-06, "loss": 0.0692, "step": 4175 }, { "epoch": 1.055541593365167, "grad_norm": 1.822688102722168, "learning_rate": 7.4503448275862075e-06, "loss": 0.071, "step": 4200 }, { "epoch": 1.0618245790399599, "grad_norm": 1.410294771194458, "learning_rate": 7.433103448275862e-06, "loss": 0.0666, "step": 4225 }, { "epoch": 1.0681075647147524, "grad_norm": 1.4143624305725098, "learning_rate": 7.415862068965518e-06, "loss": 0.0662, "step": 4250 }, { "epoch": 1.0743905503895452, "grad_norm": 1.5564229488372803, "learning_rate": 7.398620689655173e-06, "loss": 0.0752, "step": 4275 }, { "epoch": 1.0806735360643378, "grad_norm": 2.0174007415771484, "learning_rate": 7.381379310344829e-06, "loss": 0.0647, "step": 4300 }, { "epoch": 1.0869565217391304, "grad_norm": 1.2202295064926147, "learning_rate": 7.364137931034483e-06, "loss": 0.0728, "step": 4325 }, { "epoch": 1.0932395074139232, "grad_norm": 1.7252171039581299, "learning_rate": 7.346896551724138e-06, "loss": 0.0613, "step": 4350 }, { "epoch": 1.0995224930887157, "grad_norm": 1.6477744579315186, "learning_rate": 7.329655172413793e-06, "loss": 0.0762, "step": 4375 }, { "epoch": 1.1058054787635083, "grad_norm": 2.01273512840271, "learning_rate": 7.312413793103449e-06, "loss": 0.0723, "step": 4400 }, { "epoch": 1.1120884644383011, "grad_norm": 2.0177805423736572, "learning_rate": 7.295172413793105e-06, "loss": 0.0713, "step": 4425 }, { "epoch": 1.1183714501130937, "grad_norm": 1.0823686122894287, "learning_rate": 7.27793103448276e-06, "loss": 0.0699, "step": 4450 }, { "epoch": 1.1246544357878865, "grad_norm": 1.8306968212127686, "learning_rate": 7.2606896551724145e-06, "loss": 0.0692, "step": 4475 }, { "epoch": 1.130937421462679, "grad_norm": 1.9577009677886963, "learning_rate": 7.243448275862069e-06, "loss": 0.0707, "step": 4500 }, { "epoch": 1.1372204071374716, "grad_norm": 1.8456298112869263, "learning_rate": 7.226206896551725e-06, "loss": 0.0702, "step": 4525 }, { "epoch": 1.1435033928122644, "grad_norm": 1.369918942451477, "learning_rate": 7.20896551724138e-06, "loss": 0.0716, "step": 4550 }, { "epoch": 1.149786378487057, "grad_norm": 1.7100856304168701, "learning_rate": 7.191724137931036e-06, "loss": 0.0773, "step": 4575 }, { "epoch": 1.1560693641618498, "grad_norm": 1.2440359592437744, "learning_rate": 7.17448275862069e-06, "loss": 0.0736, "step": 4600 }, { "epoch": 1.1623523498366424, "grad_norm": 1.26316237449646, "learning_rate": 7.157241379310345e-06, "loss": 0.0683, "step": 4625 }, { "epoch": 1.168635335511435, "grad_norm": 1.6966075897216797, "learning_rate": 7.14e-06, "loss": 0.064, "step": 4650 }, { "epoch": 1.1749183211862277, "grad_norm": 1.7377158403396606, "learning_rate": 7.122758620689656e-06, "loss": 0.0685, "step": 4675 }, { "epoch": 1.1812013068610203, "grad_norm": 1.851913571357727, "learning_rate": 7.105517241379311e-06, "loss": 0.0586, "step": 4700 }, { "epoch": 1.1874842925358131, "grad_norm": 1.6973158121109009, "learning_rate": 7.0882758620689666e-06, "loss": 0.0696, "step": 4725 }, { "epoch": 1.1937672782106057, "grad_norm": 1.451185703277588, "learning_rate": 7.0710344827586206e-06, "loss": 0.0671, "step": 4750 }, { "epoch": 1.2000502638853983, "grad_norm": 1.7117061614990234, "learning_rate": 7.053793103448276e-06, "loss": 0.0563, "step": 4775 }, { "epoch": 1.206333249560191, "grad_norm": 1.4349240064620972, "learning_rate": 7.036551724137931e-06, "loss": 0.0696, "step": 4800 }, { "epoch": 1.2126162352349836, "grad_norm": 1.5705769062042236, "learning_rate": 7.019310344827587e-06, "loss": 0.0634, "step": 4825 }, { "epoch": 1.2188992209097762, "grad_norm": 1.5555791854858398, "learning_rate": 7.002068965517243e-06, "loss": 0.0665, "step": 4850 }, { "epoch": 1.225182206584569, "grad_norm": 1.4251762628555298, "learning_rate": 6.9848275862068975e-06, "loss": 0.0636, "step": 4875 }, { "epoch": 1.2314651922593616, "grad_norm": 1.7479225397109985, "learning_rate": 6.967586206896552e-06, "loss": 0.0736, "step": 4900 }, { "epoch": 1.2377481779341544, "grad_norm": 1.4017658233642578, "learning_rate": 6.950344827586207e-06, "loss": 0.0642, "step": 4925 }, { "epoch": 1.244031163608947, "grad_norm": 1.34666907787323, "learning_rate": 6.933103448275863e-06, "loss": 0.0647, "step": 4950 }, { "epoch": 1.2503141492837395, "grad_norm": 1.6432591676712036, "learning_rate": 6.915862068965518e-06, "loss": 0.069, "step": 4975 }, { "epoch": 1.2565971349585323, "grad_norm": 1.2850168943405151, "learning_rate": 6.8986206896551735e-06, "loss": 0.0638, "step": 5000 }, { "epoch": 1.2565971349585323, "eval_loss": 0.12251746654510498, "eval_runtime": 28262.6172, "eval_samples_per_second": 0.691, "eval_steps_per_second": 0.086, "eval_wer": 38.8171910996844, "step": 5000 }, { "epoch": 1.2628801206333249, "grad_norm": 1.6795750856399536, "learning_rate": 6.8813793103448275e-06, "loss": 0.069, "step": 5025 }, { "epoch": 1.2691631063081177, "grad_norm": 1.418845772743225, "learning_rate": 6.864137931034483e-06, "loss": 0.0609, "step": 5050 }, { "epoch": 1.2754460919829103, "grad_norm": 1.2165697813034058, "learning_rate": 6.846896551724138e-06, "loss": 0.0638, "step": 5075 }, { "epoch": 1.2817290776577028, "grad_norm": 2.068269729614258, "learning_rate": 6.829655172413794e-06, "loss": 0.0673, "step": 5100 }, { "epoch": 1.2880120633324956, "grad_norm": 1.2388827800750732, "learning_rate": 6.812413793103449e-06, "loss": 0.0701, "step": 5125 }, { "epoch": 1.2942950490072882, "grad_norm": 1.2347630262374878, "learning_rate": 6.7951724137931044e-06, "loss": 0.065, "step": 5150 }, { "epoch": 1.300578034682081, "grad_norm": 1.15070378780365, "learning_rate": 6.7779310344827585e-06, "loss": 0.0637, "step": 5175 }, { "epoch": 1.3068610203568736, "grad_norm": 1.2261390686035156, "learning_rate": 6.760689655172414e-06, "loss": 0.0676, "step": 5200 }, { "epoch": 1.3131440060316661, "grad_norm": 1.9254522323608398, "learning_rate": 6.74344827586207e-06, "loss": 0.0594, "step": 5225 }, { "epoch": 1.319426991706459, "grad_norm": 1.3794054985046387, "learning_rate": 6.726206896551725e-06, "loss": 0.0649, "step": 5250 }, { "epoch": 1.3257099773812515, "grad_norm": 1.6740128993988037, "learning_rate": 6.7089655172413805e-06, "loss": 0.0619, "step": 5275 }, { "epoch": 1.3319929630560443, "grad_norm": 1.2363388538360596, "learning_rate": 6.691724137931035e-06, "loss": 0.0646, "step": 5300 }, { "epoch": 1.3382759487308369, "grad_norm": 1.433228850364685, "learning_rate": 6.67448275862069e-06, "loss": 0.0663, "step": 5325 }, { "epoch": 1.3445589344056295, "grad_norm": 1.898812174797058, "learning_rate": 6.657241379310345e-06, "loss": 0.0574, "step": 5350 }, { "epoch": 1.3508419200804223, "grad_norm": 1.3165233135223389, "learning_rate": 6.640000000000001e-06, "loss": 0.0597, "step": 5375 }, { "epoch": 1.3571249057552148, "grad_norm": 1.8794306516647339, "learning_rate": 6.622758620689656e-06, "loss": 0.0697, "step": 5400 }, { "epoch": 1.3634078914300076, "grad_norm": 1.1872018575668335, "learning_rate": 6.605517241379311e-06, "loss": 0.0587, "step": 5425 }, { "epoch": 1.3696908771048002, "grad_norm": 1.639711856842041, "learning_rate": 6.588275862068965e-06, "loss": 0.0645, "step": 5450 }, { "epoch": 1.3759738627795928, "grad_norm": 1.2173725366592407, "learning_rate": 6.571034482758621e-06, "loss": 0.055, "step": 5475 }, { "epoch": 1.3822568484543856, "grad_norm": 1.2602193355560303, "learning_rate": 6.553793103448276e-06, "loss": 0.0543, "step": 5500 }, { "epoch": 1.3885398341291781, "grad_norm": 1.350376009941101, "learning_rate": 6.536551724137932e-06, "loss": 0.0675, "step": 5525 }, { "epoch": 1.394822819803971, "grad_norm": 1.314760446548462, "learning_rate": 6.519310344827587e-06, "loss": 0.0597, "step": 5550 }, { "epoch": 1.4011058054787635, "grad_norm": 1.0545654296875, "learning_rate": 6.502068965517242e-06, "loss": 0.0553, "step": 5575 }, { "epoch": 1.407388791153556, "grad_norm": 1.2332383394241333, "learning_rate": 6.484827586206896e-06, "loss": 0.0607, "step": 5600 }, { "epoch": 1.4136717768283489, "grad_norm": 1.3052699565887451, "learning_rate": 6.467586206896552e-06, "loss": 0.0689, "step": 5625 }, { "epoch": 1.4199547625031415, "grad_norm": 1.505159854888916, "learning_rate": 6.450344827586208e-06, "loss": 0.0585, "step": 5650 }, { "epoch": 1.4262377481779343, "grad_norm": 1.2076035737991333, "learning_rate": 6.433103448275863e-06, "loss": 0.0624, "step": 5675 }, { "epoch": 1.4325207338527268, "grad_norm": 0.716097354888916, "learning_rate": 6.415862068965518e-06, "loss": 0.056, "step": 5700 }, { "epoch": 1.4388037195275194, "grad_norm": 0.8870618939399719, "learning_rate": 6.398620689655173e-06, "loss": 0.0592, "step": 5725 }, { "epoch": 1.4450867052023122, "grad_norm": 2.086239814758301, "learning_rate": 6.381379310344828e-06, "loss": 0.0647, "step": 5750 }, { "epoch": 1.4513696908771048, "grad_norm": 1.2493882179260254, "learning_rate": 6.364137931034483e-06, "loss": 0.0664, "step": 5775 }, { "epoch": 1.4576526765518976, "grad_norm": 0.78863924741745, "learning_rate": 6.346896551724139e-06, "loss": 0.0545, "step": 5800 }, { "epoch": 1.4639356622266901, "grad_norm": 1.2974257469177246, "learning_rate": 6.3296551724137935e-06, "loss": 0.0643, "step": 5825 }, { "epoch": 1.4702186479014827, "grad_norm": 1.220800518989563, "learning_rate": 6.312413793103449e-06, "loss": 0.0664, "step": 5850 }, { "epoch": 1.4765016335762755, "grad_norm": 1.9717583656311035, "learning_rate": 6.295172413793103e-06, "loss": 0.0585, "step": 5875 }, { "epoch": 1.482784619251068, "grad_norm": 1.9224556684494019, "learning_rate": 6.277931034482759e-06, "loss": 0.0607, "step": 5900 }, { "epoch": 1.4890676049258609, "grad_norm": 1.3678847551345825, "learning_rate": 6.260689655172414e-06, "loss": 0.0619, "step": 5925 }, { "epoch": 1.4953505906006535, "grad_norm": 1.2710736989974976, "learning_rate": 6.24344827586207e-06, "loss": 0.0615, "step": 5950 }, { "epoch": 1.501633576275446, "grad_norm": 1.1889654397964478, "learning_rate": 6.2262068965517245e-06, "loss": 0.0595, "step": 5975 }, { "epoch": 1.5079165619502386, "grad_norm": 1.165711760520935, "learning_rate": 6.20896551724138e-06, "loss": 0.0529, "step": 6000 }, { "epoch": 1.5079165619502386, "eval_loss": 0.12242772430181503, "eval_runtime": 28299.0323, "eval_samples_per_second": 0.69, "eval_steps_per_second": 0.086, "eval_wer": 37.89097763366001, "step": 6000 }, { "epoch": 1.5141995476250314, "grad_norm": 1.0195108652114868, "learning_rate": 6.191724137931034e-06, "loss": 0.0586, "step": 6025 }, { "epoch": 1.5204825332998242, "grad_norm": 1.1056115627288818, "learning_rate": 6.17448275862069e-06, "loss": 0.0576, "step": 6050 }, { "epoch": 1.5267655189746168, "grad_norm": 1.2018024921417236, "learning_rate": 6.157241379310346e-06, "loss": 0.0641, "step": 6075 }, { "epoch": 1.5330485046494093, "grad_norm": 1.426589846611023, "learning_rate": 6.1400000000000005e-06, "loss": 0.0575, "step": 6100 }, { "epoch": 1.539331490324202, "grad_norm": 1.0339651107788086, "learning_rate": 6.122758620689656e-06, "loss": 0.0459, "step": 6125 }, { "epoch": 1.5456144759989947, "grad_norm": 1.3441721200942993, "learning_rate": 6.105517241379311e-06, "loss": 0.0588, "step": 6150 }, { "epoch": 1.5518974616737875, "grad_norm": 1.2048940658569336, "learning_rate": 6.088275862068966e-06, "loss": 0.0573, "step": 6175 }, { "epoch": 1.55818044734858, "grad_norm": 1.5876215696334839, "learning_rate": 6.071034482758621e-06, "loss": 0.0616, "step": 6200 }, { "epoch": 1.5644634330233727, "grad_norm": 1.1515843868255615, "learning_rate": 6.0537931034482766e-06, "loss": 0.0566, "step": 6225 }, { "epoch": 1.5707464186981652, "grad_norm": 1.2697322368621826, "learning_rate": 6.036551724137931e-06, "loss": 0.0654, "step": 6250 }, { "epoch": 1.577029404372958, "grad_norm": 0.8662827014923096, "learning_rate": 6.019310344827587e-06, "loss": 0.0568, "step": 6275 }, { "epoch": 1.5833123900477508, "grad_norm": 1.5702407360076904, "learning_rate": 6.002068965517241e-06, "loss": 0.0563, "step": 6300 }, { "epoch": 1.5895953757225434, "grad_norm": 1.2121763229370117, "learning_rate": 5.984827586206897e-06, "loss": 0.0638, "step": 6325 }, { "epoch": 1.595878361397336, "grad_norm": 1.257488489151001, "learning_rate": 5.967586206896552e-06, "loss": 0.0577, "step": 6350 }, { "epoch": 1.6021613470721285, "grad_norm": 1.1616463661193848, "learning_rate": 5.9503448275862075e-06, "loss": 0.0604, "step": 6375 }, { "epoch": 1.6084443327469213, "grad_norm": 1.3494690656661987, "learning_rate": 5.933103448275862e-06, "loss": 0.0628, "step": 6400 }, { "epoch": 1.6147273184217141, "grad_norm": 1.5194650888442993, "learning_rate": 5.915862068965518e-06, "loss": 0.0549, "step": 6425 }, { "epoch": 1.6210103040965067, "grad_norm": 1.3437527418136597, "learning_rate": 5.898620689655174e-06, "loss": 0.0579, "step": 6450 }, { "epoch": 1.6272932897712993, "grad_norm": 1.445145845413208, "learning_rate": 5.881379310344828e-06, "loss": 0.0619, "step": 6475 }, { "epoch": 1.6335762754460919, "grad_norm": 1.3654954433441162, "learning_rate": 5.8641379310344835e-06, "loss": 0.0572, "step": 6500 }, { "epoch": 1.6398592611208846, "grad_norm": 1.4988460540771484, "learning_rate": 5.846896551724138e-06, "loss": 0.0599, "step": 6525 }, { "epoch": 1.6461422467956774, "grad_norm": 1.3341230154037476, "learning_rate": 5.829655172413794e-06, "loss": 0.0555, "step": 6550 }, { "epoch": 1.65242523247047, "grad_norm": 1.2729185819625854, "learning_rate": 5.812413793103449e-06, "loss": 0.0542, "step": 6575 }, { "epoch": 1.6587082181452626, "grad_norm": 1.7550246715545654, "learning_rate": 5.795172413793104e-06, "loss": 0.0582, "step": 6600 }, { "epoch": 1.6649912038200552, "grad_norm": 1.5241115093231201, "learning_rate": 5.777931034482759e-06, "loss": 0.063, "step": 6625 }, { "epoch": 1.671274189494848, "grad_norm": 1.8420275449752808, "learning_rate": 5.7606896551724144e-06, "loss": 0.0656, "step": 6650 }, { "epoch": 1.6775571751696408, "grad_norm": 1.0507006645202637, "learning_rate": 5.743448275862069e-06, "loss": 0.0562, "step": 6675 }, { "epoch": 1.6838401608444333, "grad_norm": 2.0767569541931152, "learning_rate": 5.726206896551725e-06, "loss": 0.0582, "step": 6700 }, { "epoch": 1.690123146519226, "grad_norm": 0.8954183459281921, "learning_rate": 5.708965517241379e-06, "loss": 0.0602, "step": 6725 }, { "epoch": 1.6964061321940185, "grad_norm": 0.9078446626663208, "learning_rate": 5.691724137931035e-06, "loss": 0.0529, "step": 6750 }, { "epoch": 1.7026891178688113, "grad_norm": 1.581921935081482, "learning_rate": 5.67448275862069e-06, "loss": 0.0548, "step": 6775 }, { "epoch": 1.708972103543604, "grad_norm": 1.4554569721221924, "learning_rate": 5.657241379310345e-06, "loss": 0.0563, "step": 6800 }, { "epoch": 1.7152550892183966, "grad_norm": 0.9179530739784241, "learning_rate": 5.64e-06, "loss": 0.0544, "step": 6825 }, { "epoch": 1.7215380748931892, "grad_norm": 1.1374155282974243, "learning_rate": 5.622758620689656e-06, "loss": 0.057, "step": 6850 }, { "epoch": 1.7278210605679818, "grad_norm": 1.0349596738815308, "learning_rate": 5.605517241379312e-06, "loss": 0.0573, "step": 6875 }, { "epoch": 1.7341040462427746, "grad_norm": 1.4229092597961426, "learning_rate": 5.588275862068966e-06, "loss": 0.0487, "step": 6900 }, { "epoch": 1.7403870319175674, "grad_norm": 1.2893837690353394, "learning_rate": 5.571034482758621e-06, "loss": 0.0605, "step": 6925 }, { "epoch": 1.74667001759236, "grad_norm": 1.1475664377212524, "learning_rate": 5.553793103448276e-06, "loss": 0.0577, "step": 6950 }, { "epoch": 1.7529530032671525, "grad_norm": 1.1052597761154175, "learning_rate": 5.536551724137932e-06, "loss": 0.0531, "step": 6975 }, { "epoch": 1.759235988941945, "grad_norm": 1.0279254913330078, "learning_rate": 5.519310344827587e-06, "loss": 0.0624, "step": 7000 }, { "epoch": 1.759235988941945, "eval_loss": 0.12031704932451248, "eval_runtime": 28303.9606, "eval_samples_per_second": 0.69, "eval_steps_per_second": 0.086, "eval_wer": 37.24605862768746, "step": 7000 }, { "epoch": 1.765518974616738, "grad_norm": 1.0434989929199219, "learning_rate": 5.502068965517242e-06, "loss": 0.0488, "step": 7025 }, { "epoch": 1.7718019602915307, "grad_norm": 1.1990073919296265, "learning_rate": 5.484827586206897e-06, "loss": 0.0525, "step": 7050 }, { "epoch": 1.7780849459663233, "grad_norm": 1.026079535484314, "learning_rate": 5.467586206896552e-06, "loss": 0.0531, "step": 7075 }, { "epoch": 1.7843679316411158, "grad_norm": 0.9900615215301514, "learning_rate": 5.450344827586207e-06, "loss": 0.0545, "step": 7100 }, { "epoch": 1.7906509173159084, "grad_norm": 1.5279738903045654, "learning_rate": 5.433103448275863e-06, "loss": 0.0566, "step": 7125 }, { "epoch": 1.7969339029907012, "grad_norm": 0.8226191401481628, "learning_rate": 5.415862068965517e-06, "loss": 0.056, "step": 7150 }, { "epoch": 1.8032168886654938, "grad_norm": 1.3544007539749146, "learning_rate": 5.398620689655173e-06, "loss": 0.061, "step": 7175 }, { "epoch": 1.8094998743402866, "grad_norm": 1.2771939039230347, "learning_rate": 5.3813793103448275e-06, "loss": 0.0542, "step": 7200 }, { "epoch": 1.8157828600150792, "grad_norm": 1.200951099395752, "learning_rate": 5.364137931034483e-06, "loss": 0.0544, "step": 7225 }, { "epoch": 1.8220658456898717, "grad_norm": 0.9072504639625549, "learning_rate": 5.346896551724139e-06, "loss": 0.0538, "step": 7250 }, { "epoch": 1.8283488313646645, "grad_norm": 1.088958978652954, "learning_rate": 5.329655172413794e-06, "loss": 0.054, "step": 7275 }, { "epoch": 1.834631817039457, "grad_norm": 1.359937071800232, "learning_rate": 5.3124137931034495e-06, "loss": 0.0623, "step": 7300 }, { "epoch": 1.84091480271425, "grad_norm": 1.467264175415039, "learning_rate": 5.2951724137931035e-06, "loss": 0.0536, "step": 7325 }, { "epoch": 1.8471977883890425, "grad_norm": 1.4082632064819336, "learning_rate": 5.277931034482759e-06, "loss": 0.051, "step": 7350 }, { "epoch": 1.853480774063835, "grad_norm": 1.472396969795227, "learning_rate": 5.260689655172414e-06, "loss": 0.0662, "step": 7375 }, { "epoch": 1.8597637597386278, "grad_norm": 1.587661862373352, "learning_rate": 5.24344827586207e-06, "loss": 0.055, "step": 7400 }, { "epoch": 1.8660467454134204, "grad_norm": 1.0919044017791748, "learning_rate": 5.226206896551725e-06, "loss": 0.0509, "step": 7425 }, { "epoch": 1.8723297310882132, "grad_norm": 0.9456779956817627, "learning_rate": 5.20896551724138e-06, "loss": 0.0557, "step": 7450 }, { "epoch": 1.8786127167630058, "grad_norm": 1.6846345663070679, "learning_rate": 5.1917241379310345e-06, "loss": 0.0531, "step": 7475 }, { "epoch": 1.8848957024377984, "grad_norm": 0.5773513317108154, "learning_rate": 5.17448275862069e-06, "loss": 0.0554, "step": 7500 }, { "epoch": 1.8911786881125912, "grad_norm": 0.977917492389679, "learning_rate": 5.157241379310345e-06, "loss": 0.0561, "step": 7525 }, { "epoch": 1.8974616737873837, "grad_norm": 1.2408355474472046, "learning_rate": 5.140000000000001e-06, "loss": 0.0615, "step": 7550 }, { "epoch": 1.9037446594621765, "grad_norm": 1.273364543914795, "learning_rate": 5.122758620689655e-06, "loss": 0.0572, "step": 7575 }, { "epoch": 1.910027645136969, "grad_norm": 1.2105774879455566, "learning_rate": 5.1055172413793105e-06, "loss": 0.0469, "step": 7600 }, { "epoch": 1.9163106308117617, "grad_norm": 0.6686076521873474, "learning_rate": 5.088275862068965e-06, "loss": 0.0596, "step": 7625 }, { "epoch": 1.9225936164865542, "grad_norm": 1.473767876625061, "learning_rate": 5.071034482758621e-06, "loss": 0.0558, "step": 7650 }, { "epoch": 1.928876602161347, "grad_norm": 1.183693289756775, "learning_rate": 5.053793103448277e-06, "loss": 0.0578, "step": 7675 }, { "epoch": 1.9351595878361398, "grad_norm": 1.661081075668335, "learning_rate": 5.036551724137932e-06, "loss": 0.0577, "step": 7700 }, { "epoch": 1.9414425735109324, "grad_norm": 1.035583734512329, "learning_rate": 5.019310344827587e-06, "loss": 0.0504, "step": 7725 }, { "epoch": 1.947725559185725, "grad_norm": 1.2706879377365112, "learning_rate": 5.002068965517241e-06, "loss": 0.0523, "step": 7750 }, { "epoch": 1.9540085448605176, "grad_norm": 1.558969497680664, "learning_rate": 4.984827586206897e-06, "loss": 0.0527, "step": 7775 }, { "epoch": 1.9602915305353104, "grad_norm": 2.107837677001953, "learning_rate": 4.967586206896552e-06, "loss": 0.0496, "step": 7800 }, { "epoch": 1.9665745162101032, "grad_norm": 1.1281065940856934, "learning_rate": 4.950344827586207e-06, "loss": 0.0495, "step": 7825 }, { "epoch": 1.9728575018848957, "grad_norm": 0.92071133852005, "learning_rate": 4.933103448275863e-06, "loss": 0.0543, "step": 7850 }, { "epoch": 1.9791404875596883, "grad_norm": 1.5125892162322998, "learning_rate": 4.9158620689655175e-06, "loss": 0.0567, "step": 7875 }, { "epoch": 1.9854234732344809, "grad_norm": 1.4018179178237915, "learning_rate": 4.898620689655173e-06, "loss": 0.0577, "step": 7900 }, { "epoch": 1.9917064589092737, "grad_norm": 1.599665880203247, "learning_rate": 4.881379310344828e-06, "loss": 0.0511, "step": 7925 }, { "epoch": 1.9979894445840665, "grad_norm": 1.3747309446334839, "learning_rate": 4.864137931034483e-06, "loss": 0.0545, "step": 7950 }, { "epoch": 2.004272430258859, "grad_norm": 1.2372041940689087, "learning_rate": 4.846896551724139e-06, "loss": 0.0434, "step": 7975 }, { "epoch": 2.0105554159336516, "grad_norm": 1.0974595546722412, "learning_rate": 4.8296551724137935e-06, "loss": 0.0426, "step": 8000 }, { "epoch": 2.0105554159336516, "eval_loss": 0.1260567456483841, "eval_runtime": 28488.8565, "eval_samples_per_second": 0.686, "eval_steps_per_second": 0.086, "eval_wer": 36.9741526861996, "step": 8000 }, { "epoch": 2.016838401608444, "grad_norm": 0.5434245467185974, "learning_rate": 4.812413793103448e-06, "loss": 0.0372, "step": 8025 }, { "epoch": 2.023121387283237, "grad_norm": 1.4904873371124268, "learning_rate": 4.795172413793104e-06, "loss": 0.0322, "step": 8050 }, { "epoch": 2.02940437295803, "grad_norm": 0.8786129355430603, "learning_rate": 4.777931034482759e-06, "loss": 0.0378, "step": 8075 }, { "epoch": 2.0356873586328224, "grad_norm": 1.062193512916565, "learning_rate": 4.760689655172414e-06, "loss": 0.0335, "step": 8100 }, { "epoch": 2.041970344307615, "grad_norm": 1.1229702234268188, "learning_rate": 4.7434482758620696e-06, "loss": 0.0341, "step": 8125 }, { "epoch": 2.0482533299824075, "grad_norm": 1.5959960222244263, "learning_rate": 4.726206896551724e-06, "loss": 0.0326, "step": 8150 }, { "epoch": 2.0545363156572005, "grad_norm": 1.3407766819000244, "learning_rate": 4.708965517241379e-06, "loss": 0.0282, "step": 8175 }, { "epoch": 2.060819301331993, "grad_norm": 0.8177748322486877, "learning_rate": 4.691724137931035e-06, "loss": 0.0334, "step": 8200 }, { "epoch": 2.0671022870067857, "grad_norm": 0.5434231162071228, "learning_rate": 4.67448275862069e-06, "loss": 0.0332, "step": 8225 }, { "epoch": 2.0733852726815782, "grad_norm": 0.6810811758041382, "learning_rate": 4.657241379310346e-06, "loss": 0.0343, "step": 8250 }, { "epoch": 2.079668258356371, "grad_norm": 1.3621633052825928, "learning_rate": 4.6400000000000005e-06, "loss": 0.0358, "step": 8275 }, { "epoch": 2.085951244031164, "grad_norm": 0.8485309481620789, "learning_rate": 4.622758620689655e-06, "loss": 0.0283, "step": 8300 }, { "epoch": 2.0922342297059564, "grad_norm": 1.2133398056030273, "learning_rate": 4.605517241379311e-06, "loss": 0.0342, "step": 8325 }, { "epoch": 2.098517215380749, "grad_norm": 1.9074926376342773, "learning_rate": 4.588275862068966e-06, "loss": 0.0372, "step": 8350 }, { "epoch": 2.1048002010555416, "grad_norm": 1.3371448516845703, "learning_rate": 4.571034482758621e-06, "loss": 0.0356, "step": 8375 }, { "epoch": 2.111083186730334, "grad_norm": 1.3409150838851929, "learning_rate": 4.5537931034482765e-06, "loss": 0.0354, "step": 8400 }, { "epoch": 2.117366172405127, "grad_norm": 1.1407537460327148, "learning_rate": 4.536551724137931e-06, "loss": 0.0338, "step": 8425 }, { "epoch": 2.1236491580799197, "grad_norm": 1.0432764291763306, "learning_rate": 4.519310344827586e-06, "loss": 0.0325, "step": 8450 }, { "epoch": 2.1299321437547123, "grad_norm": 1.2592930793762207, "learning_rate": 4.502068965517242e-06, "loss": 0.0374, "step": 8475 }, { "epoch": 2.136215129429505, "grad_norm": 0.9935320019721985, "learning_rate": 4.484827586206897e-06, "loss": 0.0349, "step": 8500 }, { "epoch": 2.1424981151042974, "grad_norm": 1.1453524827957153, "learning_rate": 4.467586206896552e-06, "loss": 0.0357, "step": 8525 }, { "epoch": 2.1487811007790905, "grad_norm": 0.8577796816825867, "learning_rate": 4.4503448275862074e-06, "loss": 0.0349, "step": 8550 }, { "epoch": 2.155064086453883, "grad_norm": 1.0337741374969482, "learning_rate": 4.433103448275862e-06, "loss": 0.0321, "step": 8575 }, { "epoch": 2.1613470721286756, "grad_norm": 1.4780592918395996, "learning_rate": 4.415862068965517e-06, "loss": 0.0359, "step": 8600 }, { "epoch": 2.167630057803468, "grad_norm": 1.6528609991073608, "learning_rate": 4.398620689655173e-06, "loss": 0.0384, "step": 8625 }, { "epoch": 2.1739130434782608, "grad_norm": 0.7156565189361572, "learning_rate": 4.381379310344829e-06, "loss": 0.0353, "step": 8650 }, { "epoch": 2.1801960291530533, "grad_norm": 1.1753544807434082, "learning_rate": 4.3641379310344835e-06, "loss": 0.031, "step": 8675 }, { "epoch": 2.1864790148278463, "grad_norm": 0.7453944087028503, "learning_rate": 4.346896551724138e-06, "loss": 0.0348, "step": 8700 }, { "epoch": 2.192762000502639, "grad_norm": 1.872745156288147, "learning_rate": 4.329655172413793e-06, "loss": 0.0351, "step": 8725 }, { "epoch": 2.1990449861774315, "grad_norm": 0.6683670282363892, "learning_rate": 4.312413793103449e-06, "loss": 0.0351, "step": 8750 }, { "epoch": 2.205327971852224, "grad_norm": 1.3862112760543823, "learning_rate": 4.295172413793104e-06, "loss": 0.0333, "step": 8775 }, { "epoch": 2.2116109575270166, "grad_norm": 1.027766466140747, "learning_rate": 4.277931034482759e-06, "loss": 0.0287, "step": 8800 }, { "epoch": 2.2178939432018097, "grad_norm": 0.8764299154281616, "learning_rate": 4.260689655172414e-06, "loss": 0.0324, "step": 8825 }, { "epoch": 2.2241769288766022, "grad_norm": 0.8723062872886658, "learning_rate": 4.243448275862069e-06, "loss": 0.0283, "step": 8850 }, { "epoch": 2.230459914551395, "grad_norm": 1.1235435009002686, "learning_rate": 4.226206896551724e-06, "loss": 0.0376, "step": 8875 }, { "epoch": 2.2367429002261874, "grad_norm": 1.0615513324737549, "learning_rate": 4.20896551724138e-06, "loss": 0.0354, "step": 8900 }, { "epoch": 2.24302588590098, "grad_norm": 0.8142825365066528, "learning_rate": 4.191724137931035e-06, "loss": 0.0274, "step": 8925 }, { "epoch": 2.249308871575773, "grad_norm": 1.7816015481948853, "learning_rate": 4.17448275862069e-06, "loss": 0.0349, "step": 8950 }, { "epoch": 2.2555918572505655, "grad_norm": 1.0881839990615845, "learning_rate": 4.157241379310345e-06, "loss": 0.0344, "step": 8975 }, { "epoch": 2.261874842925358, "grad_norm": 0.554862916469574, "learning_rate": 4.14e-06, "loss": 0.0305, "step": 9000 }, { "epoch": 2.261874842925358, "eval_loss": 0.1346791386604309, "eval_runtime": 28846.9785, "eval_samples_per_second": 0.677, "eval_steps_per_second": 0.085, "eval_wer": 36.786382314919805, "step": 9000 }, { "epoch": 2.2681578286001507, "grad_norm": 1.2704734802246094, "learning_rate": 4.122758620689655e-06, "loss": 0.0334, "step": 9025 }, { "epoch": 2.2744408142749433, "grad_norm": 1.3331466913223267, "learning_rate": 4.105517241379311e-06, "loss": 0.0343, "step": 9050 }, { "epoch": 2.2807237999497363, "grad_norm": 0.3723588287830353, "learning_rate": 4.0882758620689665e-06, "loss": 0.0343, "step": 9075 }, { "epoch": 2.287006785624529, "grad_norm": 2.03139591217041, "learning_rate": 4.071034482758621e-06, "loss": 0.034, "step": 9100 }, { "epoch": 2.2932897712993214, "grad_norm": 1.230726718902588, "learning_rate": 4.053793103448276e-06, "loss": 0.0319, "step": 9125 }, { "epoch": 2.299572756974114, "grad_norm": 1.408710241317749, "learning_rate": 4.036551724137931e-06, "loss": 0.0267, "step": 9150 }, { "epoch": 2.3058557426489066, "grad_norm": 1.5532382726669312, "learning_rate": 4.019310344827587e-06, "loss": 0.0335, "step": 9175 }, { "epoch": 2.3121387283236996, "grad_norm": 1.1856962442398071, "learning_rate": 4.002068965517242e-06, "loss": 0.0346, "step": 9200 }, { "epoch": 2.318421713998492, "grad_norm": 1.0790759325027466, "learning_rate": 3.9848275862068965e-06, "loss": 0.0339, "step": 9225 }, { "epoch": 2.3247046996732847, "grad_norm": 0.7154790759086609, "learning_rate": 3.967586206896552e-06, "loss": 0.0353, "step": 9250 }, { "epoch": 2.3309876853480773, "grad_norm": 0.8203781843185425, "learning_rate": 3.950344827586207e-06, "loss": 0.0392, "step": 9275 }, { "epoch": 2.33727067102287, "grad_norm": 1.3644154071807861, "learning_rate": 3.933103448275862e-06, "loss": 0.0333, "step": 9300 }, { "epoch": 2.343553656697663, "grad_norm": 0.7084758877754211, "learning_rate": 3.915862068965518e-06, "loss": 0.0309, "step": 9325 }, { "epoch": 2.3498366423724555, "grad_norm": 0.7974054217338562, "learning_rate": 3.898620689655173e-06, "loss": 0.0337, "step": 9350 }, { "epoch": 2.356119628047248, "grad_norm": 0.9612919092178345, "learning_rate": 3.8813793103448275e-06, "loss": 0.0358, "step": 9375 }, { "epoch": 2.3624026137220406, "grad_norm": 1.3854459524154663, "learning_rate": 3.864137931034483e-06, "loss": 0.0299, "step": 9400 }, { "epoch": 2.368685599396833, "grad_norm": 1.1167787313461304, "learning_rate": 3.846896551724138e-06, "loss": 0.0337, "step": 9425 }, { "epoch": 2.3749685850716262, "grad_norm": 0.9742668271064758, "learning_rate": 3.829655172413793e-06, "loss": 0.0286, "step": 9450 }, { "epoch": 2.381251570746419, "grad_norm": 1.435309886932373, "learning_rate": 3.8124137931034486e-06, "loss": 0.0305, "step": 9475 }, { "epoch": 2.3875345564212114, "grad_norm": 1.1362321376800537, "learning_rate": 3.795172413793104e-06, "loss": 0.0313, "step": 9500 }, { "epoch": 2.393817542096004, "grad_norm": 1.0465309619903564, "learning_rate": 3.7779310344827592e-06, "loss": 0.0364, "step": 9525 }, { "epoch": 2.4001005277707965, "grad_norm": 1.4722024202346802, "learning_rate": 3.760689655172414e-06, "loss": 0.0451, "step": 9550 }, { "epoch": 2.406383513445589, "grad_norm": 1.1084930896759033, "learning_rate": 3.7434482758620694e-06, "loss": 0.0349, "step": 9575 }, { "epoch": 2.412666499120382, "grad_norm": 1.4382020235061646, "learning_rate": 3.7262068965517247e-06, "loss": 0.0338, "step": 9600 }, { "epoch": 2.4189494847951747, "grad_norm": 1.6590332984924316, "learning_rate": 3.7089655172413795e-06, "loss": 0.0335, "step": 9625 }, { "epoch": 2.4252324704699673, "grad_norm": 0.7975425720214844, "learning_rate": 3.691724137931035e-06, "loss": 0.0371, "step": 9650 }, { "epoch": 2.43151545614476, "grad_norm": 0.9135144352912903, "learning_rate": 3.67448275862069e-06, "loss": 0.0384, "step": 9675 }, { "epoch": 2.4377984418195524, "grad_norm": 1.74324369430542, "learning_rate": 3.657241379310345e-06, "loss": 0.029, "step": 9700 }, { "epoch": 2.4440814274943454, "grad_norm": 0.9986597299575806, "learning_rate": 3.6400000000000003e-06, "loss": 0.0363, "step": 9725 }, { "epoch": 2.450364413169138, "grad_norm": 0.8304340839385986, "learning_rate": 3.622758620689655e-06, "loss": 0.0337, "step": 9750 }, { "epoch": 2.4566473988439306, "grad_norm": 0.6401971578598022, "learning_rate": 3.6055172413793105e-06, "loss": 0.0309, "step": 9775 }, { "epoch": 2.462930384518723, "grad_norm": 1.2303663492202759, "learning_rate": 3.5882758620689658e-06, "loss": 0.0333, "step": 9800 }, { "epoch": 2.4692133701935157, "grad_norm": 1.2973604202270508, "learning_rate": 3.5710344827586206e-06, "loss": 0.0346, "step": 9825 }, { "epoch": 2.4754963558683087, "grad_norm": 1.0538204908370972, "learning_rate": 3.553793103448276e-06, "loss": 0.0335, "step": 9850 }, { "epoch": 2.4817793415431013, "grad_norm": 1.006469964981079, "learning_rate": 3.5365517241379316e-06, "loss": 0.0319, "step": 9875 }, { "epoch": 2.488062327217894, "grad_norm": 1.2951979637145996, "learning_rate": 3.5193103448275865e-06, "loss": 0.0309, "step": 9900 }, { "epoch": 2.4943453128926865, "grad_norm": 1.205333948135376, "learning_rate": 3.502068965517242e-06, "loss": 0.0322, "step": 9925 }, { "epoch": 2.500628298567479, "grad_norm": 1.051314115524292, "learning_rate": 3.484827586206897e-06, "loss": 0.0347, "step": 9950 }, { "epoch": 2.506911284242272, "grad_norm": 1.1379516124725342, "learning_rate": 3.467586206896552e-06, "loss": 0.0297, "step": 9975 }, { "epoch": 2.5131942699170646, "grad_norm": 1.4470369815826416, "learning_rate": 3.4503448275862073e-06, "loss": 0.0344, "step": 10000 }, { "epoch": 2.5131942699170646, "eval_loss": 0.13530105352401733, "eval_runtime": 28721.0802, "eval_samples_per_second": 0.68, "eval_steps_per_second": 0.085, "eval_wer": 36.4960604331718, "step": 10000 } ], "logging_steps": 25, "max_steps": 15000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.6324881677254656e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }