{ "best_metric": 0.08493312855954438, "best_model_checkpoint": "d:\\\\whisper-large-v3-pt-cv19-fleurs\\checkpoint-25000", "epoch": 22.88329519450801, "eval_steps": 5000, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011441647597254004, "grad_norm": 6.62877082824707, "learning_rate": 1.5625000000000003e-08, "loss": 0.7265, "step": 25 }, { "epoch": 0.02288329519450801, "grad_norm": Infinity, "learning_rate": 3e-08, "loss": 1.2394, "step": 50 }, { "epoch": 0.034324942791762014, "grad_norm": 5.83213996887207, "learning_rate": 4.5625e-08, "loss": 0.721, "step": 75 }, { "epoch": 0.04576659038901602, "grad_norm": 19.50699806213379, "learning_rate": 6.125e-08, "loss": 1.2406, "step": 100 }, { "epoch": 0.057208237986270026, "grad_norm": 6.239120960235596, "learning_rate": 7.6875e-08, "loss": 0.7169, "step": 125 }, { "epoch": 0.06864988558352403, "grad_norm": 18.885549545288086, "learning_rate": 9.250000000000001e-08, "loss": 1.2154, "step": 150 }, { "epoch": 0.08009153318077804, "grad_norm": 7.490365982055664, "learning_rate": 1.08125e-07, "loss": 0.6488, "step": 175 }, { "epoch": 0.09153318077803203, "grad_norm": 15.990517616271973, "learning_rate": 1.2375e-07, "loss": 1.0847, "step": 200 }, { "epoch": 0.10297482837528604, "grad_norm": 7.532449722290039, "learning_rate": 1.3937500000000002e-07, "loss": 0.5148, "step": 225 }, { "epoch": 0.11441647597254005, "grad_norm": 15.39826774597168, "learning_rate": 1.55e-07, "loss": 0.6084, "step": 250 }, { "epoch": 0.12585812356979406, "grad_norm": 4.829259872436523, "learning_rate": 1.70625e-07, "loss": 0.2526, "step": 275 }, { "epoch": 0.13729977116704806, "grad_norm": 14.0956392288208, "learning_rate": 1.8625000000000002e-07, "loss": 0.3234, "step": 300 }, { "epoch": 0.14874141876430205, "grad_norm": 3.5299148559570312, "learning_rate": 2.0187500000000003e-07, "loss": 0.1732, "step": 325 }, { "epoch": 0.16018306636155608, "grad_norm": 14.003329277038574, "learning_rate": 2.1749999999999998e-07, "loss": 0.228, "step": 350 }, { "epoch": 0.17162471395881007, "grad_norm": 3.2087361812591553, "learning_rate": 2.3312500000000002e-07, "loss": 0.1796, "step": 375 }, { "epoch": 0.18306636155606407, "grad_norm": 13.039973258972168, "learning_rate": 2.4875e-07, "loss": 0.2275, "step": 400 }, { "epoch": 0.1945080091533181, "grad_norm": 3.967797040939331, "learning_rate": 2.64375e-07, "loss": 0.1404, "step": 425 }, { "epoch": 0.20594965675057209, "grad_norm": 13.055093765258789, "learning_rate": 2.8e-07, "loss": 0.1859, "step": 450 }, { "epoch": 0.21739130434782608, "grad_norm": 2.9818637371063232, "learning_rate": 2.9562500000000003e-07, "loss": 0.1438, "step": 475 }, { "epoch": 0.2288329519450801, "grad_norm": 8.552623748779297, "learning_rate": 3.1125e-07, "loss": 0.1653, "step": 500 }, { "epoch": 0.2402745995423341, "grad_norm": 5.015272617340088, "learning_rate": 3.26875e-07, "loss": 0.1247, "step": 525 }, { "epoch": 0.2517162471395881, "grad_norm": 8.722563743591309, "learning_rate": 3.425e-07, "loss": 0.1648, "step": 550 }, { "epoch": 0.2631578947368421, "grad_norm": 3.586549997329712, "learning_rate": 3.58125e-07, "loss": 0.1415, "step": 575 }, { "epoch": 0.2745995423340961, "grad_norm": 14.463945388793945, "learning_rate": 3.7375e-07, "loss": 0.1626, "step": 600 }, { "epoch": 0.28604118993135014, "grad_norm": 4.430425643920898, "learning_rate": 3.8937500000000003e-07, "loss": 0.1349, "step": 625 }, { "epoch": 0.2974828375286041, "grad_norm": 12.54709243774414, "learning_rate": 4.05e-07, "loss": 0.1453, "step": 650 }, { "epoch": 0.30892448512585813, "grad_norm": 4.3557939529418945, "learning_rate": 4.20625e-07, "loss": 0.1385, "step": 675 }, { "epoch": 0.32036613272311215, "grad_norm": 7.513607501983643, "learning_rate": 4.3625e-07, "loss": 0.1605, "step": 700 }, { "epoch": 0.3318077803203661, "grad_norm": 4.362117767333984, "learning_rate": 4.5187500000000007e-07, "loss": 0.1219, "step": 725 }, { "epoch": 0.34324942791762014, "grad_norm": 9.83281421661377, "learning_rate": 4.675000000000001e-07, "loss": 0.1569, "step": 750 }, { "epoch": 0.35469107551487417, "grad_norm": 3.5240659713745117, "learning_rate": 4.83125e-07, "loss": 0.1357, "step": 775 }, { "epoch": 0.36613272311212813, "grad_norm": 8.545495986938477, "learning_rate": 4.9875e-07, "loss": 0.1462, "step": 800 }, { "epoch": 0.37757437070938216, "grad_norm": 4.000309467315674, "learning_rate": 5.14375e-07, "loss": 0.1222, "step": 825 }, { "epoch": 0.3890160183066362, "grad_norm": 10.31431770324707, "learning_rate": 5.3e-07, "loss": 0.1344, "step": 850 }, { "epoch": 0.40045766590389015, "grad_norm": 2.6014370918273926, "learning_rate": 5.456250000000001e-07, "loss": 0.1284, "step": 875 }, { "epoch": 0.41189931350114417, "grad_norm": 7.926617622375488, "learning_rate": 5.6125e-07, "loss": 0.1414, "step": 900 }, { "epoch": 0.4233409610983982, "grad_norm": 4.362987518310547, "learning_rate": 5.76875e-07, "loss": 0.1258, "step": 925 }, { "epoch": 0.43478260869565216, "grad_norm": 8.642595291137695, "learning_rate": 5.925e-07, "loss": 0.1303, "step": 950 }, { "epoch": 0.4462242562929062, "grad_norm": 2.2307755947113037, "learning_rate": 6.08125e-07, "loss": 0.1266, "step": 975 }, { "epoch": 0.4576659038901602, "grad_norm": 10.151159286499023, "learning_rate": 6.237500000000001e-07, "loss": 0.1332, "step": 1000 }, { "epoch": 0.4691075514874142, "grad_norm": 3.0874152183532715, "learning_rate": 6.39375e-07, "loss": 0.1192, "step": 1025 }, { "epoch": 0.4805491990846682, "grad_norm": 7.633583068847656, "learning_rate": 6.550000000000001e-07, "loss": 0.1204, "step": 1050 }, { "epoch": 0.4919908466819222, "grad_norm": 2.0958948135375977, "learning_rate": 6.70625e-07, "loss": 0.1142, "step": 1075 }, { "epoch": 0.5034324942791762, "grad_norm": 7.018194198608398, "learning_rate": 6.8625e-07, "loss": 0.1014, "step": 1100 }, { "epoch": 0.5148741418764302, "grad_norm": 5.929811000823975, "learning_rate": 7.018750000000001e-07, "loss": 0.1251, "step": 1125 }, { "epoch": 0.5263157894736842, "grad_norm": 11.825408935546875, "learning_rate": 7.175e-07, "loss": 0.1426, "step": 1150 }, { "epoch": 0.5377574370709383, "grad_norm": 1.4950259923934937, "learning_rate": 7.331250000000001e-07, "loss": 0.1195, "step": 1175 }, { "epoch": 0.5491990846681922, "grad_norm": 3.756826639175415, "learning_rate": 7.4875e-07, "loss": 0.1139, "step": 1200 }, { "epoch": 0.5606407322654462, "grad_norm": 1.7584425210952759, "learning_rate": 7.643750000000001e-07, "loss": 0.1241, "step": 1225 }, { "epoch": 0.5720823798627003, "grad_norm": 9.659570693969727, "learning_rate": 7.8e-07, "loss": 0.1175, "step": 1250 }, { "epoch": 0.5835240274599542, "grad_norm": 2.339547634124756, "learning_rate": 7.95625e-07, "loss": 0.1153, "step": 1275 }, { "epoch": 0.5949656750572082, "grad_norm": 8.946951866149902, "learning_rate": 8.112500000000001e-07, "loss": 0.1278, "step": 1300 }, { "epoch": 0.6064073226544623, "grad_norm": 1.9447693824768066, "learning_rate": 8.26875e-07, "loss": 0.1213, "step": 1325 }, { "epoch": 0.6178489702517163, "grad_norm": 16.7085018157959, "learning_rate": 8.425000000000001e-07, "loss": 0.14, "step": 1350 }, { "epoch": 0.6292906178489702, "grad_norm": 2.692288875579834, "learning_rate": 8.58125e-07, "loss": 0.1279, "step": 1375 }, { "epoch": 0.6407322654462243, "grad_norm": 8.288985252380371, "learning_rate": 8.737500000000001e-07, "loss": 0.1041, "step": 1400 }, { "epoch": 0.6521739130434783, "grad_norm": 2.5615968704223633, "learning_rate": 8.893750000000001e-07, "loss": 0.1207, "step": 1425 }, { "epoch": 0.6636155606407322, "grad_norm": 5.402917861938477, "learning_rate": 9.050000000000001e-07, "loss": 0.1209, "step": 1450 }, { "epoch": 0.6750572082379863, "grad_norm": 3.1846108436584473, "learning_rate": 9.20625e-07, "loss": 0.1131, "step": 1475 }, { "epoch": 0.6864988558352403, "grad_norm": 7.106233596801758, "learning_rate": 9.362499999999999e-07, "loss": 0.1206, "step": 1500 }, { "epoch": 0.6979405034324943, "grad_norm": 1.574010968208313, "learning_rate": 9.51875e-07, "loss": 0.1169, "step": 1525 }, { "epoch": 0.7093821510297483, "grad_norm": 12.645787239074707, "learning_rate": 9.675e-07, "loss": 0.153, "step": 1550 }, { "epoch": 0.7208237986270023, "grad_norm": 2.3170979022979736, "learning_rate": 9.83125e-07, "loss": 0.1196, "step": 1575 }, { "epoch": 0.7322654462242563, "grad_norm": 6.579622745513916, "learning_rate": 9.9875e-07, "loss": 0.1112, "step": 1600 }, { "epoch": 0.7437070938215103, "grad_norm": 3.6276774406433105, "learning_rate": 1.0143750000000001e-06, "loss": 0.1174, "step": 1625 }, { "epoch": 0.7551487414187643, "grad_norm": 10.94548511505127, "learning_rate": 1.03e-06, "loss": 0.1115, "step": 1650 }, { "epoch": 0.7665903890160183, "grad_norm": 4.662591934204102, "learning_rate": 1.045625e-06, "loss": 0.103, "step": 1675 }, { "epoch": 0.7780320366132724, "grad_norm": 11.371915817260742, "learning_rate": 1.06125e-06, "loss": 0.1163, "step": 1700 }, { "epoch": 0.7894736842105263, "grad_norm": 2.4760260581970215, "learning_rate": 1.0768750000000002e-06, "loss": 0.1047, "step": 1725 }, { "epoch": 0.8009153318077803, "grad_norm": 7.711414337158203, "learning_rate": 1.0925000000000001e-06, "loss": 0.1167, "step": 1750 }, { "epoch": 0.8123569794050344, "grad_norm": 3.2793569564819336, "learning_rate": 1.108125e-06, "loss": 0.119, "step": 1775 }, { "epoch": 0.8237986270022883, "grad_norm": 7.447999000549316, "learning_rate": 1.12375e-06, "loss": 0.1219, "step": 1800 }, { "epoch": 0.8352402745995423, "grad_norm": 1.265462040901184, "learning_rate": 1.139375e-06, "loss": 0.1011, "step": 1825 }, { "epoch": 0.8466819221967964, "grad_norm": 26.126134872436523, "learning_rate": 1.155e-06, "loss": 0.132, "step": 1850 }, { "epoch": 0.8581235697940504, "grad_norm": 2.147458553314209, "learning_rate": 1.1706250000000001e-06, "loss": 0.1171, "step": 1875 }, { "epoch": 0.8695652173913043, "grad_norm": 11.847962379455566, "learning_rate": 1.18625e-06, "loss": 0.1223, "step": 1900 }, { "epoch": 0.8810068649885584, "grad_norm": 1.5704588890075684, "learning_rate": 1.201875e-06, "loss": 0.1207, "step": 1925 }, { "epoch": 0.8924485125858124, "grad_norm": 12.10556411743164, "learning_rate": 1.2175e-06, "loss": 0.0989, "step": 1950 }, { "epoch": 0.9038901601830663, "grad_norm": 3.2033963203430176, "learning_rate": 1.2331250000000002e-06, "loss": 0.1027, "step": 1975 }, { "epoch": 0.9153318077803204, "grad_norm": 6.506619930267334, "learning_rate": 1.2487500000000001e-06, "loss": 0.1426, "step": 2000 }, { "epoch": 0.9267734553775744, "grad_norm": 2.171173095703125, "learning_rate": 1.264375e-06, "loss": 0.1036, "step": 2025 }, { "epoch": 0.9382151029748284, "grad_norm": 8.255300521850586, "learning_rate": 1.28e-06, "loss": 0.1328, "step": 2050 }, { "epoch": 0.9496567505720824, "grad_norm": 4.062456130981445, "learning_rate": 1.2956250000000002e-06, "loss": 0.1175, "step": 2075 }, { "epoch": 0.9610983981693364, "grad_norm": 7.572447776794434, "learning_rate": 1.31125e-06, "loss": 0.1377, "step": 2100 }, { "epoch": 0.9725400457665904, "grad_norm": 1.6148779392242432, "learning_rate": 1.326875e-06, "loss": 0.1134, "step": 2125 }, { "epoch": 0.9839816933638444, "grad_norm": 5.8796186447143555, "learning_rate": 1.3425e-06, "loss": 0.1233, "step": 2150 }, { "epoch": 0.9954233409610984, "grad_norm": 2.6012580394744873, "learning_rate": 1.358125e-06, "loss": 0.1141, "step": 2175 }, { "epoch": 1.0068649885583525, "grad_norm": 2.369357109069824, "learning_rate": 1.37375e-06, "loss": 0.0964, "step": 2200 }, { "epoch": 1.0183066361556063, "grad_norm": 3.426637887954712, "learning_rate": 1.3893750000000001e-06, "loss": 0.0844, "step": 2225 }, { "epoch": 1.0297482837528604, "grad_norm": 2.0123000144958496, "learning_rate": 1.405e-06, "loss": 0.1125, "step": 2250 }, { "epoch": 1.0411899313501145, "grad_norm": 2.8704395294189453, "learning_rate": 1.420625e-06, "loss": 0.0827, "step": 2275 }, { "epoch": 1.0526315789473684, "grad_norm": 2.3413915634155273, "learning_rate": 1.43625e-06, "loss": 0.108, "step": 2300 }, { "epoch": 1.0640732265446224, "grad_norm": 1.216963291168213, "learning_rate": 1.4518750000000002e-06, "loss": 0.0766, "step": 2325 }, { "epoch": 1.0755148741418765, "grad_norm": 2.2277841567993164, "learning_rate": 1.4675000000000001e-06, "loss": 0.1035, "step": 2350 }, { "epoch": 1.0869565217391304, "grad_norm": 3.6448659896850586, "learning_rate": 1.483125e-06, "loss": 0.0749, "step": 2375 }, { "epoch": 1.0983981693363845, "grad_norm": 1.5013147592544556, "learning_rate": 1.49875e-06, "loss": 0.111, "step": 2400 }, { "epoch": 1.1098398169336385, "grad_norm": 3.0823745727539062, "learning_rate": 1.514375e-06, "loss": 0.0842, "step": 2425 }, { "epoch": 1.1212814645308924, "grad_norm": 1.238543152809143, "learning_rate": 1.53e-06, "loss": 0.0978, "step": 2450 }, { "epoch": 1.1327231121281465, "grad_norm": 4.413508415222168, "learning_rate": 1.545625e-06, "loss": 0.0759, "step": 2475 }, { "epoch": 1.1441647597254005, "grad_norm": 2.655590057373047, "learning_rate": 1.56125e-06, "loss": 0.1061, "step": 2500 }, { "epoch": 1.1556064073226544, "grad_norm": 3.8559327125549316, "learning_rate": 1.5768750000000003e-06, "loss": 0.0876, "step": 2525 }, { "epoch": 1.1670480549199085, "grad_norm": 2.452881336212158, "learning_rate": 1.5925000000000002e-06, "loss": 0.1068, "step": 2550 }, { "epoch": 1.1784897025171626, "grad_norm": 3.86460280418396, "learning_rate": 1.608125e-06, "loss": 0.079, "step": 2575 }, { "epoch": 1.1899313501144164, "grad_norm": 1.9224119186401367, "learning_rate": 1.62375e-06, "loss": 0.1139, "step": 2600 }, { "epoch": 1.2013729977116705, "grad_norm": 7.143287181854248, "learning_rate": 1.6393749999999999e-06, "loss": 0.0802, "step": 2625 }, { "epoch": 1.2128146453089246, "grad_norm": 1.8962572813034058, "learning_rate": 1.655e-06, "loss": 0.104, "step": 2650 }, { "epoch": 1.2242562929061784, "grad_norm": 3.0214734077453613, "learning_rate": 1.670625e-06, "loss": 0.0711, "step": 2675 }, { "epoch": 1.2356979405034325, "grad_norm": 2.490997314453125, "learning_rate": 1.68625e-06, "loss": 0.0987, "step": 2700 }, { "epoch": 1.2471395881006866, "grad_norm": 3.6045401096343994, "learning_rate": 1.701875e-06, "loss": 0.0968, "step": 2725 }, { "epoch": 1.2585812356979404, "grad_norm": 2.8490657806396484, "learning_rate": 1.7175e-06, "loss": 0.1124, "step": 2750 }, { "epoch": 1.2700228832951945, "grad_norm": 6.608100414276123, "learning_rate": 1.733125e-06, "loss": 0.0855, "step": 2775 }, { "epoch": 1.2814645308924484, "grad_norm": 2.4767751693725586, "learning_rate": 1.74875e-06, "loss": 0.1073, "step": 2800 }, { "epoch": 1.2929061784897025, "grad_norm": 5.4979047775268555, "learning_rate": 1.7643750000000002e-06, "loss": 0.0772, "step": 2825 }, { "epoch": 1.3043478260869565, "grad_norm": 2.7516839504241943, "learning_rate": 1.7800000000000001e-06, "loss": 0.111, "step": 2850 }, { "epoch": 1.3157894736842106, "grad_norm": 3.251453161239624, "learning_rate": 1.795625e-06, "loss": 0.0759, "step": 2875 }, { "epoch": 1.3272311212814645, "grad_norm": 3.175234079360962, "learning_rate": 1.81125e-06, "loss": 0.123, "step": 2900 }, { "epoch": 1.3386727688787186, "grad_norm": 3.5862858295440674, "learning_rate": 1.8268750000000002e-06, "loss": 0.0735, "step": 2925 }, { "epoch": 1.3501144164759724, "grad_norm": 2.170401096343994, "learning_rate": 1.8425000000000001e-06, "loss": 0.1189, "step": 2950 }, { "epoch": 1.3615560640732265, "grad_norm": 3.134989023208618, "learning_rate": 1.858125e-06, "loss": 0.081, "step": 2975 }, { "epoch": 1.3729977116704806, "grad_norm": 1.6760380268096924, "learning_rate": 1.87375e-06, "loss": 0.1074, "step": 3000 }, { "epoch": 1.3844393592677346, "grad_norm": 3.4386298656463623, "learning_rate": 1.8893750000000002e-06, "loss": 0.0704, "step": 3025 }, { "epoch": 1.3958810068649885, "grad_norm": 2.0835421085357666, "learning_rate": 1.9050000000000002e-06, "loss": 0.1112, "step": 3050 }, { "epoch": 1.4073226544622426, "grad_norm": 4.792001247406006, "learning_rate": 1.9206250000000004e-06, "loss": 0.0914, "step": 3075 }, { "epoch": 1.4187643020594964, "grad_norm": 2.2697513103485107, "learning_rate": 1.9362500000000003e-06, "loss": 0.1044, "step": 3100 }, { "epoch": 1.4302059496567505, "grad_norm": 1.7390592098236084, "learning_rate": 1.9518750000000003e-06, "loss": 0.0789, "step": 3125 }, { "epoch": 1.4416475972540046, "grad_norm": 2.5395240783691406, "learning_rate": 1.9675000000000002e-06, "loss": 0.1103, "step": 3150 }, { "epoch": 1.4530892448512587, "grad_norm": 9.415529251098633, "learning_rate": 1.983125e-06, "loss": 0.0815, "step": 3175 }, { "epoch": 1.4645308924485125, "grad_norm": 3.2418212890625, "learning_rate": 1.99875e-06, "loss": 0.0877, "step": 3200 }, { "epoch": 1.4759725400457666, "grad_norm": 2.5140135288238525, "learning_rate": 2.014375e-06, "loss": 0.072, "step": 3225 }, { "epoch": 1.4874141876430205, "grad_norm": 1.8860526084899902, "learning_rate": 2.03e-06, "loss": 0.1082, "step": 3250 }, { "epoch": 1.4988558352402745, "grad_norm": 3.322282075881958, "learning_rate": 2.045625e-06, "loss": 0.0703, "step": 3275 }, { "epoch": 1.5102974828375286, "grad_norm": 1.9170950651168823, "learning_rate": 2.06125e-06, "loss": 0.1038, "step": 3300 }, { "epoch": 1.5217391304347827, "grad_norm": 3.269129753112793, "learning_rate": 2.076875e-06, "loss": 0.0862, "step": 3325 }, { "epoch": 1.5331807780320366, "grad_norm": 2.319660186767578, "learning_rate": 2.0925e-06, "loss": 0.1168, "step": 3350 }, { "epoch": 1.5446224256292906, "grad_norm": 2.9521560668945312, "learning_rate": 2.108125e-06, "loss": 0.083, "step": 3375 }, { "epoch": 1.5560640732265445, "grad_norm": 2.316248655319214, "learning_rate": 2.12375e-06, "loss": 0.1144, "step": 3400 }, { "epoch": 1.5675057208237986, "grad_norm": 3.6519219875335693, "learning_rate": 2.139375e-06, "loss": 0.0833, "step": 3425 }, { "epoch": 1.5789473684210527, "grad_norm": 1.8947266340255737, "learning_rate": 2.155e-06, "loss": 0.1178, "step": 3450 }, { "epoch": 1.5903890160183067, "grad_norm": 5.7655158042907715, "learning_rate": 2.170625e-06, "loss": 0.0706, "step": 3475 }, { "epoch": 1.6018306636155606, "grad_norm": 2.7136037349700928, "learning_rate": 2.18625e-06, "loss": 0.1206, "step": 3500 }, { "epoch": 1.6132723112128147, "grad_norm": 2.735067844390869, "learning_rate": 2.201875e-06, "loss": 0.0799, "step": 3525 }, { "epoch": 1.6247139588100685, "grad_norm": 2.059816837310791, "learning_rate": 2.2175e-06, "loss": 0.094, "step": 3550 }, { "epoch": 1.6361556064073226, "grad_norm": 2.8907694816589355, "learning_rate": 2.2331250000000003e-06, "loss": 0.0894, "step": 3575 }, { "epoch": 1.6475972540045767, "grad_norm": 1.6282504796981812, "learning_rate": 2.2487500000000003e-06, "loss": 0.1034, "step": 3600 }, { "epoch": 1.6590389016018308, "grad_norm": 2.726628303527832, "learning_rate": 2.2643750000000002e-06, "loss": 0.0789, "step": 3625 }, { "epoch": 1.6704805491990846, "grad_norm": 2.466552257537842, "learning_rate": 2.28e-06, "loss": 0.0927, "step": 3650 }, { "epoch": 1.6819221967963387, "grad_norm": 2.815573215484619, "learning_rate": 2.295625e-06, "loss": 0.086, "step": 3675 }, { "epoch": 1.6933638443935926, "grad_norm": 2.731748104095459, "learning_rate": 2.31125e-06, "loss": 0.1175, "step": 3700 }, { "epoch": 1.7048054919908466, "grad_norm": 3.349738836288452, "learning_rate": 2.326875e-06, "loss": 0.0763, "step": 3725 }, { "epoch": 1.7162471395881007, "grad_norm": 2.117974042892456, "learning_rate": 2.3425000000000004e-06, "loss": 0.1058, "step": 3750 }, { "epoch": 1.7276887871853548, "grad_norm": 3.7518179416656494, "learning_rate": 2.3581250000000004e-06, "loss": 0.0689, "step": 3775 }, { "epoch": 1.7391304347826086, "grad_norm": 2.805143356323242, "learning_rate": 2.3737500000000004e-06, "loss": 0.1188, "step": 3800 }, { "epoch": 1.7505720823798627, "grad_norm": 5.231779098510742, "learning_rate": 2.389375e-06, "loss": 0.0884, "step": 3825 }, { "epoch": 1.7620137299771166, "grad_norm": 2.7431952953338623, "learning_rate": 2.405e-06, "loss": 0.1073, "step": 3850 }, { "epoch": 1.7734553775743707, "grad_norm": 3.7958285808563232, "learning_rate": 2.420625e-06, "loss": 0.0754, "step": 3875 }, { "epoch": 1.7848970251716247, "grad_norm": 3.2921016216278076, "learning_rate": 2.43625e-06, "loss": 0.1042, "step": 3900 }, { "epoch": 1.7963386727688788, "grad_norm": 4.16481876373291, "learning_rate": 2.451875e-06, "loss": 0.0687, "step": 3925 }, { "epoch": 1.8077803203661327, "grad_norm": 1.6314984560012817, "learning_rate": 2.4675e-06, "loss": 0.1265, "step": 3950 }, { "epoch": 1.8192219679633868, "grad_norm": 3.358804702758789, "learning_rate": 2.483125e-06, "loss": 0.0683, "step": 3975 }, { "epoch": 1.8306636155606406, "grad_norm": 2.8832662105560303, "learning_rate": 2.49875e-06, "loss": 0.0982, "step": 4000 }, { "epoch": 1.8421052631578947, "grad_norm": 2.2557880878448486, "learning_rate": 2.514375e-06, "loss": 0.0716, "step": 4025 }, { "epoch": 1.8535469107551488, "grad_norm": 2.0373599529266357, "learning_rate": 2.53e-06, "loss": 0.1072, "step": 4050 }, { "epoch": 1.8649885583524028, "grad_norm": 2.316436767578125, "learning_rate": 2.5456250000000003e-06, "loss": 0.0784, "step": 4075 }, { "epoch": 1.8764302059496567, "grad_norm": 1.2671200037002563, "learning_rate": 2.5612500000000003e-06, "loss": 0.0996, "step": 4100 }, { "epoch": 1.8878718535469108, "grad_norm": 3.7395412921905518, "learning_rate": 2.576875e-06, "loss": 0.0651, "step": 4125 }, { "epoch": 1.8993135011441646, "grad_norm": 1.91667640209198, "learning_rate": 2.5925e-06, "loss": 0.1002, "step": 4150 }, { "epoch": 1.9107551487414187, "grad_norm": 4.17740535736084, "learning_rate": 2.608125e-06, "loss": 0.0813, "step": 4175 }, { "epoch": 1.9221967963386728, "grad_norm": 2.539367198944092, "learning_rate": 2.62375e-06, "loss": 0.1104, "step": 4200 }, { "epoch": 1.9336384439359269, "grad_norm": 2.484417200088501, "learning_rate": 2.639375e-06, "loss": 0.0836, "step": 4225 }, { "epoch": 1.9450800915331807, "grad_norm": 2.7852723598480225, "learning_rate": 2.655e-06, "loss": 0.1162, "step": 4250 }, { "epoch": 1.9565217391304348, "grad_norm": 3.9952969551086426, "learning_rate": 2.6706250000000004e-06, "loss": 0.0832, "step": 4275 }, { "epoch": 1.9679633867276887, "grad_norm": 2.994462490081787, "learning_rate": 2.6862500000000003e-06, "loss": 0.1299, "step": 4300 }, { "epoch": 1.9794050343249427, "grad_norm": 2.588834047317505, "learning_rate": 2.7018750000000003e-06, "loss": 0.0821, "step": 4325 }, { "epoch": 1.9908466819221968, "grad_norm": 1.583375334739685, "learning_rate": 2.7175000000000002e-06, "loss": 0.091, "step": 4350 }, { "epoch": 2.002288329519451, "grad_norm": 6.827860355377197, "learning_rate": 2.733125e-06, "loss": 0.0953, "step": 4375 }, { "epoch": 2.013729977116705, "grad_norm": 3.3871593475341797, "learning_rate": 2.74875e-06, "loss": 0.0436, "step": 4400 }, { "epoch": 2.0251716247139586, "grad_norm": 3.1622588634490967, "learning_rate": 2.764375e-06, "loss": 0.0819, "step": 4425 }, { "epoch": 2.0366132723112127, "grad_norm": 2.5629935264587402, "learning_rate": 2.78e-06, "loss": 0.0467, "step": 4450 }, { "epoch": 2.0480549199084668, "grad_norm": 2.397113561630249, "learning_rate": 2.795625e-06, "loss": 0.0788, "step": 4475 }, { "epoch": 2.059496567505721, "grad_norm": 3.9952454566955566, "learning_rate": 2.81125e-06, "loss": 0.0544, "step": 4500 }, { "epoch": 2.070938215102975, "grad_norm": 2.8579368591308594, "learning_rate": 2.826875e-06, "loss": 0.0675, "step": 4525 }, { "epoch": 2.082379862700229, "grad_norm": 2.419980525970459, "learning_rate": 2.8425e-06, "loss": 0.0479, "step": 4550 }, { "epoch": 2.0938215102974826, "grad_norm": 2.3462846279144287, "learning_rate": 2.8575e-06, "loss": 0.0692, "step": 4575 }, { "epoch": 2.1052631578947367, "grad_norm": 2.523151397705078, "learning_rate": 2.873125e-06, "loss": 0.0466, "step": 4600 }, { "epoch": 2.116704805491991, "grad_norm": 1.8373658657073975, "learning_rate": 2.88875e-06, "loss": 0.0596, "step": 4625 }, { "epoch": 2.128146453089245, "grad_norm": 2.7236149311065674, "learning_rate": 2.904375e-06, "loss": 0.0465, "step": 4650 }, { "epoch": 2.139588100686499, "grad_norm": 3.013388156890869, "learning_rate": 2.92e-06, "loss": 0.0829, "step": 4675 }, { "epoch": 2.151029748283753, "grad_norm": 1.6054365634918213, "learning_rate": 2.9356250000000004e-06, "loss": 0.0447, "step": 4700 }, { "epoch": 2.1624713958810067, "grad_norm": 1.8882802724838257, "learning_rate": 2.9512500000000003e-06, "loss": 0.0634, "step": 4725 }, { "epoch": 2.1739130434782608, "grad_norm": 4.886575698852539, "learning_rate": 2.9668750000000003e-06, "loss": 0.0543, "step": 4750 }, { "epoch": 2.185354691075515, "grad_norm": 1.4552139043807983, "learning_rate": 2.9825000000000002e-06, "loss": 0.0697, "step": 4775 }, { "epoch": 2.196796338672769, "grad_norm": 2.0052905082702637, "learning_rate": 2.998125e-06, "loss": 0.0562, "step": 4800 }, { "epoch": 2.208237986270023, "grad_norm": 2.3724350929260254, "learning_rate": 3.01375e-06, "loss": 0.0718, "step": 4825 }, { "epoch": 2.219679633867277, "grad_norm": 1.8292073011398315, "learning_rate": 3.029375e-06, "loss": 0.055, "step": 4850 }, { "epoch": 2.2311212814645307, "grad_norm": 1.8585909605026245, "learning_rate": 3.0450000000000005e-06, "loss": 0.071, "step": 4875 }, { "epoch": 2.242562929061785, "grad_norm": 3.8988568782806396, "learning_rate": 3.0606250000000004e-06, "loss": 0.0464, "step": 4900 }, { "epoch": 2.254004576659039, "grad_norm": 2.759815216064453, "learning_rate": 3.0762500000000004e-06, "loss": 0.0684, "step": 4925 }, { "epoch": 2.265446224256293, "grad_norm": 1.6180821657180786, "learning_rate": 3.091875e-06, "loss": 0.0684, "step": 4950 }, { "epoch": 2.276887871853547, "grad_norm": 1.9958300590515137, "learning_rate": 3.1075e-06, "loss": 0.0676, "step": 4975 }, { "epoch": 2.288329519450801, "grad_norm": 4.299381732940674, "learning_rate": 3.123125e-06, "loss": 0.0559, "step": 5000 }, { "epoch": 2.288329519450801, "eval_loss": 0.10962941497564316, "eval_runtime": 8536.2165, "eval_samples_per_second": 1.115, "eval_steps_per_second": 0.14, "eval_wer": 0.07299865617200998, "step": 5000 }, { "epoch": 2.2997711670480547, "grad_norm": 1.8799129724502563, "learning_rate": 3.1387500000000002e-06, "loss": 0.0648, "step": 5025 }, { "epoch": 2.311212814645309, "grad_norm": 2.080326557159424, "learning_rate": 3.154375e-06, "loss": 0.0553, "step": 5050 }, { "epoch": 2.322654462242563, "grad_norm": 2.730989694595337, "learning_rate": 3.17e-06, "loss": 0.0716, "step": 5075 }, { "epoch": 2.334096109839817, "grad_norm": 2.361237049102783, "learning_rate": 3.1856250000000005e-06, "loss": 0.0454, "step": 5100 }, { "epoch": 2.345537757437071, "grad_norm": 1.9998853206634521, "learning_rate": 3.20125e-06, "loss": 0.0716, "step": 5125 }, { "epoch": 2.356979405034325, "grad_norm": 3.8843512535095215, "learning_rate": 3.2168750000000004e-06, "loss": 0.0657, "step": 5150 }, { "epoch": 2.3684210526315788, "grad_norm": 1.3620953559875488, "learning_rate": 3.2325e-06, "loss": 0.0768, "step": 5175 }, { "epoch": 2.379862700228833, "grad_norm": 1.574424386024475, "learning_rate": 3.2481250000000003e-06, "loss": 0.0546, "step": 5200 }, { "epoch": 2.391304347826087, "grad_norm": 1.5355968475341797, "learning_rate": 3.2637500000000003e-06, "loss": 0.0671, "step": 5225 }, { "epoch": 2.402745995423341, "grad_norm": 2.243077039718628, "learning_rate": 3.2793750000000007e-06, "loss": 0.0576, "step": 5250 }, { "epoch": 2.414187643020595, "grad_norm": 2.2297890186309814, "learning_rate": 3.2950000000000002e-06, "loss": 0.0829, "step": 5275 }, { "epoch": 2.425629290617849, "grad_norm": 2.9907617568969727, "learning_rate": 3.3106249999999997e-06, "loss": 0.052, "step": 5300 }, { "epoch": 2.437070938215103, "grad_norm": 1.9852451086044312, "learning_rate": 3.32625e-06, "loss": 0.0739, "step": 5325 }, { "epoch": 2.448512585812357, "grad_norm": 2.876899003982544, "learning_rate": 3.3418749999999997e-06, "loss": 0.0588, "step": 5350 }, { "epoch": 2.459954233409611, "grad_norm": 2.7665271759033203, "learning_rate": 3.3575e-06, "loss": 0.0701, "step": 5375 }, { "epoch": 2.471395881006865, "grad_norm": 2.6745996475219727, "learning_rate": 3.373125e-06, "loss": 0.0562, "step": 5400 }, { "epoch": 2.482837528604119, "grad_norm": 3.0907094478607178, "learning_rate": 3.3887500000000004e-06, "loss": 0.0757, "step": 5425 }, { "epoch": 2.494279176201373, "grad_norm": 2.170896291732788, "learning_rate": 3.404375e-06, "loss": 0.0596, "step": 5450 }, { "epoch": 2.505720823798627, "grad_norm": 2.5431177616119385, "learning_rate": 3.4200000000000003e-06, "loss": 0.0775, "step": 5475 }, { "epoch": 2.517162471395881, "grad_norm": 1.530771255493164, "learning_rate": 3.435625e-06, "loss": 0.052, "step": 5500 }, { "epoch": 2.528604118993135, "grad_norm": 2.03749418258667, "learning_rate": 3.45125e-06, "loss": 0.0664, "step": 5525 }, { "epoch": 2.540045766590389, "grad_norm": 4.283966541290283, "learning_rate": 3.466875e-06, "loss": 0.0612, "step": 5550 }, { "epoch": 2.551487414187643, "grad_norm": 1.7893714904785156, "learning_rate": 3.4825000000000005e-06, "loss": 0.0813, "step": 5575 }, { "epoch": 2.5629290617848968, "grad_norm": 1.8550102710723877, "learning_rate": 3.498125e-06, "loss": 0.0551, "step": 5600 }, { "epoch": 2.5743707093821513, "grad_norm": 2.459023952484131, "learning_rate": 3.5137500000000005e-06, "loss": 0.0748, "step": 5625 }, { "epoch": 2.585812356979405, "grad_norm": 1.0705052614212036, "learning_rate": 3.529375e-06, "loss": 0.0583, "step": 5650 }, { "epoch": 2.597254004576659, "grad_norm": 2.647242546081543, "learning_rate": 3.5450000000000004e-06, "loss": 0.0655, "step": 5675 }, { "epoch": 2.608695652173913, "grad_norm": 5.434226989746094, "learning_rate": 3.560625e-06, "loss": 0.0602, "step": 5700 }, { "epoch": 2.620137299771167, "grad_norm": 3.0654892921447754, "learning_rate": 3.5762500000000003e-06, "loss": 0.0795, "step": 5725 }, { "epoch": 2.6315789473684212, "grad_norm": 1.9863752126693726, "learning_rate": 3.5918750000000002e-06, "loss": 0.0753, "step": 5750 }, { "epoch": 2.643020594965675, "grad_norm": 2.407395839691162, "learning_rate": 3.6075000000000006e-06, "loss": 0.0778, "step": 5775 }, { "epoch": 2.654462242562929, "grad_norm": 2.044715642929077, "learning_rate": 3.623125e-06, "loss": 0.0619, "step": 5800 }, { "epoch": 2.665903890160183, "grad_norm": 1.6034976243972778, "learning_rate": 3.6387500000000005e-06, "loss": 0.063, "step": 5825 }, { "epoch": 2.677345537757437, "grad_norm": 4.321903705596924, "learning_rate": 3.654375e-06, "loss": 0.0603, "step": 5850 }, { "epoch": 2.688787185354691, "grad_norm": 2.0146384239196777, "learning_rate": 3.6700000000000004e-06, "loss": 0.0912, "step": 5875 }, { "epoch": 2.700228832951945, "grad_norm": 2.918707847595215, "learning_rate": 3.6856250000000004e-06, "loss": 0.0626, "step": 5900 }, { "epoch": 2.7116704805491993, "grad_norm": 2.255991220474243, "learning_rate": 3.70125e-06, "loss": 0.0634, "step": 5925 }, { "epoch": 2.723112128146453, "grad_norm": 2.645730972290039, "learning_rate": 3.7168750000000003e-06, "loss": 0.052, "step": 5950 }, { "epoch": 2.734553775743707, "grad_norm": 2.3450279235839844, "learning_rate": 3.7325e-06, "loss": 0.0818, "step": 5975 }, { "epoch": 2.745995423340961, "grad_norm": 2.75349497795105, "learning_rate": 3.7481250000000002e-06, "loss": 0.0588, "step": 6000 }, { "epoch": 2.757437070938215, "grad_norm": 2.3382132053375244, "learning_rate": 3.7637499999999998e-06, "loss": 0.0825, "step": 6025 }, { "epoch": 2.7688787185354693, "grad_norm": 4.024121284484863, "learning_rate": 3.779375e-06, "loss": 0.0605, "step": 6050 }, { "epoch": 2.780320366132723, "grad_norm": 1.8773914575576782, "learning_rate": 3.795e-06, "loss": 0.0725, "step": 6075 }, { "epoch": 2.791762013729977, "grad_norm": 3.376814603805542, "learning_rate": 3.8106250000000005e-06, "loss": 0.0582, "step": 6100 }, { "epoch": 2.803203661327231, "grad_norm": 2.18963885307312, "learning_rate": 3.82625e-06, "loss": 0.0975, "step": 6125 }, { "epoch": 2.814645308924485, "grad_norm": 2.445758104324341, "learning_rate": 3.841875e-06, "loss": 0.0733, "step": 6150 }, { "epoch": 2.8260869565217392, "grad_norm": 2.6733598709106445, "learning_rate": 3.8575e-06, "loss": 0.0762, "step": 6175 }, { "epoch": 2.837528604118993, "grad_norm": 3.2311208248138428, "learning_rate": 3.873125e-06, "loss": 0.0589, "step": 6200 }, { "epoch": 2.8489702517162474, "grad_norm": 2.0734827518463135, "learning_rate": 3.88875e-06, "loss": 0.0866, "step": 6225 }, { "epoch": 2.860411899313501, "grad_norm": 4.616520404815674, "learning_rate": 3.904375e-06, "loss": 0.0547, "step": 6250 }, { "epoch": 2.871853546910755, "grad_norm": 3.1445586681365967, "learning_rate": 3.92e-06, "loss": 0.082, "step": 6275 }, { "epoch": 2.883295194508009, "grad_norm": 3.01181697845459, "learning_rate": 3.935625e-06, "loss": 0.0632, "step": 6300 }, { "epoch": 2.8947368421052633, "grad_norm": 1.6130107641220093, "learning_rate": 3.9512500000000005e-06, "loss": 0.0812, "step": 6325 }, { "epoch": 2.9061784897025174, "grad_norm": 5.2825846672058105, "learning_rate": 3.966875e-06, "loss": 0.0609, "step": 6350 }, { "epoch": 2.917620137299771, "grad_norm": 2.065019369125366, "learning_rate": 3.9825e-06, "loss": 0.0879, "step": 6375 }, { "epoch": 2.929061784897025, "grad_norm": 1.2566419839859009, "learning_rate": 3.998125000000001e-06, "loss": 0.0542, "step": 6400 }, { "epoch": 2.940503432494279, "grad_norm": 2.141080379486084, "learning_rate": 4.01375e-06, "loss": 0.0946, "step": 6425 }, { "epoch": 2.9519450800915332, "grad_norm": 2.570467233657837, "learning_rate": 4.029375000000001e-06, "loss": 0.0613, "step": 6450 }, { "epoch": 2.9633867276887873, "grad_norm": 1.8930100202560425, "learning_rate": 4.045e-06, "loss": 0.0735, "step": 6475 }, { "epoch": 2.974828375286041, "grad_norm": 3.8491806983947754, "learning_rate": 4.060625000000001e-06, "loss": 0.0544, "step": 6500 }, { "epoch": 2.9862700228832955, "grad_norm": 2.8520760536193848, "learning_rate": 4.07625e-06, "loss": 0.081, "step": 6525 }, { "epoch": 2.997711670480549, "grad_norm": 3.3645670413970947, "learning_rate": 4.091875e-06, "loss": 0.0587, "step": 6550 }, { "epoch": 3.009153318077803, "grad_norm": 1.577447772026062, "learning_rate": 4.1075e-06, "loss": 0.0494, "step": 6575 }, { "epoch": 3.0205949656750573, "grad_norm": 5.078967094421387, "learning_rate": 4.123125e-06, "loss": 0.0382, "step": 6600 }, { "epoch": 3.0320366132723113, "grad_norm": 1.4379734992980957, "learning_rate": 4.13875e-06, "loss": 0.0524, "step": 6625 }, { "epoch": 3.0434782608695654, "grad_norm": 4.736499309539795, "learning_rate": 4.154375e-06, "loss": 0.038, "step": 6650 }, { "epoch": 3.054919908466819, "grad_norm": 1.2501332759857178, "learning_rate": 4.17e-06, "loss": 0.058, "step": 6675 }, { "epoch": 3.066361556064073, "grad_norm": 6.145361423492432, "learning_rate": 4.185625e-06, "loss": 0.0359, "step": 6700 }, { "epoch": 3.077803203661327, "grad_norm": 0.909717857837677, "learning_rate": 4.201250000000001e-06, "loss": 0.0427, "step": 6725 }, { "epoch": 3.0892448512585813, "grad_norm": 1.6473100185394287, "learning_rate": 4.216875e-06, "loss": 0.0369, "step": 6750 }, { "epoch": 3.1006864988558354, "grad_norm": 1.7811235189437866, "learning_rate": 4.2325000000000006e-06, "loss": 0.0511, "step": 6775 }, { "epoch": 3.1121281464530894, "grad_norm": 4.205276012420654, "learning_rate": 4.248125e-06, "loss": 0.0379, "step": 6800 }, { "epoch": 3.123569794050343, "grad_norm": 1.9242891073226929, "learning_rate": 4.2637500000000005e-06, "loss": 0.0506, "step": 6825 }, { "epoch": 3.135011441647597, "grad_norm": 2.6685783863067627, "learning_rate": 4.279375e-06, "loss": 0.0342, "step": 6850 }, { "epoch": 3.1464530892448512, "grad_norm": 1.2958639860153198, "learning_rate": 4.295e-06, "loss": 0.0424, "step": 6875 }, { "epoch": 3.1578947368421053, "grad_norm": 2.8113203048706055, "learning_rate": 4.310625e-06, "loss": 0.0414, "step": 6900 }, { "epoch": 3.1693363844393594, "grad_norm": 2.195906162261963, "learning_rate": 4.32625e-06, "loss": 0.0521, "step": 6925 }, { "epoch": 3.1807780320366135, "grad_norm": 1.0623836517333984, "learning_rate": 4.341875e-06, "loss": 0.0326, "step": 6950 }, { "epoch": 3.192219679633867, "grad_norm": 1.6473109722137451, "learning_rate": 4.3575e-06, "loss": 0.0497, "step": 6975 }, { "epoch": 3.203661327231121, "grad_norm": 4.674131870269775, "learning_rate": 4.373125e-06, "loss": 0.0447, "step": 7000 }, { "epoch": 3.2151029748283753, "grad_norm": 1.9346390962600708, "learning_rate": 4.38875e-06, "loss": 0.0521, "step": 7025 }, { "epoch": 3.2265446224256293, "grad_norm": 2.4699621200561523, "learning_rate": 4.4043750000000005e-06, "loss": 0.0393, "step": 7050 }, { "epoch": 3.2379862700228834, "grad_norm": 0.8783555626869202, "learning_rate": 4.420000000000001e-06, "loss": 0.0434, "step": 7075 }, { "epoch": 3.2494279176201375, "grad_norm": 8.290339469909668, "learning_rate": 4.435625e-06, "loss": 0.0417, "step": 7100 }, { "epoch": 3.260869565217391, "grad_norm": 2.6827893257141113, "learning_rate": 4.451250000000001e-06, "loss": 0.053, "step": 7125 }, { "epoch": 3.272311212814645, "grad_norm": 4.84825325012207, "learning_rate": 4.466875e-06, "loss": 0.0371, "step": 7150 }, { "epoch": 3.2837528604118993, "grad_norm": 2.9503021240234375, "learning_rate": 4.4825e-06, "loss": 0.0447, "step": 7175 }, { "epoch": 3.2951945080091534, "grad_norm": 3.2825422286987305, "learning_rate": 4.498125e-06, "loss": 0.0397, "step": 7200 }, { "epoch": 3.3066361556064074, "grad_norm": 2.532628297805786, "learning_rate": 4.51375e-06, "loss": 0.0543, "step": 7225 }, { "epoch": 3.3180778032036615, "grad_norm": 1.3574790954589844, "learning_rate": 4.529375e-06, "loss": 0.0491, "step": 7250 }, { "epoch": 3.329519450800915, "grad_norm": 2.330169916152954, "learning_rate": 4.545e-06, "loss": 0.0557, "step": 7275 }, { "epoch": 3.3409610983981692, "grad_norm": 2.9766383171081543, "learning_rate": 4.560625e-06, "loss": 0.0401, "step": 7300 }, { "epoch": 3.3524027459954233, "grad_norm": 2.0109455585479736, "learning_rate": 4.57625e-06, "loss": 0.0438, "step": 7325 }, { "epoch": 3.3638443935926774, "grad_norm": 5.960399150848389, "learning_rate": 4.591875e-06, "loss": 0.0332, "step": 7350 }, { "epoch": 3.3752860411899315, "grad_norm": 1.0166248083114624, "learning_rate": 4.6075e-06, "loss": 0.0473, "step": 7375 }, { "epoch": 3.386727688787185, "grad_norm": 3.1853346824645996, "learning_rate": 4.623125000000001e-06, "loss": 0.0379, "step": 7400 }, { "epoch": 3.398169336384439, "grad_norm": 1.282551884651184, "learning_rate": 4.63875e-06, "loss": 0.0439, "step": 7425 }, { "epoch": 3.4096109839816933, "grad_norm": 4.976385593414307, "learning_rate": 4.654375000000001e-06, "loss": 0.0535, "step": 7450 }, { "epoch": 3.4210526315789473, "grad_norm": 1.1836857795715332, "learning_rate": 4.67e-06, "loss": 0.0435, "step": 7475 }, { "epoch": 3.4324942791762014, "grad_norm": 4.760205268859863, "learning_rate": 4.6856250000000006e-06, "loss": 0.041, "step": 7500 }, { "epoch": 3.4439359267734555, "grad_norm": 2.7138822078704834, "learning_rate": 4.70125e-06, "loss": 0.0486, "step": 7525 }, { "epoch": 3.4553775743707096, "grad_norm": 1.5902996063232422, "learning_rate": 4.7168750000000005e-06, "loss": 0.0482, "step": 7550 }, { "epoch": 3.466819221967963, "grad_norm": 1.3546466827392578, "learning_rate": 4.7325e-06, "loss": 0.0414, "step": 7575 }, { "epoch": 3.4782608695652173, "grad_norm": 1.520518183708191, "learning_rate": 4.748125e-06, "loss": 0.0443, "step": 7600 }, { "epoch": 3.4897025171624714, "grad_norm": 2.7497692108154297, "learning_rate": 4.76375e-06, "loss": 0.06, "step": 7625 }, { "epoch": 3.5011441647597255, "grad_norm": 2.237837553024292, "learning_rate": 4.779375e-06, "loss": 0.0443, "step": 7650 }, { "epoch": 3.5125858123569795, "grad_norm": 2.258901596069336, "learning_rate": 4.795e-06, "loss": 0.0668, "step": 7675 }, { "epoch": 3.524027459954233, "grad_norm": 5.595725059509277, "learning_rate": 4.810625e-06, "loss": 0.0422, "step": 7700 }, { "epoch": 3.5354691075514877, "grad_norm": 3.10247802734375, "learning_rate": 4.826250000000001e-06, "loss": 0.0473, "step": 7725 }, { "epoch": 3.5469107551487413, "grad_norm": 2.568012237548828, "learning_rate": 4.841875000000001e-06, "loss": 0.0437, "step": 7750 }, { "epoch": 3.5583524027459954, "grad_norm": 1.1307361125946045, "learning_rate": 4.8575000000000005e-06, "loss": 0.0608, "step": 7775 }, { "epoch": 3.5697940503432495, "grad_norm": 1.6656261682510376, "learning_rate": 4.873125e-06, "loss": 0.042, "step": 7800 }, { "epoch": 3.5812356979405036, "grad_norm": 1.9320863485336304, "learning_rate": 4.88875e-06, "loss": 0.053, "step": 7825 }, { "epoch": 3.5926773455377576, "grad_norm": 4.413325786590576, "learning_rate": 4.904375e-06, "loss": 0.0421, "step": 7850 }, { "epoch": 3.6041189931350113, "grad_norm": 1.3597586154937744, "learning_rate": 4.92e-06, "loss": 0.0483, "step": 7875 }, { "epoch": 3.6155606407322654, "grad_norm": 3.5859973430633545, "learning_rate": 4.935625e-06, "loss": 0.0402, "step": 7900 }, { "epoch": 3.6270022883295194, "grad_norm": 2.306730031967163, "learning_rate": 4.95125e-06, "loss": 0.0526, "step": 7925 }, { "epoch": 3.6384439359267735, "grad_norm": 3.935138702392578, "learning_rate": 4.966875e-06, "loss": 0.0607, "step": 7950 }, { "epoch": 3.6498855835240276, "grad_norm": 2.3124289512634277, "learning_rate": 4.9825e-06, "loss": 0.0545, "step": 7975 }, { "epoch": 3.6613272311212812, "grad_norm": 4.540414810180664, "learning_rate": 4.9975e-06, "loss": 0.0408, "step": 8000 }, { "epoch": 3.6727688787185357, "grad_norm": 2.159935712814331, "learning_rate": 5.013125e-06, "loss": 0.052, "step": 8025 }, { "epoch": 3.6842105263157894, "grad_norm": 3.3574485778808594, "learning_rate": 5.02875e-06, "loss": 0.0545, "step": 8050 }, { "epoch": 3.6956521739130435, "grad_norm": 0.9943602681159973, "learning_rate": 5.044375e-06, "loss": 0.0473, "step": 8075 }, { "epoch": 3.7070938215102975, "grad_norm": 3.0983762741088867, "learning_rate": 5.06e-06, "loss": 0.0546, "step": 8100 }, { "epoch": 3.7185354691075516, "grad_norm": 4.827439785003662, "learning_rate": 5.075625e-06, "loss": 0.0563, "step": 8125 }, { "epoch": 3.7299771167048057, "grad_norm": 3.3578224182128906, "learning_rate": 5.091250000000001e-06, "loss": 0.0562, "step": 8150 }, { "epoch": 3.7414187643020593, "grad_norm": 1.9058266878128052, "learning_rate": 5.106875e-06, "loss": 0.0579, "step": 8175 }, { "epoch": 3.7528604118993134, "grad_norm": 2.7806217670440674, "learning_rate": 5.1225000000000005e-06, "loss": 0.054, "step": 8200 }, { "epoch": 3.7643020594965675, "grad_norm": 1.6406562328338623, "learning_rate": 5.138125000000001e-06, "loss": 0.0567, "step": 8225 }, { "epoch": 3.7757437070938216, "grad_norm": 5.01106595993042, "learning_rate": 5.15375e-06, "loss": 0.042, "step": 8250 }, { "epoch": 3.7871853546910756, "grad_norm": 4.058021068572998, "learning_rate": 5.169375e-06, "loss": 0.0572, "step": 8275 }, { "epoch": 3.7986270022883293, "grad_norm": 4.998597621917725, "learning_rate": 5.185e-06, "loss": 0.0522, "step": 8300 }, { "epoch": 3.8100686498855834, "grad_norm": 4.826078414916992, "learning_rate": 5.200625e-06, "loss": 0.0552, "step": 8325 }, { "epoch": 3.8215102974828374, "grad_norm": 5.765878200531006, "learning_rate": 5.21625e-06, "loss": 0.0477, "step": 8350 }, { "epoch": 3.8329519450800915, "grad_norm": 5.913174152374268, "learning_rate": 5.231875e-06, "loss": 0.0638, "step": 8375 }, { "epoch": 3.8443935926773456, "grad_norm": 2.6266438961029053, "learning_rate": 5.2475e-06, "loss": 0.046, "step": 8400 }, { "epoch": 3.8558352402745997, "grad_norm": 2.2001307010650635, "learning_rate": 5.263125e-06, "loss": 0.0654, "step": 8425 }, { "epoch": 3.8672768878718538, "grad_norm": 2.704002618789673, "learning_rate": 5.27875e-06, "loss": 0.0521, "step": 8450 }, { "epoch": 3.8787185354691074, "grad_norm": 2.7904629707336426, "learning_rate": 5.2943750000000004e-06, "loss": 0.0619, "step": 8475 }, { "epoch": 3.8901601830663615, "grad_norm": 3.6070396900177, "learning_rate": 5.31e-06, "loss": 0.0503, "step": 8500 }, { "epoch": 3.9016018306636155, "grad_norm": 2.3498170375823975, "learning_rate": 5.325625e-06, "loss": 0.0533, "step": 8525 }, { "epoch": 3.9130434782608696, "grad_norm": 4.795491695404053, "learning_rate": 5.341250000000001e-06, "loss": 0.0484, "step": 8550 }, { "epoch": 3.9244851258581237, "grad_norm": 1.5031219720840454, "learning_rate": 5.356875e-06, "loss": 0.0647, "step": 8575 }, { "epoch": 3.9359267734553773, "grad_norm": 4.074753761291504, "learning_rate": 5.372500000000001e-06, "loss": 0.0439, "step": 8600 }, { "epoch": 3.9473684210526314, "grad_norm": 3.534262180328369, "learning_rate": 5.388125e-06, "loss": 0.0603, "step": 8625 }, { "epoch": 3.9588100686498855, "grad_norm": 1.4854680299758911, "learning_rate": 5.4037500000000006e-06, "loss": 0.0472, "step": 8650 }, { "epoch": 3.9702517162471396, "grad_norm": 1.3425439596176147, "learning_rate": 5.419375e-06, "loss": 0.0598, "step": 8675 }, { "epoch": 3.9816933638443937, "grad_norm": 2.2017860412597656, "learning_rate": 5.4350000000000005e-06, "loss": 0.0481, "step": 8700 }, { "epoch": 3.9931350114416477, "grad_norm": 3.3017570972442627, "learning_rate": 5.450625e-06, "loss": 0.0596, "step": 8725 }, { "epoch": 4.004576659038902, "grad_norm": 1.602834939956665, "learning_rate": 5.46625e-06, "loss": 0.0651, "step": 8750 }, { "epoch": 4.016018306636155, "grad_norm": 2.772857427597046, "learning_rate": 5.481875e-06, "loss": 0.0211, "step": 8775 }, { "epoch": 4.02745995423341, "grad_norm": 1.561313271522522, "learning_rate": 5.4975e-06, "loss": 0.0392, "step": 8800 }, { "epoch": 4.038901601830664, "grad_norm": 1.9547089338302612, "learning_rate": 5.513125e-06, "loss": 0.0276, "step": 8825 }, { "epoch": 4.050343249427917, "grad_norm": 6.52534818649292, "learning_rate": 5.52875e-06, "loss": 0.0492, "step": 8850 }, { "epoch": 4.061784897025172, "grad_norm": 2.2165064811706543, "learning_rate": 5.544375000000001e-06, "loss": 0.0258, "step": 8875 }, { "epoch": 4.073226544622425, "grad_norm": 1.8075672388076782, "learning_rate": 5.56e-06, "loss": 0.0461, "step": 8900 }, { "epoch": 4.08466819221968, "grad_norm": 4.631660461425781, "learning_rate": 5.5756250000000005e-06, "loss": 0.0277, "step": 8925 }, { "epoch": 4.0961098398169336, "grad_norm": 1.0000470876693726, "learning_rate": 5.59125e-06, "loss": 0.0425, "step": 8950 }, { "epoch": 4.107551487414188, "grad_norm": 1.2732213735580444, "learning_rate": 5.606875e-06, "loss": 0.0343, "step": 8975 }, { "epoch": 4.118993135011442, "grad_norm": 0.4584027826786041, "learning_rate": 5.6225e-06, "loss": 0.0428, "step": 9000 }, { "epoch": 4.130434782608695, "grad_norm": 1.5011073350906372, "learning_rate": 5.638125e-06, "loss": 0.0328, "step": 9025 }, { "epoch": 4.14187643020595, "grad_norm": 4.310413360595703, "learning_rate": 5.65375e-06, "loss": 0.0485, "step": 9050 }, { "epoch": 4.1533180778032035, "grad_norm": 3.5656051635742188, "learning_rate": 5.669375e-06, "loss": 0.0266, "step": 9075 }, { "epoch": 4.164759725400458, "grad_norm": 1.8385664224624634, "learning_rate": 5.685e-06, "loss": 0.0481, "step": 9100 }, { "epoch": 4.176201372997712, "grad_norm": 1.5393552780151367, "learning_rate": 5.700625e-06, "loss": 0.0373, "step": 9125 }, { "epoch": 4.187643020594965, "grad_norm": 0.9085458517074585, "learning_rate": 5.71625e-06, "loss": 0.0452, "step": 9150 }, { "epoch": 4.19908466819222, "grad_norm": 1.3088352680206299, "learning_rate": 5.731875e-06, "loss": 0.0305, "step": 9175 }, { "epoch": 4.2105263157894735, "grad_norm": 2.538881778717041, "learning_rate": 5.7475000000000005e-06, "loss": 0.0545, "step": 9200 }, { "epoch": 4.221967963386728, "grad_norm": 2.4237465858459473, "learning_rate": 5.763125000000001e-06, "loss": 0.0298, "step": 9225 }, { "epoch": 4.233409610983982, "grad_norm": 2.9607691764831543, "learning_rate": 5.77875e-06, "loss": 0.0375, "step": 9250 }, { "epoch": 4.244851258581235, "grad_norm": 1.4011865854263306, "learning_rate": 5.794375000000001e-06, "loss": 0.0318, "step": 9275 }, { "epoch": 4.25629290617849, "grad_norm": 1.349041223526001, "learning_rate": 5.81e-06, "loss": 0.0371, "step": 9300 }, { "epoch": 4.267734553775743, "grad_norm": 2.49147629737854, "learning_rate": 5.825625000000001e-06, "loss": 0.0317, "step": 9325 }, { "epoch": 4.279176201372998, "grad_norm": 2.146099090576172, "learning_rate": 5.84125e-06, "loss": 0.0463, "step": 9350 }, { "epoch": 4.290617848970252, "grad_norm": 4.7241291999816895, "learning_rate": 5.8568750000000006e-06, "loss": 0.03, "step": 9375 }, { "epoch": 4.302059496567506, "grad_norm": 1.7988123893737793, "learning_rate": 5.8725e-06, "loss": 0.0511, "step": 9400 }, { "epoch": 4.31350114416476, "grad_norm": 3.9925315380096436, "learning_rate": 5.8881250000000005e-06, "loss": 0.0321, "step": 9425 }, { "epoch": 4.324942791762013, "grad_norm": 2.249257802963257, "learning_rate": 5.90375e-06, "loss": 0.0461, "step": 9450 }, { "epoch": 4.336384439359268, "grad_norm": 2.2734787464141846, "learning_rate": 5.919375e-06, "loss": 0.0276, "step": 9475 }, { "epoch": 4.3478260869565215, "grad_norm": 2.8019375801086426, "learning_rate": 5.935e-06, "loss": 0.0442, "step": 9500 }, { "epoch": 4.359267734553776, "grad_norm": 1.658045768737793, "learning_rate": 5.950625e-06, "loss": 0.0289, "step": 9525 }, { "epoch": 4.37070938215103, "grad_norm": 1.2820688486099243, "learning_rate": 5.966250000000001e-06, "loss": 0.0501, "step": 9550 }, { "epoch": 4.382151029748284, "grad_norm": 3.4379382133483887, "learning_rate": 5.981875e-06, "loss": 0.0266, "step": 9575 }, { "epoch": 4.393592677345538, "grad_norm": 2.9417431354522705, "learning_rate": 5.997500000000001e-06, "loss": 0.0419, "step": 9600 }, { "epoch": 4.4050343249427915, "grad_norm": 3.831340789794922, "learning_rate": 6.013125e-06, "loss": 0.0309, "step": 9625 }, { "epoch": 4.416475972540046, "grad_norm": 2.573761224746704, "learning_rate": 6.0287500000000005e-06, "loss": 0.0474, "step": 9650 }, { "epoch": 4.4279176201373, "grad_norm": 0.3918006420135498, "learning_rate": 6.044375e-06, "loss": 0.0341, "step": 9675 }, { "epoch": 4.439359267734554, "grad_norm": 1.6582770347595215, "learning_rate": 6.0600000000000004e-06, "loss": 0.0317, "step": 9700 }, { "epoch": 4.450800915331808, "grad_norm": 2.8036224842071533, "learning_rate": 6.075625e-06, "loss": 0.0389, "step": 9725 }, { "epoch": 4.462242562929061, "grad_norm": 1.1665189266204834, "learning_rate": 6.09125e-06, "loss": 0.0416, "step": 9750 }, { "epoch": 4.473684210526316, "grad_norm": 3.064020872116089, "learning_rate": 6.106875e-06, "loss": 0.0288, "step": 9775 }, { "epoch": 4.48512585812357, "grad_norm": 1.773339033126831, "learning_rate": 6.1225e-06, "loss": 0.0453, "step": 9800 }, { "epoch": 4.496567505720824, "grad_norm": 2.284943103790283, "learning_rate": 6.138125e-06, "loss": 0.0329, "step": 9825 }, { "epoch": 4.508009153318078, "grad_norm": 3.180586814880371, "learning_rate": 6.15375e-06, "loss": 0.0615, "step": 9850 }, { "epoch": 4.519450800915331, "grad_norm": 5.7272772789001465, "learning_rate": 6.1693750000000005e-06, "loss": 0.0328, "step": 9875 }, { "epoch": 4.530892448512586, "grad_norm": 0.5188058614730835, "learning_rate": 6.185000000000001e-06, "loss": 0.0523, "step": 9900 }, { "epoch": 4.5423340961098395, "grad_norm": 5.444327354431152, "learning_rate": 6.2006250000000005e-06, "loss": 0.0272, "step": 9925 }, { "epoch": 4.553775743707094, "grad_norm": 1.6532262563705444, "learning_rate": 6.216250000000001e-06, "loss": 0.0442, "step": 9950 }, { "epoch": 4.565217391304348, "grad_norm": 1.2820019721984863, "learning_rate": 6.231875e-06, "loss": 0.0354, "step": 9975 }, { "epoch": 4.576659038901602, "grad_norm": 1.568413496017456, "learning_rate": 6.247500000000001e-06, "loss": 0.0581, "step": 10000 }, { "epoch": 4.576659038901602, "eval_loss": 0.13263048231601715, "eval_runtime": 8476.0278, "eval_samples_per_second": 1.123, "eval_steps_per_second": 0.141, "eval_wer": 0.08291738657451846, "step": 10000 }, { "epoch": 4.588100686498856, "grad_norm": 3.1065287590026855, "learning_rate": 6.24671875e-06, "loss": 0.0381, "step": 10025 }, { "epoch": 4.5995423340961095, "grad_norm": 1.7548161745071411, "learning_rate": 6.242812500000001e-06, "loss": 0.0497, "step": 10050 }, { "epoch": 4.610983981693364, "grad_norm": 3.192704439163208, "learning_rate": 6.23890625e-06, "loss": 0.0397, "step": 10075 }, { "epoch": 4.622425629290618, "grad_norm": 1.3416036367416382, "learning_rate": 6.2350000000000004e-06, "loss": 0.042, "step": 10100 }, { "epoch": 4.633867276887872, "grad_norm": 1.8229219913482666, "learning_rate": 6.23109375e-06, "loss": 0.0304, "step": 10125 }, { "epoch": 4.645308924485126, "grad_norm": 1.3719747066497803, "learning_rate": 6.2271875e-06, "loss": 0.0471, "step": 10150 }, { "epoch": 4.65675057208238, "grad_norm": 2.458113670349121, "learning_rate": 6.22328125e-06, "loss": 0.029, "step": 10175 }, { "epoch": 4.668192219679634, "grad_norm": 1.583488941192627, "learning_rate": 6.219375e-06, "loss": 0.045, "step": 10200 }, { "epoch": 4.679633867276888, "grad_norm": 3.8001644611358643, "learning_rate": 6.21546875e-06, "loss": 0.0314, "step": 10225 }, { "epoch": 4.691075514874142, "grad_norm": 1.5467511415481567, "learning_rate": 6.211562500000001e-06, "loss": 0.0512, "step": 10250 }, { "epoch": 4.702517162471396, "grad_norm": 4.194436073303223, "learning_rate": 6.20765625e-06, "loss": 0.0367, "step": 10275 }, { "epoch": 4.71395881006865, "grad_norm": 3.282353639602661, "learning_rate": 6.2037500000000005e-06, "loss": 0.0548, "step": 10300 }, { "epoch": 4.725400457665904, "grad_norm": 3.7602429389953613, "learning_rate": 6.199843750000001e-06, "loss": 0.0318, "step": 10325 }, { "epoch": 4.7368421052631575, "grad_norm": 1.14561927318573, "learning_rate": 6.1959375e-06, "loss": 0.0581, "step": 10350 }, { "epoch": 4.748283752860412, "grad_norm": 2.6498944759368896, "learning_rate": 6.19203125e-06, "loss": 0.0322, "step": 10375 }, { "epoch": 4.759725400457666, "grad_norm": 1.1338865756988525, "learning_rate": 6.188125e-06, "loss": 0.056, "step": 10400 }, { "epoch": 4.77116704805492, "grad_norm": 4.4739251136779785, "learning_rate": 6.1842187500000005e-06, "loss": 0.0369, "step": 10425 }, { "epoch": 4.782608695652174, "grad_norm": 2.040625810623169, "learning_rate": 6.1803125e-06, "loss": 0.0596, "step": 10450 }, { "epoch": 4.7940503432494275, "grad_norm": 1.5571162700653076, "learning_rate": 6.17640625e-06, "loss": 0.0326, "step": 10475 }, { "epoch": 4.805491990846682, "grad_norm": 2.806779146194458, "learning_rate": 6.172500000000001e-06, "loss": 0.0556, "step": 10500 }, { "epoch": 4.816933638443936, "grad_norm": 2.709650993347168, "learning_rate": 6.168593750000001e-06, "loss": 0.0398, "step": 10525 }, { "epoch": 4.82837528604119, "grad_norm": 0.8603181838989258, "learning_rate": 6.1646875e-06, "loss": 0.0495, "step": 10550 }, { "epoch": 4.839816933638444, "grad_norm": 2.002333164215088, "learning_rate": 6.16078125e-06, "loss": 0.0312, "step": 10575 }, { "epoch": 4.851258581235698, "grad_norm": 1.9747883081436157, "learning_rate": 6.156875e-06, "loss": 0.0609, "step": 10600 }, { "epoch": 4.862700228832952, "grad_norm": 1.2020927667617798, "learning_rate": 6.1529687500000006e-06, "loss": 0.0427, "step": 10625 }, { "epoch": 4.874141876430206, "grad_norm": 1.4368155002593994, "learning_rate": 6.1490625e-06, "loss": 0.0478, "step": 10650 }, { "epoch": 4.88558352402746, "grad_norm": 2.706700563430786, "learning_rate": 6.14515625e-06, "loss": 0.041, "step": 10675 }, { "epoch": 4.897025171624714, "grad_norm": 1.3693126440048218, "learning_rate": 6.141250000000001e-06, "loss": 0.047, "step": 10700 }, { "epoch": 4.908466819221968, "grad_norm": 4.081038951873779, "learning_rate": 6.13734375e-06, "loss": 0.0337, "step": 10725 }, { "epoch": 4.919908466819222, "grad_norm": 2.389098882675171, "learning_rate": 6.1334375e-06, "loss": 0.0479, "step": 10750 }, { "epoch": 4.931350114416476, "grad_norm": 3.5421600341796875, "learning_rate": 6.12953125e-06, "loss": 0.0357, "step": 10775 }, { "epoch": 4.94279176201373, "grad_norm": 2.7297027111053467, "learning_rate": 6.125625e-06, "loss": 0.0441, "step": 10800 }, { "epoch": 4.954233409610984, "grad_norm": 1.4277007579803467, "learning_rate": 6.12171875e-06, "loss": 0.0431, "step": 10825 }, { "epoch": 4.965675057208238, "grad_norm": 1.9834622144699097, "learning_rate": 6.1178125e-06, "loss": 0.048, "step": 10850 }, { "epoch": 4.977116704805492, "grad_norm": 0.4609973728656769, "learning_rate": 6.1139062500000005e-06, "loss": 0.0325, "step": 10875 }, { "epoch": 4.988558352402746, "grad_norm": 1.8763446807861328, "learning_rate": 6.110000000000001e-06, "loss": 0.0496, "step": 10900 }, { "epoch": 5.0, "grad_norm": 3.2754316329956055, "learning_rate": 6.10609375e-06, "loss": 0.0479, "step": 10925 }, { "epoch": 5.011441647597254, "grad_norm": 5.122183799743652, "learning_rate": 6.102187500000001e-06, "loss": 0.0219, "step": 10950 }, { "epoch": 5.022883295194508, "grad_norm": 4.793453216552734, "learning_rate": 6.09828125e-06, "loss": 0.0362, "step": 10975 }, { "epoch": 5.034324942791762, "grad_norm": 2.249206304550171, "learning_rate": 6.094375e-06, "loss": 0.0198, "step": 11000 }, { "epoch": 5.045766590389016, "grad_norm": 2.9413721561431885, "learning_rate": 6.09046875e-06, "loss": 0.0174, "step": 11025 }, { "epoch": 5.05720823798627, "grad_norm": 0.40500813722610474, "learning_rate": 6.0865625e-06, "loss": 0.016, "step": 11050 }, { "epoch": 5.068649885583524, "grad_norm": 4.315321922302246, "learning_rate": 6.0826562500000006e-06, "loss": 0.0363, "step": 11075 }, { "epoch": 5.080091533180778, "grad_norm": 0.7413917183876038, "learning_rate": 6.07875e-06, "loss": 0.0204, "step": 11100 }, { "epoch": 5.091533180778032, "grad_norm": 3.5221505165100098, "learning_rate": 6.07484375e-06, "loss": 0.0365, "step": 11125 }, { "epoch": 5.102974828375286, "grad_norm": 1.7396185398101807, "learning_rate": 6.070937500000001e-06, "loss": 0.0234, "step": 11150 }, { "epoch": 5.11441647597254, "grad_norm": 5.39373779296875, "learning_rate": 6.06703125e-06, "loss": 0.0358, "step": 11175 }, { "epoch": 5.125858123569794, "grad_norm": 2.7335047721862793, "learning_rate": 6.063125e-06, "loss": 0.0256, "step": 11200 }, { "epoch": 5.137299771167048, "grad_norm": 6.199601650238037, "learning_rate": 6.05921875e-06, "loss": 0.0307, "step": 11225 }, { "epoch": 5.148741418764302, "grad_norm": 2.20025372505188, "learning_rate": 6.0553125e-06, "loss": 0.0216, "step": 11250 }, { "epoch": 5.160183066361556, "grad_norm": 9.27344799041748, "learning_rate": 6.051406250000001e-06, "loss": 0.0279, "step": 11275 }, { "epoch": 5.17162471395881, "grad_norm": 3.2440028190612793, "learning_rate": 6.0475e-06, "loss": 0.0273, "step": 11300 }, { "epoch": 5.183066361556064, "grad_norm": 6.8499226570129395, "learning_rate": 6.0435937500000004e-06, "loss": 0.0434, "step": 11325 }, { "epoch": 5.194508009153318, "grad_norm": 1.6816035509109497, "learning_rate": 6.039687500000001e-06, "loss": 0.0221, "step": 11350 }, { "epoch": 5.2059496567505725, "grad_norm": 1.133388876914978, "learning_rate": 6.03578125e-06, "loss": 0.0317, "step": 11375 }, { "epoch": 5.217391304347826, "grad_norm": 1.6359041929244995, "learning_rate": 6.031875e-06, "loss": 0.025, "step": 11400 }, { "epoch": 5.22883295194508, "grad_norm": 4.984877109527588, "learning_rate": 6.02796875e-06, "loss": 0.0266, "step": 11425 }, { "epoch": 5.240274599542334, "grad_norm": 3.7085070610046387, "learning_rate": 6.0240625e-06, "loss": 0.0283, "step": 11450 }, { "epoch": 5.251716247139588, "grad_norm": 6.761989116668701, "learning_rate": 6.02015625e-06, "loss": 0.0428, "step": 11475 }, { "epoch": 5.2631578947368425, "grad_norm": 2.7936620712280273, "learning_rate": 6.01625e-06, "loss": 0.0251, "step": 11500 }, { "epoch": 5.274599542334096, "grad_norm": 5.547983646392822, "learning_rate": 6.0123437500000005e-06, "loss": 0.0318, "step": 11525 }, { "epoch": 5.28604118993135, "grad_norm": 1.003167748451233, "learning_rate": 6.008437500000001e-06, "loss": 0.0285, "step": 11550 }, { "epoch": 5.297482837528604, "grad_norm": 2.9314146041870117, "learning_rate": 6.00453125e-06, "loss": 0.0317, "step": 11575 }, { "epoch": 5.308924485125858, "grad_norm": 0.7954840660095215, "learning_rate": 6.000625e-06, "loss": 0.0261, "step": 11600 }, { "epoch": 5.320366132723112, "grad_norm": 8.020834922790527, "learning_rate": 5.99671875e-06, "loss": 0.036, "step": 11625 }, { "epoch": 5.331807780320366, "grad_norm": 0.46895453333854675, "learning_rate": 5.9928125000000005e-06, "loss": 0.0223, "step": 11650 }, { "epoch": 5.34324942791762, "grad_norm": 7.10143518447876, "learning_rate": 5.98890625e-06, "loss": 0.0391, "step": 11675 }, { "epoch": 5.354691075514874, "grad_norm": 1.6974797248840332, "learning_rate": 5.985e-06, "loss": 0.0235, "step": 11700 }, { "epoch": 5.366132723112128, "grad_norm": 4.873080730438232, "learning_rate": 5.981093750000001e-06, "loss": 0.0337, "step": 11725 }, { "epoch": 5.377574370709382, "grad_norm": 0.3695700466632843, "learning_rate": 5.9771875e-06, "loss": 0.026, "step": 11750 }, { "epoch": 5.389016018306636, "grad_norm": 3.1437160968780518, "learning_rate": 5.9734375e-06, "loss": 0.0314, "step": 11775 }, { "epoch": 5.4004576659038905, "grad_norm": 1.6795854568481445, "learning_rate": 5.969531250000001e-06, "loss": 0.0259, "step": 11800 }, { "epoch": 5.411899313501144, "grad_norm": 10.205307006835938, "learning_rate": 5.965625e-06, "loss": 0.0331, "step": 11825 }, { "epoch": 5.423340961098398, "grad_norm": 1.8584654331207275, "learning_rate": 5.9617187500000005e-06, "loss": 0.0268, "step": 11850 }, { "epoch": 5.434782608695652, "grad_norm": 3.7585856914520264, "learning_rate": 5.957812500000001e-06, "loss": 0.0354, "step": 11875 }, { "epoch": 5.446224256292906, "grad_norm": 0.3272765576839447, "learning_rate": 5.95390625e-06, "loss": 0.0235, "step": 11900 }, { "epoch": 5.4576659038901605, "grad_norm": 5.947609901428223, "learning_rate": 5.95e-06, "loss": 0.0309, "step": 11925 }, { "epoch": 5.469107551487414, "grad_norm": 1.5576297044754028, "learning_rate": 5.94609375e-06, "loss": 0.0289, "step": 11950 }, { "epoch": 5.480549199084669, "grad_norm": 6.958711624145508, "learning_rate": 5.9421875000000004e-06, "loss": 0.0353, "step": 11975 }, { "epoch": 5.491990846681922, "grad_norm": 3.0236716270446777, "learning_rate": 5.93828125e-06, "loss": 0.0223, "step": 12000 }, { "epoch": 5.503432494279176, "grad_norm": 4.048431873321533, "learning_rate": 5.934375e-06, "loss": 0.0324, "step": 12025 }, { "epoch": 5.51487414187643, "grad_norm": 1.2261245250701904, "learning_rate": 5.930468750000001e-06, "loss": 0.0223, "step": 12050 }, { "epoch": 5.526315789473684, "grad_norm": 1.4566423892974854, "learning_rate": 5.926562500000001e-06, "loss": 0.0342, "step": 12075 }, { "epoch": 5.537757437070939, "grad_norm": 3.938749074935913, "learning_rate": 5.92265625e-06, "loss": 0.025, "step": 12100 }, { "epoch": 5.549199084668192, "grad_norm": 3.392719268798828, "learning_rate": 5.91875e-06, "loss": 0.0367, "step": 12125 }, { "epoch": 5.560640732265446, "grad_norm": 2.564655065536499, "learning_rate": 5.91484375e-06, "loss": 0.0239, "step": 12150 }, { "epoch": 5.5720823798627, "grad_norm": 6.947197914123535, "learning_rate": 5.9109375000000005e-06, "loss": 0.0365, "step": 12175 }, { "epoch": 5.583524027459954, "grad_norm": 2.679152011871338, "learning_rate": 5.90703125e-06, "loss": 0.0228, "step": 12200 }, { "epoch": 5.5949656750572085, "grad_norm": 5.84982442855835, "learning_rate": 5.903125e-06, "loss": 0.0372, "step": 12225 }, { "epoch": 5.606407322654462, "grad_norm": 2.001720905303955, "learning_rate": 5.899218750000001e-06, "loss": 0.0298, "step": 12250 }, { "epoch": 5.617848970251716, "grad_norm": 4.845787048339844, "learning_rate": 5.8953125e-06, "loss": 0.0257, "step": 12275 }, { "epoch": 5.62929061784897, "grad_norm": 3.0925636291503906, "learning_rate": 5.8914062500000005e-06, "loss": 0.0264, "step": 12300 }, { "epoch": 5.640732265446224, "grad_norm": 4.710490703582764, "learning_rate": 5.8875e-06, "loss": 0.0264, "step": 12325 }, { "epoch": 5.6521739130434785, "grad_norm": 4.397391319274902, "learning_rate": 5.88359375e-06, "loss": 0.025, "step": 12350 }, { "epoch": 5.663615560640732, "grad_norm": 3.7566113471984863, "learning_rate": 5.8796875e-06, "loss": 0.0304, "step": 12375 }, { "epoch": 5.675057208237987, "grad_norm": 1.8263936042785645, "learning_rate": 5.87578125e-06, "loss": 0.0248, "step": 12400 }, { "epoch": 5.68649885583524, "grad_norm": 11.288564682006836, "learning_rate": 5.8718750000000004e-06, "loss": 0.0451, "step": 12425 }, { "epoch": 5.697940503432494, "grad_norm": 3.030503749847412, "learning_rate": 5.867968750000001e-06, "loss": 0.0286, "step": 12450 }, { "epoch": 5.709382151029748, "grad_norm": 3.937255382537842, "learning_rate": 5.8640625e-06, "loss": 0.0427, "step": 12475 }, { "epoch": 5.720823798627002, "grad_norm": 0.9319488406181335, "learning_rate": 5.8601562500000006e-06, "loss": 0.0211, "step": 12500 }, { "epoch": 5.732265446224257, "grad_norm": 3.707982301712036, "learning_rate": 5.856250000000001e-06, "loss": 0.0328, "step": 12525 }, { "epoch": 5.74370709382151, "grad_norm": 3.8587310314178467, "learning_rate": 5.85234375e-06, "loss": 0.0264, "step": 12550 }, { "epoch": 5.755148741418765, "grad_norm": 5.644896030426025, "learning_rate": 5.8484375e-06, "loss": 0.0436, "step": 12575 }, { "epoch": 5.766590389016018, "grad_norm": 2.0625832080841064, "learning_rate": 5.84453125e-06, "loss": 0.0261, "step": 12600 }, { "epoch": 5.778032036613272, "grad_norm": 3.209373950958252, "learning_rate": 5.8406250000000005e-06, "loss": 0.03, "step": 12625 }, { "epoch": 5.7894736842105265, "grad_norm": 5.094180583953857, "learning_rate": 5.83671875e-06, "loss": 0.0254, "step": 12650 }, { "epoch": 5.80091533180778, "grad_norm": 3.323632001876831, "learning_rate": 5.8328125e-06, "loss": 0.0377, "step": 12675 }, { "epoch": 5.812356979405035, "grad_norm": 2.335705518722534, "learning_rate": 5.828906250000001e-06, "loss": 0.0258, "step": 12700 }, { "epoch": 5.823798627002288, "grad_norm": 4.69788932800293, "learning_rate": 5.825000000000001e-06, "loss": 0.0362, "step": 12725 }, { "epoch": 5.835240274599542, "grad_norm": 3.9574642181396484, "learning_rate": 5.82109375e-06, "loss": 0.03, "step": 12750 }, { "epoch": 5.8466819221967965, "grad_norm": 3.0901191234588623, "learning_rate": 5.8171875e-06, "loss": 0.0429, "step": 12775 }, { "epoch": 5.85812356979405, "grad_norm": 1.4580295085906982, "learning_rate": 5.81328125e-06, "loss": 0.0251, "step": 12800 }, { "epoch": 5.869565217391305, "grad_norm": 3.3204898834228516, "learning_rate": 5.809375000000001e-06, "loss": 0.0333, "step": 12825 }, { "epoch": 5.881006864988558, "grad_norm": 1.2781578302383423, "learning_rate": 5.80546875e-06, "loss": 0.024, "step": 12850 }, { "epoch": 5.892448512585812, "grad_norm": 3.30926775932312, "learning_rate": 5.8015625e-06, "loss": 0.0269, "step": 12875 }, { "epoch": 5.9038901601830664, "grad_norm": 1.008018136024475, "learning_rate": 5.797656250000001e-06, "loss": 0.028, "step": 12900 }, { "epoch": 5.91533180778032, "grad_norm": 10.23064136505127, "learning_rate": 5.79375e-06, "loss": 0.0407, "step": 12925 }, { "epoch": 5.926773455377575, "grad_norm": 2.7669782638549805, "learning_rate": 5.78984375e-06, "loss": 0.025, "step": 12950 }, { "epoch": 5.938215102974828, "grad_norm": 6.264385223388672, "learning_rate": 5.7859375e-06, "loss": 0.0463, "step": 12975 }, { "epoch": 5.949656750572083, "grad_norm": 0.38453003764152527, "learning_rate": 5.78203125e-06, "loss": 0.0257, "step": 13000 }, { "epoch": 5.961098398169336, "grad_norm": 4.114786148071289, "learning_rate": 5.778125e-06, "loss": 0.0349, "step": 13025 }, { "epoch": 5.97254004576659, "grad_norm": 1.9861042499542236, "learning_rate": 5.77421875e-06, "loss": 0.0281, "step": 13050 }, { "epoch": 5.983981693363845, "grad_norm": 2.0022335052490234, "learning_rate": 5.7703125000000005e-06, "loss": 0.0324, "step": 13075 }, { "epoch": 5.995423340961098, "grad_norm": 1.8620116710662842, "learning_rate": 5.766406250000001e-06, "loss": 0.0276, "step": 13100 }, { "epoch": 6.006864988558353, "grad_norm": 0.8321772813796997, "learning_rate": 5.7625e-06, "loss": 0.0318, "step": 13125 }, { "epoch": 6.018306636155606, "grad_norm": 1.971578598022461, "learning_rate": 5.758593750000001e-06, "loss": 0.0092, "step": 13150 }, { "epoch": 6.02974828375286, "grad_norm": 2.404782295227051, "learning_rate": 5.7546875e-06, "loss": 0.0203, "step": 13175 }, { "epoch": 6.0411899313501145, "grad_norm": 5.279099941253662, "learning_rate": 5.7507812500000004e-06, "loss": 0.0156, "step": 13200 }, { "epoch": 6.052631578947368, "grad_norm": 1.047425627708435, "learning_rate": 5.746875e-06, "loss": 0.0201, "step": 13225 }, { "epoch": 6.064073226544623, "grad_norm": 2.5753073692321777, "learning_rate": 5.74296875e-06, "loss": 0.011, "step": 13250 }, { "epoch": 6.075514874141876, "grad_norm": 1.78007972240448, "learning_rate": 5.739062500000001e-06, "loss": 0.0233, "step": 13275 }, { "epoch": 6.086956521739131, "grad_norm": 0.6934411525726318, "learning_rate": 5.73515625e-06, "loss": 0.021, "step": 13300 }, { "epoch": 6.0983981693363845, "grad_norm": 3.8213915824890137, "learning_rate": 5.73125e-06, "loss": 0.0293, "step": 13325 }, { "epoch": 6.109839816933638, "grad_norm": 2.7871358394622803, "learning_rate": 5.727343750000001e-06, "loss": 0.0174, "step": 13350 }, { "epoch": 6.121281464530893, "grad_norm": 1.6364772319793701, "learning_rate": 5.7234375e-06, "loss": 0.0242, "step": 13375 }, { "epoch": 6.132723112128146, "grad_norm": 1.6298720836639404, "learning_rate": 5.71953125e-06, "loss": 0.0163, "step": 13400 }, { "epoch": 6.144164759725401, "grad_norm": 1.7847976684570312, "learning_rate": 5.715625e-06, "loss": 0.0277, "step": 13425 }, { "epoch": 6.155606407322654, "grad_norm": 0.7765257358551025, "learning_rate": 5.71171875e-06, "loss": 0.0104, "step": 13450 }, { "epoch": 6.167048054919908, "grad_norm": 0.38881629705429077, "learning_rate": 5.707812500000001e-06, "loss": 0.0192, "step": 13475 }, { "epoch": 6.178489702517163, "grad_norm": 5.265055179595947, "learning_rate": 5.70390625e-06, "loss": 0.024, "step": 13500 }, { "epoch": 6.189931350114416, "grad_norm": 3.118817090988159, "learning_rate": 5.7000000000000005e-06, "loss": 0.0195, "step": 13525 }, { "epoch": 6.201372997711671, "grad_norm": 2.820322275161743, "learning_rate": 5.696093750000001e-06, "loss": 0.016, "step": 13550 }, { "epoch": 6.212814645308924, "grad_norm": 1.8668017387390137, "learning_rate": 5.6921875e-06, "loss": 0.0268, "step": 13575 }, { "epoch": 6.224256292906179, "grad_norm": 1.3626856803894043, "learning_rate": 5.68828125e-06, "loss": 0.0155, "step": 13600 }, { "epoch": 6.2356979405034325, "grad_norm": 0.24685801565647125, "learning_rate": 5.684375e-06, "loss": 0.0257, "step": 13625 }, { "epoch": 6.247139588100686, "grad_norm": 2.274303913116455, "learning_rate": 5.6804687500000004e-06, "loss": 0.0202, "step": 13650 }, { "epoch": 6.258581235697941, "grad_norm": 0.1426711082458496, "learning_rate": 5.6765625e-06, "loss": 0.0223, "step": 13675 }, { "epoch": 6.270022883295194, "grad_norm": 1.919135332107544, "learning_rate": 5.67265625e-06, "loss": 0.0187, "step": 13700 }, { "epoch": 6.281464530892449, "grad_norm": 1.3074697256088257, "learning_rate": 5.6687500000000006e-06, "loss": 0.0259, "step": 13725 }, { "epoch": 6.2929061784897025, "grad_norm": 0.7924039959907532, "learning_rate": 5.664843750000001e-06, "loss": 0.0133, "step": 13750 }, { "epoch": 6.304347826086957, "grad_norm": 1.1090521812438965, "learning_rate": 5.6609375e-06, "loss": 0.0235, "step": 13775 }, { "epoch": 6.315789473684211, "grad_norm": 6.865564346313477, "learning_rate": 5.65703125e-06, "loss": 0.0259, "step": 13800 }, { "epoch": 6.327231121281464, "grad_norm": 1.3353395462036133, "learning_rate": 5.653125e-06, "loss": 0.0226, "step": 13825 }, { "epoch": 6.338672768878719, "grad_norm": 2.667495012283325, "learning_rate": 5.6492187500000005e-06, "loss": 0.0186, "step": 13850 }, { "epoch": 6.350114416475972, "grad_norm": 0.9550194144248962, "learning_rate": 5.6453125e-06, "loss": 0.0192, "step": 13875 }, { "epoch": 6.361556064073227, "grad_norm": 1.979049801826477, "learning_rate": 5.64140625e-06, "loss": 0.0191, "step": 13900 }, { "epoch": 6.372997711670481, "grad_norm": 0.524032711982727, "learning_rate": 5.637500000000001e-06, "loss": 0.0185, "step": 13925 }, { "epoch": 6.384439359267734, "grad_norm": 0.6382055282592773, "learning_rate": 5.63359375e-06, "loss": 0.0181, "step": 13950 }, { "epoch": 6.395881006864989, "grad_norm": 0.8079178929328918, "learning_rate": 5.6296875000000005e-06, "loss": 0.0275, "step": 13975 }, { "epoch": 6.407322654462242, "grad_norm": 2.475860118865967, "learning_rate": 5.62578125e-06, "loss": 0.0172, "step": 14000 }, { "epoch": 6.418764302059497, "grad_norm": 3.069545269012451, "learning_rate": 5.621875e-06, "loss": 0.0342, "step": 14025 }, { "epoch": 6.4302059496567505, "grad_norm": 1.3313754796981812, "learning_rate": 5.61796875e-06, "loss": 0.0144, "step": 14050 }, { "epoch": 6.441647597254004, "grad_norm": 4.446610927581787, "learning_rate": 5.6140625e-06, "loss": 0.0316, "step": 14075 }, { "epoch": 6.453089244851259, "grad_norm": 1.924608826637268, "learning_rate": 5.61015625e-06, "loss": 0.0144, "step": 14100 }, { "epoch": 6.464530892448512, "grad_norm": 4.026088714599609, "learning_rate": 5.606250000000001e-06, "loss": 0.0259, "step": 14125 }, { "epoch": 6.475972540045767, "grad_norm": 2.8281869888305664, "learning_rate": 5.60234375e-06, "loss": 0.0168, "step": 14150 }, { "epoch": 6.4874141876430205, "grad_norm": 3.8542351722717285, "learning_rate": 5.5984375000000006e-06, "loss": 0.0232, "step": 14175 }, { "epoch": 6.498855835240275, "grad_norm": 0.6594443321228027, "learning_rate": 5.59453125e-06, "loss": 0.0222, "step": 14200 }, { "epoch": 6.510297482837529, "grad_norm": 0.6128702759742737, "learning_rate": 5.590625e-06, "loss": 0.0218, "step": 14225 }, { "epoch": 6.521739130434782, "grad_norm": 0.4436359405517578, "learning_rate": 5.58671875e-06, "loss": 0.0179, "step": 14250 }, { "epoch": 6.533180778032037, "grad_norm": 1.2225440740585327, "learning_rate": 5.5828125e-06, "loss": 0.0267, "step": 14275 }, { "epoch": 6.54462242562929, "grad_norm": 3.732752799987793, "learning_rate": 5.5789062500000005e-06, "loss": 0.0157, "step": 14300 }, { "epoch": 6.556064073226545, "grad_norm": 0.4065556228160858, "learning_rate": 5.575e-06, "loss": 0.03, "step": 14325 }, { "epoch": 6.567505720823799, "grad_norm": 2.227457046508789, "learning_rate": 5.57109375e-06, "loss": 0.0179, "step": 14350 }, { "epoch": 6.578947368421053, "grad_norm": 1.4811984300613403, "learning_rate": 5.567187500000001e-06, "loss": 0.0181, "step": 14375 }, { "epoch": 6.590389016018307, "grad_norm": 1.9638090133666992, "learning_rate": 5.563281250000001e-06, "loss": 0.0158, "step": 14400 }, { "epoch": 6.60183066361556, "grad_norm": 0.49590787291526794, "learning_rate": 5.559375e-06, "loss": 0.0272, "step": 14425 }, { "epoch": 6.613272311212815, "grad_norm": 3.1890580654144287, "learning_rate": 5.55546875e-06, "loss": 0.0138, "step": 14450 }, { "epoch": 6.6247139588100685, "grad_norm": 2.1384153366088867, "learning_rate": 5.5515625e-06, "loss": 0.0252, "step": 14475 }, { "epoch": 6.636155606407323, "grad_norm": 1.058912754058838, "learning_rate": 5.547656250000001e-06, "loss": 0.016, "step": 14500 }, { "epoch": 6.647597254004577, "grad_norm": 1.2951571941375732, "learning_rate": 5.54375e-06, "loss": 0.0199, "step": 14525 }, { "epoch": 6.65903890160183, "grad_norm": 3.9273900985717773, "learning_rate": 5.53984375e-06, "loss": 0.0164, "step": 14550 }, { "epoch": 6.670480549199085, "grad_norm": 1.9826246500015259, "learning_rate": 5.535937500000001e-06, "loss": 0.0269, "step": 14575 }, { "epoch": 6.6819221967963385, "grad_norm": 1.9506044387817383, "learning_rate": 5.53203125e-06, "loss": 0.014, "step": 14600 }, { "epoch": 6.693363844393593, "grad_norm": 4.968943119049072, "learning_rate": 5.528125e-06, "loss": 0.0266, "step": 14625 }, { "epoch": 6.704805491990847, "grad_norm": 2.959742546081543, "learning_rate": 5.52421875e-06, "loss": 0.0168, "step": 14650 }, { "epoch": 6.7162471395881, "grad_norm": 2.1982803344726562, "learning_rate": 5.5203125e-06, "loss": 0.0267, "step": 14675 }, { "epoch": 6.727688787185355, "grad_norm": 1.832104206085205, "learning_rate": 5.51640625e-06, "loss": 0.0178, "step": 14700 }, { "epoch": 6.739130434782608, "grad_norm": 2.252427339553833, "learning_rate": 5.5125e-06, "loss": 0.0206, "step": 14725 }, { "epoch": 6.750572082379863, "grad_norm": 0.21544213593006134, "learning_rate": 5.5085937500000005e-06, "loss": 0.0167, "step": 14750 }, { "epoch": 6.762013729977117, "grad_norm": 2.100684404373169, "learning_rate": 5.504687500000001e-06, "loss": 0.027, "step": 14775 }, { "epoch": 6.77345537757437, "grad_norm": 1.0401899814605713, "learning_rate": 5.50078125e-06, "loss": 0.023, "step": 14800 }, { "epoch": 6.784897025171625, "grad_norm": 1.2624324560165405, "learning_rate": 5.496875e-06, "loss": 0.0256, "step": 14825 }, { "epoch": 6.796338672768878, "grad_norm": 0.6681243181228638, "learning_rate": 5.49296875e-06, "loss": 0.0167, "step": 14850 }, { "epoch": 6.807780320366133, "grad_norm": 0.6695626378059387, "learning_rate": 5.4890625000000004e-06, "loss": 0.0238, "step": 14875 }, { "epoch": 6.8192219679633865, "grad_norm": 1.3886865377426147, "learning_rate": 5.48515625e-06, "loss": 0.0251, "step": 14900 }, { "epoch": 6.830663615560641, "grad_norm": 0.940159261226654, "learning_rate": 5.48125e-06, "loss": 0.0302, "step": 14925 }, { "epoch": 6.842105263157895, "grad_norm": 2.3970630168914795, "learning_rate": 5.477343750000001e-06, "loss": 0.0183, "step": 14950 }, { "epoch": 6.853546910755149, "grad_norm": 2.410069465637207, "learning_rate": 5.4734375e-06, "loss": 0.0214, "step": 14975 }, { "epoch": 6.864988558352403, "grad_norm": 2.3420534133911133, "learning_rate": 5.46953125e-06, "loss": 0.0225, "step": 15000 }, { "epoch": 6.864988558352403, "eval_loss": 0.15696020424365997, "eval_runtime": 8425.5271, "eval_samples_per_second": 1.13, "eval_steps_per_second": 0.141, "eval_wer": 0.08486913675049594, "step": 15000 }, { "epoch": 6.8764302059496565, "grad_norm": 3.4250895977020264, "learning_rate": 5.465625000000001e-06, "loss": 0.03, "step": 15025 }, { "epoch": 6.887871853546911, "grad_norm": 3.4854824542999268, "learning_rate": 5.46171875e-06, "loss": 0.0177, "step": 15050 }, { "epoch": 6.899313501144165, "grad_norm": 0.8536240458488464, "learning_rate": 5.4578125e-06, "loss": 0.0189, "step": 15075 }, { "epoch": 6.910755148741419, "grad_norm": 0.8668047785758972, "learning_rate": 5.45390625e-06, "loss": 0.0152, "step": 15100 }, { "epoch": 6.922196796338673, "grad_norm": 1.7880914211273193, "learning_rate": 5.45e-06, "loss": 0.027, "step": 15125 }, { "epoch": 6.933638443935926, "grad_norm": 3.566779375076294, "learning_rate": 5.446093750000001e-06, "loss": 0.0167, "step": 15150 }, { "epoch": 6.945080091533181, "grad_norm": 0.7250522971153259, "learning_rate": 5.4421875e-06, "loss": 0.0223, "step": 15175 }, { "epoch": 6.956521739130435, "grad_norm": 3.150644302368164, "learning_rate": 5.4382812500000005e-06, "loss": 0.0204, "step": 15200 }, { "epoch": 6.967963386727689, "grad_norm": 0.6711576581001282, "learning_rate": 5.434375000000001e-06, "loss": 0.022, "step": 15225 }, { "epoch": 6.979405034324943, "grad_norm": 3.4224953651428223, "learning_rate": 5.43046875e-06, "loss": 0.0186, "step": 15250 }, { "epoch": 6.990846681922196, "grad_norm": 1.3148167133331299, "learning_rate": 5.4265625e-06, "loss": 0.0242, "step": 15275 }, { "epoch": 7.002288329519451, "grad_norm": 0.3823435306549072, "learning_rate": 5.42265625e-06, "loss": 0.0226, "step": 15300 }, { "epoch": 7.0137299771167045, "grad_norm": 1.3943029642105103, "learning_rate": 5.41875e-06, "loss": 0.0109, "step": 15325 }, { "epoch": 7.025171624713959, "grad_norm": 0.7203263640403748, "learning_rate": 5.41484375e-06, "loss": 0.0125, "step": 15350 }, { "epoch": 7.036613272311213, "grad_norm": 0.41139668226242065, "learning_rate": 5.4109375e-06, "loss": 0.0126, "step": 15375 }, { "epoch": 7.048054919908467, "grad_norm": 1.5451672077178955, "learning_rate": 5.4070312500000006e-06, "loss": 0.0224, "step": 15400 }, { "epoch": 7.059496567505721, "grad_norm": 0.48175954818725586, "learning_rate": 5.403125000000001e-06, "loss": 0.0135, "step": 15425 }, { "epoch": 7.0709382151029745, "grad_norm": 1.59236478805542, "learning_rate": 5.3992187499999995e-06, "loss": 0.0165, "step": 15450 }, { "epoch": 7.082379862700229, "grad_norm": 1.2343538999557495, "learning_rate": 5.3953125e-06, "loss": 0.0117, "step": 15475 }, { "epoch": 7.093821510297483, "grad_norm": 0.973778247833252, "learning_rate": 5.39140625e-06, "loss": 0.0186, "step": 15500 }, { "epoch": 7.105263157894737, "grad_norm": 1.6163041591644287, "learning_rate": 5.3875000000000005e-06, "loss": 0.0125, "step": 15525 }, { "epoch": 7.116704805491991, "grad_norm": 1.0415899753570557, "learning_rate": 5.38359375e-06, "loss": 0.0195, "step": 15550 }, { "epoch": 7.128146453089244, "grad_norm": 2.307370901107788, "learning_rate": 5.3796875e-06, "loss": 0.0082, "step": 15575 }, { "epoch": 7.139588100686499, "grad_norm": 1.1321461200714111, "learning_rate": 5.375781250000001e-06, "loss": 0.0132, "step": 15600 }, { "epoch": 7.151029748283753, "grad_norm": 0.9697201251983643, "learning_rate": 5.371875e-06, "loss": 0.0105, "step": 15625 }, { "epoch": 7.162471395881007, "grad_norm": 0.737309455871582, "learning_rate": 5.3679687500000005e-06, "loss": 0.0155, "step": 15650 }, { "epoch": 7.173913043478261, "grad_norm": 4.352673053741455, "learning_rate": 5.3640625e-06, "loss": 0.0114, "step": 15675 }, { "epoch": 7.185354691075515, "grad_norm": 1.5137193202972412, "learning_rate": 5.3603125e-06, "loss": 0.0213, "step": 15700 }, { "epoch": 7.196796338672769, "grad_norm": 1.0873304605484009, "learning_rate": 5.3564062500000005e-06, "loss": 0.0103, "step": 15725 }, { "epoch": 7.2082379862700225, "grad_norm": 2.30537748336792, "learning_rate": 5.352500000000001e-06, "loss": 0.0217, "step": 15750 }, { "epoch": 7.219679633867277, "grad_norm": 2.4181723594665527, "learning_rate": 5.34859375e-06, "loss": 0.0109, "step": 15775 }, { "epoch": 7.231121281464531, "grad_norm": 0.6082072854042053, "learning_rate": 5.3446875e-06, "loss": 0.0193, "step": 15800 }, { "epoch": 7.242562929061785, "grad_norm": 1.248982310295105, "learning_rate": 5.34078125e-06, "loss": 0.0136, "step": 15825 }, { "epoch": 7.254004576659039, "grad_norm": 0.9748781323432922, "learning_rate": 5.3368750000000005e-06, "loss": 0.0153, "step": 15850 }, { "epoch": 7.2654462242562925, "grad_norm": 1.8344913721084595, "learning_rate": 5.33296875e-06, "loss": 0.0145, "step": 15875 }, { "epoch": 7.276887871853547, "grad_norm": 0.5007604956626892, "learning_rate": 5.3290625e-06, "loss": 0.0166, "step": 15900 }, { "epoch": 7.288329519450801, "grad_norm": 1.7965891361236572, "learning_rate": 5.325156250000001e-06, "loss": 0.0084, "step": 15925 }, { "epoch": 7.299771167048055, "grad_norm": 1.5175199508666992, "learning_rate": 5.321250000000001e-06, "loss": 0.0234, "step": 15950 }, { "epoch": 7.311212814645309, "grad_norm": 0.5066431760787964, "learning_rate": 5.3173437499999996e-06, "loss": 0.0114, "step": 15975 }, { "epoch": 7.322654462242563, "grad_norm": 0.9130284786224365, "learning_rate": 5.3134375e-06, "loss": 0.0263, "step": 16000 }, { "epoch": 7.334096109839817, "grad_norm": 0.05765797942876816, "learning_rate": 5.30953125e-06, "loss": 0.0089, "step": 16025 }, { "epoch": 7.345537757437071, "grad_norm": 0.5619144439697266, "learning_rate": 5.3056250000000006e-06, "loss": 0.0227, "step": 16050 }, { "epoch": 7.356979405034325, "grad_norm": 0.3315473198890686, "learning_rate": 5.30171875e-06, "loss": 0.0122, "step": 16075 }, { "epoch": 7.368421052631579, "grad_norm": 1.07926607131958, "learning_rate": 5.2978125e-06, "loss": 0.0192, "step": 16100 }, { "epoch": 7.379862700228833, "grad_norm": 0.4695550501346588, "learning_rate": 5.293906250000001e-06, "loss": 0.0141, "step": 16125 }, { "epoch": 7.391304347826087, "grad_norm": 0.8653700947761536, "learning_rate": 5.29e-06, "loss": 0.0248, "step": 16150 }, { "epoch": 7.4027459954233406, "grad_norm": 1.6522399187088013, "learning_rate": 5.2860937500000005e-06, "loss": 0.0083, "step": 16175 }, { "epoch": 7.414187643020595, "grad_norm": 1.464819312095642, "learning_rate": 5.2821875e-06, "loss": 0.0169, "step": 16200 }, { "epoch": 7.425629290617849, "grad_norm": 0.45212358236312866, "learning_rate": 5.27828125e-06, "loss": 0.0119, "step": 16225 }, { "epoch": 7.437070938215103, "grad_norm": 1.0408241748809814, "learning_rate": 5.274375e-06, "loss": 0.0186, "step": 16250 }, { "epoch": 7.448512585812357, "grad_norm": 1.8748407363891602, "learning_rate": 5.27046875e-06, "loss": 0.0173, "step": 16275 }, { "epoch": 7.459954233409611, "grad_norm": 1.0063502788543701, "learning_rate": 5.2665625000000005e-06, "loss": 0.0211, "step": 16300 }, { "epoch": 7.471395881006865, "grad_norm": 0.12291441112756729, "learning_rate": 5.262656250000001e-06, "loss": 0.0142, "step": 16325 }, { "epoch": 7.482837528604119, "grad_norm": 0.9158095121383667, "learning_rate": 5.25875e-06, "loss": 0.0171, "step": 16350 }, { "epoch": 7.494279176201373, "grad_norm": 0.3749239444732666, "learning_rate": 5.254843750000001e-06, "loss": 0.0127, "step": 16375 }, { "epoch": 7.505720823798627, "grad_norm": 0.7709665298461914, "learning_rate": 5.2509375e-06, "loss": 0.0191, "step": 16400 }, { "epoch": 7.517162471395881, "grad_norm": 3.2617990970611572, "learning_rate": 5.24703125e-06, "loss": 0.0134, "step": 16425 }, { "epoch": 7.528604118993135, "grad_norm": 1.3472856283187866, "learning_rate": 5.243125e-06, "loss": 0.0224, "step": 16450 }, { "epoch": 7.540045766590389, "grad_norm": 1.9107680320739746, "learning_rate": 5.23921875e-06, "loss": 0.0142, "step": 16475 }, { "epoch": 7.551487414187643, "grad_norm": 0.7160741686820984, "learning_rate": 5.2353125000000005e-06, "loss": 0.0249, "step": 16500 }, { "epoch": 7.562929061784897, "grad_norm": 1.0781526565551758, "learning_rate": 5.23140625e-06, "loss": 0.0161, "step": 16525 }, { "epoch": 7.574370709382151, "grad_norm": 1.084631085395813, "learning_rate": 5.2275e-06, "loss": 0.025, "step": 16550 }, { "epoch": 7.585812356979405, "grad_norm": 2.6102561950683594, "learning_rate": 5.223593750000001e-06, "loss": 0.0149, "step": 16575 }, { "epoch": 7.597254004576659, "grad_norm": 0.9257845282554626, "learning_rate": 5.2196875e-06, "loss": 0.0204, "step": 16600 }, { "epoch": 7.608695652173913, "grad_norm": 0.39411503076553345, "learning_rate": 5.21578125e-06, "loss": 0.0086, "step": 16625 }, { "epoch": 7.620137299771167, "grad_norm": 2.020707845687866, "learning_rate": 5.211875e-06, "loss": 0.0189, "step": 16650 }, { "epoch": 7.631578947368421, "grad_norm": 1.0345745086669922, "learning_rate": 5.20796875e-06, "loss": 0.0132, "step": 16675 }, { "epoch": 7.643020594965675, "grad_norm": 1.5063828229904175, "learning_rate": 5.204062500000001e-06, "loss": 0.0207, "step": 16700 }, { "epoch": 7.654462242562929, "grad_norm": 1.0685392618179321, "learning_rate": 5.20015625e-06, "loss": 0.0129, "step": 16725 }, { "epoch": 7.665903890160183, "grad_norm": 0.547886848449707, "learning_rate": 5.1962500000000004e-06, "loss": 0.0306, "step": 16750 }, { "epoch": 7.6773455377574376, "grad_norm": 0.5572363138198853, "learning_rate": 5.192343750000001e-06, "loss": 0.0083, "step": 16775 }, { "epoch": 7.688787185354691, "grad_norm": 1.064138412475586, "learning_rate": 5.1884375e-06, "loss": 0.017, "step": 16800 }, { "epoch": 7.700228832951945, "grad_norm": 1.0206539630889893, "learning_rate": 5.18453125e-06, "loss": 0.0138, "step": 16825 }, { "epoch": 7.711670480549199, "grad_norm": 1.2938916683197021, "learning_rate": 5.180625e-06, "loss": 0.014, "step": 16850 }, { "epoch": 7.723112128146453, "grad_norm": 1.0462238788604736, "learning_rate": 5.17671875e-06, "loss": 0.018, "step": 16875 }, { "epoch": 7.7345537757437075, "grad_norm": 1.7892041206359863, "learning_rate": 5.1728125e-06, "loss": 0.0256, "step": 16900 }, { "epoch": 7.745995423340961, "grad_norm": 2.0987486839294434, "learning_rate": 5.16890625e-06, "loss": 0.0196, "step": 16925 }, { "epoch": 7.757437070938215, "grad_norm": 1.1295993328094482, "learning_rate": 5.1650000000000005e-06, "loss": 0.0235, "step": 16950 }, { "epoch": 7.768878718535469, "grad_norm": 1.3253165483474731, "learning_rate": 5.161093750000001e-06, "loss": 0.0136, "step": 16975 }, { "epoch": 7.780320366132723, "grad_norm": 0.8303130269050598, "learning_rate": 5.1571875e-06, "loss": 0.0265, "step": 17000 }, { "epoch": 7.7917620137299775, "grad_norm": 1.181491494178772, "learning_rate": 5.15328125e-06, "loss": 0.0163, "step": 17025 }, { "epoch": 7.803203661327231, "grad_norm": 0.9661981463432312, "learning_rate": 5.149375e-06, "loss": 0.0192, "step": 17050 }, { "epoch": 7.814645308924485, "grad_norm": 0.4149691164493561, "learning_rate": 5.1454687500000005e-06, "loss": 0.0136, "step": 17075 }, { "epoch": 7.826086956521739, "grad_norm": 0.5994296669960022, "learning_rate": 5.1415625e-06, "loss": 0.0282, "step": 17100 }, { "epoch": 7.837528604118993, "grad_norm": 0.6517021059989929, "learning_rate": 5.13765625e-06, "loss": 0.0088, "step": 17125 }, { "epoch": 7.848970251716247, "grad_norm": 1.8703917264938354, "learning_rate": 5.133750000000001e-06, "loss": 0.0281, "step": 17150 }, { "epoch": 7.860411899313501, "grad_norm": 0.14560416340827942, "learning_rate": 5.12984375e-06, "loss": 0.0115, "step": 17175 }, { "epoch": 7.871853546910755, "grad_norm": 1.1590520143508911, "learning_rate": 5.1259375e-06, "loss": 0.0209, "step": 17200 }, { "epoch": 7.883295194508009, "grad_norm": 1.2970707416534424, "learning_rate": 5.12203125e-06, "loss": 0.0096, "step": 17225 }, { "epoch": 7.894736842105263, "grad_norm": 0.6606979966163635, "learning_rate": 5.118125e-06, "loss": 0.0199, "step": 17250 }, { "epoch": 7.906178489702517, "grad_norm": 0.6137211918830872, "learning_rate": 5.11421875e-06, "loss": 0.0161, "step": 17275 }, { "epoch": 7.917620137299771, "grad_norm": 1.1610240936279297, "learning_rate": 5.1103125e-06, "loss": 0.021, "step": 17300 }, { "epoch": 7.9290617848970255, "grad_norm": 0.5328589081764221, "learning_rate": 5.10640625e-06, "loss": 0.0162, "step": 17325 }, { "epoch": 7.940503432494279, "grad_norm": 0.8363622426986694, "learning_rate": 5.102500000000001e-06, "loss": 0.0114, "step": 17350 }, { "epoch": 7.951945080091534, "grad_norm": 0.2498696744441986, "learning_rate": 5.09859375e-06, "loss": 0.0135, "step": 17375 }, { "epoch": 7.963386727688787, "grad_norm": 0.8312330842018127, "learning_rate": 5.0946875000000005e-06, "loss": 0.0157, "step": 17400 }, { "epoch": 7.974828375286041, "grad_norm": 2.403409719467163, "learning_rate": 5.090781250000001e-06, "loss": 0.0113, "step": 17425 }, { "epoch": 7.9862700228832955, "grad_norm": 1.0571558475494385, "learning_rate": 5.086875e-06, "loss": 0.0237, "step": 17450 }, { "epoch": 7.997711670480549, "grad_norm": 0.7870259284973145, "learning_rate": 5.08296875e-06, "loss": 0.0154, "step": 17475 }, { "epoch": 8.009153318077804, "grad_norm": 1.1883960962295532, "learning_rate": 5.0790625e-06, "loss": 0.0167, "step": 17500 }, { "epoch": 8.020594965675057, "grad_norm": 4.847294807434082, "learning_rate": 5.0751562500000005e-06, "loss": 0.013, "step": 17525 }, { "epoch": 8.03203661327231, "grad_norm": 0.9088895320892334, "learning_rate": 5.07125e-06, "loss": 0.0138, "step": 17550 }, { "epoch": 8.043478260869565, "grad_norm": 0.20299823582172394, "learning_rate": 5.06734375e-06, "loss": 0.0116, "step": 17575 }, { "epoch": 8.05491990846682, "grad_norm": 0.20789586007595062, "learning_rate": 5.063437500000001e-06, "loss": 0.0198, "step": 17600 }, { "epoch": 8.066361556064074, "grad_norm": 1.580451250076294, "learning_rate": 5.059531250000001e-06, "loss": 0.0069, "step": 17625 }, { "epoch": 8.077803203661327, "grad_norm": 0.6814912557601929, "learning_rate": 5.0556249999999996e-06, "loss": 0.0159, "step": 17650 }, { "epoch": 8.08924485125858, "grad_norm": 2.7753753662109375, "learning_rate": 5.05171875e-06, "loss": 0.0098, "step": 17675 }, { "epoch": 8.100686498855834, "grad_norm": 2.184615135192871, "learning_rate": 5.0478125e-06, "loss": 0.0136, "step": 17700 }, { "epoch": 8.11212814645309, "grad_norm": 1.3797327280044556, "learning_rate": 5.0439062500000005e-06, "loss": 0.0112, "step": 17725 }, { "epoch": 8.123569794050344, "grad_norm": 0.19944579899311066, "learning_rate": 5.04e-06, "loss": 0.017, "step": 17750 }, { "epoch": 8.135011441647597, "grad_norm": 1.6061996221542358, "learning_rate": 5.03609375e-06, "loss": 0.0126, "step": 17775 }, { "epoch": 8.14645308924485, "grad_norm": 0.6356900334358215, "learning_rate": 5.032187500000001e-06, "loss": 0.0109, "step": 17800 }, { "epoch": 8.157894736842104, "grad_norm": 0.17535723745822906, "learning_rate": 5.02828125e-06, "loss": 0.0155, "step": 17825 }, { "epoch": 8.16933638443936, "grad_norm": 0.23621246218681335, "learning_rate": 5.024375e-06, "loss": 0.0139, "step": 17850 }, { "epoch": 8.180778032036613, "grad_norm": 3.409660577774048, "learning_rate": 5.02046875e-06, "loss": 0.0101, "step": 17875 }, { "epoch": 8.192219679633867, "grad_norm": 1.5496536493301392, "learning_rate": 5.0165625e-06, "loss": 0.0142, "step": 17900 }, { "epoch": 8.20366132723112, "grad_norm": 1.4734901189804077, "learning_rate": 5.01265625e-06, "loss": 0.0136, "step": 17925 }, { "epoch": 8.215102974828376, "grad_norm": 1.6027588844299316, "learning_rate": 5.00875e-06, "loss": 0.0184, "step": 17950 }, { "epoch": 8.22654462242563, "grad_norm": 1.7734415531158447, "learning_rate": 5.0048437500000004e-06, "loss": 0.0121, "step": 17975 }, { "epoch": 8.237986270022883, "grad_norm": 0.8128214478492737, "learning_rate": 5.000937500000001e-06, "loss": 0.0176, "step": 18000 }, { "epoch": 8.249427917620137, "grad_norm": 2.47589111328125, "learning_rate": 4.99703125e-06, "loss": 0.0168, "step": 18025 }, { "epoch": 8.26086956521739, "grad_norm": 0.07057098299264908, "learning_rate": 4.993125000000001e-06, "loss": 0.0114, "step": 18050 }, { "epoch": 8.272311212814646, "grad_norm": 0.9646495580673218, "learning_rate": 4.98921875e-06, "loss": 0.0078, "step": 18075 }, { "epoch": 8.2837528604119, "grad_norm": 0.16202184557914734, "learning_rate": 4.9853125e-06, "loss": 0.0113, "step": 18100 }, { "epoch": 8.295194508009153, "grad_norm": 0.8214831948280334, "learning_rate": 4.98140625e-06, "loss": 0.0119, "step": 18125 }, { "epoch": 8.306636155606407, "grad_norm": 0.6320903897285461, "learning_rate": 4.9775e-06, "loss": 0.0114, "step": 18150 }, { "epoch": 8.31807780320366, "grad_norm": 3.323302984237671, "learning_rate": 4.9735937500000005e-06, "loss": 0.0122, "step": 18175 }, { "epoch": 8.329519450800916, "grad_norm": 1.6427133083343506, "learning_rate": 4.9696875e-06, "loss": 0.016, "step": 18200 }, { "epoch": 8.34096109839817, "grad_norm": 0.11769402027130127, "learning_rate": 4.96578125e-06, "loss": 0.0115, "step": 18225 }, { "epoch": 8.352402745995423, "grad_norm": 1.0071938037872314, "learning_rate": 4.961875000000001e-06, "loss": 0.0111, "step": 18250 }, { "epoch": 8.363844393592677, "grad_norm": 1.382653832435608, "learning_rate": 4.95796875e-06, "loss": 0.0152, "step": 18275 }, { "epoch": 8.37528604118993, "grad_norm": 0.6369820833206177, "learning_rate": 4.9540625e-06, "loss": 0.0164, "step": 18300 }, { "epoch": 8.386727688787186, "grad_norm": 3.145285129547119, "learning_rate": 4.95015625e-06, "loss": 0.0107, "step": 18325 }, { "epoch": 8.39816933638444, "grad_norm": 0.9794672131538391, "learning_rate": 4.94625e-06, "loss": 0.0111, "step": 18350 }, { "epoch": 8.409610983981693, "grad_norm": 1.0672450065612793, "learning_rate": 4.942343750000001e-06, "loss": 0.0132, "step": 18375 }, { "epoch": 8.421052631578947, "grad_norm": 0.14489297568798065, "learning_rate": 4.9384375e-06, "loss": 0.0161, "step": 18400 }, { "epoch": 8.4324942791762, "grad_norm": 2.3339426517486572, "learning_rate": 4.9345312500000004e-06, "loss": 0.0175, "step": 18425 }, { "epoch": 8.443935926773456, "grad_norm": 2.02839994430542, "learning_rate": 4.930625000000001e-06, "loss": 0.0145, "step": 18450 }, { "epoch": 8.45537757437071, "grad_norm": 3.019855260848999, "learning_rate": 4.92671875e-06, "loss": 0.0133, "step": 18475 }, { "epoch": 8.466819221967963, "grad_norm": 1.5375008583068848, "learning_rate": 4.9228125e-06, "loss": 0.0091, "step": 18500 }, { "epoch": 8.478260869565217, "grad_norm": 0.10762526839971542, "learning_rate": 4.91890625e-06, "loss": 0.0161, "step": 18525 }, { "epoch": 8.48970251716247, "grad_norm": 0.3452286124229431, "learning_rate": 4.915e-06, "loss": 0.0177, "step": 18550 }, { "epoch": 8.501144164759726, "grad_norm": 2.135416269302368, "learning_rate": 4.91109375e-06, "loss": 0.0112, "step": 18575 }, { "epoch": 8.51258581235698, "grad_norm": 0.7354816198348999, "learning_rate": 4.9071875e-06, "loss": 0.0103, "step": 18600 }, { "epoch": 8.524027459954233, "grad_norm": 3.338163375854492, "learning_rate": 4.9032812500000005e-06, "loss": 0.0191, "step": 18625 }, { "epoch": 8.535469107551487, "grad_norm": 1.048393726348877, "learning_rate": 4.899375000000001e-06, "loss": 0.0127, "step": 18650 }, { "epoch": 8.546910755148742, "grad_norm": 1.4512662887573242, "learning_rate": 4.89546875e-06, "loss": 0.009, "step": 18675 }, { "epoch": 8.558352402745996, "grad_norm": 0.29056400060653687, "learning_rate": 4.8915625e-06, "loss": 0.0204, "step": 18700 }, { "epoch": 8.56979405034325, "grad_norm": 3.6008384227752686, "learning_rate": 4.88765625e-06, "loss": 0.0109, "step": 18725 }, { "epoch": 8.581235697940503, "grad_norm": 0.9403023719787598, "learning_rate": 4.8837500000000005e-06, "loss": 0.0142, "step": 18750 }, { "epoch": 8.592677345537757, "grad_norm": 0.5917396545410156, "learning_rate": 4.87984375e-06, "loss": 0.0085, "step": 18775 }, { "epoch": 8.604118993135012, "grad_norm": 0.8383995294570923, "learning_rate": 4.8759375e-06, "loss": 0.0212, "step": 18800 }, { "epoch": 8.615560640732266, "grad_norm": 0.8528295755386353, "learning_rate": 4.872031250000001e-06, "loss": 0.0074, "step": 18825 }, { "epoch": 8.62700228832952, "grad_norm": 2.9630656242370605, "learning_rate": 4.868125e-06, "loss": 0.0175, "step": 18850 }, { "epoch": 8.638443935926773, "grad_norm": 5.270595550537109, "learning_rate": 4.86421875e-06, "loss": 0.0164, "step": 18875 }, { "epoch": 8.649885583524027, "grad_norm": 1.7384530305862427, "learning_rate": 4.8603125e-06, "loss": 0.0192, "step": 18900 }, { "epoch": 8.661327231121282, "grad_norm": 4.940834999084473, "learning_rate": 4.85640625e-06, "loss": 0.012, "step": 18925 }, { "epoch": 8.672768878718536, "grad_norm": 0.3905220031738281, "learning_rate": 4.8525e-06, "loss": 0.0171, "step": 18950 }, { "epoch": 8.68421052631579, "grad_norm": 2.7816128730773926, "learning_rate": 4.84859375e-06, "loss": 0.0111, "step": 18975 }, { "epoch": 8.695652173913043, "grad_norm": 0.6524660587310791, "learning_rate": 4.8446875e-06, "loss": 0.0134, "step": 19000 }, { "epoch": 8.707093821510298, "grad_norm": 5.369323253631592, "learning_rate": 4.840781250000001e-06, "loss": 0.0196, "step": 19025 }, { "epoch": 8.718535469107552, "grad_norm": 0.35658735036849976, "learning_rate": 4.836875e-06, "loss": 0.0116, "step": 19050 }, { "epoch": 8.729977116704806, "grad_norm": 0.7237716913223267, "learning_rate": 4.8329687500000005e-06, "loss": 0.0123, "step": 19075 }, { "epoch": 8.74141876430206, "grad_norm": 2.99265193939209, "learning_rate": 4.8290625e-06, "loss": 0.012, "step": 19100 }, { "epoch": 8.752860411899313, "grad_norm": 2.613100528717041, "learning_rate": 4.82515625e-06, "loss": 0.0122, "step": 19125 }, { "epoch": 8.764302059496568, "grad_norm": 0.6266801357269287, "learning_rate": 4.82125e-06, "loss": 0.0148, "step": 19150 }, { "epoch": 8.775743707093822, "grad_norm": 3.5433642864227295, "learning_rate": 4.81734375e-06, "loss": 0.0161, "step": 19175 }, { "epoch": 8.787185354691076, "grad_norm": 0.1925951987504959, "learning_rate": 4.8134375000000004e-06, "loss": 0.0112, "step": 19200 }, { "epoch": 8.79862700228833, "grad_norm": 3.5308215618133545, "learning_rate": 4.80953125e-06, "loss": 0.0108, "step": 19225 }, { "epoch": 8.810068649885583, "grad_norm": 2.4649088382720947, "learning_rate": 4.805625e-06, "loss": 0.0157, "step": 19250 }, { "epoch": 8.821510297482838, "grad_norm": 2.442873954772949, "learning_rate": 4.801718750000001e-06, "loss": 0.013, "step": 19275 }, { "epoch": 8.832951945080092, "grad_norm": 0.9463144540786743, "learning_rate": 4.797812500000001e-06, "loss": 0.011, "step": 19300 }, { "epoch": 8.844393592677346, "grad_norm": 0.19932988286018372, "learning_rate": 4.7939062499999995e-06, "loss": 0.0105, "step": 19325 }, { "epoch": 8.8558352402746, "grad_norm": 0.8660555481910706, "learning_rate": 4.79e-06, "loss": 0.0157, "step": 19350 }, { "epoch": 8.867276887871853, "grad_norm": 0.7722224593162537, "learning_rate": 4.78609375e-06, "loss": 0.0108, "step": 19375 }, { "epoch": 8.878718535469108, "grad_norm": 2.1887614727020264, "learning_rate": 4.7823437500000005e-06, "loss": 0.0212, "step": 19400 }, { "epoch": 8.890160183066362, "grad_norm": 0.8951042294502258, "learning_rate": 4.7784375e-06, "loss": 0.0113, "step": 19425 }, { "epoch": 8.901601830663616, "grad_norm": 0.24026672542095184, "learning_rate": 4.77453125e-06, "loss": 0.0159, "step": 19450 }, { "epoch": 8.91304347826087, "grad_norm": 0.1646479368209839, "learning_rate": 4.770625e-06, "loss": 0.0125, "step": 19475 }, { "epoch": 8.924485125858123, "grad_norm": 4.403901100158691, "learning_rate": 4.76671875e-06, "loss": 0.0105, "step": 19500 }, { "epoch": 8.935926773455378, "grad_norm": 4.077794551849365, "learning_rate": 4.7628125e-06, "loss": 0.01, "step": 19525 }, { "epoch": 8.947368421052632, "grad_norm": 1.0000375509262085, "learning_rate": 4.758906250000001e-06, "loss": 0.0256, "step": 19550 }, { "epoch": 8.958810068649885, "grad_norm": 6.668058395385742, "learning_rate": 4.755e-06, "loss": 0.012, "step": 19575 }, { "epoch": 8.97025171624714, "grad_norm": 1.8313168287277222, "learning_rate": 4.7510937500000005e-06, "loss": 0.0155, "step": 19600 }, { "epoch": 8.981693363844393, "grad_norm": 0.25155580043792725, "learning_rate": 4.7471875e-06, "loss": 0.0121, "step": 19625 }, { "epoch": 8.993135011441648, "grad_norm": 0.5707370638847351, "learning_rate": 4.74328125e-06, "loss": 0.0117, "step": 19650 }, { "epoch": 9.004576659038902, "grad_norm": 0.34548330307006836, "learning_rate": 4.739375e-06, "loss": 0.0148, "step": 19675 }, { "epoch": 9.016018306636155, "grad_norm": 0.7761284708976746, "learning_rate": 4.73546875e-06, "loss": 0.0066, "step": 19700 }, { "epoch": 9.027459954233409, "grad_norm": 1.4383254051208496, "learning_rate": 4.7315625000000005e-06, "loss": 0.0103, "step": 19725 }, { "epoch": 9.038901601830664, "grad_norm": 2.025021553039551, "learning_rate": 4.72765625e-06, "loss": 0.0057, "step": 19750 }, { "epoch": 9.050343249427918, "grad_norm": 1.3105952739715576, "learning_rate": 4.72375e-06, "loss": 0.0179, "step": 19775 }, { "epoch": 9.061784897025172, "grad_norm": 0.26737406849861145, "learning_rate": 4.719843750000001e-06, "loss": 0.0105, "step": 19800 }, { "epoch": 9.073226544622425, "grad_norm": 0.30581921339035034, "learning_rate": 4.715937500000001e-06, "loss": 0.0134, "step": 19825 }, { "epoch": 9.084668192219679, "grad_norm": 1.771240234375, "learning_rate": 4.71203125e-06, "loss": 0.0071, "step": 19850 }, { "epoch": 9.096109839816934, "grad_norm": 0.09394315630197525, "learning_rate": 4.708125e-06, "loss": 0.0105, "step": 19875 }, { "epoch": 9.107551487414188, "grad_norm": 3.1190526485443115, "learning_rate": 4.70421875e-06, "loss": 0.0075, "step": 19900 }, { "epoch": 9.118993135011442, "grad_norm": 0.17495498061180115, "learning_rate": 4.700312500000001e-06, "loss": 0.0099, "step": 19925 }, { "epoch": 9.130434782608695, "grad_norm": 0.05427069589495659, "learning_rate": 4.69640625e-06, "loss": 0.0051, "step": 19950 }, { "epoch": 9.141876430205949, "grad_norm": 0.20918408036231995, "learning_rate": 4.6925e-06, "loss": 0.0096, "step": 19975 }, { "epoch": 9.153318077803204, "grad_norm": 0.15386857092380524, "learning_rate": 4.688593750000001e-06, "loss": 0.0088, "step": 20000 }, { "epoch": 9.153318077803204, "eval_loss": 0.1703590452671051, "eval_runtime": 8483.1509, "eval_samples_per_second": 1.122, "eval_steps_per_second": 0.14, "eval_wer": 0.08397325142381776, "step": 20000 }, { "epoch": 9.164759725400458, "grad_norm": 0.9295946955680847, "learning_rate": 4.6846875e-06, "loss": 0.0087, "step": 20025 }, { "epoch": 9.176201372997712, "grad_norm": 1.8225586414337158, "learning_rate": 4.68078125e-06, "loss": 0.0061, "step": 20050 }, { "epoch": 9.187643020594965, "grad_norm": 0.5476183295249939, "learning_rate": 4.676875e-06, "loss": 0.018, "step": 20075 }, { "epoch": 9.199084668192219, "grad_norm": 2.473881244659424, "learning_rate": 4.67296875e-06, "loss": 0.0097, "step": 20100 }, { "epoch": 9.210526315789474, "grad_norm": 0.7805386781692505, "learning_rate": 4.6690625e-06, "loss": 0.0195, "step": 20125 }, { "epoch": 9.221967963386728, "grad_norm": 1.7499024868011475, "learning_rate": 4.66515625e-06, "loss": 0.0086, "step": 20150 }, { "epoch": 9.233409610983982, "grad_norm": 2.100869655609131, "learning_rate": 4.6612500000000005e-06, "loss": 0.0114, "step": 20175 }, { "epoch": 9.244851258581235, "grad_norm": 0.04648475721478462, "learning_rate": 4.657343750000001e-06, "loss": 0.0054, "step": 20200 }, { "epoch": 9.256292906178489, "grad_norm": 0.2908037602901459, "learning_rate": 4.6534375e-06, "loss": 0.0102, "step": 20225 }, { "epoch": 9.267734553775744, "grad_norm": 1.554908037185669, "learning_rate": 4.64953125e-06, "loss": 0.0093, "step": 20250 }, { "epoch": 9.279176201372998, "grad_norm": 1.785403847694397, "learning_rate": 4.645625e-06, "loss": 0.0159, "step": 20275 }, { "epoch": 9.290617848970252, "grad_norm": 0.08504261821508408, "learning_rate": 4.6417187500000004e-06, "loss": 0.0101, "step": 20300 }, { "epoch": 9.302059496567505, "grad_norm": 1.2090202569961548, "learning_rate": 4.6378125e-06, "loss": 0.0146, "step": 20325 }, { "epoch": 9.31350114416476, "grad_norm": 2.689305543899536, "learning_rate": 4.63390625e-06, "loss": 0.0049, "step": 20350 }, { "epoch": 9.324942791762014, "grad_norm": 0.4883344769477844, "learning_rate": 4.6300000000000006e-06, "loss": 0.0123, "step": 20375 }, { "epoch": 9.336384439359268, "grad_norm": 0.607967734336853, "learning_rate": 4.62609375e-06, "loss": 0.0061, "step": 20400 }, { "epoch": 9.347826086956522, "grad_norm": 0.579464852809906, "learning_rate": 4.6221875e-06, "loss": 0.0103, "step": 20425 }, { "epoch": 9.359267734553775, "grad_norm": 2.0376808643341064, "learning_rate": 4.618281250000001e-06, "loss": 0.0086, "step": 20450 }, { "epoch": 9.37070938215103, "grad_norm": 0.6122703552246094, "learning_rate": 4.614375e-06, "loss": 0.0136, "step": 20475 }, { "epoch": 9.382151029748284, "grad_norm": 1.618090271949768, "learning_rate": 4.61046875e-06, "loss": 0.0139, "step": 20500 }, { "epoch": 9.393592677345538, "grad_norm": 1.3755525350570679, "learning_rate": 4.6065625e-06, "loss": 0.013, "step": 20525 }, { "epoch": 9.405034324942791, "grad_norm": 0.018447596579790115, "learning_rate": 4.60265625e-06, "loss": 0.0102, "step": 20550 }, { "epoch": 9.416475972540045, "grad_norm": 0.09019108861684799, "learning_rate": 4.598750000000001e-06, "loss": 0.0147, "step": 20575 }, { "epoch": 9.4279176201373, "grad_norm": 0.024939313530921936, "learning_rate": 4.59484375e-06, "loss": 0.0079, "step": 20600 }, { "epoch": 9.439359267734554, "grad_norm": 0.5434083342552185, "learning_rate": 4.5909375000000005e-06, "loss": 0.0097, "step": 20625 }, { "epoch": 9.450800915331808, "grad_norm": 0.13215604424476624, "learning_rate": 4.587031250000001e-06, "loss": 0.006, "step": 20650 }, { "epoch": 9.462242562929061, "grad_norm": 3.517838716506958, "learning_rate": 4.583125e-06, "loss": 0.0187, "step": 20675 }, { "epoch": 9.473684210526315, "grad_norm": 0.052679643034935, "learning_rate": 4.57921875e-06, "loss": 0.0055, "step": 20700 }, { "epoch": 9.48512585812357, "grad_norm": 2.106519937515259, "learning_rate": 4.5753125e-06, "loss": 0.0189, "step": 20725 }, { "epoch": 9.496567505720824, "grad_norm": 1.0505872964859009, "learning_rate": 4.57140625e-06, "loss": 0.0128, "step": 20750 }, { "epoch": 9.508009153318078, "grad_norm": 0.10401804000139236, "learning_rate": 4.5675e-06, "loss": 0.0179, "step": 20775 }, { "epoch": 9.519450800915331, "grad_norm": 1.3083796501159668, "learning_rate": 4.56359375e-06, "loss": 0.0103, "step": 20800 }, { "epoch": 9.530892448512585, "grad_norm": 2.332017660140991, "learning_rate": 4.5596875000000005e-06, "loss": 0.0085, "step": 20825 }, { "epoch": 9.54233409610984, "grad_norm": 3.1442549228668213, "learning_rate": 4.555781250000001e-06, "loss": 0.0108, "step": 20850 }, { "epoch": 9.553775743707094, "grad_norm": 1.25956392288208, "learning_rate": 4.5518749999999995e-06, "loss": 0.0141, "step": 20875 }, { "epoch": 9.565217391304348, "grad_norm": 1.9368778467178345, "learning_rate": 4.54796875e-06, "loss": 0.0099, "step": 20900 }, { "epoch": 9.576659038901601, "grad_norm": 0.2511462867259979, "learning_rate": 4.5440625e-06, "loss": 0.0132, "step": 20925 }, { "epoch": 9.588100686498855, "grad_norm": 1.3322951793670654, "learning_rate": 4.5401562500000005e-06, "loss": 0.0098, "step": 20950 }, { "epoch": 9.59954233409611, "grad_norm": 0.9744560718536377, "learning_rate": 4.53625e-06, "loss": 0.0137, "step": 20975 }, { "epoch": 9.610983981693364, "grad_norm": 0.9821468591690063, "learning_rate": 4.53234375e-06, "loss": 0.0074, "step": 21000 }, { "epoch": 9.622425629290618, "grad_norm": 0.606266438961029, "learning_rate": 4.528437500000001e-06, "loss": 0.0161, "step": 21025 }, { "epoch": 9.633867276887871, "grad_norm": 0.3212910592556, "learning_rate": 4.52453125e-06, "loss": 0.014, "step": 21050 }, { "epoch": 9.645308924485127, "grad_norm": 0.3966047465801239, "learning_rate": 4.5206250000000004e-06, "loss": 0.0181, "step": 21075 }, { "epoch": 9.65675057208238, "grad_norm": 1.5205986499786377, "learning_rate": 4.51671875e-06, "loss": 0.0052, "step": 21100 }, { "epoch": 9.668192219679634, "grad_norm": 1.7500886917114258, "learning_rate": 4.5128125e-06, "loss": 0.0166, "step": 21125 }, { "epoch": 9.679633867276888, "grad_norm": 3.436781883239746, "learning_rate": 4.50890625e-06, "loss": 0.0062, "step": 21150 }, { "epoch": 9.691075514874141, "grad_norm": 0.9921854138374329, "learning_rate": 4.505e-06, "loss": 0.0216, "step": 21175 }, { "epoch": 9.702517162471397, "grad_norm": 2.330374002456665, "learning_rate": 4.50109375e-06, "loss": 0.0122, "step": 21200 }, { "epoch": 9.71395881006865, "grad_norm": 0.20422367751598358, "learning_rate": 4.497187500000001e-06, "loss": 0.0112, "step": 21225 }, { "epoch": 9.725400457665904, "grad_norm": 2.015188694000244, "learning_rate": 4.49328125e-06, "loss": 0.0098, "step": 21250 }, { "epoch": 9.736842105263158, "grad_norm": 0.27854761481285095, "learning_rate": 4.4893750000000005e-06, "loss": 0.0127, "step": 21275 }, { "epoch": 9.748283752860411, "grad_norm": 2.6081674098968506, "learning_rate": 4.48546875e-06, "loss": 0.0076, "step": 21300 }, { "epoch": 9.759725400457667, "grad_norm": 0.4660798907279968, "learning_rate": 4.4815625e-06, "loss": 0.0147, "step": 21325 }, { "epoch": 9.77116704805492, "grad_norm": 1.435490369796753, "learning_rate": 4.47765625e-06, "loss": 0.008, "step": 21350 }, { "epoch": 9.782608695652174, "grad_norm": 0.19726777076721191, "learning_rate": 4.47375e-06, "loss": 0.0177, "step": 21375 }, { "epoch": 9.794050343249427, "grad_norm": 0.22597679495811462, "learning_rate": 4.4698437500000005e-06, "loss": 0.0098, "step": 21400 }, { "epoch": 9.805491990846681, "grad_norm": 0.26929301023483276, "learning_rate": 4.4659375e-06, "loss": 0.0161, "step": 21425 }, { "epoch": 9.816933638443937, "grad_norm": 0.3112774193286896, "learning_rate": 4.46203125e-06, "loss": 0.0067, "step": 21450 }, { "epoch": 9.82837528604119, "grad_norm": 2.671537399291992, "learning_rate": 4.458125000000001e-06, "loss": 0.0192, "step": 21475 }, { "epoch": 9.839816933638444, "grad_norm": 0.5832556486129761, "learning_rate": 4.45421875e-06, "loss": 0.0082, "step": 21500 }, { "epoch": 9.851258581235697, "grad_norm": 1.2362496852874756, "learning_rate": 4.4503125e-06, "loss": 0.0117, "step": 21525 }, { "epoch": 9.862700228832953, "grad_norm": 1.704158067703247, "learning_rate": 4.44640625e-06, "loss": 0.0156, "step": 21550 }, { "epoch": 9.874141876430206, "grad_norm": 0.9261170029640198, "learning_rate": 4.4425e-06, "loss": 0.0132, "step": 21575 }, { "epoch": 9.88558352402746, "grad_norm": 0.10364066809415817, "learning_rate": 4.4385937500000006e-06, "loss": 0.0095, "step": 21600 }, { "epoch": 9.897025171624714, "grad_norm": 0.9242740273475647, "learning_rate": 4.4346875e-06, "loss": 0.0144, "step": 21625 }, { "epoch": 9.908466819221967, "grad_norm": 1.9973706007003784, "learning_rate": 4.43078125e-06, "loss": 0.0099, "step": 21650 }, { "epoch": 9.919908466819223, "grad_norm": 1.7673425674438477, "learning_rate": 4.426875000000001e-06, "loss": 0.0169, "step": 21675 }, { "epoch": 9.931350114416476, "grad_norm": 0.7870113253593445, "learning_rate": 4.42296875e-06, "loss": 0.0091, "step": 21700 }, { "epoch": 9.94279176201373, "grad_norm": 0.3301905393600464, "learning_rate": 4.4190625e-06, "loss": 0.0103, "step": 21725 }, { "epoch": 9.954233409610984, "grad_norm": 0.2258094996213913, "learning_rate": 4.41515625e-06, "loss": 0.0069, "step": 21750 }, { "epoch": 9.965675057208237, "grad_norm": 0.19677838683128357, "learning_rate": 4.41125e-06, "loss": 0.0177, "step": 21775 }, { "epoch": 9.977116704805493, "grad_norm": 1.1019150018692017, "learning_rate": 4.40734375e-06, "loss": 0.0079, "step": 21800 }, { "epoch": 9.988558352402746, "grad_norm": 0.2076110690832138, "learning_rate": 4.4034375e-06, "loss": 0.0137, "step": 21825 }, { "epoch": 10.0, "grad_norm": 1.9538120031356812, "learning_rate": 4.3995312500000005e-06, "loss": 0.017, "step": 21850 }, { "epoch": 10.011441647597254, "grad_norm": 0.2267533540725708, "learning_rate": 4.395625000000001e-06, "loss": 0.0073, "step": 21875 }, { "epoch": 10.022883295194507, "grad_norm": 1.6176233291625977, "learning_rate": 4.39171875e-06, "loss": 0.0126, "step": 21900 }, { "epoch": 10.034324942791763, "grad_norm": 0.5193654894828796, "learning_rate": 4.3878125e-06, "loss": 0.0064, "step": 21925 }, { "epoch": 10.045766590389016, "grad_norm": 5.242427825927734, "learning_rate": 4.3840625e-06, "loss": 0.0202, "step": 21950 }, { "epoch": 10.05720823798627, "grad_norm": 0.4088348150253296, "learning_rate": 4.38015625e-06, "loss": 0.0069, "step": 21975 }, { "epoch": 10.068649885583524, "grad_norm": 3.1071178913116455, "learning_rate": 4.376250000000001e-06, "loss": 0.0208, "step": 22000 }, { "epoch": 10.080091533180777, "grad_norm": 0.3789709508419037, "learning_rate": 4.37234375e-06, "loss": 0.007, "step": 22025 }, { "epoch": 10.091533180778033, "grad_norm": 3.275552272796631, "learning_rate": 4.3684375e-06, "loss": 0.0111, "step": 22050 }, { "epoch": 10.102974828375286, "grad_norm": 1.909187912940979, "learning_rate": 4.36453125e-06, "loss": 0.0074, "step": 22075 }, { "epoch": 10.11441647597254, "grad_norm": 7.340714454650879, "learning_rate": 4.360625e-06, "loss": 0.017, "step": 22100 }, { "epoch": 10.125858123569794, "grad_norm": 1.7521164417266846, "learning_rate": 4.356718750000001e-06, "loss": 0.0073, "step": 22125 }, { "epoch": 10.137299771167047, "grad_norm": 3.9338464736938477, "learning_rate": 4.3528125e-06, "loss": 0.0117, "step": 22150 }, { "epoch": 10.148741418764303, "grad_norm": 0.340256005525589, "learning_rate": 4.34890625e-06, "loss": 0.0073, "step": 22175 }, { "epoch": 10.160183066361556, "grad_norm": 0.7079922556877136, "learning_rate": 4.345000000000001e-06, "loss": 0.0073, "step": 22200 }, { "epoch": 10.17162471395881, "grad_norm": 0.1859007626771927, "learning_rate": 4.34109375e-06, "loss": 0.0069, "step": 22225 }, { "epoch": 10.183066361556063, "grad_norm": 4.099189281463623, "learning_rate": 4.3371875e-06, "loss": 0.0179, "step": 22250 }, { "epoch": 10.194508009153319, "grad_norm": 1.2318094968795776, "learning_rate": 4.33328125e-06, "loss": 0.0054, "step": 22275 }, { "epoch": 10.205949656750573, "grad_norm": 3.928300380706787, "learning_rate": 4.329375e-06, "loss": 0.0084, "step": 22300 }, { "epoch": 10.217391304347826, "grad_norm": 0.12189070135354996, "learning_rate": 4.32546875e-06, "loss": 0.0038, "step": 22325 }, { "epoch": 10.22883295194508, "grad_norm": 1.143131136894226, "learning_rate": 4.3215625e-06, "loss": 0.0118, "step": 22350 }, { "epoch": 10.240274599542333, "grad_norm": 8.372136116027832, "learning_rate": 4.3176562500000005e-06, "loss": 0.0067, "step": 22375 }, { "epoch": 10.251716247139589, "grad_norm": 6.539907932281494, "learning_rate": 4.313750000000001e-06, "loss": 0.0137, "step": 22400 }, { "epoch": 10.263157894736842, "grad_norm": 0.09404875338077545, "learning_rate": 4.30984375e-06, "loss": 0.0069, "step": 22425 }, { "epoch": 10.274599542334096, "grad_norm": 3.818504810333252, "learning_rate": 4.3059375e-06, "loss": 0.0167, "step": 22450 }, { "epoch": 10.28604118993135, "grad_norm": 0.5479367971420288, "learning_rate": 4.30203125e-06, "loss": 0.0074, "step": 22475 }, { "epoch": 10.297482837528603, "grad_norm": 0.2958333492279053, "learning_rate": 4.2981250000000005e-06, "loss": 0.0057, "step": 22500 }, { "epoch": 10.308924485125859, "grad_norm": 0.12461014091968536, "learning_rate": 4.29421875e-06, "loss": 0.0063, "step": 22525 }, { "epoch": 10.320366132723112, "grad_norm": 3.3684613704681396, "learning_rate": 4.2903125e-06, "loss": 0.0119, "step": 22550 }, { "epoch": 10.331807780320366, "grad_norm": 0.1971171647310257, "learning_rate": 4.286406250000001e-06, "loss": 0.0073, "step": 22575 }, { "epoch": 10.34324942791762, "grad_norm": 5.749772548675537, "learning_rate": 4.2825e-06, "loss": 0.012, "step": 22600 }, { "epoch": 10.354691075514873, "grad_norm": 0.1977299004793167, "learning_rate": 4.27859375e-06, "loss": 0.007, "step": 22625 }, { "epoch": 10.366132723112129, "grad_norm": 0.04828539118170738, "learning_rate": 4.2746875e-06, "loss": 0.008, "step": 22650 }, { "epoch": 10.377574370709382, "grad_norm": 1.033084511756897, "learning_rate": 4.27078125e-06, "loss": 0.0072, "step": 22675 }, { "epoch": 10.389016018306636, "grad_norm": 2.2727484703063965, "learning_rate": 4.266875e-06, "loss": 0.0145, "step": 22700 }, { "epoch": 10.40045766590389, "grad_norm": 1.0028413534164429, "learning_rate": 4.26296875e-06, "loss": 0.0078, "step": 22725 }, { "epoch": 10.411899313501145, "grad_norm": 0.4780726730823517, "learning_rate": 4.2590625e-06, "loss": 0.0087, "step": 22750 }, { "epoch": 10.423340961098399, "grad_norm": 0.41152331233024597, "learning_rate": 4.255156250000001e-06, "loss": 0.007, "step": 22775 }, { "epoch": 10.434782608695652, "grad_norm": 3.5015735626220703, "learning_rate": 4.25125e-06, "loss": 0.0166, "step": 22800 }, { "epoch": 10.446224256292906, "grad_norm": 1.6998751163482666, "learning_rate": 4.2473437500000005e-06, "loss": 0.0067, "step": 22825 }, { "epoch": 10.45766590389016, "grad_norm": 6.103700160980225, "learning_rate": 4.243437500000001e-06, "loss": 0.0167, "step": 22850 }, { "epoch": 10.469107551487415, "grad_norm": 0.23333023488521576, "learning_rate": 4.23953125e-06, "loss": 0.009, "step": 22875 }, { "epoch": 10.480549199084669, "grad_norm": 1.7737101316452026, "learning_rate": 4.235625e-06, "loss": 0.0126, "step": 22900 }, { "epoch": 10.491990846681922, "grad_norm": 0.034998368471860886, "learning_rate": 4.23171875e-06, "loss": 0.005, "step": 22925 }, { "epoch": 10.503432494279176, "grad_norm": 5.770680904388428, "learning_rate": 4.2278125000000004e-06, "loss": 0.0211, "step": 22950 }, { "epoch": 10.51487414187643, "grad_norm": 4.836666584014893, "learning_rate": 4.22390625e-06, "loss": 0.0073, "step": 22975 }, { "epoch": 10.526315789473685, "grad_norm": 8.53127384185791, "learning_rate": 4.22e-06, "loss": 0.0169, "step": 23000 }, { "epoch": 10.537757437070939, "grad_norm": 1.372205138206482, "learning_rate": 4.216093750000001e-06, "loss": 0.0066, "step": 23025 }, { "epoch": 10.549199084668192, "grad_norm": 2.9175825119018555, "learning_rate": 4.212187500000001e-06, "loss": 0.0088, "step": 23050 }, { "epoch": 10.560640732265446, "grad_norm": 0.11073864251375198, "learning_rate": 4.2082812499999995e-06, "loss": 0.0076, "step": 23075 }, { "epoch": 10.5720823798627, "grad_norm": 3.333414316177368, "learning_rate": 4.204375e-06, "loss": 0.0127, "step": 23100 }, { "epoch": 10.583524027459955, "grad_norm": 0.2508285939693451, "learning_rate": 4.20046875e-06, "loss": 0.0066, "step": 23125 }, { "epoch": 10.594965675057209, "grad_norm": 1.784462809562683, "learning_rate": 4.1965625000000005e-06, "loss": 0.0133, "step": 23150 }, { "epoch": 10.606407322654462, "grad_norm": 0.4733440577983856, "learning_rate": 4.19265625e-06, "loss": 0.0041, "step": 23175 }, { "epoch": 10.617848970251716, "grad_norm": 2.352760076522827, "learning_rate": 4.18875e-06, "loss": 0.0107, "step": 23200 }, { "epoch": 10.62929061784897, "grad_norm": 0.504758358001709, "learning_rate": 4.184843750000001e-06, "loss": 0.0076, "step": 23225 }, { "epoch": 10.640732265446225, "grad_norm": 6.352942943572998, "learning_rate": 4.1809375e-06, "loss": 0.0116, "step": 23250 }, { "epoch": 10.652173913043478, "grad_norm": 3.3511691093444824, "learning_rate": 4.17703125e-06, "loss": 0.0073, "step": 23275 }, { "epoch": 10.663615560640732, "grad_norm": 8.50075626373291, "learning_rate": 4.173125e-06, "loss": 0.0099, "step": 23300 }, { "epoch": 10.675057208237986, "grad_norm": 0.607912003993988, "learning_rate": 4.16921875e-06, "loss": 0.007, "step": 23325 }, { "epoch": 10.68649885583524, "grad_norm": 0.051545530557632446, "learning_rate": 4.1653125e-06, "loss": 0.0103, "step": 23350 }, { "epoch": 10.697940503432495, "grad_norm": 1.3820303678512573, "learning_rate": 4.16140625e-06, "loss": 0.0084, "step": 23375 }, { "epoch": 10.709382151029748, "grad_norm": 4.683422565460205, "learning_rate": 4.1575000000000004e-06, "loss": 0.01, "step": 23400 }, { "epoch": 10.720823798627002, "grad_norm": 0.5495042204856873, "learning_rate": 4.153593750000001e-06, "loss": 0.0085, "step": 23425 }, { "epoch": 10.732265446224256, "grad_norm": 0.5360108017921448, "learning_rate": 4.1496875e-06, "loss": 0.0101, "step": 23450 }, { "epoch": 10.743707093821511, "grad_norm": 0.42514368891716003, "learning_rate": 4.1457812500000006e-06, "loss": 0.0078, "step": 23475 }, { "epoch": 10.755148741418765, "grad_norm": 2.4318857192993164, "learning_rate": 4.141875e-06, "loss": 0.0087, "step": 23500 }, { "epoch": 10.766590389016018, "grad_norm": 3.4442293643951416, "learning_rate": 4.13796875e-06, "loss": 0.0087, "step": 23525 }, { "epoch": 10.778032036613272, "grad_norm": 0.7716177701950073, "learning_rate": 4.1340625e-06, "loss": 0.0104, "step": 23550 }, { "epoch": 10.789473684210526, "grad_norm": 0.05760778859257698, "learning_rate": 4.13015625e-06, "loss": 0.0064, "step": 23575 }, { "epoch": 10.800915331807781, "grad_norm": 3.2319798469543457, "learning_rate": 4.1262500000000005e-06, "loss": 0.0088, "step": 23600 }, { "epoch": 10.812356979405035, "grad_norm": 0.23007997870445251, "learning_rate": 4.12234375e-06, "loss": 0.0062, "step": 23625 }, { "epoch": 10.823798627002288, "grad_norm": 1.1692849397659302, "learning_rate": 4.1184375e-06, "loss": 0.0101, "step": 23650 }, { "epoch": 10.835240274599542, "grad_norm": 1.282559871673584, "learning_rate": 4.114531250000001e-06, "loss": 0.0074, "step": 23675 }, { "epoch": 10.846681922196796, "grad_norm": 0.7795631289482117, "learning_rate": 4.110625e-06, "loss": 0.0076, "step": 23700 }, { "epoch": 10.858123569794051, "grad_norm": 0.9309841990470886, "learning_rate": 4.10671875e-06, "loss": 0.0055, "step": 23725 }, { "epoch": 10.869565217391305, "grad_norm": 3.0026588439941406, "learning_rate": 4.1028125e-06, "loss": 0.0115, "step": 23750 }, { "epoch": 10.881006864988558, "grad_norm": 0.11742173880338669, "learning_rate": 4.09890625e-06, "loss": 0.0082, "step": 23775 }, { "epoch": 10.892448512585812, "grad_norm": 3.019906997680664, "learning_rate": 4.095000000000001e-06, "loss": 0.0116, "step": 23800 }, { "epoch": 10.903890160183066, "grad_norm": 0.7403866648674011, "learning_rate": 4.09109375e-06, "loss": 0.0125, "step": 23825 }, { "epoch": 10.915331807780321, "grad_norm": 0.7617065906524658, "learning_rate": 4.0871875e-06, "loss": 0.0159, "step": 23850 }, { "epoch": 10.926773455377575, "grad_norm": 2.5809476375579834, "learning_rate": 4.083281250000001e-06, "loss": 0.0085, "step": 23875 }, { "epoch": 10.938215102974828, "grad_norm": 2.147615432739258, "learning_rate": 4.079375e-06, "loss": 0.0115, "step": 23900 }, { "epoch": 10.949656750572082, "grad_norm": 1.0493861436843872, "learning_rate": 4.07546875e-06, "loss": 0.005, "step": 23925 }, { "epoch": 10.961098398169337, "grad_norm": 3.730088472366333, "learning_rate": 4.0715625e-06, "loss": 0.0086, "step": 23950 }, { "epoch": 10.972540045766591, "grad_norm": 0.13982118666172028, "learning_rate": 4.06765625e-06, "loss": 0.0037, "step": 23975 }, { "epoch": 10.983981693363845, "grad_norm": 0.18219667673110962, "learning_rate": 4.06375e-06, "loss": 0.0088, "step": 24000 }, { "epoch": 10.995423340961098, "grad_norm": 0.3765556812286377, "learning_rate": 4.05984375e-06, "loss": 0.0092, "step": 24025 }, { "epoch": 11.006864988558352, "grad_norm": 1.3313030004501343, "learning_rate": 4.0559375000000005e-06, "loss": 0.0119, "step": 24050 }, { "epoch": 11.018306636155607, "grad_norm": 0.1510874629020691, "learning_rate": 4.0521875e-06, "loss": 0.0059, "step": 24075 }, { "epoch": 11.02974828375286, "grad_norm": 0.3305507302284241, "learning_rate": 4.04828125e-06, "loss": 0.0068, "step": 24100 }, { "epoch": 11.041189931350115, "grad_norm": 0.8198592662811279, "learning_rate": 4.0443750000000006e-06, "loss": 0.008, "step": 24125 }, { "epoch": 11.052631578947368, "grad_norm": 0.08008773624897003, "learning_rate": 4.04046875e-06, "loss": 0.0122, "step": 24150 }, { "epoch": 11.064073226544622, "grad_norm": 2.9105420112609863, "learning_rate": 4.0365625e-06, "loss": 0.0093, "step": 24175 }, { "epoch": 11.075514874141877, "grad_norm": 0.3481699526309967, "learning_rate": 4.032656250000001e-06, "loss": 0.0081, "step": 24200 }, { "epoch": 11.08695652173913, "grad_norm": 0.2750926613807678, "learning_rate": 4.02875e-06, "loss": 0.0083, "step": 24225 }, { "epoch": 11.098398169336384, "grad_norm": 0.17569704353809357, "learning_rate": 4.02484375e-06, "loss": 0.0059, "step": 24250 }, { "epoch": 11.109839816933638, "grad_norm": 0.19646963477134705, "learning_rate": 4.0209375e-06, "loss": 0.0083, "step": 24275 }, { "epoch": 11.121281464530892, "grad_norm": 0.13096636533737183, "learning_rate": 4.01703125e-06, "loss": 0.0097, "step": 24300 }, { "epoch": 11.132723112128147, "grad_norm": 2.2364015579223633, "learning_rate": 4.013125e-06, "loss": 0.0087, "step": 24325 }, { "epoch": 11.1441647597254, "grad_norm": 1.0948302745819092, "learning_rate": 4.00921875e-06, "loss": 0.0051, "step": 24350 }, { "epoch": 11.155606407322654, "grad_norm": 0.04499274492263794, "learning_rate": 4.0053125000000005e-06, "loss": 0.0053, "step": 24375 }, { "epoch": 11.167048054919908, "grad_norm": 0.4982542097568512, "learning_rate": 4.001406250000001e-06, "loss": 0.0112, "step": 24400 }, { "epoch": 11.178489702517162, "grad_norm": 0.4417966604232788, "learning_rate": 3.9975e-06, "loss": 0.0062, "step": 24425 }, { "epoch": 11.189931350114417, "grad_norm": 0.5443983674049377, "learning_rate": 3.99359375e-06, "loss": 0.0156, "step": 24450 }, { "epoch": 11.20137299771167, "grad_norm": 0.047877777367830276, "learning_rate": 3.9896875e-06, "loss": 0.0089, "step": 24475 }, { "epoch": 11.212814645308924, "grad_norm": 1.5229582786560059, "learning_rate": 3.98578125e-06, "loss": 0.0076, "step": 24500 }, { "epoch": 11.224256292906178, "grad_norm": 2.30202579498291, "learning_rate": 3.981875e-06, "loss": 0.009, "step": 24525 }, { "epoch": 11.235697940503432, "grad_norm": 0.12078336626291275, "learning_rate": 3.97796875e-06, "loss": 0.0117, "step": 24550 }, { "epoch": 11.247139588100687, "grad_norm": 0.1738824099302292, "learning_rate": 3.9740625000000005e-06, "loss": 0.0044, "step": 24575 }, { "epoch": 11.25858123569794, "grad_norm": 1.9112942218780518, "learning_rate": 3.97015625e-06, "loss": 0.0133, "step": 24600 }, { "epoch": 11.270022883295194, "grad_norm": 0.04181580990552902, "learning_rate": 3.96625e-06, "loss": 0.003, "step": 24625 }, { "epoch": 11.281464530892448, "grad_norm": 0.026247529312968254, "learning_rate": 3.96234375e-06, "loss": 0.0071, "step": 24650 }, { "epoch": 11.292906178489703, "grad_norm": 4.70066499710083, "learning_rate": 3.9584375e-06, "loss": 0.0079, "step": 24675 }, { "epoch": 11.304347826086957, "grad_norm": 0.1226392611861229, "learning_rate": 3.9545312500000005e-06, "loss": 0.0109, "step": 24700 }, { "epoch": 11.31578947368421, "grad_norm": 0.5699432492256165, "learning_rate": 3.950625e-06, "loss": 0.003, "step": 24725 }, { "epoch": 11.327231121281464, "grad_norm": 0.01484320405870676, "learning_rate": 3.94671875e-06, "loss": 0.0135, "step": 24750 }, { "epoch": 11.338672768878718, "grad_norm": 0.035592660307884216, "learning_rate": 3.942812500000001e-06, "loss": 0.004, "step": 24775 }, { "epoch": 11.350114416475973, "grad_norm": 0.1858692467212677, "learning_rate": 3.93890625e-06, "loss": 0.0052, "step": 24800 }, { "epoch": 11.361556064073227, "grad_norm": 2.503647804260254, "learning_rate": 3.9350000000000004e-06, "loss": 0.0066, "step": 24825 }, { "epoch": 11.37299771167048, "grad_norm": 3.71572208404541, "learning_rate": 3.93109375e-06, "loss": 0.0097, "step": 24850 }, { "epoch": 11.384439359267734, "grad_norm": 1.2233408689498901, "learning_rate": 3.9271875e-06, "loss": 0.0081, "step": 24875 }, { "epoch": 11.395881006864988, "grad_norm": 2.118540048599243, "learning_rate": 3.92328125e-06, "loss": 0.0153, "step": 24900 }, { "epoch": 11.407322654462243, "grad_norm": 0.17355529963970184, "learning_rate": 3.919375e-06, "loss": 0.0057, "step": 24925 }, { "epoch": 11.418764302059497, "grad_norm": 1.1891402006149292, "learning_rate": 3.91546875e-06, "loss": 0.0106, "step": 24950 }, { "epoch": 11.43020594965675, "grad_norm": 0.3441114127635956, "learning_rate": 3.911562500000001e-06, "loss": 0.0047, "step": 24975 }, { "epoch": 11.441647597254004, "grad_norm": 0.44860488176345825, "learning_rate": 3.90765625e-06, "loss": 0.0065, "step": 25000 }, { "epoch": 11.441647597254004, "eval_loss": 0.18233540654182434, "eval_runtime": 8812.6554, "eval_samples_per_second": 1.08, "eval_steps_per_second": 0.135, "eval_wer": 0.08493312855954438, "step": 25000 }, { "epoch": 11.453089244851258, "grad_norm": 0.595927894115448, "learning_rate": 3.9037500000000005e-06, "loss": 0.0035, "step": 25025 }, { "epoch": 11.464530892448513, "grad_norm": 1.4440901279449463, "learning_rate": 3.89984375e-06, "loss": 0.0138, "step": 25050 }, { "epoch": 11.475972540045767, "grad_norm": 1.2529171705245972, "learning_rate": 3.8959375e-06, "loss": 0.0089, "step": 25075 }, { "epoch": 11.48741418764302, "grad_norm": 0.06299307197332382, "learning_rate": 3.89203125e-06, "loss": 0.0069, "step": 25100 }, { "epoch": 11.498855835240274, "grad_norm": 0.726309061050415, "learning_rate": 3.888125e-06, "loss": 0.0094, "step": 25125 }, { "epoch": 11.51029748283753, "grad_norm": 0.694255530834198, "learning_rate": 3.8842187500000005e-06, "loss": 0.0137, "step": 25150 }, { "epoch": 11.521739130434783, "grad_norm": 0.09387504309415817, "learning_rate": 3.8803125e-06, "loss": 0.0026, "step": 25175 }, { "epoch": 11.533180778032037, "grad_norm": 0.9417803883552551, "learning_rate": 3.87640625e-06, "loss": 0.0115, "step": 25200 }, { "epoch": 11.54462242562929, "grad_norm": 0.05203927680850029, "learning_rate": 3.872500000000001e-06, "loss": 0.007, "step": 25225 }, { "epoch": 11.556064073226544, "grad_norm": 0.41137346625328064, "learning_rate": 3.86859375e-06, "loss": 0.0062, "step": 25250 }, { "epoch": 11.5675057208238, "grad_norm": 0.1848648637533188, "learning_rate": 3.8646875e-06, "loss": 0.0062, "step": 25275 }, { "epoch": 11.578947368421053, "grad_norm": 2.573845863342285, "learning_rate": 3.86078125e-06, "loss": 0.0126, "step": 25300 }, { "epoch": 11.590389016018307, "grad_norm": 0.6269115805625916, "learning_rate": 3.856875e-06, "loss": 0.0109, "step": 25325 }, { "epoch": 11.60183066361556, "grad_norm": 0.9002649188041687, "learning_rate": 3.8529687500000006e-06, "loss": 0.0099, "step": 25350 }, { "epoch": 11.613272311212814, "grad_norm": 1.8748010396957397, "learning_rate": 3.8490625e-06, "loss": 0.0068, "step": 25375 }, { "epoch": 11.62471395881007, "grad_norm": 0.1886737048625946, "learning_rate": 3.84515625e-06, "loss": 0.0096, "step": 25400 }, { "epoch": 11.636155606407323, "grad_norm": 0.5853599905967712, "learning_rate": 3.841250000000001e-06, "loss": 0.0073, "step": 25425 }, { "epoch": 11.647597254004577, "grad_norm": 1.6265490055084229, "learning_rate": 3.83734375e-06, "loss": 0.0176, "step": 25450 }, { "epoch": 11.65903890160183, "grad_norm": 1.750231146812439, "learning_rate": 3.8334375e-06, "loss": 0.0059, "step": 25475 }, { "epoch": 11.670480549199084, "grad_norm": 2.2551820278167725, "learning_rate": 3.82953125e-06, "loss": 0.0064, "step": 25500 }, { "epoch": 11.68192219679634, "grad_norm": 0.14926180243492126, "learning_rate": 3.825625e-06, "loss": 0.0087, "step": 25525 }, { "epoch": 11.693363844393593, "grad_norm": 2.3310084342956543, "learning_rate": 3.82171875e-06, "loss": 0.0094, "step": 25550 }, { "epoch": 11.704805491990847, "grad_norm": 2.0139739513397217, "learning_rate": 3.8178125e-06, "loss": 0.0068, "step": 25575 }, { "epoch": 11.7162471395881, "grad_norm": 1.094116449356079, "learning_rate": 3.8139062500000005e-06, "loss": 0.0122, "step": 25600 }, { "epoch": 11.727688787185354, "grad_norm": 2.4732375144958496, "learning_rate": 3.8100000000000004e-06, "loss": 0.0079, "step": 25625 }, { "epoch": 11.73913043478261, "grad_norm": 0.3952226936817169, "learning_rate": 3.8060937500000003e-06, "loss": 0.0104, "step": 25650 }, { "epoch": 11.750572082379863, "grad_norm": 0.37850210070610046, "learning_rate": 3.8021874999999998e-06, "loss": 0.0058, "step": 25675 }, { "epoch": 11.762013729977117, "grad_norm": 0.36700963973999023, "learning_rate": 3.79828125e-06, "loss": 0.0098, "step": 25700 }, { "epoch": 11.77345537757437, "grad_norm": 0.6098679900169373, "learning_rate": 3.794375e-06, "loss": 0.0066, "step": 25725 }, { "epoch": 11.784897025171624, "grad_norm": 0.09302989393472672, "learning_rate": 3.79046875e-06, "loss": 0.0127, "step": 25750 }, { "epoch": 11.79633867276888, "grad_norm": 0.0692160502076149, "learning_rate": 3.7865625000000002e-06, "loss": 0.0081, "step": 25775 }, { "epoch": 11.807780320366133, "grad_norm": 1.0740209817886353, "learning_rate": 3.78265625e-06, "loss": 0.0119, "step": 25800 }, { "epoch": 11.819221967963387, "grad_norm": 2.747967481613159, "learning_rate": 3.7787500000000005e-06, "loss": 0.0105, "step": 25825 }, { "epoch": 11.83066361556064, "grad_norm": 0.026785267516970634, "learning_rate": 3.7748437500000004e-06, "loss": 0.0089, "step": 25850 }, { "epoch": 11.842105263157894, "grad_norm": 0.5013160705566406, "learning_rate": 3.7709375000000007e-06, "loss": 0.009, "step": 25875 }, { "epoch": 11.85354691075515, "grad_norm": 0.7497411370277405, "learning_rate": 3.7670312499999997e-06, "loss": 0.0087, "step": 25900 }, { "epoch": 11.864988558352403, "grad_norm": 3.126556634902954, "learning_rate": 3.763125e-06, "loss": 0.0056, "step": 25925 }, { "epoch": 11.876430205949656, "grad_norm": 0.9904767274856567, "learning_rate": 3.75921875e-06, "loss": 0.0092, "step": 25950 }, { "epoch": 11.88787185354691, "grad_norm": 1.5207469463348389, "learning_rate": 3.7553125000000003e-06, "loss": 0.0041, "step": 25975 }, { "epoch": 11.899313501144166, "grad_norm": 0.04306778684258461, "learning_rate": 3.7514062500000002e-06, "loss": 0.0107, "step": 26000 }, { "epoch": 11.91075514874142, "grad_norm": 1.214351773262024, "learning_rate": 3.7475e-06, "loss": 0.0079, "step": 26025 }, { "epoch": 11.922196796338673, "grad_norm": 2.3525550365448, "learning_rate": 3.7435937500000004e-06, "loss": 0.01, "step": 26050 }, { "epoch": 11.933638443935926, "grad_norm": 0.725584864616394, "learning_rate": 3.7396875000000004e-06, "loss": 0.0062, "step": 26075 }, { "epoch": 11.94508009153318, "grad_norm": 0.4120253622531891, "learning_rate": 3.73578125e-06, "loss": 0.0068, "step": 26100 }, { "epoch": 11.956521739130435, "grad_norm": 0.03465963155031204, "learning_rate": 3.731875e-06, "loss": 0.0038, "step": 26125 }, { "epoch": 11.96796338672769, "grad_norm": 0.050099629908800125, "learning_rate": 3.72796875e-06, "loss": 0.0079, "step": 26150 }, { "epoch": 11.979405034324943, "grad_norm": 3.9544880390167236, "learning_rate": 3.7240625e-06, "loss": 0.0079, "step": 26175 }, { "epoch": 11.990846681922196, "grad_norm": 1.6744534969329834, "learning_rate": 3.7201562500000003e-06, "loss": 0.0076, "step": 26200 }, { "epoch": 12.00228832951945, "grad_norm": 0.20202918350696564, "learning_rate": 3.71625e-06, "loss": 0.0107, "step": 26225 }, { "epoch": 12.013729977116705, "grad_norm": 0.07729358226060867, "learning_rate": 3.7123437500000005e-06, "loss": 0.0046, "step": 26250 }, { "epoch": 12.025171624713959, "grad_norm": 1.54712975025177, "learning_rate": 3.7084375000000004e-06, "loss": 0.0047, "step": 26275 }, { "epoch": 12.036613272311213, "grad_norm": 0.42515042424201965, "learning_rate": 3.70453125e-06, "loss": 0.0053, "step": 26300 }, { "epoch": 12.048054919908466, "grad_norm": 0.3717302978038788, "learning_rate": 3.700625e-06, "loss": 0.0084, "step": 26325 }, { "epoch": 12.05949656750572, "grad_norm": 0.28201761841773987, "learning_rate": 3.69671875e-06, "loss": 0.0034, "step": 26350 }, { "epoch": 12.070938215102975, "grad_norm": 0.051573362201452255, "learning_rate": 3.6928125e-06, "loss": 0.0053, "step": 26375 }, { "epoch": 12.082379862700229, "grad_norm": 0.1747036576271057, "learning_rate": 3.68890625e-06, "loss": 0.0064, "step": 26400 }, { "epoch": 12.093821510297483, "grad_norm": 1.0776088237762451, "learning_rate": 3.6850000000000003e-06, "loss": 0.0084, "step": 26425 }, { "epoch": 12.105263157894736, "grad_norm": 0.2853906452655792, "learning_rate": 3.68109375e-06, "loss": 0.0053, "step": 26450 }, { "epoch": 12.116704805491992, "grad_norm": 0.8596778512001038, "learning_rate": 3.6771875000000005e-06, "loss": 0.01, "step": 26475 }, { "epoch": 12.128146453089245, "grad_norm": 0.16126525402069092, "learning_rate": 3.6732812500000004e-06, "loss": 0.0053, "step": 26500 }, { "epoch": 12.139588100686499, "grad_norm": 0.7302467823028564, "learning_rate": 3.669375e-06, "loss": 0.006, "step": 26525 }, { "epoch": 12.151029748283753, "grad_norm": 2.4233593940734863, "learning_rate": 3.66546875e-06, "loss": 0.0056, "step": 26550 }, { "epoch": 12.162471395881006, "grad_norm": 0.20938943326473236, "learning_rate": 3.6615625e-06, "loss": 0.0085, "step": 26575 }, { "epoch": 12.173913043478262, "grad_norm": 0.06558073312044144, "learning_rate": 3.65765625e-06, "loss": 0.003, "step": 26600 }, { "epoch": 12.185354691075515, "grad_norm": 0.8649306893348694, "learning_rate": 3.6537500000000004e-06, "loss": 0.0096, "step": 26625 }, { "epoch": 12.196796338672769, "grad_norm": 0.17014870047569275, "learning_rate": 3.6498437500000003e-06, "loss": 0.0041, "step": 26650 }, { "epoch": 12.208237986270023, "grad_norm": 1.4718366861343384, "learning_rate": 3.6459375e-06, "loss": 0.0069, "step": 26675 }, { "epoch": 12.219679633867276, "grad_norm": 0.4427139163017273, "learning_rate": 3.6420312500000005e-06, "loss": 0.004, "step": 26700 }, { "epoch": 12.231121281464532, "grad_norm": 1.431381106376648, "learning_rate": 3.638125e-06, "loss": 0.0117, "step": 26725 }, { "epoch": 12.242562929061785, "grad_norm": 2.899378776550293, "learning_rate": 3.63421875e-06, "loss": 0.0075, "step": 26750 }, { "epoch": 12.254004576659039, "grad_norm": 1.0545241832733154, "learning_rate": 3.6303125000000002e-06, "loss": 0.0061, "step": 26775 }, { "epoch": 12.265446224256292, "grad_norm": 0.0755414217710495, "learning_rate": 3.62640625e-06, "loss": 0.0044, "step": 26800 }, { "epoch": 12.276887871853546, "grad_norm": 1.607385516166687, "learning_rate": 3.6225e-06, "loss": 0.0103, "step": 26825 }, { "epoch": 12.288329519450802, "grad_norm": 0.6529345512390137, "learning_rate": 3.6185937500000004e-06, "loss": 0.0061, "step": 26850 }, { "epoch": 12.299771167048055, "grad_norm": 0.8069061040878296, "learning_rate": 3.6146875000000003e-06, "loss": 0.0032, "step": 26875 }, { "epoch": 12.311212814645309, "grad_norm": 0.03794765844941139, "learning_rate": 3.6107812500000006e-06, "loss": 0.0028, "step": 26900 }, { "epoch": 12.322654462242562, "grad_norm": 5.785488128662109, "learning_rate": 3.6068749999999997e-06, "loss": 0.0133, "step": 26925 }, { "epoch": 12.334096109839816, "grad_norm": 0.1751006692647934, "learning_rate": 3.60296875e-06, "loss": 0.0046, "step": 26950 }, { "epoch": 12.345537757437071, "grad_norm": 1.2459888458251953, "learning_rate": 3.5990625e-06, "loss": 0.0079, "step": 26975 }, { "epoch": 12.356979405034325, "grad_norm": 0.1333468109369278, "learning_rate": 3.5951562500000002e-06, "loss": 0.0085, "step": 27000 }, { "epoch": 12.368421052631579, "grad_norm": 0.4747946858406067, "learning_rate": 3.59125e-06, "loss": 0.0057, "step": 27025 }, { "epoch": 12.379862700228832, "grad_norm": 0.2733854651451111, "learning_rate": 3.5873437500000005e-06, "loss": 0.0038, "step": 27050 }, { "epoch": 12.391304347826088, "grad_norm": 0.7390388250350952, "learning_rate": 3.5834375000000004e-06, "loss": 0.0139, "step": 27075 }, { "epoch": 12.402745995423341, "grad_norm": 0.022354505956172943, "learning_rate": 3.5795312500000003e-06, "loss": 0.0056, "step": 27100 }, { "epoch": 12.414187643020595, "grad_norm": 0.614753246307373, "learning_rate": 3.5756250000000006e-06, "loss": 0.0059, "step": 27125 }, { "epoch": 12.425629290617849, "grad_norm": 4.490765571594238, "learning_rate": 3.57171875e-06, "loss": 0.0051, "step": 27150 }, { "epoch": 12.437070938215102, "grad_norm": 0.7734677791595459, "learning_rate": 3.5678125e-06, "loss": 0.0066, "step": 27175 }, { "epoch": 12.448512585812358, "grad_norm": 2.6567184925079346, "learning_rate": 3.56390625e-06, "loss": 0.0073, "step": 27200 }, { "epoch": 12.459954233409611, "grad_norm": 0.1563418060541153, "learning_rate": 3.5600000000000002e-06, "loss": 0.01, "step": 27225 }, { "epoch": 12.471395881006865, "grad_norm": 1.2115215063095093, "learning_rate": 3.55609375e-06, "loss": 0.0073, "step": 27250 }, { "epoch": 12.482837528604119, "grad_norm": 0.9921894669532776, "learning_rate": 3.5521875000000005e-06, "loss": 0.0117, "step": 27275 }, { "epoch": 12.494279176201372, "grad_norm": 0.2649920582771301, "learning_rate": 3.5482812500000004e-06, "loss": 0.0086, "step": 27300 }, { "epoch": 12.505720823798628, "grad_norm": 5.724328517913818, "learning_rate": 3.5443750000000003e-06, "loss": 0.006, "step": 27325 }, { "epoch": 12.517162471395881, "grad_norm": 2.260976552963257, "learning_rate": 3.5404687499999997e-06, "loss": 0.0073, "step": 27350 }, { "epoch": 12.528604118993135, "grad_norm": 0.6492946147918701, "learning_rate": 3.5365625e-06, "loss": 0.0089, "step": 27375 }, { "epoch": 12.540045766590389, "grad_norm": 0.03157801553606987, "learning_rate": 3.53265625e-06, "loss": 0.0057, "step": 27400 }, { "epoch": 12.551487414187642, "grad_norm": 0.7684985995292664, "learning_rate": 3.5287500000000003e-06, "loss": 0.0073, "step": 27425 }, { "epoch": 12.562929061784898, "grad_norm": 2.1785857677459717, "learning_rate": 3.5248437500000002e-06, "loss": 0.0084, "step": 27450 }, { "epoch": 12.574370709382151, "grad_norm": 0.08991169929504395, "learning_rate": 3.5209375e-06, "loss": 0.009, "step": 27475 }, { "epoch": 12.585812356979405, "grad_norm": 0.21213358640670776, "learning_rate": 3.5170312500000004e-06, "loss": 0.0073, "step": 27500 }, { "epoch": 12.597254004576659, "grad_norm": 0.030096199363470078, "learning_rate": 3.5131250000000004e-06, "loss": 0.0081, "step": 27525 }, { "epoch": 12.608695652173914, "grad_norm": 0.2712863087654114, "learning_rate": 3.50921875e-06, "loss": 0.0082, "step": 27550 }, { "epoch": 12.620137299771168, "grad_norm": 0.9672313332557678, "learning_rate": 3.50546875e-06, "loss": 0.0099, "step": 27575 }, { "epoch": 12.631578947368421, "grad_norm": 3.9991157054901123, "learning_rate": 3.5015625000000004e-06, "loss": 0.0082, "step": 27600 }, { "epoch": 12.643020594965675, "grad_norm": 0.16638922691345215, "learning_rate": 3.4976562500000003e-06, "loss": 0.0118, "step": 27625 }, { "epoch": 12.654462242562929, "grad_norm": 0.1447831094264984, "learning_rate": 3.4937500000000006e-06, "loss": 0.0057, "step": 27650 }, { "epoch": 12.665903890160184, "grad_norm": 0.45069146156311035, "learning_rate": 3.4898437499999997e-06, "loss": 0.0062, "step": 27675 }, { "epoch": 12.677345537757438, "grad_norm": 0.13740800321102142, "learning_rate": 3.4859375e-06, "loss": 0.003, "step": 27700 }, { "epoch": 12.688787185354691, "grad_norm": 0.3269636929035187, "learning_rate": 3.48203125e-06, "loss": 0.0071, "step": 27725 }, { "epoch": 12.700228832951945, "grad_norm": 0.13175521790981293, "learning_rate": 3.4781250000000003e-06, "loss": 0.0033, "step": 27750 }, { "epoch": 12.711670480549198, "grad_norm": 0.15867213904857635, "learning_rate": 3.47421875e-06, "loss": 0.0075, "step": 27775 }, { "epoch": 12.723112128146454, "grad_norm": 0.5604462027549744, "learning_rate": 3.4703125000000005e-06, "loss": 0.0072, "step": 27800 }, { "epoch": 12.734553775743708, "grad_norm": 0.3909308910369873, "learning_rate": 3.4664062500000004e-06, "loss": 0.0091, "step": 27825 }, { "epoch": 12.745995423340961, "grad_norm": 0.067705899477005, "learning_rate": 3.4625000000000003e-06, "loss": 0.0038, "step": 27850 }, { "epoch": 12.757437070938215, "grad_norm": 0.19309298694133759, "learning_rate": 3.4585937499999998e-06, "loss": 0.0129, "step": 27875 }, { "epoch": 12.768878718535468, "grad_norm": 0.3463464677333832, "learning_rate": 3.4546875e-06, "loss": 0.0018, "step": 27900 }, { "epoch": 12.780320366132724, "grad_norm": 1.135886311531067, "learning_rate": 3.45078125e-06, "loss": 0.0126, "step": 27925 }, { "epoch": 12.791762013729977, "grad_norm": 0.049147848039865494, "learning_rate": 3.446875e-06, "loss": 0.0064, "step": 27950 }, { "epoch": 12.803203661327231, "grad_norm": 0.950079619884491, "learning_rate": 3.4429687500000003e-06, "loss": 0.011, "step": 27975 }, { "epoch": 12.814645308924485, "grad_norm": 0.05525101721286774, "learning_rate": 3.4390625e-06, "loss": 0.0042, "step": 28000 }, { "epoch": 12.826086956521738, "grad_norm": 1.1997219324111938, "learning_rate": 3.4351562500000005e-06, "loss": 0.0116, "step": 28025 }, { "epoch": 12.837528604118994, "grad_norm": 0.24805450439453125, "learning_rate": 3.4312500000000004e-06, "loss": 0.0087, "step": 28050 }, { "epoch": 12.848970251716247, "grad_norm": 1.1105051040649414, "learning_rate": 3.42734375e-06, "loss": 0.0092, "step": 28075 }, { "epoch": 12.860411899313501, "grad_norm": 1.5007377862930298, "learning_rate": 3.4234374999999998e-06, "loss": 0.0058, "step": 28100 }, { "epoch": 12.871853546910755, "grad_norm": 0.14772702753543854, "learning_rate": 3.41953125e-06, "loss": 0.0139, "step": 28125 }, { "epoch": 12.883295194508008, "grad_norm": 0.04660077020525932, "learning_rate": 3.415625e-06, "loss": 0.0037, "step": 28150 }, { "epoch": 12.894736842105264, "grad_norm": 1.8650418519973755, "learning_rate": 3.4117187500000003e-06, "loss": 0.0057, "step": 28175 }, { "epoch": 12.906178489702517, "grad_norm": 0.058051276952028275, "learning_rate": 3.4078125000000002e-06, "loss": 0.005, "step": 28200 }, { "epoch": 12.917620137299771, "grad_norm": 1.6015323400497437, "learning_rate": 3.40390625e-06, "loss": 0.012, "step": 28225 }, { "epoch": 12.929061784897025, "grad_norm": 0.13523660600185394, "learning_rate": 3.4000000000000005e-06, "loss": 0.0069, "step": 28250 }, { "epoch": 12.940503432494278, "grad_norm": 0.5378448963165283, "learning_rate": 3.3960937500000004e-06, "loss": 0.0097, "step": 28275 }, { "epoch": 12.951945080091534, "grad_norm": 0.6862388849258423, "learning_rate": 3.3921875e-06, "loss": 0.0028, "step": 28300 }, { "epoch": 12.963386727688787, "grad_norm": 0.750220000743866, "learning_rate": 3.3882812499999998e-06, "loss": 0.0085, "step": 28325 }, { "epoch": 12.974828375286041, "grad_norm": 0.15125234425067902, "learning_rate": 3.384375e-06, "loss": 0.0045, "step": 28350 }, { "epoch": 12.986270022883295, "grad_norm": 0.5914090871810913, "learning_rate": 3.38046875e-06, "loss": 0.0086, "step": 28375 }, { "epoch": 12.99771167048055, "grad_norm": 0.7181547284126282, "learning_rate": 3.3765625000000003e-06, "loss": 0.0056, "step": 28400 }, { "epoch": 13.009153318077804, "grad_norm": 1.999975323677063, "learning_rate": 3.3726562500000002e-06, "loss": 0.0043, "step": 28425 }, { "epoch": 13.020594965675057, "grad_norm": 0.09092377871274948, "learning_rate": 3.3687500000000006e-06, "loss": 0.0088, "step": 28450 }, { "epoch": 13.03203661327231, "grad_norm": 1.7949730157852173, "learning_rate": 3.3648437500000005e-06, "loss": 0.0076, "step": 28475 }, { "epoch": 13.043478260869565, "grad_norm": 2.5737016201019287, "learning_rate": 3.3609375e-06, "loss": 0.0078, "step": 28500 }, { "epoch": 13.05491990846682, "grad_norm": 2.6186556816101074, "learning_rate": 3.35703125e-06, "loss": 0.0082, "step": 28525 }, { "epoch": 13.066361556064074, "grad_norm": 0.3996107280254364, "learning_rate": 3.353125e-06, "loss": 0.007, "step": 28550 }, { "epoch": 13.077803203661327, "grad_norm": 3.384303092956543, "learning_rate": 3.34921875e-06, "loss": 0.0053, "step": 28575 }, { "epoch": 13.08924485125858, "grad_norm": 0.09174101799726486, "learning_rate": 3.3453125e-06, "loss": 0.0058, "step": 28600 }, { "epoch": 13.100686498855834, "grad_norm": 0.8262161612510681, "learning_rate": 3.3414062500000003e-06, "loss": 0.0038, "step": 28625 }, { "epoch": 13.11212814645309, "grad_norm": 1.045690894126892, "learning_rate": 3.3375000000000002e-06, "loss": 0.0078, "step": 28650 }, { "epoch": 13.123569794050344, "grad_norm": 0.028259994462132454, "learning_rate": 3.3335937500000006e-06, "loss": 0.0052, "step": 28675 }, { "epoch": 13.135011441647597, "grad_norm": 0.6078764200210571, "learning_rate": 3.3296874999999996e-06, "loss": 0.0053, "step": 28700 }, { "epoch": 13.14645308924485, "grad_norm": 0.02156892977654934, "learning_rate": 3.32578125e-06, "loss": 0.0043, "step": 28725 }, { "epoch": 13.157894736842104, "grad_norm": 0.9901741743087769, "learning_rate": 3.321875e-06, "loss": 0.0077, "step": 28750 }, { "epoch": 13.16933638443936, "grad_norm": 0.023360157385468483, "learning_rate": 3.31796875e-06, "loss": 0.0083, "step": 28775 }, { "epoch": 13.180778032036613, "grad_norm": 0.7396934628486633, "learning_rate": 3.3140625e-06, "loss": 0.0062, "step": 28800 }, { "epoch": 13.192219679633867, "grad_norm": 3.4653725624084473, "learning_rate": 3.3101562500000004e-06, "loss": 0.0084, "step": 28825 }, { "epoch": 13.20366132723112, "grad_norm": 3.071186065673828, "learning_rate": 3.3062500000000003e-06, "loss": 0.0053, "step": 28850 }, { "epoch": 13.215102974828376, "grad_norm": 0.08804110437631607, "learning_rate": 3.3023437500000002e-06, "loss": 0.0035, "step": 28875 }, { "epoch": 13.22654462242563, "grad_norm": 1.8675472736358643, "learning_rate": 3.2984375000000006e-06, "loss": 0.008, "step": 28900 }, { "epoch": 13.237986270022883, "grad_norm": 0.025203507393598557, "learning_rate": 3.29453125e-06, "loss": 0.0031, "step": 28925 }, { "epoch": 13.249427917620137, "grad_norm": 1.8199517726898193, "learning_rate": 3.290625e-06, "loss": 0.0057, "step": 28950 }, { "epoch": 13.26086956521739, "grad_norm": 0.5931240320205688, "learning_rate": 3.28671875e-06, "loss": 0.0054, "step": 28975 }, { "epoch": 13.272311212814646, "grad_norm": 4.243343353271484, "learning_rate": 3.2828125e-06, "loss": 0.0056, "step": 29000 }, { "epoch": 13.2837528604119, "grad_norm": 2.2064199447631836, "learning_rate": 3.27890625e-06, "loss": 0.0076, "step": 29025 }, { "epoch": 13.295194508009153, "grad_norm": 1.488349199295044, "learning_rate": 3.2750000000000004e-06, "loss": 0.0058, "step": 29050 }, { "epoch": 13.306636155606407, "grad_norm": 1.03605318069458, "learning_rate": 3.2710937500000003e-06, "loss": 0.0124, "step": 29075 }, { "epoch": 13.31807780320366, "grad_norm": 0.011979938484728336, "learning_rate": 3.2671875000000006e-06, "loss": 0.004, "step": 29100 }, { "epoch": 13.329519450800916, "grad_norm": 1.6582856178283691, "learning_rate": 3.2632812499999997e-06, "loss": 0.0048, "step": 29125 }, { "epoch": 13.34096109839817, "grad_norm": 0.08125101029872894, "learning_rate": 3.259375e-06, "loss": 0.0029, "step": 29150 }, { "epoch": 13.352402745995423, "grad_norm": 0.055700212717056274, "learning_rate": 3.25546875e-06, "loss": 0.0077, "step": 29175 }, { "epoch": 13.363844393592677, "grad_norm": 2.3434126377105713, "learning_rate": 3.2515625000000003e-06, "loss": 0.0061, "step": 29200 }, { "epoch": 13.37528604118993, "grad_norm": 0.864936888217926, "learning_rate": 3.24765625e-06, "loss": 0.0077, "step": 29225 }, { "epoch": 13.386727688787186, "grad_norm": 0.18443617224693298, "learning_rate": 3.24375e-06, "loss": 0.0075, "step": 29250 }, { "epoch": 13.39816933638444, "grad_norm": 0.3636978566646576, "learning_rate": 3.2398437500000004e-06, "loss": 0.0082, "step": 29275 }, { "epoch": 13.409610983981693, "grad_norm": 1.2357699871063232, "learning_rate": 3.2359375000000003e-06, "loss": 0.0051, "step": 29300 }, { "epoch": 13.421052631578947, "grad_norm": 0.017450567334890366, "learning_rate": 3.2320312499999998e-06, "loss": 0.0065, "step": 29325 }, { "epoch": 13.4324942791762, "grad_norm": 3.0198428630828857, "learning_rate": 3.2281249999999997e-06, "loss": 0.0045, "step": 29350 }, { "epoch": 13.443935926773456, "grad_norm": 0.5470199584960938, "learning_rate": 3.22421875e-06, "loss": 0.0083, "step": 29375 }, { "epoch": 13.45537757437071, "grad_norm": 0.028531597927212715, "learning_rate": 3.2203125e-06, "loss": 0.0035, "step": 29400 }, { "epoch": 13.466819221967963, "grad_norm": 0.09060712903738022, "learning_rate": 3.2164062500000003e-06, "loss": 0.004, "step": 29425 }, { "epoch": 13.478260869565217, "grad_norm": 1.947334885597229, "learning_rate": 3.2125e-06, "loss": 0.0028, "step": 29450 }, { "epoch": 13.48970251716247, "grad_norm": 0.03363508731126785, "learning_rate": 3.2085937500000005e-06, "loss": 0.0037, "step": 29475 }, { "epoch": 13.501144164759726, "grad_norm": 0.13988135755062103, "learning_rate": 3.2046875000000004e-06, "loss": 0.0074, "step": 29500 }, { "epoch": 13.51258581235698, "grad_norm": 0.209013894200325, "learning_rate": 3.2007812500000003e-06, "loss": 0.0082, "step": 29525 }, { "epoch": 13.524027459954233, "grad_norm": 2.497392177581787, "learning_rate": 3.1968749999999998e-06, "loss": 0.0066, "step": 29550 }, { "epoch": 13.535469107551487, "grad_norm": 1.2079094648361206, "learning_rate": 3.19296875e-06, "loss": 0.0046, "step": 29575 }, { "epoch": 13.546910755148742, "grad_norm": 0.19309692084789276, "learning_rate": 3.1890625e-06, "loss": 0.0052, "step": 29600 }, { "epoch": 13.558352402745996, "grad_norm": 0.04407721385359764, "learning_rate": 3.18515625e-06, "loss": 0.0058, "step": 29625 }, { "epoch": 13.56979405034325, "grad_norm": 1.9256988763809204, "learning_rate": 3.1812500000000002e-06, "loss": 0.0023, "step": 29650 }, { "epoch": 13.581235697940503, "grad_norm": 0.39643150568008423, "learning_rate": 3.17734375e-06, "loss": 0.0034, "step": 29675 }, { "epoch": 13.592677345537757, "grad_norm": 0.11968174576759338, "learning_rate": 3.1734375000000005e-06, "loss": 0.0038, "step": 29700 }, { "epoch": 13.604118993135012, "grad_norm": 0.04074479639530182, "learning_rate": 3.1695312500000004e-06, "loss": 0.0069, "step": 29725 }, { "epoch": 13.615560640732266, "grad_norm": 1.1040339469909668, "learning_rate": 3.165625e-06, "loss": 0.0075, "step": 29750 }, { "epoch": 13.62700228832952, "grad_norm": 1.6253503561019897, "learning_rate": 3.1617187499999998e-06, "loss": 0.0056, "step": 29775 }, { "epoch": 13.638443935926773, "grad_norm": 2.6330320835113525, "learning_rate": 3.1578125e-06, "loss": 0.0057, "step": 29800 }, { "epoch": 13.649885583524027, "grad_norm": 0.08415306359529495, "learning_rate": 3.15390625e-06, "loss": 0.0048, "step": 29825 }, { "epoch": 13.661327231121282, "grad_norm": 0.025448646396398544, "learning_rate": 3.1500000000000003e-06, "loss": 0.0049, "step": 29850 }, { "epoch": 13.672768878718536, "grad_norm": 1.2036207914352417, "learning_rate": 3.1460937500000002e-06, "loss": 0.0052, "step": 29875 }, { "epoch": 13.68421052631579, "grad_norm": 0.21050342917442322, "learning_rate": 3.1421875e-06, "loss": 0.0069, "step": 29900 }, { "epoch": 13.695652173913043, "grad_norm": 0.2702687680721283, "learning_rate": 3.1382812500000005e-06, "loss": 0.0045, "step": 29925 }, { "epoch": 13.707093821510298, "grad_norm": 2.3359804153442383, "learning_rate": 3.134375e-06, "loss": 0.0025, "step": 29950 }, { "epoch": 13.718535469107552, "grad_norm": 0.027840740978717804, "learning_rate": 3.13046875e-06, "loss": 0.0073, "step": 29975 }, { "epoch": 13.729977116704806, "grad_norm": 0.01418315339833498, "learning_rate": 3.1265625e-06, "loss": 0.006, "step": 30000 }, { "epoch": 13.729977116704806, "eval_loss": 0.18081426620483398, "eval_runtime": 8816.313, "eval_samples_per_second": 1.08, "eval_steps_per_second": 0.135, "eval_wer": 0.08088564663723044, "step": 30000 }, { "epoch": 13.74141876430206, "grad_norm": 0.07010439038276672, "learning_rate": 3.12265625e-06, "loss": 0.0053, "step": 30025 }, { "epoch": 13.752860411899313, "grad_norm": Infinity, "learning_rate": 3.11890625e-06, "loss": 0.0105, "step": 30050 }, { "epoch": 13.764302059496568, "grad_norm": 0.34050726890563965, "learning_rate": 3.1150000000000002e-06, "loss": 0.0056, "step": 30075 }, { "epoch": 13.775743707093822, "grad_norm": 1.29059898853302, "learning_rate": 3.11109375e-06, "loss": 0.0017, "step": 30100 }, { "epoch": 13.787185354691076, "grad_norm": 1.7795320749282837, "learning_rate": 3.1071875e-06, "loss": 0.0079, "step": 30125 }, { "epoch": 13.79862700228833, "grad_norm": 0.03241199627518654, "learning_rate": 3.10328125e-06, "loss": 0.0061, "step": 30150 }, { "epoch": 13.810068649885583, "grad_norm": 0.031430721282958984, "learning_rate": 3.0993750000000003e-06, "loss": 0.0058, "step": 30175 }, { "epoch": 13.821510297482838, "grad_norm": 3.567770004272461, "learning_rate": 3.09546875e-06, "loss": 0.0073, "step": 30200 }, { "epoch": 13.832951945080092, "grad_norm": 0.15421247482299805, "learning_rate": 3.0915625e-06, "loss": 0.0079, "step": 30225 }, { "epoch": 13.844393592677346, "grad_norm": 0.20840105414390564, "learning_rate": 3.08765625e-06, "loss": 0.0069, "step": 30250 }, { "epoch": 13.8558352402746, "grad_norm": 1.5622609853744507, "learning_rate": 3.0837500000000003e-06, "loss": 0.0045, "step": 30275 }, { "epoch": 13.867276887871853, "grad_norm": 0.27918922901153564, "learning_rate": 3.0798437500000002e-06, "loss": 0.0027, "step": 30300 }, { "epoch": 13.878718535469108, "grad_norm": 0.18607428669929504, "learning_rate": 3.0759375e-06, "loss": 0.0048, "step": 30325 }, { "epoch": 13.890160183066362, "grad_norm": 0.03345849737524986, "learning_rate": 3.07203125e-06, "loss": 0.0051, "step": 30350 }, { "epoch": 13.901601830663616, "grad_norm": 0.06558558344841003, "learning_rate": 3.0681250000000004e-06, "loss": 0.0085, "step": 30375 }, { "epoch": 13.91304347826087, "grad_norm": 0.4423581659793854, "learning_rate": 3.0642187500000003e-06, "loss": 0.0044, "step": 30400 }, { "epoch": 13.924485125858123, "grad_norm": 3.977522611618042, "learning_rate": 3.0603125e-06, "loss": 0.0039, "step": 30425 }, { "epoch": 13.935926773455378, "grad_norm": 0.18562455475330353, "learning_rate": 3.05640625e-06, "loss": 0.005, "step": 30450 }, { "epoch": 13.947368421052632, "grad_norm": 1.438119888305664, "learning_rate": 3.0525e-06, "loss": 0.0093, "step": 30475 }, { "epoch": 13.958810068649885, "grad_norm": 0.20377586781978607, "learning_rate": 3.0485937500000003e-06, "loss": 0.0025, "step": 30500 }, { "epoch": 13.97025171624714, "grad_norm": 0.15750333666801453, "learning_rate": 3.0446875000000002e-06, "loss": 0.0071, "step": 30525 }, { "epoch": 13.981693363844393, "grad_norm": 1.8309283256530762, "learning_rate": 3.04078125e-06, "loss": 0.0062, "step": 30550 }, { "epoch": 13.993135011441648, "grad_norm": 0.43493324518203735, "learning_rate": 3.036875e-06, "loss": 0.0048, "step": 30575 }, { "epoch": 14.004576659038902, "grad_norm": 1.2056841850280762, "learning_rate": 3.0329687500000004e-06, "loss": 0.0048, "step": 30600 }, { "epoch": 14.016018306636155, "grad_norm": 0.04575463384389877, "learning_rate": 3.0290625000000003e-06, "loss": 0.0014, "step": 30625 }, { "epoch": 14.027459954233409, "grad_norm": 0.7106139063835144, "learning_rate": 3.02515625e-06, "loss": 0.0041, "step": 30650 }, { "epoch": 14.038901601830664, "grad_norm": 0.08353018015623093, "learning_rate": 3.02125e-06, "loss": 0.0021, "step": 30675 }, { "epoch": 14.050343249427918, "grad_norm": 0.04171194136142731, "learning_rate": 3.0173437500000004e-06, "loss": 0.0074, "step": 30700 }, { "epoch": 14.061784897025172, "grad_norm": 0.07023908942937851, "learning_rate": 3.0134375000000003e-06, "loss": 0.0032, "step": 30725 }, { "epoch": 14.073226544622425, "grad_norm": 0.15139324963092804, "learning_rate": 3.0095312500000002e-06, "loss": 0.0019, "step": 30750 }, { "epoch": 14.084668192219679, "grad_norm": 0.05846810340881348, "learning_rate": 3.005625e-06, "loss": 0.0033, "step": 30775 }, { "epoch": 14.096109839816934, "grad_norm": 0.23001690208911896, "learning_rate": 3.00171875e-06, "loss": 0.0054, "step": 30800 }, { "epoch": 14.107551487414188, "grad_norm": 0.012840249575674534, "learning_rate": 2.9978125000000004e-06, "loss": 0.0022, "step": 30825 }, { "epoch": 14.118993135011442, "grad_norm": 0.8020305037498474, "learning_rate": 2.99390625e-06, "loss": 0.0028, "step": 30850 }, { "epoch": 14.130434782608695, "grad_norm": 0.054987985640764236, "learning_rate": 2.99e-06, "loss": 0.0079, "step": 30875 }, { "epoch": 14.141876430205949, "grad_norm": 0.8349403738975525, "learning_rate": 2.98609375e-06, "loss": 0.006, "step": 30900 }, { "epoch": 14.153318077803204, "grad_norm": 0.011226288042962551, "learning_rate": 2.9821875000000004e-06, "loss": 0.0056, "step": 30925 }, { "epoch": 14.164759725400458, "grad_norm": 3.176738977432251, "learning_rate": 2.97828125e-06, "loss": 0.0082, "step": 30950 }, { "epoch": 14.176201372997712, "grad_norm": 0.01673993095755577, "learning_rate": 2.9743750000000002e-06, "loss": 0.0049, "step": 30975 }, { "epoch": 14.187643020594965, "grad_norm": 0.470841646194458, "learning_rate": 2.97046875e-06, "loss": 0.0061, "step": 31000 }, { "epoch": 14.199084668192219, "grad_norm": 0.15515857934951782, "learning_rate": 2.9665625000000004e-06, "loss": 0.0021, "step": 31025 }, { "epoch": 14.210526315789474, "grad_norm": 1.633768081665039, "learning_rate": 2.96265625e-06, "loss": 0.0047, "step": 31050 }, { "epoch": 14.221967963386728, "grad_norm": 0.45998334884643555, "learning_rate": 2.9587500000000003e-06, "loss": 0.0018, "step": 31075 }, { "epoch": 14.233409610983982, "grad_norm": 0.0303165502846241, "learning_rate": 2.95484375e-06, "loss": 0.0104, "step": 31100 }, { "epoch": 14.244851258581235, "grad_norm": 1.0592862367630005, "learning_rate": 2.9509375e-06, "loss": 0.0041, "step": 31125 }, { "epoch": 14.256292906178489, "grad_norm": 0.057001568377017975, "learning_rate": 2.9470312500000004e-06, "loss": 0.007, "step": 31150 }, { "epoch": 14.267734553775744, "grad_norm": 0.4782808721065521, "learning_rate": 2.943125e-06, "loss": 0.0044, "step": 31175 }, { "epoch": 14.279176201372998, "grad_norm": 0.37128251791000366, "learning_rate": 2.93921875e-06, "loss": 0.0067, "step": 31200 }, { "epoch": 14.290617848970252, "grad_norm": 0.012815974652767181, "learning_rate": 2.9353125e-06, "loss": 0.0031, "step": 31225 }, { "epoch": 14.302059496567505, "grad_norm": 0.9134680032730103, "learning_rate": 2.9314062500000004e-06, "loss": 0.0065, "step": 31250 }, { "epoch": 14.31350114416476, "grad_norm": 0.8373939990997314, "learning_rate": 2.9275e-06, "loss": 0.0037, "step": 31275 }, { "epoch": 14.324942791762014, "grad_norm": 0.3114294707775116, "learning_rate": 2.9235937500000003e-06, "loss": 0.005, "step": 31300 }, { "epoch": 14.336384439359268, "grad_norm": 0.5236983895301819, "learning_rate": 2.9196875e-06, "loss": 0.0051, "step": 31325 }, { "epoch": 14.347826086956522, "grad_norm": 1.0224878787994385, "learning_rate": 2.9157812500000005e-06, "loss": 0.0046, "step": 31350 }, { "epoch": 14.359267734553775, "grad_norm": 0.22550596296787262, "learning_rate": 2.911875e-06, "loss": 0.0073, "step": 31375 }, { "epoch": 14.37070938215103, "grad_norm": 0.0469031035900116, "learning_rate": 2.9079687500000003e-06, "loss": 0.0063, "step": 31400 }, { "epoch": 14.382151029748284, "grad_norm": 1.458096981048584, "learning_rate": 2.9040625e-06, "loss": 0.0043, "step": 31425 }, { "epoch": 14.393592677345538, "grad_norm": 0.5893502235412598, "learning_rate": 2.90015625e-06, "loss": 0.0079, "step": 31450 }, { "epoch": 14.405034324942791, "grad_norm": 1.4577136039733887, "learning_rate": 2.89625e-06, "loss": 0.0035, "step": 31475 }, { "epoch": 14.416475972540045, "grad_norm": 0.29451343417167664, "learning_rate": 2.89234375e-06, "loss": 0.0082, "step": 31500 }, { "epoch": 14.4279176201373, "grad_norm": 0.9730453491210938, "learning_rate": 2.8884375000000002e-06, "loss": 0.0051, "step": 31525 }, { "epoch": 14.439359267734554, "grad_norm": 1.110335111618042, "learning_rate": 2.88453125e-06, "loss": 0.007, "step": 31550 }, { "epoch": 14.450800915331808, "grad_norm": 0.7208927273750305, "learning_rate": 2.880625e-06, "loss": 0.0036, "step": 31575 }, { "epoch": 14.462242562929061, "grad_norm": 0.6832392811775208, "learning_rate": 2.87671875e-06, "loss": 0.0047, "step": 31600 }, { "epoch": 14.473684210526315, "grad_norm": 0.02059379778802395, "learning_rate": 2.8728125000000003e-06, "loss": 0.0122, "step": 31625 }, { "epoch": 14.48512585812357, "grad_norm": 0.8392793536186218, "learning_rate": 2.86890625e-06, "loss": 0.0055, "step": 31650 }, { "epoch": 14.496567505720824, "grad_norm": 0.03922909498214722, "learning_rate": 2.865e-06, "loss": 0.0028, "step": 31675 }, { "epoch": 14.508009153318078, "grad_norm": 0.0962461307644844, "learning_rate": 2.86109375e-06, "loss": 0.0036, "step": 31700 }, { "epoch": 14.519450800915331, "grad_norm": 0.23972932994365692, "learning_rate": 2.8571875000000003e-06, "loss": 0.0034, "step": 31725 }, { "epoch": 14.530892448512585, "grad_norm": 0.2644689381122589, "learning_rate": 2.8532812500000002e-06, "loss": 0.0027, "step": 31750 }, { "epoch": 14.54233409610984, "grad_norm": 0.4280746877193451, "learning_rate": 2.849375e-06, "loss": 0.0023, "step": 31775 }, { "epoch": 14.553775743707094, "grad_norm": 1.2397422790527344, "learning_rate": 2.84546875e-06, "loss": 0.0085, "step": 31800 }, { "epoch": 14.565217391304348, "grad_norm": 0.335915744304657, "learning_rate": 2.8415625e-06, "loss": 0.0029, "step": 31825 }, { "epoch": 14.576659038901601, "grad_norm": 0.05976617708802223, "learning_rate": 2.8376562500000003e-06, "loss": 0.0069, "step": 31850 }, { "epoch": 14.588100686498855, "grad_norm": 0.10103817284107208, "learning_rate": 2.83375e-06, "loss": 0.0031, "step": 31875 }, { "epoch": 14.59954233409611, "grad_norm": 0.5501534342765808, "learning_rate": 2.82984375e-06, "loss": 0.0078, "step": 31900 }, { "epoch": 14.610983981693364, "grad_norm": 0.028689030557870865, "learning_rate": 2.8259375e-06, "loss": 0.0059, "step": 31925 }, { "epoch": 14.622425629290618, "grad_norm": 0.9441660642623901, "learning_rate": 2.8220312500000003e-06, "loss": 0.0088, "step": 31950 }, { "epoch": 14.633867276887871, "grad_norm": 1.6239631175994873, "learning_rate": 2.8181250000000002e-06, "loss": 0.0028, "step": 31975 }, { "epoch": 14.645308924485127, "grad_norm": 1.817855715751648, "learning_rate": 2.81421875e-06, "loss": 0.0075, "step": 32000 }, { "epoch": 14.65675057208238, "grad_norm": 0.09565707296133041, "learning_rate": 2.8103125e-06, "loss": 0.0046, "step": 32025 }, { "epoch": 14.668192219679634, "grad_norm": 0.32869425415992737, "learning_rate": 2.8064062500000004e-06, "loss": 0.0071, "step": 32050 }, { "epoch": 14.679633867276888, "grad_norm": 3.4210045337677, "learning_rate": 2.8025000000000003e-06, "loss": 0.003, "step": 32075 }, { "epoch": 14.691075514874141, "grad_norm": 0.8280455470085144, "learning_rate": 2.79859375e-06, "loss": 0.0046, "step": 32100 }, { "epoch": 14.702517162471397, "grad_norm": 0.034207556396722794, "learning_rate": 2.7946875e-06, "loss": 0.0042, "step": 32125 }, { "epoch": 14.71395881006865, "grad_norm": 0.0863451212644577, "learning_rate": 2.79078125e-06, "loss": 0.0045, "step": 32150 }, { "epoch": 14.725400457665904, "grad_norm": 0.07515043020248413, "learning_rate": 2.7868750000000003e-06, "loss": 0.0027, "step": 32175 }, { "epoch": 14.736842105263158, "grad_norm": 0.1660388708114624, "learning_rate": 2.78296875e-06, "loss": 0.0074, "step": 32200 }, { "epoch": 14.748283752860411, "grad_norm": 0.04122938960790634, "learning_rate": 2.7790625e-06, "loss": 0.0054, "step": 32225 }, { "epoch": 14.759725400457667, "grad_norm": 0.055084895342588425, "learning_rate": 2.77515625e-06, "loss": 0.0067, "step": 32250 }, { "epoch": 14.77116704805492, "grad_norm": 5.71760368347168, "learning_rate": 2.7712500000000004e-06, "loss": 0.0088, "step": 32275 }, { "epoch": 14.782608695652174, "grad_norm": 0.09588690847158432, "learning_rate": 2.76734375e-06, "loss": 0.0088, "step": 32300 }, { "epoch": 14.794050343249427, "grad_norm": 2.44734787940979, "learning_rate": 2.7634375e-06, "loss": 0.0047, "step": 32325 }, { "epoch": 14.805491990846681, "grad_norm": 0.3692518472671509, "learning_rate": 2.75953125e-06, "loss": 0.0073, "step": 32350 }, { "epoch": 14.816933638443937, "grad_norm": 0.04715866595506668, "learning_rate": 2.7556250000000004e-06, "loss": 0.0051, "step": 32375 }, { "epoch": 14.82837528604119, "grad_norm": 0.09502308070659637, "learning_rate": 2.7517187500000003e-06, "loss": 0.0068, "step": 32400 }, { "epoch": 14.839816933638444, "grad_norm": 0.07894061505794525, "learning_rate": 2.7478125000000002e-06, "loss": 0.0043, "step": 32425 }, { "epoch": 14.851258581235697, "grad_norm": 1.3253856897354126, "learning_rate": 2.74390625e-06, "loss": 0.0031, "step": 32450 }, { "epoch": 14.862700228832953, "grad_norm": 0.5947267413139343, "learning_rate": 2.74e-06, "loss": 0.005, "step": 32475 }, { "epoch": 14.874141876430206, "grad_norm": 2.0173184871673584, "learning_rate": 2.7360937500000004e-06, "loss": 0.0077, "step": 32500 }, { "epoch": 14.88558352402746, "grad_norm": 0.2204510122537613, "learning_rate": 2.7321875e-06, "loss": 0.0016, "step": 32525 }, { "epoch": 14.897025171624714, "grad_norm": 0.20291882753372192, "learning_rate": 2.72828125e-06, "loss": 0.0061, "step": 32550 }, { "epoch": 14.908466819221967, "grad_norm": 0.008917185477912426, "learning_rate": 2.724375e-06, "loss": 0.0043, "step": 32575 }, { "epoch": 14.919908466819223, "grad_norm": 2.0818703174591064, "learning_rate": 2.7204687500000004e-06, "loss": 0.0098, "step": 32600 }, { "epoch": 14.931350114416476, "grad_norm": 0.08606406301259995, "learning_rate": 2.7165625e-06, "loss": 0.0046, "step": 32625 }, { "epoch": 14.94279176201373, "grad_norm": 0.05883662402629852, "learning_rate": 2.71265625e-06, "loss": 0.0035, "step": 32650 }, { "epoch": 14.954233409610984, "grad_norm": 3.288303852081299, "learning_rate": 2.70875e-06, "loss": 0.0051, "step": 32675 }, { "epoch": 14.965675057208237, "grad_norm": 0.11650009453296661, "learning_rate": 2.7048437500000004e-06, "loss": 0.0056, "step": 32700 }, { "epoch": 14.977116704805493, "grad_norm": 0.01182722020894289, "learning_rate": 2.7009375e-06, "loss": 0.0042, "step": 32725 }, { "epoch": 14.988558352402746, "grad_norm": 0.04978015646338463, "learning_rate": 2.6970312500000003e-06, "loss": 0.0042, "step": 32750 }, { "epoch": 15.0, "grad_norm": 3.184007167816162, "learning_rate": 2.693125e-06, "loss": 0.0069, "step": 32775 }, { "epoch": 15.011441647597254, "grad_norm": 0.5160917639732361, "learning_rate": 2.68921875e-06, "loss": 0.0055, "step": 32800 }, { "epoch": 15.022883295194507, "grad_norm": 1.0696347951889038, "learning_rate": 2.6853125e-06, "loss": 0.0075, "step": 32825 }, { "epoch": 15.034324942791763, "grad_norm": 0.22492730617523193, "learning_rate": 2.68140625e-06, "loss": 0.0037, "step": 32850 }, { "epoch": 15.045766590389016, "grad_norm": 2.2279930114746094, "learning_rate": 2.6775e-06, "loss": 0.0041, "step": 32875 }, { "epoch": 15.05720823798627, "grad_norm": 0.32741251587867737, "learning_rate": 2.67359375e-06, "loss": 0.0044, "step": 32900 }, { "epoch": 15.068649885583524, "grad_norm": 2.456855058670044, "learning_rate": 2.6696875e-06, "loss": 0.0077, "step": 32925 }, { "epoch": 15.080091533180777, "grad_norm": 0.026868492364883423, "learning_rate": 2.66578125e-06, "loss": 0.0031, "step": 32950 }, { "epoch": 15.091533180778033, "grad_norm": 1.7259018421173096, "learning_rate": 2.6618750000000002e-06, "loss": 0.0053, "step": 32975 }, { "epoch": 15.102974828375286, "grad_norm": 0.5136904716491699, "learning_rate": 2.65796875e-06, "loss": 0.0036, "step": 33000 }, { "epoch": 15.11441647597254, "grad_norm": 3.579509973526001, "learning_rate": 2.6540625000000005e-06, "loss": 0.0055, "step": 33025 }, { "epoch": 15.125858123569794, "grad_norm": 0.518195390701294, "learning_rate": 2.65015625e-06, "loss": 0.0033, "step": 33050 }, { "epoch": 15.137299771167047, "grad_norm": 3.6283934116363525, "learning_rate": 2.6462500000000003e-06, "loss": 0.0111, "step": 33075 }, { "epoch": 15.148741418764303, "grad_norm": 0.06827004998922348, "learning_rate": 2.64234375e-06, "loss": 0.0036, "step": 33100 }, { "epoch": 15.160183066361556, "grad_norm": 0.32024258375167847, "learning_rate": 2.6384375e-06, "loss": 0.006, "step": 33125 }, { "epoch": 15.17162471395881, "grad_norm": 1.311583161354065, "learning_rate": 2.63453125e-06, "loss": 0.0038, "step": 33150 }, { "epoch": 15.183066361556063, "grad_norm": 0.03487209603190422, "learning_rate": 2.630625e-06, "loss": 0.0055, "step": 33175 }, { "epoch": 15.194508009153319, "grad_norm": 0.9429818987846375, "learning_rate": 2.6267187500000002e-06, "loss": 0.0021, "step": 33200 }, { "epoch": 15.205949656750573, "grad_norm": 4.24558162689209, "learning_rate": 2.6228125e-06, "loss": 0.0083, "step": 33225 }, { "epoch": 15.217391304347826, "grad_norm": 0.026341550052165985, "learning_rate": 2.61890625e-06, "loss": 0.0048, "step": 33250 }, { "epoch": 15.22883295194508, "grad_norm": 0.0364416167140007, "learning_rate": 2.615e-06, "loss": 0.0046, "step": 33275 }, { "epoch": 15.240274599542333, "grad_norm": 0.10698997229337692, "learning_rate": 2.6110937500000003e-06, "loss": 0.003, "step": 33300 }, { "epoch": 15.251716247139589, "grad_norm": 0.024570699781179428, "learning_rate": 2.6071875e-06, "loss": 0.0041, "step": 33325 }, { "epoch": 15.263157894736842, "grad_norm": 0.021379169076681137, "learning_rate": 2.60328125e-06, "loss": 0.0037, "step": 33350 }, { "epoch": 15.274599542334096, "grad_norm": 2.5097668170928955, "learning_rate": 2.599375e-06, "loss": 0.0023, "step": 33375 }, { "epoch": 15.28604118993135, "grad_norm": 0.9193539619445801, "learning_rate": 2.5954687500000003e-06, "loss": 0.0038, "step": 33400 }, { "epoch": 15.297482837528603, "grad_norm": 2.8234310150146484, "learning_rate": 2.5915625000000002e-06, "loss": 0.0094, "step": 33425 }, { "epoch": 15.308924485125859, "grad_norm": 0.37913912534713745, "learning_rate": 2.58765625e-06, "loss": 0.0042, "step": 33450 }, { "epoch": 15.320366132723112, "grad_norm": 2.2030346393585205, "learning_rate": 2.58375e-06, "loss": 0.005, "step": 33475 }, { "epoch": 15.331807780320366, "grad_norm": 0.0667339488863945, "learning_rate": 2.57984375e-06, "loss": 0.0042, "step": 33500 }, { "epoch": 15.34324942791762, "grad_norm": 4.066973686218262, "learning_rate": 2.5759375000000003e-06, "loss": 0.0102, "step": 33525 }, { "epoch": 15.354691075514873, "grad_norm": 0.03947918862104416, "learning_rate": 2.5720312499999998e-06, "loss": 0.0037, "step": 33550 }, { "epoch": 15.366132723112129, "grad_norm": 1.3296524286270142, "learning_rate": 2.568125e-06, "loss": 0.0043, "step": 33575 }, { "epoch": 15.377574370709382, "grad_norm": 2.8236894607543945, "learning_rate": 2.56421875e-06, "loss": 0.0043, "step": 33600 }, { "epoch": 15.389016018306636, "grad_norm": 0.08753054589033127, "learning_rate": 2.5603125000000003e-06, "loss": 0.0046, "step": 33625 }, { "epoch": 15.40045766590389, "grad_norm": 1.5815887451171875, "learning_rate": 2.5564062500000002e-06, "loss": 0.0043, "step": 33650 }, { "epoch": 15.411899313501145, "grad_norm": 0.03917712718248367, "learning_rate": 2.5525e-06, "loss": 0.0114, "step": 33675 }, { "epoch": 15.423340961098399, "grad_norm": 0.08201280236244202, "learning_rate": 2.54859375e-06, "loss": 0.0051, "step": 33700 }, { "epoch": 15.434782608695652, "grad_norm": 0.1986863762140274, "learning_rate": 2.5446875000000004e-06, "loss": 0.0052, "step": 33725 }, { "epoch": 15.446224256292906, "grad_norm": 0.040176644921302795, "learning_rate": 2.5407812500000003e-06, "loss": 0.0032, "step": 33750 }, { "epoch": 15.45766590389016, "grad_norm": 7.200075149536133, "learning_rate": 2.536875e-06, "loss": 0.0101, "step": 33775 }, { "epoch": 15.469107551487415, "grad_norm": 0.03656776621937752, "learning_rate": 2.53296875e-06, "loss": 0.0033, "step": 33800 }, { "epoch": 15.480549199084669, "grad_norm": 0.037042662501335144, "learning_rate": 2.5290625e-06, "loss": 0.0014, "step": 33825 }, { "epoch": 15.491990846681922, "grad_norm": 4.016299247741699, "learning_rate": 2.5251562500000003e-06, "loss": 0.0065, "step": 33850 }, { "epoch": 15.503432494279176, "grad_norm": 0.4147491157054901, "learning_rate": 2.52125e-06, "loss": 0.0019, "step": 33875 }, { "epoch": 15.51487414187643, "grad_norm": 0.9178475737571716, "learning_rate": 2.51734375e-06, "loss": 0.0024, "step": 33900 }, { "epoch": 15.526315789473685, "grad_norm": 0.034964669495821, "learning_rate": 2.5134375e-06, "loss": 0.004, "step": 33925 }, { "epoch": 15.537757437070939, "grad_norm": 0.03211408108472824, "learning_rate": 2.5095312500000004e-06, "loss": 0.0024, "step": 33950 }, { "epoch": 15.549199084668192, "grad_norm": 0.013405973091721535, "learning_rate": 2.505625e-06, "loss": 0.0027, "step": 33975 }, { "epoch": 15.560640732265446, "grad_norm": 0.05987042188644409, "learning_rate": 2.50171875e-06, "loss": 0.0024, "step": 34000 }, { "epoch": 15.5720823798627, "grad_norm": 0.522374153137207, "learning_rate": 2.4978125e-06, "loss": 0.0053, "step": 34025 }, { "epoch": 15.583524027459955, "grad_norm": 0.20688433945178986, "learning_rate": 2.4939062500000004e-06, "loss": 0.0015, "step": 34050 }, { "epoch": 15.594965675057209, "grad_norm": 0.04718019440770149, "learning_rate": 2.49e-06, "loss": 0.0032, "step": 34075 }, { "epoch": 15.606407322654462, "grad_norm": 0.9375022053718567, "learning_rate": 2.48609375e-06, "loss": 0.0025, "step": 34100 }, { "epoch": 15.617848970251716, "grad_norm": 0.08237750083208084, "learning_rate": 2.4821875e-06, "loss": 0.0052, "step": 34125 }, { "epoch": 15.62929061784897, "grad_norm": 0.6917111277580261, "learning_rate": 2.47828125e-06, "loss": 0.0033, "step": 34150 }, { "epoch": 15.640732265446225, "grad_norm": 0.3365626037120819, "learning_rate": 2.474375e-06, "loss": 0.0042, "step": 34175 }, { "epoch": 15.652173913043478, "grad_norm": 1.8583805561065674, "learning_rate": 2.4704687500000002e-06, "loss": 0.0029, "step": 34200 }, { "epoch": 15.663615560640732, "grad_norm": 2.5219223499298096, "learning_rate": 2.4665625e-06, "loss": 0.0078, "step": 34225 }, { "epoch": 15.675057208237986, "grad_norm": 0.016824573278427124, "learning_rate": 2.46265625e-06, "loss": 0.0036, "step": 34250 }, { "epoch": 15.68649885583524, "grad_norm": 0.07802023738622665, "learning_rate": 2.4587500000000004e-06, "loss": 0.0056, "step": 34275 }, { "epoch": 15.697940503432495, "grad_norm": 0.1118958592414856, "learning_rate": 2.45484375e-06, "loss": 0.002, "step": 34300 }, { "epoch": 15.709382151029748, "grad_norm": 8.526473999023438, "learning_rate": 2.4509375e-06, "loss": 0.0072, "step": 34325 }, { "epoch": 15.720823798627002, "grad_norm": 0.126836359500885, "learning_rate": 2.44703125e-06, "loss": 0.0041, "step": 34350 }, { "epoch": 15.732265446224256, "grad_norm": 0.0682033970952034, "learning_rate": 2.4431250000000004e-06, "loss": 0.0052, "step": 34375 }, { "epoch": 15.743707093821511, "grad_norm": 0.019434375688433647, "learning_rate": 2.43921875e-06, "loss": 0.0037, "step": 34400 }, { "epoch": 15.755148741418765, "grad_norm": 0.08618365228176117, "learning_rate": 2.4353125000000002e-06, "loss": 0.0041, "step": 34425 }, { "epoch": 15.766590389016018, "grad_norm": 0.10395698994398117, "learning_rate": 2.43140625e-06, "loss": 0.0023, "step": 34450 }, { "epoch": 15.778032036613272, "grad_norm": 5.901650428771973, "learning_rate": 2.4275e-06, "loss": 0.006, "step": 34475 }, { "epoch": 15.789473684210526, "grad_norm": 0.018643943592905998, "learning_rate": 2.42359375e-06, "loss": 0.0066, "step": 34500 }, { "epoch": 15.800915331807781, "grad_norm": 0.06916037201881409, "learning_rate": 2.4196875000000003e-06, "loss": 0.0101, "step": 34525 }, { "epoch": 15.812356979405035, "grad_norm": 1.2917554378509521, "learning_rate": 2.41578125e-06, "loss": 0.0021, "step": 34550 }, { "epoch": 15.823798627002288, "grad_norm": 0.06819707155227661, "learning_rate": 2.411875e-06, "loss": 0.0011, "step": 34575 }, { "epoch": 15.835240274599542, "grad_norm": 1.3246757984161377, "learning_rate": 2.40796875e-06, "loss": 0.0047, "step": 34600 }, { "epoch": 15.846681922196796, "grad_norm": 4.656325817108154, "learning_rate": 2.4040625e-06, "loss": 0.0085, "step": 34625 }, { "epoch": 15.858123569794051, "grad_norm": 0.16503220796585083, "learning_rate": 2.4001562500000002e-06, "loss": 0.0034, "step": 34650 }, { "epoch": 15.869565217391305, "grad_norm": 5.600546360015869, "learning_rate": 2.39625e-06, "loss": 0.0069, "step": 34675 }, { "epoch": 15.881006864988558, "grad_norm": 2.4486541748046875, "learning_rate": 2.39234375e-06, "loss": 0.0049, "step": 34700 }, { "epoch": 15.892448512585812, "grad_norm": 2.8362202644348145, "learning_rate": 2.3884375e-06, "loss": 0.0097, "step": 34725 }, { "epoch": 15.903890160183066, "grad_norm": 0.054218728095293045, "learning_rate": 2.3845312500000003e-06, "loss": 0.0029, "step": 34750 }, { "epoch": 15.915331807780321, "grad_norm": 0.13189098238945007, "learning_rate": 2.380625e-06, "loss": 0.0046, "step": 34775 }, { "epoch": 15.926773455377575, "grad_norm": 0.12270741164684296, "learning_rate": 2.37671875e-06, "loss": 0.0049, "step": 34800 }, { "epoch": 15.938215102974828, "grad_norm": 0.668196439743042, "learning_rate": 2.3728125e-06, "loss": 0.0041, "step": 34825 }, { "epoch": 15.949656750572082, "grad_norm": 1.3448288440704346, "learning_rate": 2.3689062500000003e-06, "loss": 0.0046, "step": 34850 }, { "epoch": 15.961098398169337, "grad_norm": 3.1953134536743164, "learning_rate": 2.3650000000000002e-06, "loss": 0.0074, "step": 34875 }, { "epoch": 15.972540045766591, "grad_norm": 0.15811976790428162, "learning_rate": 2.36109375e-06, "loss": 0.0027, "step": 34900 }, { "epoch": 15.983981693363845, "grad_norm": 3.5737857818603516, "learning_rate": 2.3571875e-06, "loss": 0.0055, "step": 34925 }, { "epoch": 15.995423340961098, "grad_norm": 0.05535993352532387, "learning_rate": 2.35328125e-06, "loss": 0.0063, "step": 34950 }, { "epoch": 16.006864988558352, "grad_norm": 1.1041736602783203, "learning_rate": 2.3493750000000003e-06, "loss": 0.0048, "step": 34975 }, { "epoch": 16.018306636155607, "grad_norm": 0.04761320725083351, "learning_rate": 2.34546875e-06, "loss": 0.0055, "step": 35000 }, { "epoch": 16.018306636155607, "eval_loss": 0.18108440935611725, "eval_runtime": 8415.279, "eval_samples_per_second": 1.132, "eval_steps_per_second": 0.142, "eval_wer": 0.07901388622256351, "step": 35000 }, { "epoch": 16.02974828375286, "grad_norm": 0.12240791320800781, "learning_rate": 2.3415625e-06, "loss": 0.0044, "step": 35025 }, { "epoch": 16.041189931350115, "grad_norm": 2.9069013595581055, "learning_rate": 2.33765625e-06, "loss": 0.0045, "step": 35050 }, { "epoch": 16.05263157894737, "grad_norm": 0.8656070232391357, "learning_rate": 2.3337500000000003e-06, "loss": 0.0059, "step": 35075 }, { "epoch": 16.06407322654462, "grad_norm": 0.5997121930122375, "learning_rate": 2.3298437500000002e-06, "loss": 0.0042, "step": 35100 }, { "epoch": 16.075514874141877, "grad_norm": 0.8254514932632446, "learning_rate": 2.3259375e-06, "loss": 0.0043, "step": 35125 }, { "epoch": 16.08695652173913, "grad_norm": 0.009766053408384323, "learning_rate": 2.32203125e-06, "loss": 0.0013, "step": 35150 }, { "epoch": 16.098398169336384, "grad_norm": 0.0496329627931118, "learning_rate": 2.3181250000000004e-06, "loss": 0.0063, "step": 35175 }, { "epoch": 16.10983981693364, "grad_norm": 0.6537144184112549, "learning_rate": 2.314375e-06, "loss": 0.0022, "step": 35200 }, { "epoch": 16.12128146453089, "grad_norm": 0.0378439836204052, "learning_rate": 2.31046875e-06, "loss": 0.0068, "step": 35225 }, { "epoch": 16.132723112128147, "grad_norm": 1.9880270957946777, "learning_rate": 2.3065625e-06, "loss": 0.0025, "step": 35250 }, { "epoch": 16.1441647597254, "grad_norm": 0.08200810104608536, "learning_rate": 2.3026562500000003e-06, "loss": 0.0037, "step": 35275 }, { "epoch": 16.155606407322654, "grad_norm": 0.946167528629303, "learning_rate": 2.2987500000000002e-06, "loss": 0.0033, "step": 35300 }, { "epoch": 16.16704805491991, "grad_norm": 0.17309901118278503, "learning_rate": 2.29484375e-06, "loss": 0.0047, "step": 35325 }, { "epoch": 16.17848970251716, "grad_norm": 0.1726033240556717, "learning_rate": 2.2909375e-06, "loss": 0.0062, "step": 35350 }, { "epoch": 16.189931350114417, "grad_norm": 2.284392833709717, "learning_rate": 2.28703125e-06, "loss": 0.0063, "step": 35375 }, { "epoch": 16.20137299771167, "grad_norm": 0.004915393423289061, "learning_rate": 2.2831250000000003e-06, "loss": 0.0016, "step": 35400 }, { "epoch": 16.212814645308924, "grad_norm": 0.2136439085006714, "learning_rate": 2.27921875e-06, "loss": 0.0089, "step": 35425 }, { "epoch": 16.22425629290618, "grad_norm": 1.3364521265029907, "learning_rate": 2.2753125e-06, "loss": 0.0025, "step": 35450 }, { "epoch": 16.23569794050343, "grad_norm": 0.08016708493232727, "learning_rate": 2.27140625e-06, "loss": 0.0025, "step": 35475 }, { "epoch": 16.247139588100687, "grad_norm": 0.007787854410707951, "learning_rate": 2.2675000000000003e-06, "loss": 0.0017, "step": 35500 }, { "epoch": 16.25858123569794, "grad_norm": 0.277810662984848, "learning_rate": 2.2635937500000002e-06, "loss": 0.0066, "step": 35525 }, { "epoch": 16.270022883295194, "grad_norm": 0.0882570669054985, "learning_rate": 2.2596875e-06, "loss": 0.0015, "step": 35550 }, { "epoch": 16.28146453089245, "grad_norm": 2.559431314468384, "learning_rate": 2.25578125e-06, "loss": 0.0049, "step": 35575 }, { "epoch": 16.2929061784897, "grad_norm": 1.0875974893569946, "learning_rate": 2.2518750000000004e-06, "loss": 0.0028, "step": 35600 }, { "epoch": 16.304347826086957, "grad_norm": 0.17312218248844147, "learning_rate": 2.2479687500000003e-06, "loss": 0.0047, "step": 35625 }, { "epoch": 16.31578947368421, "grad_norm": 2.8001294136047363, "learning_rate": 2.2440625e-06, "loss": 0.007, "step": 35650 }, { "epoch": 16.327231121281464, "grad_norm": 2.398991107940674, "learning_rate": 2.24015625e-06, "loss": 0.0047, "step": 35675 }, { "epoch": 16.33867276887872, "grad_norm": 0.014586808159947395, "learning_rate": 2.23625e-06, "loss": 0.0039, "step": 35700 }, { "epoch": 16.35011441647597, "grad_norm": 0.048909373581409454, "learning_rate": 2.2323437500000003e-06, "loss": 0.0086, "step": 35725 }, { "epoch": 16.361556064073227, "grad_norm": 0.01723467744886875, "learning_rate": 2.2284374999999998e-06, "loss": 0.0032, "step": 35750 }, { "epoch": 16.37299771167048, "grad_norm": 0.7270344495773315, "learning_rate": 2.22453125e-06, "loss": 0.0039, "step": 35775 }, { "epoch": 16.384439359267734, "grad_norm": 0.03530869632959366, "learning_rate": 2.220625e-06, "loss": 0.0012, "step": 35800 }, { "epoch": 16.39588100686499, "grad_norm": 0.6865436434745789, "learning_rate": 2.2167187500000003e-06, "loss": 0.0077, "step": 35825 }, { "epoch": 16.40732265446224, "grad_norm": 0.5618290901184082, "learning_rate": 2.2128125e-06, "loss": 0.0043, "step": 35850 }, { "epoch": 16.418764302059497, "grad_norm": 0.2456822246313095, "learning_rate": 2.20890625e-06, "loss": 0.0066, "step": 35875 }, { "epoch": 16.430205949656752, "grad_norm": 0.016302816569805145, "learning_rate": 2.205e-06, "loss": 0.0035, "step": 35900 }, { "epoch": 16.441647597254004, "grad_norm": 0.07421453297138214, "learning_rate": 2.2010937500000004e-06, "loss": 0.005, "step": 35925 }, { "epoch": 16.45308924485126, "grad_norm": 0.03416196256875992, "learning_rate": 2.1971875e-06, "loss": 0.005, "step": 35950 }, { "epoch": 16.46453089244851, "grad_norm": 0.11165349185466766, "learning_rate": 2.19328125e-06, "loss": 0.0038, "step": 35975 }, { "epoch": 16.475972540045767, "grad_norm": 0.07029126584529877, "learning_rate": 2.189375e-06, "loss": 0.0026, "step": 36000 }, { "epoch": 16.487414187643022, "grad_norm": 0.10349828004837036, "learning_rate": 2.18546875e-06, "loss": 0.0062, "step": 36025 }, { "epoch": 16.498855835240274, "grad_norm": 0.0035689908545464277, "learning_rate": 2.1815625000000003e-06, "loss": 0.003, "step": 36050 }, { "epoch": 16.51029748283753, "grad_norm": 1.769261121749878, "learning_rate": 2.17765625e-06, "loss": 0.0035, "step": 36075 }, { "epoch": 16.52173913043478, "grad_norm": 0.5530456900596619, "learning_rate": 2.17375e-06, "loss": 0.0021, "step": 36100 }, { "epoch": 16.533180778032037, "grad_norm": 0.10640926659107208, "learning_rate": 2.16984375e-06, "loss": 0.0055, "step": 36125 }, { "epoch": 16.544622425629292, "grad_norm": 0.05374550074338913, "learning_rate": 2.1659375000000004e-06, "loss": 0.0025, "step": 36150 }, { "epoch": 16.556064073226544, "grad_norm": 0.6702244281768799, "learning_rate": 2.16203125e-06, "loss": 0.0074, "step": 36175 }, { "epoch": 16.5675057208238, "grad_norm": 0.17198871076107025, "learning_rate": 2.158125e-06, "loss": 0.0037, "step": 36200 }, { "epoch": 16.57894736842105, "grad_norm": 2.446089506149292, "learning_rate": 2.15421875e-06, "loss": 0.0039, "step": 36225 }, { "epoch": 16.590389016018307, "grad_norm": 0.016241425648331642, "learning_rate": 2.1503125000000004e-06, "loss": 0.0028, "step": 36250 }, { "epoch": 16.601830663615562, "grad_norm": 0.010558122768998146, "learning_rate": 2.14640625e-06, "loss": 0.0064, "step": 36275 }, { "epoch": 16.613272311212814, "grad_norm": 0.3437139391899109, "learning_rate": 2.1425000000000002e-06, "loss": 0.0022, "step": 36300 }, { "epoch": 16.62471395881007, "grad_norm": 0.07143080234527588, "learning_rate": 2.13859375e-06, "loss": 0.0036, "step": 36325 }, { "epoch": 16.63615560640732, "grad_norm": 0.9997221827507019, "learning_rate": 2.1346875e-06, "loss": 0.0058, "step": 36350 }, { "epoch": 16.647597254004577, "grad_norm": 0.14317162334918976, "learning_rate": 2.13078125e-06, "loss": 0.0064, "step": 36375 }, { "epoch": 16.659038901601832, "grad_norm": 0.9893584847450256, "learning_rate": 2.126875e-06, "loss": 0.002, "step": 36400 }, { "epoch": 16.670480549199084, "grad_norm": 0.4199976325035095, "learning_rate": 2.12296875e-06, "loss": 0.0042, "step": 36425 }, { "epoch": 16.68192219679634, "grad_norm": 0.0734599307179451, "learning_rate": 2.1190625e-06, "loss": 0.0023, "step": 36450 }, { "epoch": 16.69336384439359, "grad_norm": 0.03368838503956795, "learning_rate": 2.11515625e-06, "loss": 0.003, "step": 36475 }, { "epoch": 16.704805491990847, "grad_norm": 0.014497213065624237, "learning_rate": 2.11125e-06, "loss": 0.0028, "step": 36500 }, { "epoch": 16.716247139588102, "grad_norm": 0.25502026081085205, "learning_rate": 2.1073437500000002e-06, "loss": 0.0054, "step": 36525 }, { "epoch": 16.727688787185354, "grad_norm": 4.784895896911621, "learning_rate": 2.1034375e-06, "loss": 0.0015, "step": 36550 }, { "epoch": 16.73913043478261, "grad_norm": 0.014250733889639378, "learning_rate": 2.09953125e-06, "loss": 0.0064, "step": 36575 }, { "epoch": 16.75057208237986, "grad_norm": 1.0211448669433594, "learning_rate": 2.095625e-06, "loss": 0.0038, "step": 36600 }, { "epoch": 16.762013729977117, "grad_norm": 0.2190571427345276, "learning_rate": 2.0917187500000003e-06, "loss": 0.0033, "step": 36625 }, { "epoch": 16.773455377574372, "grad_norm": 0.04402073845267296, "learning_rate": 2.0878125e-06, "loss": 0.0035, "step": 36650 }, { "epoch": 16.784897025171624, "grad_norm": 0.02722327969968319, "learning_rate": 2.08390625e-06, "loss": 0.0057, "step": 36675 }, { "epoch": 16.79633867276888, "grad_norm": 0.009370029903948307, "learning_rate": 2.08e-06, "loss": 0.0019, "step": 36700 }, { "epoch": 16.80778032036613, "grad_norm": 0.590143084526062, "learning_rate": 2.07609375e-06, "loss": 0.0043, "step": 36725 }, { "epoch": 16.819221967963387, "grad_norm": 0.025572996586561203, "learning_rate": 2.0721875000000002e-06, "loss": 0.0029, "step": 36750 }, { "epoch": 16.830663615560642, "grad_norm": 0.8651687502861023, "learning_rate": 2.06828125e-06, "loss": 0.0045, "step": 36775 }, { "epoch": 16.842105263157894, "grad_norm": 0.01061153132468462, "learning_rate": 2.064375e-06, "loss": 0.0047, "step": 36800 }, { "epoch": 16.85354691075515, "grad_norm": 3.5046908855438232, "learning_rate": 2.06046875e-06, "loss": 0.0063, "step": 36825 }, { "epoch": 16.8649885583524, "grad_norm": 0.026393836364150047, "learning_rate": 2.0565625000000003e-06, "loss": 0.0011, "step": 36850 }, { "epoch": 16.876430205949656, "grad_norm": 0.06727100163698196, "learning_rate": 2.05265625e-06, "loss": 0.0076, "step": 36875 }, { "epoch": 16.887871853546912, "grad_norm": 0.012005898170173168, "learning_rate": 2.04875e-06, "loss": 0.0052, "step": 36900 }, { "epoch": 16.899313501144164, "grad_norm": 1.2424042224884033, "learning_rate": 2.04484375e-06, "loss": 0.0055, "step": 36925 }, { "epoch": 16.91075514874142, "grad_norm": 1.8606563806533813, "learning_rate": 2.0409375000000003e-06, "loss": 0.0029, "step": 36950 }, { "epoch": 16.922196796338675, "grad_norm": 0.24449869990348816, "learning_rate": 2.0370312500000002e-06, "loss": 0.0058, "step": 36975 }, { "epoch": 16.933638443935926, "grad_norm": 0.9247862696647644, "learning_rate": 2.033125e-06, "loss": 0.0034, "step": 37000 }, { "epoch": 16.945080091533182, "grad_norm": 0.025322571396827698, "learning_rate": 2.02921875e-06, "loss": 0.0109, "step": 37025 }, { "epoch": 16.956521739130434, "grad_norm": 0.010746942833065987, "learning_rate": 2.0253125e-06, "loss": 0.0026, "step": 37050 }, { "epoch": 16.96796338672769, "grad_norm": 0.5656152367591858, "learning_rate": 2.0214062500000003e-06, "loss": 0.0054, "step": 37075 }, { "epoch": 16.97940503432494, "grad_norm": 0.3019123375415802, "learning_rate": 2.0174999999999997e-06, "loss": 0.0031, "step": 37100 }, { "epoch": 16.990846681922196, "grad_norm": 0.03569486737251282, "learning_rate": 2.01359375e-06, "loss": 0.0063, "step": 37125 }, { "epoch": 17.002288329519452, "grad_norm": 0.03166543319821358, "learning_rate": 2.0096875e-06, "loss": 0.0057, "step": 37150 }, { "epoch": 17.013729977116704, "grad_norm": 0.2332111895084381, "learning_rate": 2.0057812500000003e-06, "loss": 0.0062, "step": 37175 }, { "epoch": 17.02517162471396, "grad_norm": 0.04952903091907501, "learning_rate": 2.0018749999999998e-06, "loss": 0.0051, "step": 37200 }, { "epoch": 17.036613272311214, "grad_norm": 0.39649686217308044, "learning_rate": 1.99796875e-06, "loss": 0.0029, "step": 37225 }, { "epoch": 17.048054919908466, "grad_norm": 1.2585179805755615, "learning_rate": 1.9940625e-06, "loss": 0.0022, "step": 37250 }, { "epoch": 17.05949656750572, "grad_norm": 0.010012681595981121, "learning_rate": 1.9901562500000003e-06, "loss": 0.0022, "step": 37275 }, { "epoch": 17.070938215102974, "grad_norm": 0.09166552871465683, "learning_rate": 1.9862500000000003e-06, "loss": 0.0023, "step": 37300 }, { "epoch": 17.08237986270023, "grad_norm": 0.0458868145942688, "learning_rate": 1.98234375e-06, "loss": 0.0015, "step": 37325 }, { "epoch": 17.093821510297484, "grad_norm": 1.38352370262146, "learning_rate": 1.9784375e-06, "loss": 0.0125, "step": 37350 }, { "epoch": 17.105263157894736, "grad_norm": 0.2531681954860687, "learning_rate": 1.97453125e-06, "loss": 0.0035, "step": 37375 }, { "epoch": 17.11670480549199, "grad_norm": 0.09584701806306839, "learning_rate": 1.9706250000000003e-06, "loss": 0.0043, "step": 37400 }, { "epoch": 17.128146453089244, "grad_norm": 1.453792929649353, "learning_rate": 1.9667187499999998e-06, "loss": 0.0029, "step": 37425 }, { "epoch": 17.1395881006865, "grad_norm": 0.16350281238555908, "learning_rate": 1.96296875e-06, "loss": 0.008, "step": 37450 }, { "epoch": 17.151029748283754, "grad_norm": 0.028230059891939163, "learning_rate": 1.9590625e-06, "loss": 0.0025, "step": 37475 }, { "epoch": 17.162471395881006, "grad_norm": 0.5208398103713989, "learning_rate": 1.9551562500000003e-06, "loss": 0.0056, "step": 37500 }, { "epoch": 17.17391304347826, "grad_norm": 1.2869021892547607, "learning_rate": 1.95125e-06, "loss": 0.0015, "step": 37525 }, { "epoch": 17.185354691075514, "grad_norm": 0.06982695311307907, "learning_rate": 1.94734375e-06, "loss": 0.0035, "step": 37550 }, { "epoch": 17.19679633867277, "grad_norm": 0.09721732884645462, "learning_rate": 1.9434375e-06, "loss": 0.0016, "step": 37575 }, { "epoch": 17.208237986270024, "grad_norm": 0.3428354263305664, "learning_rate": 1.9395312500000003e-06, "loss": 0.0038, "step": 37600 }, { "epoch": 17.219679633867276, "grad_norm": 0.01983836106956005, "learning_rate": 1.9356249999999998e-06, "loss": 0.0009, "step": 37625 }, { "epoch": 17.23112128146453, "grad_norm": 0.8901821970939636, "learning_rate": 1.93171875e-06, "loss": 0.0041, "step": 37650 }, { "epoch": 17.242562929061783, "grad_norm": 0.0027956163976341486, "learning_rate": 1.9278125e-06, "loss": 0.0027, "step": 37675 }, { "epoch": 17.25400457665904, "grad_norm": 0.01692274399101734, "learning_rate": 1.9239062500000003e-06, "loss": 0.0065, "step": 37700 }, { "epoch": 17.265446224256294, "grad_norm": 0.28875893354415894, "learning_rate": 1.92e-06, "loss": 0.0017, "step": 37725 }, { "epoch": 17.276887871853546, "grad_norm": 1.0983160734176636, "learning_rate": 1.91609375e-06, "loss": 0.0037, "step": 37750 }, { "epoch": 17.2883295194508, "grad_norm": 0.22145585715770721, "learning_rate": 1.9121875e-06, "loss": 0.0016, "step": 37775 }, { "epoch": 17.299771167048053, "grad_norm": 0.7165852189064026, "learning_rate": 1.90828125e-06, "loss": 0.0078, "step": 37800 }, { "epoch": 17.31121281464531, "grad_norm": 0.013279288075864315, "learning_rate": 1.9043750000000003e-06, "loss": 0.0021, "step": 37825 }, { "epoch": 17.322654462242564, "grad_norm": 0.8722370266914368, "learning_rate": 1.90046875e-06, "loss": 0.0048, "step": 37850 }, { "epoch": 17.334096109839816, "grad_norm": 0.011238808743655682, "learning_rate": 1.8965625e-06, "loss": 0.0035, "step": 37875 }, { "epoch": 17.34553775743707, "grad_norm": 0.42500200867652893, "learning_rate": 1.8926562500000002e-06, "loss": 0.0042, "step": 37900 }, { "epoch": 17.356979405034323, "grad_norm": 0.6305665969848633, "learning_rate": 1.8887500000000003e-06, "loss": 0.0032, "step": 37925 }, { "epoch": 17.36842105263158, "grad_norm": 0.06624335050582886, "learning_rate": 1.88484375e-06, "loss": 0.0022, "step": 37950 }, { "epoch": 17.379862700228834, "grad_norm": 0.29211392998695374, "learning_rate": 1.8809375000000001e-06, "loss": 0.0046, "step": 37975 }, { "epoch": 17.391304347826086, "grad_norm": 1.3960460424423218, "learning_rate": 1.87703125e-06, "loss": 0.0026, "step": 38000 }, { "epoch": 17.40274599542334, "grad_norm": 0.03258596733212471, "learning_rate": 1.8731250000000002e-06, "loss": 0.0021, "step": 38025 }, { "epoch": 17.414187643020593, "grad_norm": 1.3049372434616089, "learning_rate": 1.8692187499999999e-06, "loss": 0.0067, "step": 38050 }, { "epoch": 17.42562929061785, "grad_norm": 2.3617146015167236, "learning_rate": 1.8653125e-06, "loss": 0.004, "step": 38075 }, { "epoch": 17.437070938215104, "grad_norm": 0.8157466650009155, "learning_rate": 1.86140625e-06, "loss": 0.0071, "step": 38100 }, { "epoch": 17.448512585812356, "grad_norm": 8.144794464111328, "learning_rate": 1.8575000000000002e-06, "loss": 0.0028, "step": 38125 }, { "epoch": 17.45995423340961, "grad_norm": 0.06484866887331009, "learning_rate": 1.85359375e-06, "loss": 0.0027, "step": 38150 }, { "epoch": 17.471395881006863, "grad_norm": 0.03647042438387871, "learning_rate": 1.8496875e-06, "loss": 0.0037, "step": 38175 }, { "epoch": 17.48283752860412, "grad_norm": 0.6224740147590637, "learning_rate": 1.8457812500000001e-06, "loss": 0.0054, "step": 38200 }, { "epoch": 17.494279176201374, "grad_norm": 1.2678624391555786, "learning_rate": 1.8418750000000003e-06, "loss": 0.0039, "step": 38225 }, { "epoch": 17.505720823798626, "grad_norm": 0.17118775844573975, "learning_rate": 1.83796875e-06, "loss": 0.0077, "step": 38250 }, { "epoch": 17.51716247139588, "grad_norm": 0.05411826819181442, "learning_rate": 1.8340625e-06, "loss": 0.0023, "step": 38275 }, { "epoch": 17.528604118993137, "grad_norm": 0.36477145552635193, "learning_rate": 1.8301562500000002e-06, "loss": 0.0016, "step": 38300 }, { "epoch": 17.54004576659039, "grad_norm": 0.23823069036006927, "learning_rate": 1.82625e-06, "loss": 0.0021, "step": 38325 }, { "epoch": 17.551487414187644, "grad_norm": 2.0546512603759766, "learning_rate": 1.82234375e-06, "loss": 0.0023, "step": 38350 }, { "epoch": 17.562929061784896, "grad_norm": 0.18325604498386383, "learning_rate": 1.8184375e-06, "loss": 0.0012, "step": 38375 }, { "epoch": 17.57437070938215, "grad_norm": 1.7281999588012695, "learning_rate": 1.81453125e-06, "loss": 0.0029, "step": 38400 }, { "epoch": 17.585812356979407, "grad_norm": 0.08062928915023804, "learning_rate": 1.8106250000000001e-06, "loss": 0.0023, "step": 38425 }, { "epoch": 17.59725400457666, "grad_norm": 0.18261617422103882, "learning_rate": 1.8067187500000003e-06, "loss": 0.0013, "step": 38450 }, { "epoch": 17.608695652173914, "grad_norm": 0.25390270352363586, "learning_rate": 1.8028125e-06, "loss": 0.002, "step": 38475 }, { "epoch": 17.620137299771166, "grad_norm": 0.6369209289550781, "learning_rate": 1.79890625e-06, "loss": 0.0038, "step": 38500 }, { "epoch": 17.63157894736842, "grad_norm": 0.11403697729110718, "learning_rate": 1.7950000000000002e-06, "loss": 0.0022, "step": 38525 }, { "epoch": 17.643020594965677, "grad_norm": 1.623634934425354, "learning_rate": 1.7910937500000003e-06, "loss": 0.003, "step": 38550 }, { "epoch": 17.65446224256293, "grad_norm": 0.5470871925354004, "learning_rate": 1.7871875e-06, "loss": 0.0023, "step": 38575 }, { "epoch": 17.665903890160184, "grad_norm": 0.719538152217865, "learning_rate": 1.78328125e-06, "loss": 0.0053, "step": 38600 }, { "epoch": 17.677345537757436, "grad_norm": 0.019781069830060005, "learning_rate": 1.7793750000000002e-06, "loss": 0.0039, "step": 38625 }, { "epoch": 17.68878718535469, "grad_norm": 0.6072456240653992, "learning_rate": 1.7754687500000001e-06, "loss": 0.0046, "step": 38650 }, { "epoch": 17.700228832951947, "grad_norm": 0.056006353348493576, "learning_rate": 1.7715625e-06, "loss": 0.0022, "step": 38675 }, { "epoch": 17.7116704805492, "grad_norm": 0.2006232738494873, "learning_rate": 1.76765625e-06, "loss": 0.001, "step": 38700 }, { "epoch": 17.723112128146454, "grad_norm": 0.18129004538059235, "learning_rate": 1.76375e-06, "loss": 0.0051, "step": 38725 }, { "epoch": 17.734553775743706, "grad_norm": 1.298149585723877, "learning_rate": 1.7598437500000002e-06, "loss": 0.0047, "step": 38750 }, { "epoch": 17.74599542334096, "grad_norm": 0.2892336845397949, "learning_rate": 1.7559374999999999e-06, "loss": 0.0025, "step": 38775 }, { "epoch": 17.757437070938217, "grad_norm": 0.41350576281547546, "learning_rate": 1.75203125e-06, "loss": 0.0049, "step": 38800 }, { "epoch": 17.76887871853547, "grad_norm": 0.7944715619087219, "learning_rate": 1.748125e-06, "loss": 0.0039, "step": 38825 }, { "epoch": 17.780320366132724, "grad_norm": 0.14121921360492706, "learning_rate": 1.7442187500000002e-06, "loss": 0.0026, "step": 38850 }, { "epoch": 17.791762013729976, "grad_norm": 1.7939224243164062, "learning_rate": 1.7403125e-06, "loss": 0.004, "step": 38875 }, { "epoch": 17.80320366132723, "grad_norm": 2.083294630050659, "learning_rate": 1.73640625e-06, "loss": 0.0053, "step": 38900 }, { "epoch": 17.814645308924486, "grad_norm": 0.061971742659807205, "learning_rate": 1.7325000000000001e-06, "loss": 0.0028, "step": 38925 }, { "epoch": 17.82608695652174, "grad_norm": 0.05892719700932503, "learning_rate": 1.7285937500000003e-06, "loss": 0.0021, "step": 38950 }, { "epoch": 17.837528604118994, "grad_norm": 0.02188616804778576, "learning_rate": 1.7246875e-06, "loss": 0.0028, "step": 38975 }, { "epoch": 17.848970251716246, "grad_norm": 0.34985095262527466, "learning_rate": 1.72078125e-06, "loss": 0.0092, "step": 39000 }, { "epoch": 17.8604118993135, "grad_norm": 0.16679978370666504, "learning_rate": 1.7168750000000002e-06, "loss": 0.0032, "step": 39025 }, { "epoch": 17.871853546910756, "grad_norm": 0.7575101256370544, "learning_rate": 1.71296875e-06, "loss": 0.0046, "step": 39050 }, { "epoch": 17.88329519450801, "grad_norm": 0.0028128561098128557, "learning_rate": 1.7090625000000002e-06, "loss": 0.0015, "step": 39075 }, { "epoch": 17.894736842105264, "grad_norm": 0.6744664907455444, "learning_rate": 1.70515625e-06, "loss": 0.0034, "step": 39100 }, { "epoch": 17.906178489702516, "grad_norm": 0.3818642795085907, "learning_rate": 1.70125e-06, "loss": 0.0026, "step": 39125 }, { "epoch": 17.91762013729977, "grad_norm": 0.0200780238956213, "learning_rate": 1.6973437500000001e-06, "loss": 0.0055, "step": 39150 }, { "epoch": 17.929061784897026, "grad_norm": 0.07669597119092941, "learning_rate": 1.6934375000000003e-06, "loss": 0.0033, "step": 39175 }, { "epoch": 17.94050343249428, "grad_norm": 0.09770254790782928, "learning_rate": 1.68953125e-06, "loss": 0.0052, "step": 39200 }, { "epoch": 17.951945080091534, "grad_norm": 0.27160772681236267, "learning_rate": 1.685625e-06, "loss": 0.0014, "step": 39225 }, { "epoch": 17.963386727688786, "grad_norm": 0.18114528059959412, "learning_rate": 1.681875e-06, "loss": 0.0043, "step": 39250 }, { "epoch": 17.97482837528604, "grad_norm": 0.5439978241920471, "learning_rate": 1.6779687500000002e-06, "loss": 0.0066, "step": 39275 }, { "epoch": 17.986270022883296, "grad_norm": 0.06256906688213348, "learning_rate": 1.6740625e-06, "loss": 0.004, "step": 39300 }, { "epoch": 17.997711670480548, "grad_norm": 0.009369060397148132, "learning_rate": 1.67015625e-06, "loss": 0.0023, "step": 39325 }, { "epoch": 18.009153318077804, "grad_norm": 0.01397709734737873, "learning_rate": 1.6662500000000001e-06, "loss": 0.0029, "step": 39350 }, { "epoch": 18.020594965675055, "grad_norm": 0.04984962195158005, "learning_rate": 1.6623437500000003e-06, "loss": 0.0048, "step": 39375 }, { "epoch": 18.03203661327231, "grad_norm": 0.015082244761288166, "learning_rate": 1.6584375e-06, "loss": 0.0015, "step": 39400 }, { "epoch": 18.043478260869566, "grad_norm": 0.12404351681470871, "learning_rate": 1.65453125e-06, "loss": 0.0012, "step": 39425 }, { "epoch": 18.054919908466818, "grad_norm": 0.01762317679822445, "learning_rate": 1.6506250000000002e-06, "loss": 0.0021, "step": 39450 }, { "epoch": 18.066361556064074, "grad_norm": 0.02278951182961464, "learning_rate": 1.64671875e-06, "loss": 0.0024, "step": 39475 }, { "epoch": 18.07780320366133, "grad_norm": 0.023347679525613785, "learning_rate": 1.6428125e-06, "loss": 0.0015, "step": 39500 }, { "epoch": 18.08924485125858, "grad_norm": 0.007021172437816858, "learning_rate": 1.63890625e-06, "loss": 0.0024, "step": 39525 }, { "epoch": 18.100686498855836, "grad_norm": 0.03417914733290672, "learning_rate": 1.635e-06, "loss": 0.0035, "step": 39550 }, { "epoch": 18.112128146453088, "grad_norm": 0.011898049153387547, "learning_rate": 1.6310937500000001e-06, "loss": 0.001, "step": 39575 }, { "epoch": 18.123569794050344, "grad_norm": 0.5622138381004333, "learning_rate": 1.6271875000000002e-06, "loss": 0.0031, "step": 39600 }, { "epoch": 18.1350114416476, "grad_norm": 0.044228821992874146, "learning_rate": 1.62328125e-06, "loss": 0.0024, "step": 39625 }, { "epoch": 18.14645308924485, "grad_norm": 0.016900882124900818, "learning_rate": 1.619375e-06, "loss": 0.0057, "step": 39650 }, { "epoch": 18.157894736842106, "grad_norm": 0.01069649588316679, "learning_rate": 1.6154687500000002e-06, "loss": 0.0048, "step": 39675 }, { "epoch": 18.169336384439358, "grad_norm": 0.028538674116134644, "learning_rate": 1.6115625000000003e-06, "loss": 0.0038, "step": 39700 }, { "epoch": 18.180778032036613, "grad_norm": 0.9430875182151794, "learning_rate": 1.60765625e-06, "loss": 0.0008, "step": 39725 }, { "epoch": 18.19221967963387, "grad_norm": 0.020247552543878555, "learning_rate": 1.60375e-06, "loss": 0.0032, "step": 39750 }, { "epoch": 18.20366132723112, "grad_norm": 0.02668643184006214, "learning_rate": 1.5998437500000002e-06, "loss": 0.0024, "step": 39775 }, { "epoch": 18.215102974828376, "grad_norm": 0.01583932898938656, "learning_rate": 1.5959375000000001e-06, "loss": 0.0028, "step": 39800 }, { "epoch": 18.226544622425628, "grad_norm": 0.03633393347263336, "learning_rate": 1.59203125e-06, "loss": 0.0034, "step": 39825 }, { "epoch": 18.237986270022883, "grad_norm": 0.7006585597991943, "learning_rate": 1.588125e-06, "loss": 0.0008, "step": 39850 }, { "epoch": 18.24942791762014, "grad_norm": 0.06800740212202072, "learning_rate": 1.58421875e-06, "loss": 0.0017, "step": 39875 }, { "epoch": 18.26086956521739, "grad_norm": 0.10307973623275757, "learning_rate": 1.5803125000000002e-06, "loss": 0.002, "step": 39900 }, { "epoch": 18.272311212814646, "grad_norm": 0.04992588236927986, "learning_rate": 1.5764062499999999e-06, "loss": 0.0009, "step": 39925 }, { "epoch": 18.283752860411898, "grad_norm": 0.04925895854830742, "learning_rate": 1.5725e-06, "loss": 0.002, "step": 39950 }, { "epoch": 18.295194508009153, "grad_norm": 0.07469247281551361, "learning_rate": 1.56859375e-06, "loss": 0.0021, "step": 39975 }, { "epoch": 18.30663615560641, "grad_norm": 0.02463546395301819, "learning_rate": 1.5646875000000002e-06, "loss": 0.0031, "step": 40000 }, { "epoch": 18.30663615560641, "eval_loss": 0.1906909942626953, "eval_runtime": 8655.9655, "eval_samples_per_second": 1.1, "eval_steps_per_second": 0.138, "eval_wer": 0.07843795994112754, "step": 40000 }, { "epoch": 18.31807780320366, "grad_norm": 3.2581143379211426, "learning_rate": 1.5607812500000001e-06, "loss": 0.0034, "step": 40025 }, { "epoch": 18.329519450800916, "grad_norm": 1.4978753328323364, "learning_rate": 1.556875e-06, "loss": 0.0088, "step": 40050 }, { "epoch": 18.340961098398168, "grad_norm": 0.3464537262916565, "learning_rate": 1.5529687500000001e-06, "loss": 0.0042, "step": 40075 }, { "epoch": 18.352402745995423, "grad_norm": 1.6809250116348267, "learning_rate": 1.5490625e-06, "loss": 0.0026, "step": 40100 }, { "epoch": 18.36384439359268, "grad_norm": 0.006601128727197647, "learning_rate": 1.5451562500000002e-06, "loss": 0.0014, "step": 40125 }, { "epoch": 18.37528604118993, "grad_norm": 0.1751963049173355, "learning_rate": 1.54125e-06, "loss": 0.0074, "step": 40150 }, { "epoch": 18.386727688787186, "grad_norm": 0.02184423804283142, "learning_rate": 1.53734375e-06, "loss": 0.0037, "step": 40175 }, { "epoch": 18.398169336384438, "grad_norm": 0.06314431130886078, "learning_rate": 1.5334375e-06, "loss": 0.0033, "step": 40200 }, { "epoch": 18.409610983981693, "grad_norm": 0.04851851612329483, "learning_rate": 1.52953125e-06, "loss": 0.0027, "step": 40225 }, { "epoch": 18.42105263157895, "grad_norm": 0.07981372624635696, "learning_rate": 1.5256250000000001e-06, "loss": 0.0029, "step": 40250 }, { "epoch": 18.4324942791762, "grad_norm": 1.5549519062042236, "learning_rate": 1.52171875e-06, "loss": 0.0035, "step": 40275 }, { "epoch": 18.443935926773456, "grad_norm": 0.01691371761262417, "learning_rate": 1.5178125000000001e-06, "loss": 0.0038, "step": 40300 }, { "epoch": 18.455377574370708, "grad_norm": 0.005776215344667435, "learning_rate": 1.51390625e-06, "loss": 0.005, "step": 40325 }, { "epoch": 18.466819221967963, "grad_norm": 0.018735315650701523, "learning_rate": 1.5100000000000002e-06, "loss": 0.0027, "step": 40350 }, { "epoch": 18.47826086956522, "grad_norm": 0.1537744551897049, "learning_rate": 1.50609375e-06, "loss": 0.0015, "step": 40375 }, { "epoch": 18.48970251716247, "grad_norm": 0.013893947005271912, "learning_rate": 1.5021875000000002e-06, "loss": 0.0021, "step": 40400 }, { "epoch": 18.501144164759726, "grad_norm": 3.427546501159668, "learning_rate": 1.49828125e-06, "loss": 0.0035, "step": 40425 }, { "epoch": 18.512585812356978, "grad_norm": 0.08010926097631454, "learning_rate": 1.4943750000000002e-06, "loss": 0.0015, "step": 40450 }, { "epoch": 18.524027459954233, "grad_norm": 0.6961889863014221, "learning_rate": 1.49046875e-06, "loss": 0.0011, "step": 40475 }, { "epoch": 18.53546910755149, "grad_norm": 0.05514024570584297, "learning_rate": 1.4865625e-06, "loss": 0.0052, "step": 40500 }, { "epoch": 18.54691075514874, "grad_norm": 0.6718109846115112, "learning_rate": 1.48265625e-06, "loss": 0.0008, "step": 40525 }, { "epoch": 18.558352402745996, "grad_norm": 1.6376979351043701, "learning_rate": 1.47875e-06, "loss": 0.0027, "step": 40550 }, { "epoch": 18.569794050343248, "grad_norm": 0.016544288024306297, "learning_rate": 1.47484375e-06, "loss": 0.0005, "step": 40575 }, { "epoch": 18.581235697940503, "grad_norm": 0.010087869130074978, "learning_rate": 1.4709375e-06, "loss": 0.0069, "step": 40600 }, { "epoch": 18.59267734553776, "grad_norm": 2.078171968460083, "learning_rate": 1.46703125e-06, "loss": 0.0017, "step": 40625 }, { "epoch": 18.60411899313501, "grad_norm": 0.0357007198035717, "learning_rate": 1.463125e-06, "loss": 0.0044, "step": 40650 }, { "epoch": 18.615560640732266, "grad_norm": 1.593570590019226, "learning_rate": 1.45921875e-06, "loss": 0.0026, "step": 40675 }, { "epoch": 18.62700228832952, "grad_norm": 1.5735076665878296, "learning_rate": 1.4553125e-06, "loss": 0.0012, "step": 40700 }, { "epoch": 18.638443935926773, "grad_norm": 0.033659450709819794, "learning_rate": 1.45140625e-06, "loss": 0.0017, "step": 40725 }, { "epoch": 18.64988558352403, "grad_norm": 0.09848388284444809, "learning_rate": 1.4475000000000001e-06, "loss": 0.0064, "step": 40750 }, { "epoch": 18.66132723112128, "grad_norm": 0.0877099335193634, "learning_rate": 1.4435937500000002e-06, "loss": 0.008, "step": 40775 }, { "epoch": 18.672768878718536, "grad_norm": 0.01506600622087717, "learning_rate": 1.4396875000000001e-06, "loss": 0.0012, "step": 40800 }, { "epoch": 18.68421052631579, "grad_norm": 0.02003307454288006, "learning_rate": 1.43578125e-06, "loss": 0.0058, "step": 40825 }, { "epoch": 18.695652173913043, "grad_norm": 1.6251393556594849, "learning_rate": 1.4318750000000002e-06, "loss": 0.0041, "step": 40850 }, { "epoch": 18.7070938215103, "grad_norm": 0.028162766247987747, "learning_rate": 1.42796875e-06, "loss": 0.0014, "step": 40875 }, { "epoch": 18.71853546910755, "grad_norm": 0.014050312340259552, "learning_rate": 1.4240625e-06, "loss": 0.0022, "step": 40900 }, { "epoch": 18.729977116704806, "grad_norm": 0.028012041002511978, "learning_rate": 1.42015625e-06, "loss": 0.0026, "step": 40925 }, { "epoch": 18.74141876430206, "grad_norm": 0.010212202556431293, "learning_rate": 1.41625e-06, "loss": 0.0069, "step": 40950 }, { "epoch": 18.752860411899313, "grad_norm": 0.009661396034061909, "learning_rate": 1.4123437500000001e-06, "loss": 0.0019, "step": 40975 }, { "epoch": 18.76430205949657, "grad_norm": 0.25984981656074524, "learning_rate": 1.4084375e-06, "loss": 0.004, "step": 41000 }, { "epoch": 18.77574370709382, "grad_norm": 0.10415000468492508, "learning_rate": 1.4045312500000001e-06, "loss": 0.0015, "step": 41025 }, { "epoch": 18.787185354691076, "grad_norm": 0.016817409545183182, "learning_rate": 1.400625e-06, "loss": 0.0044, "step": 41050 }, { "epoch": 18.79862700228833, "grad_norm": 1.5429880619049072, "learning_rate": 1.3967187500000002e-06, "loss": 0.0023, "step": 41075 }, { "epoch": 18.810068649885583, "grad_norm": 0.7825852632522583, "learning_rate": 1.3928125e-06, "loss": 0.0028, "step": 41100 }, { "epoch": 18.82151029748284, "grad_norm": 1.5415157079696655, "learning_rate": 1.3889062500000002e-06, "loss": 0.0052, "step": 41125 }, { "epoch": 18.83295194508009, "grad_norm": 0.047702062875032425, "learning_rate": 1.385e-06, "loss": 0.007, "step": 41150 }, { "epoch": 18.844393592677346, "grad_norm": 0.14654336869716644, "learning_rate": 1.3810937500000002e-06, "loss": 0.0018, "step": 41175 }, { "epoch": 18.8558352402746, "grad_norm": 0.04059933125972748, "learning_rate": 1.3771875e-06, "loss": 0.002, "step": 41200 }, { "epoch": 18.867276887871853, "grad_norm": 0.28603577613830566, "learning_rate": 1.37328125e-06, "loss": 0.0024, "step": 41225 }, { "epoch": 18.87871853546911, "grad_norm": 0.023345595225691795, "learning_rate": 1.369375e-06, "loss": 0.0029, "step": 41250 }, { "epoch": 18.89016018306636, "grad_norm": 0.12155888229608536, "learning_rate": 1.36546875e-06, "loss": 0.005, "step": 41275 }, { "epoch": 18.901601830663616, "grad_norm": 4.1245436668396, "learning_rate": 1.3615625e-06, "loss": 0.0044, "step": 41300 }, { "epoch": 18.91304347826087, "grad_norm": 0.08341874182224274, "learning_rate": 1.35765625e-06, "loss": 0.0017, "step": 41325 }, { "epoch": 18.924485125858123, "grad_norm": 0.03105013258755207, "learning_rate": 1.35375e-06, "loss": 0.0019, "step": 41350 }, { "epoch": 18.93592677345538, "grad_norm": 1.42904531955719, "learning_rate": 1.34984375e-06, "loss": 0.0026, "step": 41375 }, { "epoch": 18.94736842105263, "grad_norm": 0.05006731301546097, "learning_rate": 1.3459375000000002e-06, "loss": 0.0033, "step": 41400 }, { "epoch": 18.958810068649885, "grad_norm": 3.949215888977051, "learning_rate": 1.34203125e-06, "loss": 0.0041, "step": 41425 }, { "epoch": 18.97025171624714, "grad_norm": 0.007967664860188961, "learning_rate": 1.3381250000000002e-06, "loss": 0.0034, "step": 41450 }, { "epoch": 18.981693363844393, "grad_norm": 0.014341895468533039, "learning_rate": 1.3342187500000001e-06, "loss": 0.0016, "step": 41475 }, { "epoch": 18.993135011441648, "grad_norm": 1.7901051044464111, "learning_rate": 1.3303125000000002e-06, "loss": 0.0078, "step": 41500 }, { "epoch": 19.0045766590389, "grad_norm": 0.24964918196201324, "learning_rate": 1.3264062500000001e-06, "loss": 0.0028, "step": 41525 }, { "epoch": 19.016018306636155, "grad_norm": 0.47201448678970337, "learning_rate": 1.3225e-06, "loss": 0.0011, "step": 41550 }, { "epoch": 19.02745995423341, "grad_norm": 1.1855088472366333, "learning_rate": 1.31859375e-06, "loss": 0.0039, "step": 41575 }, { "epoch": 19.038901601830663, "grad_norm": 2.4581384658813477, "learning_rate": 1.3146875e-06, "loss": 0.0025, "step": 41600 }, { "epoch": 19.050343249427918, "grad_norm": 0.010398473590612411, "learning_rate": 1.31078125e-06, "loss": 0.0049, "step": 41625 }, { "epoch": 19.06178489702517, "grad_norm": 0.014186563901603222, "learning_rate": 1.306875e-06, "loss": 0.0016, "step": 41650 }, { "epoch": 19.073226544622425, "grad_norm": 0.005698268301784992, "learning_rate": 1.30296875e-06, "loss": 0.0036, "step": 41675 }, { "epoch": 19.08466819221968, "grad_norm": 0.09281117469072342, "learning_rate": 1.2990625000000001e-06, "loss": 0.0016, "step": 41700 }, { "epoch": 19.096109839816933, "grad_norm": 0.008674722164869308, "learning_rate": 1.29515625e-06, "loss": 0.003, "step": 41725 }, { "epoch": 19.107551487414188, "grad_norm": 0.0041849990375339985, "learning_rate": 1.2912500000000001e-06, "loss": 0.0018, "step": 41750 }, { "epoch": 19.11899313501144, "grad_norm": 0.05465414375066757, "learning_rate": 1.28734375e-06, "loss": 0.0038, "step": 41775 }, { "epoch": 19.130434782608695, "grad_norm": 0.011361594311892986, "learning_rate": 1.2834375000000002e-06, "loss": 0.0012, "step": 41800 }, { "epoch": 19.14187643020595, "grad_norm": 0.01416735164821148, "learning_rate": 1.27953125e-06, "loss": 0.0024, "step": 41825 }, { "epoch": 19.153318077803203, "grad_norm": 0.08474656194448471, "learning_rate": 1.2756250000000002e-06, "loss": 0.0008, "step": 41850 }, { "epoch": 19.164759725400458, "grad_norm": 0.14660042524337769, "learning_rate": 1.27171875e-06, "loss": 0.0047, "step": 41875 }, { "epoch": 19.17620137299771, "grad_norm": 0.04085688665509224, "learning_rate": 1.2678125e-06, "loss": 0.002, "step": 41900 }, { "epoch": 19.187643020594965, "grad_norm": 1.1031817197799683, "learning_rate": 1.2639062499999999e-06, "loss": 0.0035, "step": 41925 }, { "epoch": 19.19908466819222, "grad_norm": 0.11019287258386612, "learning_rate": 1.26e-06, "loss": 0.0021, "step": 41950 }, { "epoch": 19.210526315789473, "grad_norm": 0.02955048531293869, "learning_rate": 1.25609375e-06, "loss": 0.0046, "step": 41975 }, { "epoch": 19.221967963386728, "grad_norm": 1.8626301288604736, "learning_rate": 1.2521875e-06, "loss": 0.0024, "step": 42000 }, { "epoch": 19.233409610983983, "grad_norm": 0.6446524262428284, "learning_rate": 1.2482812500000001e-06, "loss": 0.002, "step": 42025 }, { "epoch": 19.244851258581235, "grad_norm": 0.00395909184589982, "learning_rate": 1.244375e-06, "loss": 0.0005, "step": 42050 }, { "epoch": 19.25629290617849, "grad_norm": 0.018985895439982414, "learning_rate": 1.2404687500000002e-06, "loss": 0.0011, "step": 42075 }, { "epoch": 19.267734553775743, "grad_norm": 1.6570340394973755, "learning_rate": 1.2365625e-06, "loss": 0.0025, "step": 42100 }, { "epoch": 19.279176201372998, "grad_norm": 3.0952064990997314, "learning_rate": 1.2326562500000002e-06, "loss": 0.0042, "step": 42125 }, { "epoch": 19.290617848970253, "grad_norm": 0.18994177877902985, "learning_rate": 1.22875e-06, "loss": 0.0017, "step": 42150 }, { "epoch": 19.302059496567505, "grad_norm": 1.1406517028808594, "learning_rate": 1.2248437500000002e-06, "loss": 0.0022, "step": 42175 }, { "epoch": 19.31350114416476, "grad_norm": 0.340487003326416, "learning_rate": 1.2209375000000001e-06, "loss": 0.0026, "step": 42200 }, { "epoch": 19.324942791762012, "grad_norm": 0.9763627648353577, "learning_rate": 1.21703125e-06, "loss": 0.0027, "step": 42225 }, { "epoch": 19.336384439359268, "grad_norm": 0.016768965870141983, "learning_rate": 1.213125e-06, "loss": 0.0028, "step": 42250 }, { "epoch": 19.347826086956523, "grad_norm": 0.1443735659122467, "learning_rate": 1.20921875e-06, "loss": 0.004, "step": 42275 }, { "epoch": 19.359267734553775, "grad_norm": 0.05310703441500664, "learning_rate": 1.2053125e-06, "loss": 0.0036, "step": 42300 }, { "epoch": 19.37070938215103, "grad_norm": 0.057933103293180466, "learning_rate": 1.20140625e-06, "loss": 0.0038, "step": 42325 }, { "epoch": 19.382151029748282, "grad_norm": 0.2718435823917389, "learning_rate": 1.1975e-06, "loss": 0.0013, "step": 42350 }, { "epoch": 19.393592677345538, "grad_norm": 0.05369953438639641, "learning_rate": 1.19359375e-06, "loss": 0.003, "step": 42375 }, { "epoch": 19.405034324942793, "grad_norm": 0.011975161731243134, "learning_rate": 1.1896875e-06, "loss": 0.0015, "step": 42400 }, { "epoch": 19.416475972540045, "grad_norm": 0.015088324435055256, "learning_rate": 1.1857812500000001e-06, "loss": 0.0022, "step": 42425 }, { "epoch": 19.4279176201373, "grad_norm": 0.007857023738324642, "learning_rate": 1.181875e-06, "loss": 0.0013, "step": 42450 }, { "epoch": 19.439359267734552, "grad_norm": 0.02740396372973919, "learning_rate": 1.1779687500000001e-06, "loss": 0.0058, "step": 42475 }, { "epoch": 19.450800915331808, "grad_norm": 0.0039176614955067635, "learning_rate": 1.1740625e-06, "loss": 0.001, "step": 42500 }, { "epoch": 19.462242562929063, "grad_norm": 0.06353799253702164, "learning_rate": 1.1701562500000002e-06, "loss": 0.0043, "step": 42525 }, { "epoch": 19.473684210526315, "grad_norm": 0.05456888675689697, "learning_rate": 1.16625e-06, "loss": 0.0018, "step": 42550 }, { "epoch": 19.48512585812357, "grad_norm": 0.007915114052593708, "learning_rate": 1.16234375e-06, "loss": 0.0015, "step": 42575 }, { "epoch": 19.496567505720822, "grad_norm": 0.0682164877653122, "learning_rate": 1.1584375e-06, "loss": 0.003, "step": 42600 }, { "epoch": 19.508009153318078, "grad_norm": 0.22733402252197266, "learning_rate": 1.15453125e-06, "loss": 0.0022, "step": 42625 }, { "epoch": 19.519450800915333, "grad_norm": 0.04920228570699692, "learning_rate": 1.150625e-06, "loss": 0.0018, "step": 42650 }, { "epoch": 19.530892448512585, "grad_norm": 0.19921521842479706, "learning_rate": 1.14671875e-06, "loss": 0.0106, "step": 42675 }, { "epoch": 19.54233409610984, "grad_norm": 0.01079315971583128, "learning_rate": 1.1428125000000001e-06, "loss": 0.002, "step": 42700 }, { "epoch": 19.553775743707092, "grad_norm": 0.01297928486019373, "learning_rate": 1.13890625e-06, "loss": 0.0026, "step": 42725 }, { "epoch": 19.565217391304348, "grad_norm": 0.01211167499423027, "learning_rate": 1.1350000000000001e-06, "loss": 0.0026, "step": 42750 }, { "epoch": 19.576659038901603, "grad_norm": 0.03415808454155922, "learning_rate": 1.13109375e-06, "loss": 0.0049, "step": 42775 }, { "epoch": 19.588100686498855, "grad_norm": 0.8120661377906799, "learning_rate": 1.1271875000000002e-06, "loss": 0.0014, "step": 42800 }, { "epoch": 19.59954233409611, "grad_norm": 0.012082367204129696, "learning_rate": 1.12328125e-06, "loss": 0.0008, "step": 42825 }, { "epoch": 19.610983981693362, "grad_norm": 0.007694719359278679, "learning_rate": 1.1193750000000002e-06, "loss": 0.0021, "step": 42850 }, { "epoch": 19.622425629290618, "grad_norm": 0.07108388841152191, "learning_rate": 1.11546875e-06, "loss": 0.0036, "step": 42875 }, { "epoch": 19.633867276887873, "grad_norm": 1.9279260635375977, "learning_rate": 1.1115625e-06, "loss": 0.005, "step": 42900 }, { "epoch": 19.645308924485125, "grad_norm": 0.026831557974219322, "learning_rate": 1.1076562500000001e-06, "loss": 0.0038, "step": 42925 }, { "epoch": 19.65675057208238, "grad_norm": 0.09153138846158981, "learning_rate": 1.10375e-06, "loss": 0.0013, "step": 42950 }, { "epoch": 19.668192219679632, "grad_norm": 0.01675252988934517, "learning_rate": 1.09984375e-06, "loss": 0.0041, "step": 42975 }, { "epoch": 19.679633867276888, "grad_norm": 0.3901120722293854, "learning_rate": 1.0959375e-06, "loss": 0.0029, "step": 43000 }, { "epoch": 19.691075514874143, "grad_norm": 0.10046473145484924, "learning_rate": 1.09203125e-06, "loss": 0.0028, "step": 43025 }, { "epoch": 19.702517162471395, "grad_norm": 0.03431469574570656, "learning_rate": 1.088125e-06, "loss": 0.0035, "step": 43050 }, { "epoch": 19.71395881006865, "grad_norm": 0.964876115322113, "learning_rate": 1.08421875e-06, "loss": 0.0073, "step": 43075 }, { "epoch": 19.725400457665906, "grad_norm": 0.3534577786922455, "learning_rate": 1.0803125e-06, "loss": 0.0008, "step": 43100 }, { "epoch": 19.736842105263158, "grad_norm": 1.4753586053848267, "learning_rate": 1.07640625e-06, "loss": 0.0036, "step": 43125 }, { "epoch": 19.748283752860413, "grad_norm": 0.038761936128139496, "learning_rate": 1.0725000000000001e-06, "loss": 0.0007, "step": 43150 }, { "epoch": 19.759725400457665, "grad_norm": 1.100974440574646, "learning_rate": 1.06859375e-06, "loss": 0.0028, "step": 43175 }, { "epoch": 19.77116704805492, "grad_norm": 0.006787677761167288, "learning_rate": 1.0646875000000001e-06, "loss": 0.0018, "step": 43200 }, { "epoch": 19.782608695652176, "grad_norm": 0.008989253081381321, "learning_rate": 1.06078125e-06, "loss": 0.0051, "step": 43225 }, { "epoch": 19.794050343249427, "grad_norm": 0.6210397481918335, "learning_rate": 1.0568750000000002e-06, "loss": 0.0031, "step": 43250 }, { "epoch": 19.805491990846683, "grad_norm": 1.2203654050827026, "learning_rate": 1.0531250000000002e-06, "loss": 0.0029, "step": 43275 }, { "epoch": 19.816933638443935, "grad_norm": 0.12051892280578613, "learning_rate": 1.04921875e-06, "loss": 0.0027, "step": 43300 }, { "epoch": 19.82837528604119, "grad_norm": 0.0844549685716629, "learning_rate": 1.0453125000000002e-06, "loss": 0.0017, "step": 43325 }, { "epoch": 19.839816933638446, "grad_norm": 0.18122392892837524, "learning_rate": 1.0414062500000001e-06, "loss": 0.0029, "step": 43350 }, { "epoch": 19.851258581235697, "grad_norm": 0.14276260137557983, "learning_rate": 1.0375e-06, "loss": 0.0038, "step": 43375 }, { "epoch": 19.862700228832953, "grad_norm": 0.017156146466732025, "learning_rate": 1.03359375e-06, "loss": 0.0016, "step": 43400 }, { "epoch": 19.874141876430205, "grad_norm": 0.5428557395935059, "learning_rate": 1.0296875e-06, "loss": 0.0016, "step": 43425 }, { "epoch": 19.88558352402746, "grad_norm": 0.18533864617347717, "learning_rate": 1.02578125e-06, "loss": 0.0032, "step": 43450 }, { "epoch": 19.897025171624716, "grad_norm": 0.14326012134552002, "learning_rate": 1.021875e-06, "loss": 0.004, "step": 43475 }, { "epoch": 19.908466819221967, "grad_norm": 0.03617006167769432, "learning_rate": 1.01796875e-06, "loss": 0.0036, "step": 43500 }, { "epoch": 19.919908466819223, "grad_norm": 0.05975247547030449, "learning_rate": 1.0140625e-06, "loss": 0.0051, "step": 43525 }, { "epoch": 19.931350114416475, "grad_norm": 0.1550580859184265, "learning_rate": 1.01015625e-06, "loss": 0.0023, "step": 43550 }, { "epoch": 19.94279176201373, "grad_norm": 0.08554903417825699, "learning_rate": 1.0062500000000001e-06, "loss": 0.0035, "step": 43575 }, { "epoch": 19.954233409610985, "grad_norm": 0.009217420592904091, "learning_rate": 1.00234375e-06, "loss": 0.0009, "step": 43600 }, { "epoch": 19.965675057208237, "grad_norm": 0.09155456721782684, "learning_rate": 9.984375000000001e-07, "loss": 0.0043, "step": 43625 }, { "epoch": 19.977116704805493, "grad_norm": 0.022363737225532532, "learning_rate": 9.9453125e-07, "loss": 0.0015, "step": 43650 }, { "epoch": 19.988558352402745, "grad_norm": 0.09595005214214325, "learning_rate": 9.906250000000002e-07, "loss": 0.0014, "step": 43675 }, { "epoch": 20.0, "grad_norm": 3.715383291244507, "learning_rate": 9.8671875e-07, "loss": 0.0043, "step": 43700 }, { "epoch": 20.011441647597255, "grad_norm": 0.06310781091451645, "learning_rate": 9.828125e-07, "loss": 0.0021, "step": 43725 }, { "epoch": 20.022883295194507, "grad_norm": 0.023597639054059982, "learning_rate": 9.789062499999999e-07, "loss": 0.004, "step": 43750 }, { "epoch": 20.034324942791763, "grad_norm": 0.01473512314260006, "learning_rate": 9.75e-07, "loss": 0.0027, "step": 43775 }, { "epoch": 20.045766590389015, "grad_norm": 0.02906249277293682, "learning_rate": 9.7109375e-07, "loss": 0.003, "step": 43800 }, { "epoch": 20.05720823798627, "grad_norm": 1.1926935911178589, "learning_rate": 9.671875e-07, "loss": 0.0027, "step": 43825 }, { "epoch": 20.068649885583525, "grad_norm": 0.03131160885095596, "learning_rate": 9.632812500000001e-07, "loss": 0.0051, "step": 43850 }, { "epoch": 20.080091533180777, "grad_norm": 0.25488337874412537, "learning_rate": 9.59375e-07, "loss": 0.0021, "step": 43875 }, { "epoch": 20.091533180778033, "grad_norm": 1.750841498374939, "learning_rate": 9.554687500000001e-07, "loss": 0.0026, "step": 43900 }, { "epoch": 20.102974828375284, "grad_norm": 0.01565762422978878, "learning_rate": 9.515625e-07, "loss": 0.0018, "step": 43925 }, { "epoch": 20.11441647597254, "grad_norm": 4.598333358764648, "learning_rate": 9.476562500000001e-07, "loss": 0.0068, "step": 43950 }, { "epoch": 20.125858123569795, "grad_norm": 0.00931946188211441, "learning_rate": 9.4375e-07, "loss": 0.0018, "step": 43975 }, { "epoch": 20.137299771167047, "grad_norm": 0.22226235270500183, "learning_rate": 9.398437500000001e-07, "loss": 0.0026, "step": 44000 }, { "epoch": 20.148741418764303, "grad_norm": 0.0068317013792693615, "learning_rate": 9.359375e-07, "loss": 0.0022, "step": 44025 }, { "epoch": 20.160183066361554, "grad_norm": 1.8505562543869019, "learning_rate": 9.320312500000001e-07, "loss": 0.0022, "step": 44050 }, { "epoch": 20.17162471395881, "grad_norm": 0.05139665678143501, "learning_rate": 9.28125e-07, "loss": 0.0029, "step": 44075 }, { "epoch": 20.183066361556065, "grad_norm": 3.548659563064575, "learning_rate": 9.242187500000001e-07, "loss": 0.0041, "step": 44100 }, { "epoch": 20.194508009153317, "grad_norm": 0.03986160829663277, "learning_rate": 9.203125e-07, "loss": 0.0018, "step": 44125 }, { "epoch": 20.205949656750573, "grad_norm": 0.8038386702537537, "learning_rate": 9.1640625e-07, "loss": 0.0058, "step": 44150 }, { "epoch": 20.217391304347824, "grad_norm": 0.013702603988349438, "learning_rate": 9.124999999999999e-07, "loss": 0.0009, "step": 44175 }, { "epoch": 20.22883295194508, "grad_norm": 0.10714434087276459, "learning_rate": 9.085937500000001e-07, "loss": 0.006, "step": 44200 }, { "epoch": 20.240274599542335, "grad_norm": 0.03697048872709274, "learning_rate": 9.046875e-07, "loss": 0.0012, "step": 44225 }, { "epoch": 20.251716247139587, "grad_norm": 0.14525456726551056, "learning_rate": 9.007812500000001e-07, "loss": 0.0017, "step": 44250 }, { "epoch": 20.263157894736842, "grad_norm": 0.035972073674201965, "learning_rate": 8.96875e-07, "loss": 0.0017, "step": 44275 }, { "epoch": 20.274599542334094, "grad_norm": 6.62619161605835, "learning_rate": 8.929687500000001e-07, "loss": 0.0062, "step": 44300 }, { "epoch": 20.28604118993135, "grad_norm": 0.011360355652868748, "learning_rate": 8.890625e-07, "loss": 0.0041, "step": 44325 }, { "epoch": 20.297482837528605, "grad_norm": 2.798712968826294, "learning_rate": 8.8515625e-07, "loss": 0.0055, "step": 44350 }, { "epoch": 20.308924485125857, "grad_norm": 0.016145195811986923, "learning_rate": 8.812499999999999e-07, "loss": 0.0013, "step": 44375 }, { "epoch": 20.320366132723112, "grad_norm": 0.06629187613725662, "learning_rate": 8.7734375e-07, "loss": 0.002, "step": 44400 }, { "epoch": 20.331807780320368, "grad_norm": 0.0672740787267685, "learning_rate": 8.734375000000002e-07, "loss": 0.0009, "step": 44425 }, { "epoch": 20.34324942791762, "grad_norm": 0.1897992044687271, "learning_rate": 8.695312500000001e-07, "loss": 0.0042, "step": 44450 }, { "epoch": 20.354691075514875, "grad_norm": 0.007385300472378731, "learning_rate": 8.656250000000001e-07, "loss": 0.0014, "step": 44475 }, { "epoch": 20.366132723112127, "grad_norm": 4.40465784072876, "learning_rate": 8.6171875e-07, "loss": 0.0059, "step": 44500 }, { "epoch": 20.377574370709382, "grad_norm": 1.0056349039077759, "learning_rate": 8.578125000000001e-07, "loss": 0.0013, "step": 44525 }, { "epoch": 20.389016018306638, "grad_norm": 0.8912267684936523, "learning_rate": 8.5390625e-07, "loss": 0.0035, "step": 44550 }, { "epoch": 20.40045766590389, "grad_norm": 0.06678758561611176, "learning_rate": 8.500000000000001e-07, "loss": 0.0013, "step": 44575 }, { "epoch": 20.411899313501145, "grad_norm": 0.008592250756919384, "learning_rate": 8.4609375e-07, "loss": 0.0046, "step": 44600 }, { "epoch": 20.423340961098397, "grad_norm": 0.05794551596045494, "learning_rate": 8.421875000000001e-07, "loss": 0.0026, "step": 44625 }, { "epoch": 20.434782608695652, "grad_norm": 0.23960883915424347, "learning_rate": 8.3828125e-07, "loss": 0.0037, "step": 44650 }, { "epoch": 20.446224256292908, "grad_norm": 0.003144345246255398, "learning_rate": 8.343750000000001e-07, "loss": 0.0014, "step": 44675 }, { "epoch": 20.45766590389016, "grad_norm": 0.009685901924967766, "learning_rate": 8.3046875e-07, "loss": 0.0029, "step": 44700 }, { "epoch": 20.469107551487415, "grad_norm": 0.003924787510186434, "learning_rate": 8.265625000000001e-07, "loss": 0.0014, "step": 44725 }, { "epoch": 20.480549199084667, "grad_norm": 2.6838042736053467, "learning_rate": 8.2265625e-07, "loss": 0.0036, "step": 44750 }, { "epoch": 20.491990846681922, "grad_norm": 0.028770767152309418, "learning_rate": 8.187500000000001e-07, "loss": 0.0009, "step": 44775 }, { "epoch": 20.503432494279178, "grad_norm": 4.129835605621338, "learning_rate": 8.1484375e-07, "loss": 0.0053, "step": 44800 }, { "epoch": 20.51487414187643, "grad_norm": 0.04465533420443535, "learning_rate": 8.109375e-07, "loss": 0.0014, "step": 44825 }, { "epoch": 20.526315789473685, "grad_norm": 0.30295035243034363, "learning_rate": 8.070312499999999e-07, "loss": 0.0079, "step": 44850 }, { "epoch": 20.537757437070937, "grad_norm": 0.11164679378271103, "learning_rate": 8.03125e-07, "loss": 0.0027, "step": 44875 }, { "epoch": 20.549199084668192, "grad_norm": 0.44406571984291077, "learning_rate": 7.992187499999999e-07, "loss": 0.0027, "step": 44900 }, { "epoch": 20.560640732265448, "grad_norm": 0.017478272318840027, "learning_rate": 7.953125000000001e-07, "loss": 0.0033, "step": 44925 }, { "epoch": 20.5720823798627, "grad_norm": 0.3167213499546051, "learning_rate": 7.9140625e-07, "loss": 0.003, "step": 44950 }, { "epoch": 20.583524027459955, "grad_norm": 0.007678564637899399, "learning_rate": 7.875000000000001e-07, "loss": 0.0011, "step": 44975 }, { "epoch": 20.594965675057207, "grad_norm": 0.2628444731235504, "learning_rate": 7.8359375e-07, "loss": 0.0011, "step": 45000 }, { "epoch": 20.594965675057207, "eval_loss": 0.1852322667837143, "eval_runtime": 8654.2293, "eval_samples_per_second": 1.1, "eval_steps_per_second": 0.138, "eval_wer": 0.07709413195111026, "step": 45000 }, { "epoch": 20.606407322654462, "grad_norm": 0.14695879817008972, "learning_rate": 7.796875e-07, "loss": 0.0014, "step": 45025 }, { "epoch": 20.617848970251718, "grad_norm": 0.7955935597419739, "learning_rate": 7.7578125e-07, "loss": 0.005, "step": 45050 }, { "epoch": 20.62929061784897, "grad_norm": 0.6086607575416565, "learning_rate": 7.71875e-07, "loss": 0.0021, "step": 45075 }, { "epoch": 20.640732265446225, "grad_norm": 0.0721694827079773, "learning_rate": 7.6796875e-07, "loss": 0.0022, "step": 45100 }, { "epoch": 20.652173913043477, "grad_norm": 1.4729714393615723, "learning_rate": 7.640625e-07, "loss": 0.0009, "step": 45125 }, { "epoch": 20.663615560640732, "grad_norm": 0.06502003967761993, "learning_rate": 7.601562500000001e-07, "loss": 0.0023, "step": 45150 }, { "epoch": 20.675057208237988, "grad_norm": 0.026072153821587563, "learning_rate": 7.562500000000001e-07, "loss": 0.0032, "step": 45175 }, { "epoch": 20.68649885583524, "grad_norm": 0.16814200580120087, "learning_rate": 7.5234375e-07, "loss": 0.0017, "step": 45200 }, { "epoch": 20.697940503432495, "grad_norm": 0.058718692511320114, "learning_rate": 7.484375e-07, "loss": 0.0021, "step": 45225 }, { "epoch": 20.709382151029747, "grad_norm": 0.561021089553833, "learning_rate": 7.4453125e-07, "loss": 0.002, "step": 45250 }, { "epoch": 20.720823798627002, "grad_norm": 0.010046838782727718, "learning_rate": 7.40625e-07, "loss": 0.0008, "step": 45275 }, { "epoch": 20.732265446224257, "grad_norm": 0.008695059455931187, "learning_rate": 7.3671875e-07, "loss": 0.0039, "step": 45300 }, { "epoch": 20.74370709382151, "grad_norm": 0.07938475906848907, "learning_rate": 7.328125e-07, "loss": 0.0035, "step": 45325 }, { "epoch": 20.755148741418765, "grad_norm": 0.28721311688423157, "learning_rate": 7.2890625e-07, "loss": 0.0034, "step": 45350 }, { "epoch": 20.766590389016017, "grad_norm": 0.0074570090509951115, "learning_rate": 7.25e-07, "loss": 0.0017, "step": 45375 }, { "epoch": 20.778032036613272, "grad_norm": 2.3674185276031494, "learning_rate": 7.210937500000001e-07, "loss": 0.0027, "step": 45400 }, { "epoch": 20.789473684210527, "grad_norm": 0.35104066133499146, "learning_rate": 7.171875000000001e-07, "loss": 0.0006, "step": 45425 }, { "epoch": 20.80091533180778, "grad_norm": 0.007390952203422785, "learning_rate": 7.132812500000001e-07, "loss": 0.0027, "step": 45450 }, { "epoch": 20.812356979405035, "grad_norm": 0.004167315550148487, "learning_rate": 7.093750000000001e-07, "loss": 0.0015, "step": 45475 }, { "epoch": 20.82379862700229, "grad_norm": 0.17244768142700195, "learning_rate": 7.054687500000001e-07, "loss": 0.0013, "step": 45500 }, { "epoch": 20.835240274599542, "grad_norm": 1.4878789186477661, "learning_rate": 7.015625e-07, "loss": 0.0012, "step": 45525 }, { "epoch": 20.846681922196797, "grad_norm": 0.06206430867314339, "learning_rate": 6.9765625e-07, "loss": 0.0022, "step": 45550 }, { "epoch": 20.85812356979405, "grad_norm": 0.026217838749289513, "learning_rate": 6.9375e-07, "loss": 0.0023, "step": 45575 }, { "epoch": 20.869565217391305, "grad_norm": 0.0358724370598793, "learning_rate": 6.8984375e-07, "loss": 0.0021, "step": 45600 }, { "epoch": 20.88100686498856, "grad_norm": 0.013543572276830673, "learning_rate": 6.859375e-07, "loss": 0.002, "step": 45625 }, { "epoch": 20.892448512585812, "grad_norm": 0.06264956295490265, "learning_rate": 6.820312500000001e-07, "loss": 0.0029, "step": 45650 }, { "epoch": 20.903890160183067, "grad_norm": 0.26473236083984375, "learning_rate": 6.781250000000001e-07, "loss": 0.0022, "step": 45675 }, { "epoch": 20.91533180778032, "grad_norm": 0.427604615688324, "learning_rate": 6.7421875e-07, "loss": 0.0023, "step": 45700 }, { "epoch": 20.926773455377575, "grad_norm": 0.005621651653200388, "learning_rate": 6.703125e-07, "loss": 0.0013, "step": 45725 }, { "epoch": 20.93821510297483, "grad_norm": 0.7664169073104858, "learning_rate": 6.6640625e-07, "loss": 0.0027, "step": 45750 }, { "epoch": 20.949656750572082, "grad_norm": 0.00759545061737299, "learning_rate": 6.625e-07, "loss": 0.0029, "step": 45775 }, { "epoch": 20.961098398169337, "grad_norm": 4.22957706451416, "learning_rate": 6.5859375e-07, "loss": 0.006, "step": 45800 }, { "epoch": 20.97254004576659, "grad_norm": 0.006260227877646685, "learning_rate": 6.546875e-07, "loss": 0.0013, "step": 45825 }, { "epoch": 20.983981693363845, "grad_norm": 0.10211384296417236, "learning_rate": 6.5078125e-07, "loss": 0.0005, "step": 45850 }, { "epoch": 20.9954233409611, "grad_norm": 0.024102549999952316, "learning_rate": 6.46875e-07, "loss": 0.0012, "step": 45875 }, { "epoch": 21.006864988558352, "grad_norm": 0.012642141431570053, "learning_rate": 6.4296875e-07, "loss": 0.0021, "step": 45900 }, { "epoch": 21.018306636155607, "grad_norm": 0.009443446062505245, "learning_rate": 6.390625e-07, "loss": 0.0011, "step": 45925 }, { "epoch": 21.02974828375286, "grad_norm": 0.01953458972275257, "learning_rate": 6.3515625e-07, "loss": 0.0015, "step": 45950 }, { "epoch": 21.041189931350115, "grad_norm": 0.0077831498347222805, "learning_rate": 6.312500000000001e-07, "loss": 0.0036, "step": 45975 }, { "epoch": 21.05263157894737, "grad_norm": 0.01449274830520153, "learning_rate": 6.273437500000001e-07, "loss": 0.0036, "step": 46000 }, { "epoch": 21.06407322654462, "grad_norm": 0.005314666777849197, "learning_rate": 6.234375e-07, "loss": 0.0017, "step": 46025 }, { "epoch": 21.075514874141877, "grad_norm": 0.10090406984090805, "learning_rate": 6.1953125e-07, "loss": 0.0043, "step": 46050 }, { "epoch": 21.08695652173913, "grad_norm": 0.055214133113622665, "learning_rate": 6.15625e-07, "loss": 0.004, "step": 46075 }, { "epoch": 21.098398169336384, "grad_norm": 0.01377058681100607, "learning_rate": 6.1171875e-07, "loss": 0.0013, "step": 46100 }, { "epoch": 21.10983981693364, "grad_norm": 0.10504399240016937, "learning_rate": 6.078125000000001e-07, "loss": 0.0012, "step": 46125 }, { "epoch": 21.12128146453089, "grad_norm": 0.322175532579422, "learning_rate": 6.039062500000001e-07, "loss": 0.0014, "step": 46150 }, { "epoch": 21.132723112128147, "grad_norm": 0.3188753128051758, "learning_rate": 6.000000000000001e-07, "loss": 0.0008, "step": 46175 }, { "epoch": 21.1441647597254, "grad_norm": 0.022006217390298843, "learning_rate": 5.960937500000001e-07, "loss": 0.005, "step": 46200 }, { "epoch": 21.155606407322654, "grad_norm": 0.003229463240131736, "learning_rate": 5.921875e-07, "loss": 0.0005, "step": 46225 }, { "epoch": 21.16704805491991, "grad_norm": 0.010743632912635803, "learning_rate": 5.8828125e-07, "loss": 0.0014, "step": 46250 }, { "epoch": 21.17848970251716, "grad_norm": 0.009482132270932198, "learning_rate": 5.84375e-07, "loss": 0.0028, "step": 46275 }, { "epoch": 21.189931350114417, "grad_norm": 0.059968553483486176, "learning_rate": 5.8046875e-07, "loss": 0.0014, "step": 46300 }, { "epoch": 21.20137299771167, "grad_norm": 1.5181567668914795, "learning_rate": 5.765625e-07, "loss": 0.0017, "step": 46325 }, { "epoch": 21.212814645308924, "grad_norm": 0.14058434963226318, "learning_rate": 5.7265625e-07, "loss": 0.0049, "step": 46350 }, { "epoch": 21.22425629290618, "grad_norm": 0.8637038469314575, "learning_rate": 5.687500000000001e-07, "loss": 0.0012, "step": 46375 }, { "epoch": 21.23569794050343, "grad_norm": 0.010258904658257961, "learning_rate": 5.6484375e-07, "loss": 0.0036, "step": 46400 }, { "epoch": 21.247139588100687, "grad_norm": 1.7938023805618286, "learning_rate": 5.609375e-07, "loss": 0.0013, "step": 46425 }, { "epoch": 21.25858123569794, "grad_norm": 0.8823245763778687, "learning_rate": 5.5703125e-07, "loss": 0.0019, "step": 46450 }, { "epoch": 21.270022883295194, "grad_norm": 0.007951987907290459, "learning_rate": 5.53125e-07, "loss": 0.0009, "step": 46475 }, { "epoch": 21.28146453089245, "grad_norm": 0.2045639455318451, "learning_rate": 5.4921875e-07, "loss": 0.0042, "step": 46500 }, { "epoch": 21.2929061784897, "grad_norm": 0.0039049233309924603, "learning_rate": 5.453125e-07, "loss": 0.001, "step": 46525 }, { "epoch": 21.304347826086957, "grad_norm": 1.208660364151001, "learning_rate": 5.4140625e-07, "loss": 0.0012, "step": 46550 }, { "epoch": 21.31578947368421, "grad_norm": 0.019521431997418404, "learning_rate": 5.374999999999999e-07, "loss": 0.0023, "step": 46575 }, { "epoch": 21.327231121281464, "grad_norm": 1.630520224571228, "learning_rate": 5.3359375e-07, "loss": 0.0039, "step": 46600 }, { "epoch": 21.33867276887872, "grad_norm": 0.061988189816474915, "learning_rate": 5.296875000000001e-07, "loss": 0.0016, "step": 46625 }, { "epoch": 21.35011441647597, "grad_norm": 0.019955461844801903, "learning_rate": 5.257812500000001e-07, "loss": 0.0055, "step": 46650 }, { "epoch": 21.361556064073227, "grad_norm": 0.05144397169351578, "learning_rate": 5.218750000000001e-07, "loss": 0.002, "step": 46675 }, { "epoch": 21.37299771167048, "grad_norm": 0.011034272611141205, "learning_rate": 5.179687500000001e-07, "loss": 0.0015, "step": 46700 }, { "epoch": 21.384439359267734, "grad_norm": 2.798609972000122, "learning_rate": 5.140625e-07, "loss": 0.0029, "step": 46725 }, { "epoch": 21.39588100686499, "grad_norm": 0.014240765944123268, "learning_rate": 5.1015625e-07, "loss": 0.0014, "step": 46750 }, { "epoch": 21.40732265446224, "grad_norm": 0.3318498134613037, "learning_rate": 5.0625e-07, "loss": 0.0017, "step": 46775 }, { "epoch": 21.418764302059497, "grad_norm": 1.4699923992156982, "learning_rate": 5.0234375e-07, "loss": 0.0022, "step": 46800 }, { "epoch": 21.430205949656752, "grad_norm": 0.009622704237699509, "learning_rate": 4.984375e-07, "loss": 0.0021, "step": 46825 }, { "epoch": 21.441647597254004, "grad_norm": 0.009581634774804115, "learning_rate": 4.945312500000001e-07, "loss": 0.0018, "step": 46850 }, { "epoch": 21.45308924485126, "grad_norm": 0.015993405133485794, "learning_rate": 4.906250000000001e-07, "loss": 0.0004, "step": 46875 }, { "epoch": 21.46453089244851, "grad_norm": 0.4137093126773834, "learning_rate": 4.8671875e-07, "loss": 0.0011, "step": 46900 }, { "epoch": 21.475972540045767, "grad_norm": 0.007416998501867056, "learning_rate": 4.828125e-07, "loss": 0.0033, "step": 46925 }, { "epoch": 21.487414187643022, "grad_norm": 3.9601035118103027, "learning_rate": 4.7890625e-07, "loss": 0.0024, "step": 46950 }, { "epoch": 21.498855835240274, "grad_norm": 0.008925327099859715, "learning_rate": 4.75e-07, "loss": 0.0011, "step": 46975 }, { "epoch": 21.51029748283753, "grad_norm": 0.1291629821062088, "learning_rate": 4.7109375e-07, "loss": 0.0014, "step": 47000 }, { "epoch": 21.52173913043478, "grad_norm": 0.010579300113022327, "learning_rate": 4.6718750000000003e-07, "loss": 0.0037, "step": 47025 }, { "epoch": 21.533180778032037, "grad_norm": 0.01654091291129589, "learning_rate": 4.6328125e-07, "loss": 0.0024, "step": 47050 }, { "epoch": 21.544622425629292, "grad_norm": 0.006487524602562189, "learning_rate": 4.59375e-07, "loss": 0.0017, "step": 47075 }, { "epoch": 21.556064073226544, "grad_norm": 2.063014030456543, "learning_rate": 4.5546875e-07, "loss": 0.0025, "step": 47100 }, { "epoch": 21.5675057208238, "grad_norm": 0.030676255002617836, "learning_rate": 4.5156249999999996e-07, "loss": 0.0009, "step": 47125 }, { "epoch": 21.57894736842105, "grad_norm": 0.20296336710453033, "learning_rate": 4.4765625e-07, "loss": 0.003, "step": 47150 }, { "epoch": 21.590389016018307, "grad_norm": 0.163761705160141, "learning_rate": 4.4375e-07, "loss": 0.0018, "step": 47175 }, { "epoch": 21.601830663615562, "grad_norm": 0.28630122542381287, "learning_rate": 4.3984375e-07, "loss": 0.0025, "step": 47200 }, { "epoch": 21.613272311212814, "grad_norm": 0.005543631501495838, "learning_rate": 4.3593750000000006e-07, "loss": 0.0028, "step": 47225 }, { "epoch": 21.62471395881007, "grad_norm": 0.016815751791000366, "learning_rate": 4.3203125000000007e-07, "loss": 0.0019, "step": 47250 }, { "epoch": 21.63615560640732, "grad_norm": 3.2668004035949707, "learning_rate": 4.2812500000000003e-07, "loss": 0.0029, "step": 47275 }, { "epoch": 21.647597254004577, "grad_norm": 0.3930434584617615, "learning_rate": 4.2421875000000004e-07, "loss": 0.0016, "step": 47300 }, { "epoch": 21.659038901601832, "grad_norm": 0.01607697270810604, "learning_rate": 4.2031250000000005e-07, "loss": 0.0042, "step": 47325 }, { "epoch": 21.670480549199084, "grad_norm": 0.012362928129732609, "learning_rate": 4.1640625000000006e-07, "loss": 0.0015, "step": 47350 }, { "epoch": 21.68192219679634, "grad_norm": 0.19018390774726868, "learning_rate": 4.125e-07, "loss": 0.0017, "step": 47375 }, { "epoch": 21.69336384439359, "grad_norm": 0.03577955439686775, "learning_rate": 4.0859375000000003e-07, "loss": 0.0013, "step": 47400 }, { "epoch": 21.704805491990847, "grad_norm": 1.102500319480896, "learning_rate": 4.0468750000000004e-07, "loss": 0.0023, "step": 47425 }, { "epoch": 21.716247139588102, "grad_norm": 0.018670858815312386, "learning_rate": 4.0078125000000005e-07, "loss": 0.0004, "step": 47450 }, { "epoch": 21.727688787185354, "grad_norm": 0.012698213569819927, "learning_rate": 3.96875e-07, "loss": 0.002, "step": 47475 }, { "epoch": 21.73913043478261, "grad_norm": 0.01263683382421732, "learning_rate": 3.9296875e-07, "loss": 0.0017, "step": 47500 }, { "epoch": 21.75057208237986, "grad_norm": 2.5224130153656006, "learning_rate": 3.8906250000000003e-07, "loss": 0.0011, "step": 47525 }, { "epoch": 21.762013729977117, "grad_norm": 0.004295944701880217, "learning_rate": 3.853125e-07, "loss": 0.0047, "step": 47550 }, { "epoch": 21.773455377574372, "grad_norm": 0.013171784579753876, "learning_rate": 3.8140625000000003e-07, "loss": 0.003, "step": 47575 }, { "epoch": 21.784897025171624, "grad_norm": 0.003711508121341467, "learning_rate": 3.7750000000000004e-07, "loss": 0.0015, "step": 47600 }, { "epoch": 21.79633867276888, "grad_norm": 0.0017702268669381738, "learning_rate": 3.7359375000000005e-07, "loss": 0.0019, "step": 47625 }, { "epoch": 21.80778032036613, "grad_norm": 0.010418115183711052, "learning_rate": 3.696875e-07, "loss": 0.0021, "step": 47650 }, { "epoch": 21.819221967963387, "grad_norm": 4.765320777893066, "learning_rate": 3.6578125e-07, "loss": 0.0013, "step": 47675 }, { "epoch": 21.830663615560642, "grad_norm": 0.008512184955179691, "learning_rate": 3.6187500000000003e-07, "loss": 0.0039, "step": 47700 }, { "epoch": 21.842105263157894, "grad_norm": 0.021925855427980423, "learning_rate": 3.5796875000000004e-07, "loss": 0.0002, "step": 47725 }, { "epoch": 21.85354691075515, "grad_norm": 0.013098538853228092, "learning_rate": 3.540625e-07, "loss": 0.0031, "step": 47750 }, { "epoch": 21.8649885583524, "grad_norm": 0.09969348460435867, "learning_rate": 3.5015625e-07, "loss": 0.0015, "step": 47775 }, { "epoch": 21.876430205949656, "grad_norm": 0.022677918896079063, "learning_rate": 3.4625e-07, "loss": 0.0004, "step": 47800 }, { "epoch": 21.887871853546912, "grad_norm": 0.01546409074217081, "learning_rate": 3.4234375e-07, "loss": 0.0051, "step": 47825 }, { "epoch": 21.899313501144164, "grad_norm": 1.5987523794174194, "learning_rate": 3.384375e-07, "loss": 0.0059, "step": 47850 }, { "epoch": 21.91075514874142, "grad_norm": 0.01283286977559328, "learning_rate": 3.3453125000000005e-07, "loss": 0.0011, "step": 47875 }, { "epoch": 21.922196796338675, "grad_norm": 0.005729961208999157, "learning_rate": 3.30625e-07, "loss": 0.0033, "step": 47900 }, { "epoch": 21.933638443935926, "grad_norm": 0.027057841420173645, "learning_rate": 3.2671875e-07, "loss": 0.0028, "step": 47925 }, { "epoch": 21.945080091533182, "grad_norm": 1.7322996854782104, "learning_rate": 3.2281250000000003e-07, "loss": 0.0065, "step": 47950 }, { "epoch": 21.956521739130434, "grad_norm": 0.036504894495010376, "learning_rate": 3.1890625000000004e-07, "loss": 0.0014, "step": 47975 }, { "epoch": 21.96796338672769, "grad_norm": 0.018779082223773003, "learning_rate": 3.15e-07, "loss": 0.0016, "step": 48000 }, { "epoch": 21.97940503432494, "grad_norm": 0.006957768462598324, "learning_rate": 3.1109375e-07, "loss": 0.0009, "step": 48025 }, { "epoch": 21.990846681922196, "grad_norm": 0.689453125, "learning_rate": 3.071875e-07, "loss": 0.0018, "step": 48050 }, { "epoch": 22.002288329519452, "grad_norm": 0.5969713926315308, "learning_rate": 3.0328125e-07, "loss": 0.0034, "step": 48075 }, { "epoch": 22.013729977116704, "grad_norm": 0.013607698492705822, "learning_rate": 2.99375e-07, "loss": 0.0024, "step": 48100 }, { "epoch": 22.02517162471396, "grad_norm": 0.43130818009376526, "learning_rate": 2.9546875e-07, "loss": 0.0028, "step": 48125 }, { "epoch": 22.036613272311214, "grad_norm": 0.00448969379067421, "learning_rate": 2.915625e-07, "loss": 0.0011, "step": 48150 }, { "epoch": 22.048054919908466, "grad_norm": 0.3551826775074005, "learning_rate": 2.8765625e-07, "loss": 0.0021, "step": 48175 }, { "epoch": 22.05949656750572, "grad_norm": 0.013653366826474667, "learning_rate": 2.8375000000000004e-07, "loss": 0.0006, "step": 48200 }, { "epoch": 22.070938215102974, "grad_norm": 0.6076807379722595, "learning_rate": 2.7984375000000005e-07, "loss": 0.0013, "step": 48225 }, { "epoch": 22.08237986270023, "grad_norm": 0.02086722105741501, "learning_rate": 2.759375e-07, "loss": 0.0017, "step": 48250 }, { "epoch": 22.093821510297484, "grad_norm": 0.26312702894210815, "learning_rate": 2.7203125e-07, "loss": 0.0008, "step": 48275 }, { "epoch": 22.105263157894736, "grad_norm": 0.03195678070187569, "learning_rate": 2.6812500000000003e-07, "loss": 0.0023, "step": 48300 }, { "epoch": 22.11670480549199, "grad_norm": 0.6387341618537903, "learning_rate": 2.6421875000000004e-07, "loss": 0.0009, "step": 48325 }, { "epoch": 22.128146453089244, "grad_norm": 0.007027031853795052, "learning_rate": 2.603125e-07, "loss": 0.001, "step": 48350 }, { "epoch": 22.1395881006865, "grad_norm": 0.25314944982528687, "learning_rate": 2.5640625e-07, "loss": 0.001, "step": 48375 }, { "epoch": 22.151029748283754, "grad_norm": 0.018648987635970116, "learning_rate": 2.525e-07, "loss": 0.0021, "step": 48400 }, { "epoch": 22.162471395881006, "grad_norm": 0.836776614189148, "learning_rate": 2.4859375e-07, "loss": 0.0065, "step": 48425 }, { "epoch": 22.17391304347826, "grad_norm": 0.03116459771990776, "learning_rate": 2.446875e-07, "loss": 0.0004, "step": 48450 }, { "epoch": 22.185354691075514, "grad_norm": 0.09086679667234421, "learning_rate": 2.4078125e-07, "loss": 0.0026, "step": 48475 }, { "epoch": 22.19679633867277, "grad_norm": 0.012594183906912804, "learning_rate": 2.3687500000000003e-07, "loss": 0.0022, "step": 48500 }, { "epoch": 22.208237986270024, "grad_norm": 0.015545814298093319, "learning_rate": 2.3296875000000002e-07, "loss": 0.006, "step": 48525 }, { "epoch": 22.219679633867276, "grad_norm": 0.10434073954820633, "learning_rate": 2.2906250000000003e-07, "loss": 0.0013, "step": 48550 }, { "epoch": 22.23112128146453, "grad_norm": 0.48425793647766113, "learning_rate": 2.2515625000000001e-07, "loss": 0.0032, "step": 48575 }, { "epoch": 22.242562929061783, "grad_norm": 0.16917268931865692, "learning_rate": 2.2125000000000003e-07, "loss": 0.0023, "step": 48600 }, { "epoch": 22.25400457665904, "grad_norm": 0.14635077118873596, "learning_rate": 2.1734375e-07, "loss": 0.0059, "step": 48625 }, { "epoch": 22.265446224256294, "grad_norm": 0.015277581289410591, "learning_rate": 2.1343750000000002e-07, "loss": 0.0008, "step": 48650 }, { "epoch": 22.276887871853546, "grad_norm": 0.5106106400489807, "learning_rate": 2.0953125e-07, "loss": 0.0006, "step": 48675 }, { "epoch": 22.2883295194508, "grad_norm": 0.0175749771296978, "learning_rate": 2.05625e-07, "loss": 0.0007, "step": 48700 }, { "epoch": 22.299771167048053, "grad_norm": 0.26815265417099, "learning_rate": 2.0171875e-07, "loss": 0.0005, "step": 48725 }, { "epoch": 22.31121281464531, "grad_norm": 0.003872194793075323, "learning_rate": 1.9781249999999999e-07, "loss": 0.0009, "step": 48750 }, { "epoch": 22.322654462242564, "grad_norm": 0.12963524460792542, "learning_rate": 1.9390625000000002e-07, "loss": 0.0048, "step": 48775 }, { "epoch": 22.334096109839816, "grad_norm": 0.015200987458229065, "learning_rate": 1.9e-07, "loss": 0.0019, "step": 48800 }, { "epoch": 22.34553775743707, "grad_norm": 0.07606189697980881, "learning_rate": 1.8609375000000002e-07, "loss": 0.0018, "step": 48825 }, { "epoch": 22.356979405034323, "grad_norm": 0.006143725011497736, "learning_rate": 1.821875e-07, "loss": 0.0004, "step": 48850 }, { "epoch": 22.36842105263158, "grad_norm": 0.423241525888443, "learning_rate": 1.7828125e-07, "loss": 0.0015, "step": 48875 }, { "epoch": 22.379862700228834, "grad_norm": 0.004006936214864254, "learning_rate": 1.7437500000000002e-07, "loss": 0.0013, "step": 48900 }, { "epoch": 22.391304347826086, "grad_norm": 0.8981619477272034, "learning_rate": 1.7046875e-07, "loss": 0.0039, "step": 48925 }, { "epoch": 22.40274599542334, "grad_norm": 0.0719260722398758, "learning_rate": 1.6656250000000002e-07, "loss": 0.0024, "step": 48950 }, { "epoch": 22.414187643020593, "grad_norm": 0.20282389223575592, "learning_rate": 1.6265625e-07, "loss": 0.0021, "step": 48975 }, { "epoch": 22.42562929061785, "grad_norm": 0.016420679166913033, "learning_rate": 1.5875e-07, "loss": 0.0015, "step": 49000 }, { "epoch": 22.437070938215104, "grad_norm": 0.4998544156551361, "learning_rate": 1.5484375e-07, "loss": 0.0021, "step": 49025 }, { "epoch": 22.448512585812356, "grad_norm": 0.01461728848516941, "learning_rate": 1.509375e-07, "loss": 0.0009, "step": 49050 }, { "epoch": 22.45995423340961, "grad_norm": 0.18887458741664886, "learning_rate": 1.4703125000000002e-07, "loss": 0.0006, "step": 49075 }, { "epoch": 22.471395881006863, "grad_norm": 0.005098717287182808, "learning_rate": 1.43125e-07, "loss": 0.0015, "step": 49100 }, { "epoch": 22.48283752860412, "grad_norm": 1.1259791851043701, "learning_rate": 1.3921875000000002e-07, "loss": 0.0018, "step": 49125 }, { "epoch": 22.494279176201374, "grad_norm": 0.007398266810923815, "learning_rate": 1.353125e-07, "loss": 0.0028, "step": 49150 }, { "epoch": 22.505720823798626, "grad_norm": 0.02189668081700802, "learning_rate": 1.3140624999999999e-07, "loss": 0.0015, "step": 49175 }, { "epoch": 22.51716247139588, "grad_norm": 0.009463733993470669, "learning_rate": 1.2750000000000002e-07, "loss": 0.0013, "step": 49200 }, { "epoch": 22.528604118993137, "grad_norm": 0.33468306064605713, "learning_rate": 1.2359375e-07, "loss": 0.0041, "step": 49225 }, { "epoch": 22.54004576659039, "grad_norm": 0.0073712230660021305, "learning_rate": 1.1968750000000002e-07, "loss": 0.0012, "step": 49250 }, { "epoch": 22.551487414187644, "grad_norm": 0.04863705486059189, "learning_rate": 1.1593750000000001e-07, "loss": 0.0027, "step": 49275 }, { "epoch": 22.562929061784896, "grad_norm": 0.03433695808053017, "learning_rate": 1.1203125e-07, "loss": 0.0021, "step": 49300 }, { "epoch": 22.57437070938215, "grad_norm": 0.33728480339050293, "learning_rate": 1.08125e-07, "loss": 0.0017, "step": 49325 }, { "epoch": 22.585812356979407, "grad_norm": 0.013134556822478771, "learning_rate": 1.0421875e-07, "loss": 0.0012, "step": 49350 }, { "epoch": 22.59725400457666, "grad_norm": 1.8899306058883667, "learning_rate": 1.0031249999999999e-07, "loss": 0.0022, "step": 49375 }, { "epoch": 22.608695652173914, "grad_norm": 0.007395836524665356, "learning_rate": 9.640625e-08, "loss": 0.0019, "step": 49400 }, { "epoch": 22.620137299771166, "grad_norm": 0.5640972852706909, "learning_rate": 9.250000000000001e-08, "loss": 0.0087, "step": 49425 }, { "epoch": 22.63157894736842, "grad_norm": 0.004151779692620039, "learning_rate": 8.859375e-08, "loss": 0.0005, "step": 49450 }, { "epoch": 22.643020594965677, "grad_norm": 0.016862498596310616, "learning_rate": 8.46875e-08, "loss": 0.0036, "step": 49475 }, { "epoch": 22.65446224256293, "grad_norm": 0.005251753143966198, "learning_rate": 8.078125000000001e-08, "loss": 0.0003, "step": 49500 }, { "epoch": 22.665903890160184, "grad_norm": 0.017828907817602158, "learning_rate": 7.6875e-08, "loss": 0.0019, "step": 49525 }, { "epoch": 22.677345537757436, "grad_norm": 0.005758681800216436, "learning_rate": 7.296875e-08, "loss": 0.0013, "step": 49550 }, { "epoch": 22.68878718535469, "grad_norm": 0.5709327459335327, "learning_rate": 6.906250000000001e-08, "loss": 0.0022, "step": 49575 }, { "epoch": 22.700228832951947, "grad_norm": 0.0059375884011387825, "learning_rate": 6.515625e-08, "loss": 0.0013, "step": 49600 }, { "epoch": 22.7116704805492, "grad_norm": 0.40407395362854004, "learning_rate": 6.125e-08, "loss": 0.0018, "step": 49625 }, { "epoch": 22.723112128146454, "grad_norm": 0.020242607221007347, "learning_rate": 5.734375000000001e-08, "loss": 0.0016, "step": 49650 }, { "epoch": 22.734553775743706, "grad_norm": 1.3027188777923584, "learning_rate": 5.3437500000000005e-08, "loss": 0.0024, "step": 49675 }, { "epoch": 22.74599542334096, "grad_norm": 0.013483814895153046, "learning_rate": 4.953125e-08, "loss": 0.002, "step": 49700 }, { "epoch": 22.757437070938217, "grad_norm": 0.04510790854692459, "learning_rate": 4.5625e-08, "loss": 0.0017, "step": 49725 }, { "epoch": 22.76887871853547, "grad_norm": 0.007019104436039925, "learning_rate": 4.1718750000000004e-08, "loss": 0.0017, "step": 49750 }, { "epoch": 22.780320366132724, "grad_norm": 0.22716917097568512, "learning_rate": 3.78125e-08, "loss": 0.0023, "step": 49775 }, { "epoch": 22.791762013729976, "grad_norm": 0.15175634622573853, "learning_rate": 3.390625e-08, "loss": 0.0007, "step": 49800 }, { "epoch": 22.80320366132723, "grad_norm": 0.03198052570223808, "learning_rate": 3e-08, "loss": 0.0031, "step": 49825 }, { "epoch": 22.814645308924486, "grad_norm": 1.113047480583191, "learning_rate": 2.609375e-08, "loss": 0.0026, "step": 49850 }, { "epoch": 22.82608695652174, "grad_norm": 0.09628685563802719, "learning_rate": 2.2187500000000003e-08, "loss": 0.0016, "step": 49875 }, { "epoch": 22.837528604118994, "grad_norm": 1.8827425241470337, "learning_rate": 1.828125e-08, "loss": 0.0014, "step": 49900 }, { "epoch": 22.848970251716246, "grad_norm": 0.06867147237062454, "learning_rate": 1.4375e-08, "loss": 0.0007, "step": 49925 }, { "epoch": 22.8604118993135, "grad_norm": 0.19449754059314728, "learning_rate": 1.046875e-08, "loss": 0.001, "step": 49950 }, { "epoch": 22.871853546910756, "grad_norm": 0.09240012615919113, "learning_rate": 6.5625e-09, "loss": 0.0023, "step": 49975 }, { "epoch": 22.88329519450801, "grad_norm": 0.10083791613578796, "learning_rate": 2.65625e-09, "loss": 0.0003, "step": 50000 }, { "epoch": 22.88329519450801, "eval_loss": 0.18483442068099976, "eval_runtime": 8938.196, "eval_samples_per_second": 1.065, "eval_steps_per_second": 0.133, "eval_wer": 0.07559032443847187, "step": 50000 } ], "logging_steps": 25, "max_steps": 50000, "num_input_tokens_seen": 0, "num_train_epochs": 23, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.7176996653970227e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }