{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.405580947798893, "eval_steps": 4000, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0060139523694972335, "grad_norm": 2.5106329917907715, "learning_rate": 4.99007697859033e-05, "loss": 3.082, "step": 100 }, { "epoch": 0.012027904738994467, "grad_norm": 2.184039831161499, "learning_rate": 4.9800537246411676e-05, "loss": 1.8328, "step": 200 }, { "epoch": 0.0180418571084917, "grad_norm": 2.2055342197418213, "learning_rate": 4.970030470692006e-05, "loss": 1.6836, "step": 300 }, { "epoch": 0.024055809477988934, "grad_norm": 2.0695838928222656, "learning_rate": 4.960007216742843e-05, "loss": 1.6098, "step": 400 }, { "epoch": 0.03006976184748617, "grad_norm": 1.9105968475341797, "learning_rate": 4.949983962793682e-05, "loss": 1.516, "step": 500 }, { "epoch": 0.0360837142169834, "grad_norm": 1.882120132446289, "learning_rate": 4.9399607088445195e-05, "loss": 1.4876, "step": 600 }, { "epoch": 0.042097666586480634, "grad_norm": 1.937444806098938, "learning_rate": 4.9299374548953576e-05, "loss": 1.4493, "step": 700 }, { "epoch": 0.04811161895597787, "grad_norm": 2.1085917949676514, "learning_rate": 4.919914200946195e-05, "loss": 1.4128, "step": 800 }, { "epoch": 0.0541255713254751, "grad_norm": 1.9272162914276123, "learning_rate": 4.909890946997033e-05, "loss": 1.4105, "step": 900 }, { "epoch": 0.06013952369497234, "grad_norm": 1.818624496459961, "learning_rate": 4.899867693047871e-05, "loss": 1.3912, "step": 1000 }, { "epoch": 0.06615347606446957, "grad_norm": 1.644021987915039, "learning_rate": 4.8898444390987094e-05, "loss": 1.3397, "step": 1100 }, { "epoch": 0.0721674284339668, "grad_norm": 1.7671422958374023, "learning_rate": 4.8798211851495476e-05, "loss": 1.3873, "step": 1200 }, { "epoch": 0.07818138080346404, "grad_norm": 2.032087564468384, "learning_rate": 4.869797931200385e-05, "loss": 1.3112, "step": 1300 }, { "epoch": 0.08419533317296127, "grad_norm": 1.7175960540771484, "learning_rate": 4.859774677251223e-05, "loss": 1.3089, "step": 1400 }, { "epoch": 0.09020928554245851, "grad_norm": 1.7282887697219849, "learning_rate": 4.8497514233020606e-05, "loss": 1.2983, "step": 1500 }, { "epoch": 0.09622323791195574, "grad_norm": 1.836075782775879, "learning_rate": 4.839728169352899e-05, "loss": 1.3092, "step": 1600 }, { "epoch": 0.10223719028145298, "grad_norm": 1.931219220161438, "learning_rate": 4.829704915403737e-05, "loss": 1.2528, "step": 1700 }, { "epoch": 0.1082511426509502, "grad_norm": 1.9112814664840698, "learning_rate": 4.819681661454575e-05, "loss": 1.25, "step": 1800 }, { "epoch": 0.11426509502044743, "grad_norm": 2.1694140434265137, "learning_rate": 4.809658407505413e-05, "loss": 1.253, "step": 1900 }, { "epoch": 0.12027904738994467, "grad_norm": 1.9220771789550781, "learning_rate": 4.7996351535562506e-05, "loss": 1.2278, "step": 2000 }, { "epoch": 0.12629299975944192, "grad_norm": 1.7120180130004883, "learning_rate": 4.789611899607089e-05, "loss": 1.2483, "step": 2100 }, { "epoch": 0.13230695212893914, "grad_norm": 1.917104959487915, "learning_rate": 4.779588645657926e-05, "loss": 1.2384, "step": 2200 }, { "epoch": 0.13832090449843637, "grad_norm": 2.0323758125305176, "learning_rate": 4.7695653917087644e-05, "loss": 1.2292, "step": 2300 }, { "epoch": 0.1443348568679336, "grad_norm": 1.779888391494751, "learning_rate": 4.7595421377596025e-05, "loss": 1.2258, "step": 2400 }, { "epoch": 0.15034880923743085, "grad_norm": 1.7667876482009888, "learning_rate": 4.7495188838104406e-05, "loss": 1.1875, "step": 2500 }, { "epoch": 0.15636276160692808, "grad_norm": 1.8692760467529297, "learning_rate": 4.739495629861279e-05, "loss": 1.2082, "step": 2600 }, { "epoch": 0.1623767139764253, "grad_norm": 1.8578811883926392, "learning_rate": 4.729472375912116e-05, "loss": 1.1939, "step": 2700 }, { "epoch": 0.16839066634592254, "grad_norm": 1.8867884874343872, "learning_rate": 4.7194491219629543e-05, "loss": 1.2108, "step": 2800 }, { "epoch": 0.17440461871541976, "grad_norm": 1.72930109500885, "learning_rate": 4.709425868013792e-05, "loss": 1.1943, "step": 2900 }, { "epoch": 0.18041857108491702, "grad_norm": 1.7612700462341309, "learning_rate": 4.69940261406463e-05, "loss": 1.1818, "step": 3000 }, { "epoch": 0.18643252345441425, "grad_norm": 1.6063175201416016, "learning_rate": 4.689379360115468e-05, "loss": 1.1926, "step": 3100 }, { "epoch": 0.19244647582391147, "grad_norm": 1.7698125839233398, "learning_rate": 4.679356106166306e-05, "loss": 1.2029, "step": 3200 }, { "epoch": 0.1984604281934087, "grad_norm": 1.6680766344070435, "learning_rate": 4.6693328522171443e-05, "loss": 1.185, "step": 3300 }, { "epoch": 0.20447438056290596, "grad_norm": 1.7791478633880615, "learning_rate": 4.659309598267982e-05, "loss": 1.1663, "step": 3400 }, { "epoch": 0.21048833293240318, "grad_norm": 1.6491518020629883, "learning_rate": 4.64928634431882e-05, "loss": 1.1492, "step": 3500 }, { "epoch": 0.2165022853019004, "grad_norm": 1.6312377452850342, "learning_rate": 4.6392630903696574e-05, "loss": 1.1696, "step": 3600 }, { "epoch": 0.22251623767139764, "grad_norm": 1.7078893184661865, "learning_rate": 4.6292398364204955e-05, "loss": 1.139, "step": 3700 }, { "epoch": 0.22853019004089486, "grad_norm": 2.0885114669799805, "learning_rate": 4.619216582471334e-05, "loss": 1.1399, "step": 3800 }, { "epoch": 0.23454414241039212, "grad_norm": 1.8433787822723389, "learning_rate": 4.609193328522172e-05, "loss": 1.1376, "step": 3900 }, { "epoch": 0.24055809477988935, "grad_norm": 1.527872085571289, "learning_rate": 4.59917007457301e-05, "loss": 1.1094, "step": 4000 }, { "epoch": 0.24055809477988935, "eval_cer": 0.7748897683360889, "eval_loss": 1.0861139297485352, "eval_runtime": 5003.1551, "eval_samples_per_second": 3.326, "eval_steps_per_second": 0.416, "step": 4000 }, { "epoch": 0.24657204714938658, "grad_norm": 1.4162925481796265, "learning_rate": 4.5891468206238474e-05, "loss": 1.1353, "step": 4100 }, { "epoch": 0.25258599951888383, "grad_norm": 1.5863301753997803, "learning_rate": 4.5791235666746855e-05, "loss": 1.1257, "step": 4200 }, { "epoch": 0.25859995188838103, "grad_norm": 1.6928209066390991, "learning_rate": 4.569100312725523e-05, "loss": 1.1086, "step": 4300 }, { "epoch": 0.2646139042578783, "grad_norm": 1.2880722284317017, "learning_rate": 4.559077058776362e-05, "loss": 1.119, "step": 4400 }, { "epoch": 0.2706278566273755, "grad_norm": 1.6384778022766113, "learning_rate": 4.549053804827199e-05, "loss": 1.1474, "step": 4500 }, { "epoch": 0.27664180899687274, "grad_norm": 1.7176462411880493, "learning_rate": 4.5390305508780374e-05, "loss": 1.0925, "step": 4600 }, { "epoch": 0.28265576136637, "grad_norm": 1.6025973558425903, "learning_rate": 4.529007296928875e-05, "loss": 1.112, "step": 4700 }, { "epoch": 0.2886697137358672, "grad_norm": 1.7415896654129028, "learning_rate": 4.518984042979713e-05, "loss": 1.1218, "step": 4800 }, { "epoch": 0.29468366610536445, "grad_norm": 1.310402750968933, "learning_rate": 4.508960789030551e-05, "loss": 1.0879, "step": 4900 }, { "epoch": 0.3006976184748617, "grad_norm": 1.7458600997924805, "learning_rate": 4.498937535081389e-05, "loss": 1.1088, "step": 5000 }, { "epoch": 0.3067115708443589, "grad_norm": 1.8199197053909302, "learning_rate": 4.4889142811322274e-05, "loss": 1.0987, "step": 5100 }, { "epoch": 0.31272552321385616, "grad_norm": 1.5931302309036255, "learning_rate": 4.478891027183065e-05, "loss": 1.0802, "step": 5200 }, { "epoch": 0.31873947558335336, "grad_norm": 1.5596023797988892, "learning_rate": 4.468867773233903e-05, "loss": 1.1009, "step": 5300 }, { "epoch": 0.3247534279528506, "grad_norm": 1.7147523164749146, "learning_rate": 4.4588445192847404e-05, "loss": 1.1144, "step": 5400 }, { "epoch": 0.33076738032234787, "grad_norm": 1.7172225713729858, "learning_rate": 4.4488212653355786e-05, "loss": 1.0928, "step": 5500 }, { "epoch": 0.33678133269184507, "grad_norm": 1.5675067901611328, "learning_rate": 4.438798011386417e-05, "loss": 1.0421, "step": 5600 }, { "epoch": 0.3427952850613423, "grad_norm": 1.755364179611206, "learning_rate": 4.428774757437255e-05, "loss": 1.0897, "step": 5700 }, { "epoch": 0.3488092374308395, "grad_norm": 1.5678260326385498, "learning_rate": 4.418751503488093e-05, "loss": 1.0703, "step": 5800 }, { "epoch": 0.3548231898003368, "grad_norm": 1.8641386032104492, "learning_rate": 4.4087282495389304e-05, "loss": 1.07, "step": 5900 }, { "epoch": 0.36083714216983404, "grad_norm": 1.571837306022644, "learning_rate": 4.3987049955897686e-05, "loss": 1.0493, "step": 6000 }, { "epoch": 0.36685109453933124, "grad_norm": 1.5046846866607666, "learning_rate": 4.388681741640606e-05, "loss": 1.0935, "step": 6100 }, { "epoch": 0.3728650469088285, "grad_norm": 1.88533353805542, "learning_rate": 4.378658487691444e-05, "loss": 1.0801, "step": 6200 }, { "epoch": 0.3788789992783257, "grad_norm": 1.5626581907272339, "learning_rate": 4.368635233742282e-05, "loss": 1.0442, "step": 6300 }, { "epoch": 0.38489295164782295, "grad_norm": 1.4634822607040405, "learning_rate": 4.3586119797931204e-05, "loss": 1.0552, "step": 6400 }, { "epoch": 0.3909069040173202, "grad_norm": 1.8044239282608032, "learning_rate": 4.3485887258439586e-05, "loss": 1.035, "step": 6500 }, { "epoch": 0.3969208563868174, "grad_norm": 1.6868014335632324, "learning_rate": 4.338565471894796e-05, "loss": 1.0497, "step": 6600 }, { "epoch": 0.40293480875631466, "grad_norm": 1.6949145793914795, "learning_rate": 4.328542217945634e-05, "loss": 1.0369, "step": 6700 }, { "epoch": 0.4089487611258119, "grad_norm": 1.7868014574050903, "learning_rate": 4.3186191965359634e-05, "loss": 1.0385, "step": 6800 }, { "epoch": 0.4149627134953091, "grad_norm": 1.550017237663269, "learning_rate": 4.3085959425868015e-05, "loss": 1.0326, "step": 6900 }, { "epoch": 0.42097666586480637, "grad_norm": 1.431362509727478, "learning_rate": 4.298572688637639e-05, "loss": 1.0154, "step": 7000 }, { "epoch": 0.42699061823430356, "grad_norm": 1.946357011795044, "learning_rate": 4.288549434688478e-05, "loss": 1.0532, "step": 7100 }, { "epoch": 0.4330045706038008, "grad_norm": 1.6147353649139404, "learning_rate": 4.278526180739316e-05, "loss": 1.0633, "step": 7200 }, { "epoch": 0.4390185229732981, "grad_norm": 1.655043363571167, "learning_rate": 4.2685029267901534e-05, "loss": 1.0558, "step": 7300 }, { "epoch": 0.4450324753427953, "grad_norm": 1.6090620756149292, "learning_rate": 4.2584796728409915e-05, "loss": 1.0289, "step": 7400 }, { "epoch": 0.45104642771229253, "grad_norm": 1.8263063430786133, "learning_rate": 4.248456418891829e-05, "loss": 1.0275, "step": 7500 }, { "epoch": 0.45706038008178973, "grad_norm": 1.6156238317489624, "learning_rate": 4.238433164942667e-05, "loss": 0.9976, "step": 7600 }, { "epoch": 0.463074332451287, "grad_norm": 1.4604992866516113, "learning_rate": 4.228409910993505e-05, "loss": 1.0607, "step": 7700 }, { "epoch": 0.46908828482078424, "grad_norm": 1.5745257139205933, "learning_rate": 4.2183866570443434e-05, "loss": 1.0297, "step": 7800 }, { "epoch": 0.47510223719028144, "grad_norm": 1.852102518081665, "learning_rate": 4.2083634030951815e-05, "loss": 1.0173, "step": 7900 }, { "epoch": 0.4811161895597787, "grad_norm": 1.6735659837722778, "learning_rate": 4.198340149146019e-05, "loss": 1.0145, "step": 8000 }, { "epoch": 0.4811161895597787, "eval_cer": 0.7706982683703557, "eval_loss": 0.9832878708839417, "eval_runtime": 5013.5334, "eval_samples_per_second": 3.319, "eval_steps_per_second": 0.415, "step": 8000 }, { "epoch": 0.4871301419292759, "grad_norm": 1.7805702686309814, "learning_rate": 4.188316895196857e-05, "loss": 1.0279, "step": 8100 }, { "epoch": 0.49314409429877315, "grad_norm": 1.4825282096862793, "learning_rate": 4.1782936412476945e-05, "loss": 1.0124, "step": 8200 }, { "epoch": 0.4991580466682704, "grad_norm": 1.5925073623657227, "learning_rate": 4.168270387298533e-05, "loss": 1.0026, "step": 8300 }, { "epoch": 0.5051719990377677, "grad_norm": 1.5965441465377808, "learning_rate": 4.158247133349371e-05, "loss": 1.0238, "step": 8400 }, { "epoch": 0.5111859514072649, "grad_norm": 1.5980632305145264, "learning_rate": 4.148223879400209e-05, "loss": 0.9909, "step": 8500 }, { "epoch": 0.5171999037767621, "grad_norm": 1.7301024198532104, "learning_rate": 4.1382006254510464e-05, "loss": 1.0172, "step": 8600 }, { "epoch": 0.5232138561462594, "grad_norm": 1.6884981393814087, "learning_rate": 4.1281773715018845e-05, "loss": 1.0326, "step": 8700 }, { "epoch": 0.5292278085157566, "grad_norm": 1.6702126264572144, "learning_rate": 4.118154117552723e-05, "loss": 1.016, "step": 8800 }, { "epoch": 0.5352417608852538, "grad_norm": 1.5463213920593262, "learning_rate": 4.108231096143052e-05, "loss": 1.0282, "step": 8900 }, { "epoch": 0.541255713254751, "grad_norm": 1.9977515935897827, "learning_rate": 4.09820784219389e-05, "loss": 1.0027, "step": 9000 }, { "epoch": 0.5472696656242483, "grad_norm": 1.4702314138412476, "learning_rate": 4.088184588244728e-05, "loss": 1.0138, "step": 9100 }, { "epoch": 0.5532836179937455, "grad_norm": 1.5981062650680542, "learning_rate": 4.078161334295566e-05, "loss": 1.0333, "step": 9200 }, { "epoch": 0.5592975703632427, "grad_norm": 1.4159762859344482, "learning_rate": 4.068138080346404e-05, "loss": 0.9956, "step": 9300 }, { "epoch": 0.56531152273274, "grad_norm": 1.4920343160629272, "learning_rate": 4.058114826397242e-05, "loss": 0.9978, "step": 9400 }, { "epoch": 0.5713254751022372, "grad_norm": 1.7335036993026733, "learning_rate": 4.048091572448079e-05, "loss": 1.0337, "step": 9500 }, { "epoch": 0.5773394274717344, "grad_norm": 1.4422550201416016, "learning_rate": 4.0380683184989175e-05, "loss": 1.0185, "step": 9600 }, { "epoch": 0.5833533798412317, "grad_norm": 1.776282787322998, "learning_rate": 4.0280450645497556e-05, "loss": 0.9941, "step": 9700 }, { "epoch": 0.5893673322107289, "grad_norm": 1.806261420249939, "learning_rate": 4.018021810600594e-05, "loss": 0.9772, "step": 9800 }, { "epoch": 0.5953812845802261, "grad_norm": 1.6673755645751953, "learning_rate": 4.007998556651432e-05, "loss": 0.9904, "step": 9900 }, { "epoch": 0.6013952369497234, "grad_norm": 1.5652821063995361, "learning_rate": 3.997975302702269e-05, "loss": 1.0008, "step": 10000 }, { "epoch": 0.6074091893192206, "grad_norm": 1.568590760231018, "learning_rate": 3.9879520487531075e-05, "loss": 1.0402, "step": 10100 }, { "epoch": 0.6134231416887178, "grad_norm": 1.505454659461975, "learning_rate": 3.977928794803945e-05, "loss": 1.0009, "step": 10200 }, { "epoch": 0.619437094058215, "grad_norm": 1.4691787958145142, "learning_rate": 3.967905540854783e-05, "loss": 1.0115, "step": 10300 }, { "epoch": 0.6254510464277123, "grad_norm": 1.5172970294952393, "learning_rate": 3.957882286905621e-05, "loss": 1.0209, "step": 10400 }, { "epoch": 0.6314649987972095, "grad_norm": 1.8390883207321167, "learning_rate": 3.947859032956459e-05, "loss": 1.0101, "step": 10500 }, { "epoch": 0.6374789511667067, "grad_norm": 1.5624459981918335, "learning_rate": 3.9378357790072975e-05, "loss": 1.0085, "step": 10600 }, { "epoch": 0.643492903536204, "grad_norm": 1.4316383600234985, "learning_rate": 3.927812525058135e-05, "loss": 1.0025, "step": 10700 }, { "epoch": 0.6495068559057012, "grad_norm": 1.5541032552719116, "learning_rate": 3.917789271108973e-05, "loss": 0.9938, "step": 10800 }, { "epoch": 0.6555208082751984, "grad_norm": 1.7416138648986816, "learning_rate": 3.907866249699302e-05, "loss": 0.9923, "step": 10900 }, { "epoch": 0.6615347606446957, "grad_norm": 1.7461607456207275, "learning_rate": 3.8978429957501404e-05, "loss": 0.9983, "step": 11000 }, { "epoch": 0.6675487130141929, "grad_norm": 1.5705294609069824, "learning_rate": 3.887819741800978e-05, "loss": 0.9809, "step": 11100 }, { "epoch": 0.6735626653836901, "grad_norm": 1.802960753440857, "learning_rate": 3.877796487851817e-05, "loss": 0.9675, "step": 11200 }, { "epoch": 0.6795766177531873, "grad_norm": 1.70058274269104, "learning_rate": 3.867773233902655e-05, "loss": 0.966, "step": 11300 }, { "epoch": 0.6855905701226847, "grad_norm": 1.5391656160354614, "learning_rate": 3.857749979953492e-05, "loss": 0.9412, "step": 11400 }, { "epoch": 0.6916045224921819, "grad_norm": 1.8226732015609741, "learning_rate": 3.8477267260043304e-05, "loss": 0.96, "step": 11500 }, { "epoch": 0.697618474861679, "grad_norm": 1.3753610849380493, "learning_rate": 3.837703472055168e-05, "loss": 0.9665, "step": 11600 }, { "epoch": 0.7036324272311764, "grad_norm": 1.6595444679260254, "learning_rate": 3.827680218106006e-05, "loss": 0.9851, "step": 11700 }, { "epoch": 0.7096463796006736, "grad_norm": 1.6920074224472046, "learning_rate": 3.817656964156844e-05, "loss": 0.983, "step": 11800 }, { "epoch": 0.7156603319701708, "grad_norm": 1.4369592666625977, "learning_rate": 3.807633710207682e-05, "loss": 0.9418, "step": 11900 }, { "epoch": 0.7216742843396681, "grad_norm": 1.7257956266403198, "learning_rate": 3.7976104562585204e-05, "loss": 0.9877, "step": 12000 }, { "epoch": 0.7216742843396681, "eval_cer": 0.7783561907148705, "eval_loss": 0.9317989349365234, "eval_runtime": 5220.1413, "eval_samples_per_second": 3.188, "eval_steps_per_second": 0.398, "step": 12000 }, { "epoch": 0.7276882367091653, "grad_norm": 1.6520166397094727, "learning_rate": 3.787587202309358e-05, "loss": 0.9415, "step": 12100 }, { "epoch": 0.7337021890786625, "grad_norm": 1.7703155279159546, "learning_rate": 3.777563948360196e-05, "loss": 0.9653, "step": 12200 }, { "epoch": 0.7397161414481598, "grad_norm": 1.4374829530715942, "learning_rate": 3.7675406944110334e-05, "loss": 0.9541, "step": 12300 }, { "epoch": 0.745730093817657, "grad_norm": 1.790893793106079, "learning_rate": 3.7575174404618716e-05, "loss": 0.9951, "step": 12400 }, { "epoch": 0.7517440461871542, "grad_norm": 1.6874678134918213, "learning_rate": 3.74749418651271e-05, "loss": 0.9704, "step": 12500 }, { "epoch": 0.7577579985566514, "grad_norm": 1.7682390213012695, "learning_rate": 3.737470932563548e-05, "loss": 0.9461, "step": 12600 }, { "epoch": 0.7637719509261487, "grad_norm": 1.6911418437957764, "learning_rate": 3.727447678614386e-05, "loss": 0.9952, "step": 12700 }, { "epoch": 0.7697859032956459, "grad_norm": 1.727547526359558, "learning_rate": 3.7174244246652234e-05, "loss": 0.9442, "step": 12800 }, { "epoch": 0.7757998556651431, "grad_norm": 1.4808145761489868, "learning_rate": 3.7074011707160616e-05, "loss": 0.9742, "step": 12900 }, { "epoch": 0.7818138080346404, "grad_norm": 1.876441240310669, "learning_rate": 3.697377916766899e-05, "loss": 0.9766, "step": 13000 }, { "epoch": 0.7878277604041376, "grad_norm": 1.4900315999984741, "learning_rate": 3.687354662817737e-05, "loss": 0.9612, "step": 13100 }, { "epoch": 0.7938417127736348, "grad_norm": 1.4132503271102905, "learning_rate": 3.677331408868575e-05, "loss": 0.9651, "step": 13200 }, { "epoch": 0.7998556651431321, "grad_norm": 1.6306148767471313, "learning_rate": 3.6673081549194134e-05, "loss": 0.9368, "step": 13300 }, { "epoch": 0.8058696175126293, "grad_norm": 1.7364792823791504, "learning_rate": 3.657284900970251e-05, "loss": 0.9427, "step": 13400 }, { "epoch": 0.8118835698821265, "grad_norm": 1.6632161140441895, "learning_rate": 3.647261647021089e-05, "loss": 0.9505, "step": 13500 }, { "epoch": 0.8178975222516238, "grad_norm": 1.5362128019332886, "learning_rate": 3.637238393071927e-05, "loss": 0.9692, "step": 13600 }, { "epoch": 0.823911474621121, "grad_norm": 1.3290611505508423, "learning_rate": 3.6272151391227646e-05, "loss": 0.9585, "step": 13700 }, { "epoch": 0.8299254269906182, "grad_norm": 1.6647266149520874, "learning_rate": 3.617191885173603e-05, "loss": 0.9725, "step": 13800 }, { "epoch": 0.8359393793601154, "grad_norm": 1.736165165901184, "learning_rate": 3.607168631224441e-05, "loss": 0.9611, "step": 13900 }, { "epoch": 0.8419533317296127, "grad_norm": 1.6238832473754883, "learning_rate": 3.597145377275279e-05, "loss": 0.9698, "step": 14000 }, { "epoch": 0.8479672840991099, "grad_norm": 1.741194486618042, "learning_rate": 3.5871221233261165e-05, "loss": 0.9338, "step": 14100 }, { "epoch": 0.8539812364686071, "grad_norm": 1.7224496603012085, "learning_rate": 3.5770988693769546e-05, "loss": 0.9715, "step": 14200 }, { "epoch": 0.8599951888381044, "grad_norm": 1.872253179550171, "learning_rate": 3.5671758479672845e-05, "loss": 0.9656, "step": 14300 }, { "epoch": 0.8660091412076016, "grad_norm": 1.5153071880340576, "learning_rate": 3.557152594018122e-05, "loss": 0.984, "step": 14400 }, { "epoch": 0.8720230935770988, "grad_norm": 1.667662262916565, "learning_rate": 3.54712934006896e-05, "loss": 0.9386, "step": 14500 }, { "epoch": 0.8780370459465962, "grad_norm": 1.7471551895141602, "learning_rate": 3.537106086119798e-05, "loss": 0.9486, "step": 14600 }, { "epoch": 0.8840509983160934, "grad_norm": 1.5477312803268433, "learning_rate": 3.5270828321706364e-05, "loss": 0.9451, "step": 14700 }, { "epoch": 0.8900649506855906, "grad_norm": 1.5079952478408813, "learning_rate": 3.517059578221474e-05, "loss": 0.9381, "step": 14800 }, { "epoch": 0.8960789030550877, "grad_norm": 1.5696821212768555, "learning_rate": 3.507036324272312e-05, "loss": 0.9418, "step": 14900 }, { "epoch": 0.9020928554245851, "grad_norm": 1.552612066268921, "learning_rate": 3.4970130703231494e-05, "loss": 0.9596, "step": 15000 }, { "epoch": 0.9081068077940823, "grad_norm": 1.7802802324295044, "learning_rate": 3.4869898163739876e-05, "loss": 0.9298, "step": 15100 }, { "epoch": 0.9141207601635795, "grad_norm": 1.5589861869812012, "learning_rate": 3.476966562424826e-05, "loss": 0.9621, "step": 15200 }, { "epoch": 0.9201347125330768, "grad_norm": 1.3050284385681152, "learning_rate": 3.466943308475664e-05, "loss": 0.9315, "step": 15300 }, { "epoch": 0.926148664902574, "grad_norm": 1.5091936588287354, "learning_rate": 3.456920054526502e-05, "loss": 0.9107, "step": 15400 }, { "epoch": 0.9321626172720712, "grad_norm": 1.5565592050552368, "learning_rate": 3.4468968005773394e-05, "loss": 0.9552, "step": 15500 }, { "epoch": 0.9381765696415685, "grad_norm": 1.7788596153259277, "learning_rate": 3.4368735466281775e-05, "loss": 0.9459, "step": 15600 }, { "epoch": 0.9441905220110657, "grad_norm": 1.8757565021514893, "learning_rate": 3.426850292679015e-05, "loss": 0.9254, "step": 15700 }, { "epoch": 0.9502044743805629, "grad_norm": 1.6978222131729126, "learning_rate": 3.416827038729853e-05, "loss": 0.9083, "step": 15800 }, { "epoch": 0.9562184267500602, "grad_norm": 1.6449016332626343, "learning_rate": 3.406803784780692e-05, "loss": 0.9767, "step": 15900 }, { "epoch": 0.9622323791195574, "grad_norm": 1.8677603006362915, "learning_rate": 3.3967805308315294e-05, "loss": 0.9469, "step": 16000 }, { "epoch": 0.9622323791195574, "eval_cer": 0.7716098871709525, "eval_loss": 0.8905403017997742, "eval_runtime": 5089.7823, "eval_samples_per_second": 3.269, "eval_steps_per_second": 0.409, "step": 16000 }, { "epoch": 0.9682463314890546, "grad_norm": 1.6178405284881592, "learning_rate": 3.3867572768823675e-05, "loss": 0.9441, "step": 16100 }, { "epoch": 0.9742602838585518, "grad_norm": 1.7407509088516235, "learning_rate": 3.376734022933205e-05, "loss": 0.9264, "step": 16200 }, { "epoch": 0.9802742362280491, "grad_norm": 1.5831618309020996, "learning_rate": 3.366710768984043e-05, "loss": 0.9583, "step": 16300 }, { "epoch": 0.9862881885975463, "grad_norm": 1.6853969097137451, "learning_rate": 3.3566875150348806e-05, "loss": 0.9226, "step": 16400 }, { "epoch": 0.9923021409670435, "grad_norm": 1.8264875411987305, "learning_rate": 3.3466642610857194e-05, "loss": 0.9492, "step": 16500 }, { "epoch": 0.9983160933365408, "grad_norm": 1.5579068660736084, "learning_rate": 3.336641007136557e-05, "loss": 0.8872, "step": 16600 }, { "epoch": 1.004330045706038, "grad_norm": 1.7848278284072876, "learning_rate": 3.326617753187395e-05, "loss": 0.8717, "step": 16700 }, { "epoch": 1.0103439980755353, "grad_norm": 1.5830621719360352, "learning_rate": 3.316694731777725e-05, "loss": 0.9032, "step": 16800 }, { "epoch": 1.0163579504450324, "grad_norm": 1.546217441558838, "learning_rate": 3.3066714778285623e-05, "loss": 0.9418, "step": 16900 }, { "epoch": 1.0223719028145297, "grad_norm": 1.4024384021759033, "learning_rate": 3.2966482238794005e-05, "loss": 0.9075, "step": 17000 }, { "epoch": 1.028385855184027, "grad_norm": 1.6523361206054688, "learning_rate": 3.286624969930238e-05, "loss": 0.9088, "step": 17100 }, { "epoch": 1.0343998075535241, "grad_norm": 1.4941192865371704, "learning_rate": 3.276601715981076e-05, "loss": 0.8703, "step": 17200 }, { "epoch": 1.0404137599230214, "grad_norm": 1.6586402654647827, "learning_rate": 3.266578462031914e-05, "loss": 0.9171, "step": 17300 }, { "epoch": 1.0464277122925187, "grad_norm": 1.5614475011825562, "learning_rate": 3.2565552080827523e-05, "loss": 0.8819, "step": 17400 }, { "epoch": 1.0524416646620158, "grad_norm": 1.6588680744171143, "learning_rate": 3.2465319541335905e-05, "loss": 0.9165, "step": 17500 }, { "epoch": 1.0584556170315131, "grad_norm": 1.4571171998977661, "learning_rate": 3.236508700184428e-05, "loss": 0.9056, "step": 17600 }, { "epoch": 1.0644695694010102, "grad_norm": 1.7484580278396606, "learning_rate": 3.226485446235266e-05, "loss": 0.9072, "step": 17700 }, { "epoch": 1.0704835217705075, "grad_norm": 1.388741374015808, "learning_rate": 3.2164621922861035e-05, "loss": 0.8645, "step": 17800 }, { "epoch": 1.0764974741400049, "grad_norm": 1.5871518850326538, "learning_rate": 3.206438938336942e-05, "loss": 0.908, "step": 17900 }, { "epoch": 1.0825114265095022, "grad_norm": 1.4603219032287598, "learning_rate": 3.19641568438778e-05, "loss": 0.8714, "step": 18000 }, { "epoch": 1.0885253788789993, "grad_norm": 1.443608283996582, "learning_rate": 3.186392430438618e-05, "loss": 0.8879, "step": 18100 }, { "epoch": 1.0945393312484966, "grad_norm": 1.4648326635360718, "learning_rate": 3.1763691764894554e-05, "loss": 0.923, "step": 18200 }, { "epoch": 1.1005532836179936, "grad_norm": 1.9082708358764648, "learning_rate": 3.1663459225402935e-05, "loss": 0.8898, "step": 18300 }, { "epoch": 1.106567235987491, "grad_norm": 1.740161418914795, "learning_rate": 3.1563226685911317e-05, "loss": 0.8913, "step": 18400 }, { "epoch": 1.1125811883569883, "grad_norm": 1.4581352472305298, "learning_rate": 3.146299414641969e-05, "loss": 0.9202, "step": 18500 }, { "epoch": 1.1185951407264854, "grad_norm": 1.5199153423309326, "learning_rate": 3.136276160692808e-05, "loss": 0.9032, "step": 18600 }, { "epoch": 1.1246090930959827, "grad_norm": 1.4630061388015747, "learning_rate": 3.1262529067436454e-05, "loss": 0.8771, "step": 18700 }, { "epoch": 1.13062304546548, "grad_norm": 1.7790659666061401, "learning_rate": 3.1162296527944835e-05, "loss": 0.8673, "step": 18800 }, { "epoch": 1.136636997834977, "grad_norm": 1.610372543334961, "learning_rate": 3.106206398845321e-05, "loss": 0.9019, "step": 18900 }, { "epoch": 1.1426509502044744, "grad_norm": 1.847385048866272, "learning_rate": 3.096183144896159e-05, "loss": 0.8907, "step": 19000 }, { "epoch": 1.1486649025739717, "grad_norm": 1.664432168006897, "learning_rate": 3.086159890946997e-05, "loss": 0.8745, "step": 19100 }, { "epoch": 1.1546788549434688, "grad_norm": 1.5932984352111816, "learning_rate": 3.0761366369978354e-05, "loss": 0.8697, "step": 19200 }, { "epoch": 1.160692807312966, "grad_norm": 1.4511469602584839, "learning_rate": 3.0662136155881646e-05, "loss": 0.8748, "step": 19300 }, { "epoch": 1.1667067596824634, "grad_norm": 1.9143450260162354, "learning_rate": 3.056190361639003e-05, "loss": 0.9045, "step": 19400 }, { "epoch": 1.1727207120519605, "grad_norm": 1.3927329778671265, "learning_rate": 3.046167107689841e-05, "loss": 0.8927, "step": 19500 }, { "epoch": 1.1787346644214578, "grad_norm": 1.8178434371948242, "learning_rate": 3.0361438537406783e-05, "loss": 0.9258, "step": 19600 }, { "epoch": 1.1847486167909551, "grad_norm": 1.6006417274475098, "learning_rate": 3.0261205997915165e-05, "loss": 0.886, "step": 19700 }, { "epoch": 1.1907625691604522, "grad_norm": 1.6698856353759766, "learning_rate": 3.0160973458423543e-05, "loss": 0.9188, "step": 19800 }, { "epoch": 1.1967765215299495, "grad_norm": 1.5293818712234497, "learning_rate": 3.0060740918931924e-05, "loss": 0.896, "step": 19900 }, { "epoch": 1.2027904738994466, "grad_norm": 1.6966098546981812, "learning_rate": 2.9960508379440305e-05, "loss": 0.8677, "step": 20000 }, { "epoch": 1.2027904738994466, "eval_cer": 0.7689350483770306, "eval_loss": 0.8620118498802185, "eval_runtime": 5134.4813, "eval_samples_per_second": 3.241, "eval_steps_per_second": 0.405, "step": 20000 }, { "epoch": 1.208804426268944, "grad_norm": 1.6066211462020874, "learning_rate": 2.9860275839948683e-05, "loss": 0.8648, "step": 20100 }, { "epoch": 1.2148183786384412, "grad_norm": 1.4755396842956543, "learning_rate": 2.9760043300457065e-05, "loss": 0.8683, "step": 20200 }, { "epoch": 1.2208323310079385, "grad_norm": 1.3547738790512085, "learning_rate": 2.965981076096544e-05, "loss": 0.8786, "step": 20300 }, { "epoch": 1.2268462833774356, "grad_norm": 1.6254231929779053, "learning_rate": 2.955957822147382e-05, "loss": 0.8758, "step": 20400 }, { "epoch": 1.232860235746933, "grad_norm": 1.5725833177566528, "learning_rate": 2.94593456819822e-05, "loss": 0.8831, "step": 20500 }, { "epoch": 1.23887418811643, "grad_norm": 1.6321443319320679, "learning_rate": 2.935911314249058e-05, "loss": 0.868, "step": 20600 }, { "epoch": 1.2448881404859273, "grad_norm": 1.544110894203186, "learning_rate": 2.925888060299896e-05, "loss": 0.9033, "step": 20700 }, { "epoch": 1.2509020928554246, "grad_norm": 1.5690948963165283, "learning_rate": 2.915864806350734e-05, "loss": 0.8736, "step": 20800 }, { "epoch": 1.256916045224922, "grad_norm": 1.3950625658035278, "learning_rate": 2.905841552401572e-05, "loss": 0.9012, "step": 20900 }, { "epoch": 1.262929997594419, "grad_norm": 1.4699276685714722, "learning_rate": 2.8958182984524095e-05, "loss": 0.8545, "step": 21000 }, { "epoch": 1.2689439499639164, "grad_norm": 1.607750654220581, "learning_rate": 2.8857950445032476e-05, "loss": 0.9101, "step": 21100 }, { "epoch": 1.2749579023334134, "grad_norm": 2.1910347938537598, "learning_rate": 2.8757717905540854e-05, "loss": 0.8803, "step": 21200 }, { "epoch": 1.2809718547029108, "grad_norm": 1.604390025138855, "learning_rate": 2.8657485366049236e-05, "loss": 0.8751, "step": 21300 }, { "epoch": 1.286985807072408, "grad_norm": 1.5971423387527466, "learning_rate": 2.8557252826557617e-05, "loss": 0.8543, "step": 21400 }, { "epoch": 1.2929997594419051, "grad_norm": 1.6325972080230713, "learning_rate": 2.8457020287065995e-05, "loss": 0.8612, "step": 21500 }, { "epoch": 1.2990137118114025, "grad_norm": 1.7952935695648193, "learning_rate": 2.8356787747574376e-05, "loss": 0.8399, "step": 21600 }, { "epoch": 1.3050276641808995, "grad_norm": 1.683236002922058, "learning_rate": 2.825655520808275e-05, "loss": 0.8585, "step": 21700 }, { "epoch": 1.3110416165503969, "grad_norm": 1.630666971206665, "learning_rate": 2.8156322668591136e-05, "loss": 0.8891, "step": 21800 }, { "epoch": 1.3170555689198942, "grad_norm": 1.7404346466064453, "learning_rate": 2.805609012909951e-05, "loss": 0.8514, "step": 21900 }, { "epoch": 1.3230695212893915, "grad_norm": 1.4314298629760742, "learning_rate": 2.795585758960789e-05, "loss": 0.8561, "step": 22000 }, { "epoch": 1.3290834736588886, "grad_norm": 1.7691779136657715, "learning_rate": 2.785562505011627e-05, "loss": 0.8474, "step": 22100 }, { "epoch": 1.3350974260283859, "grad_norm": 1.7155267000198364, "learning_rate": 2.775539251062465e-05, "loss": 0.8289, "step": 22200 }, { "epoch": 1.341111378397883, "grad_norm": 1.7087023258209229, "learning_rate": 2.7655159971133032e-05, "loss": 0.8544, "step": 22300 }, { "epoch": 1.3471253307673803, "grad_norm": 1.617749571800232, "learning_rate": 2.755492743164141e-05, "loss": 0.8781, "step": 22400 }, { "epoch": 1.3531392831368776, "grad_norm": 1.6493247747421265, "learning_rate": 2.745469489214979e-05, "loss": 0.8392, "step": 22500 }, { "epoch": 1.359153235506375, "grad_norm": 1.809634804725647, "learning_rate": 2.7354462352658166e-05, "loss": 0.8721, "step": 22600 }, { "epoch": 1.365167187875872, "grad_norm": 1.3698049783706665, "learning_rate": 2.7254229813166547e-05, "loss": 0.8533, "step": 22700 }, { "epoch": 1.3711811402453693, "grad_norm": 1.7568131685256958, "learning_rate": 2.7153997273674925e-05, "loss": 0.886, "step": 22800 }, { "epoch": 1.3771950926148664, "grad_norm": 1.867412805557251, "learning_rate": 2.7053764734183307e-05, "loss": 0.8637, "step": 22900 }, { "epoch": 1.3832090449843637, "grad_norm": 2.0730977058410645, "learning_rate": 2.6953532194691688e-05, "loss": 0.8626, "step": 23000 }, { "epoch": 1.389222997353861, "grad_norm": 1.8011558055877686, "learning_rate": 2.6853299655200066e-05, "loss": 0.8784, "step": 23100 }, { "epoch": 1.3952369497233583, "grad_norm": 1.6936458349227905, "learning_rate": 2.6753067115708447e-05, "loss": 0.8634, "step": 23200 }, { "epoch": 1.4012509020928554, "grad_norm": 1.7492289543151855, "learning_rate": 2.665383690161174e-05, "loss": 0.8678, "step": 23300 }, { "epoch": 1.4072648544623527, "grad_norm": 1.8972880840301514, "learning_rate": 2.655360436212012e-05, "loss": 0.8939, "step": 23400 }, { "epoch": 1.4132788068318498, "grad_norm": 1.6961406469345093, "learning_rate": 2.64533718226285e-05, "loss": 0.8727, "step": 23500 }, { "epoch": 1.4192927592013471, "grad_norm": 1.583854079246521, "learning_rate": 2.635313928313688e-05, "loss": 0.8332, "step": 23600 }, { "epoch": 1.4253067115708444, "grad_norm": 1.6541253328323364, "learning_rate": 2.6252906743645255e-05, "loss": 0.8798, "step": 23700 }, { "epoch": 1.4313206639403415, "grad_norm": 1.7607979774475098, "learning_rate": 2.6152674204153636e-05, "loss": 0.8472, "step": 23800 }, { "epoch": 1.4373346163098388, "grad_norm": 1.5591400861740112, "learning_rate": 2.605244166466202e-05, "loss": 0.8355, "step": 23900 }, { "epoch": 1.4433485686793361, "grad_norm": 1.4700669050216675, "learning_rate": 2.5952209125170395e-05, "loss": 0.8521, "step": 24000 }, { "epoch": 1.4433485686793361, "eval_cer": 0.7597912847133936, "eval_loss": 0.8321590423583984, "eval_runtime": 5084.7673, "eval_samples_per_second": 3.273, "eval_steps_per_second": 0.409, "step": 24000 }, { "epoch": 1.4493625210488332, "grad_norm": 1.4155552387237549, "learning_rate": 2.5851976585678777e-05, "loss": 0.8797, "step": 24100 }, { "epoch": 1.4553764734183305, "grad_norm": 1.4764596223831177, "learning_rate": 2.5751744046187155e-05, "loss": 0.8657, "step": 24200 }, { "epoch": 1.4613904257878279, "grad_norm": 1.463333010673523, "learning_rate": 2.5651511506695536e-05, "loss": 0.8746, "step": 24300 }, { "epoch": 1.467404378157325, "grad_norm": 1.5392202138900757, "learning_rate": 2.555127896720391e-05, "loss": 0.8512, "step": 24400 }, { "epoch": 1.4734183305268223, "grad_norm": 1.8480241298675537, "learning_rate": 2.5451046427712295e-05, "loss": 0.8562, "step": 24500 }, { "epoch": 1.4794322828963193, "grad_norm": 1.7533873319625854, "learning_rate": 2.5350813888220677e-05, "loss": 0.8534, "step": 24600 }, { "epoch": 1.4854462352658167, "grad_norm": 1.6647679805755615, "learning_rate": 2.525058134872905e-05, "loss": 0.8335, "step": 24700 }, { "epoch": 1.491460187635314, "grad_norm": 1.8899763822555542, "learning_rate": 2.5150348809237433e-05, "loss": 0.8486, "step": 24800 }, { "epoch": 1.4974741400048113, "grad_norm": 1.7569955587387085, "learning_rate": 2.505011626974581e-05, "loss": 0.8415, "step": 24900 }, { "epoch": 1.5034880923743084, "grad_norm": 1.5141854286193848, "learning_rate": 2.4949883730254192e-05, "loss": 0.8107, "step": 25000 }, { "epoch": 1.5095020447438057, "grad_norm": 1.8239057064056396, "learning_rate": 2.484965119076257e-05, "loss": 0.8841, "step": 25100 }, { "epoch": 1.5155159971133028, "grad_norm": 1.433118224143982, "learning_rate": 2.474941865127095e-05, "loss": 0.8408, "step": 25200 }, { "epoch": 1.5215299494828, "grad_norm": 1.6874032020568848, "learning_rate": 2.4650188437174247e-05, "loss": 0.8387, "step": 25300 }, { "epoch": 1.5275439018522974, "grad_norm": 1.4172905683517456, "learning_rate": 2.4549955897682625e-05, "loss": 0.8357, "step": 25400 }, { "epoch": 1.5335578542217947, "grad_norm": 1.4848599433898926, "learning_rate": 2.4449723358191003e-05, "loss": 0.8451, "step": 25500 }, { "epoch": 1.5395718065912918, "grad_norm": 1.4670342206954956, "learning_rate": 2.4349490818699384e-05, "loss": 0.8505, "step": 25600 }, { "epoch": 1.5455857589607889, "grad_norm": 1.5914552211761475, "learning_rate": 2.4249258279207762e-05, "loss": 0.8635, "step": 25700 }, { "epoch": 1.5515997113302862, "grad_norm": 1.5550841093063354, "learning_rate": 2.414902573971614e-05, "loss": 0.8763, "step": 25800 }, { "epoch": 1.5576136636997835, "grad_norm": 1.5907316207885742, "learning_rate": 2.4048793200224525e-05, "loss": 0.8664, "step": 25900 }, { "epoch": 1.5636276160692808, "grad_norm": 1.4494388103485107, "learning_rate": 2.3948560660732903e-05, "loss": 0.819, "step": 26000 }, { "epoch": 1.5696415684387781, "grad_norm": 1.599004864692688, "learning_rate": 2.384832812124128e-05, "loss": 0.8363, "step": 26100 }, { "epoch": 1.5756555208082752, "grad_norm": 1.887817621231079, "learning_rate": 2.3748095581749662e-05, "loss": 0.8845, "step": 26200 }, { "epoch": 1.5816694731777723, "grad_norm": 1.3124005794525146, "learning_rate": 2.364786304225804e-05, "loss": 0.858, "step": 26300 }, { "epoch": 1.5876834255472696, "grad_norm": 1.6560554504394531, "learning_rate": 2.3547630502766418e-05, "loss": 0.8553, "step": 26400 }, { "epoch": 1.593697377916767, "grad_norm": 1.5678675174713135, "learning_rate": 2.34473979632748e-05, "loss": 0.8488, "step": 26500 }, { "epoch": 1.5997113302862642, "grad_norm": 1.4168376922607422, "learning_rate": 2.334716542378318e-05, "loss": 0.8263, "step": 26600 }, { "epoch": 1.6057252826557615, "grad_norm": 1.6189205646514893, "learning_rate": 2.324693288429156e-05, "loss": 0.8489, "step": 26700 }, { "epoch": 1.6117392350252586, "grad_norm": 1.7498302459716797, "learning_rate": 2.3146700344799936e-05, "loss": 0.8069, "step": 26800 }, { "epoch": 1.6177531873947557, "grad_norm": 1.5609160661697388, "learning_rate": 2.3046467805308318e-05, "loss": 0.8337, "step": 26900 }, { "epoch": 1.623767139764253, "grad_norm": 1.7673338651657104, "learning_rate": 2.2946235265816696e-05, "loss": 0.8264, "step": 27000 }, { "epoch": 1.6297810921337503, "grad_norm": 1.593299150466919, "learning_rate": 2.2846002726325074e-05, "loss": 0.828, "step": 27100 }, { "epoch": 1.6357950445032476, "grad_norm": 1.5802645683288574, "learning_rate": 2.274677251222837e-05, "loss": 0.8428, "step": 27200 }, { "epoch": 1.6418089968727447, "grad_norm": 1.7584878206253052, "learning_rate": 2.264653997273675e-05, "loss": 0.8378, "step": 27300 }, { "epoch": 1.647822949242242, "grad_norm": 1.5360692739486694, "learning_rate": 2.254630743324513e-05, "loss": 0.8288, "step": 27400 }, { "epoch": 1.6538369016117391, "grad_norm": 1.5635976791381836, "learning_rate": 2.244607489375351e-05, "loss": 0.8208, "step": 27500 }, { "epoch": 1.6598508539812364, "grad_norm": 1.778735876083374, "learning_rate": 2.2345842354261888e-05, "loss": 0.847, "step": 27600 }, { "epoch": 1.6658648063507338, "grad_norm": 1.5961335897445679, "learning_rate": 2.224560981477027e-05, "loss": 0.8059, "step": 27700 }, { "epoch": 1.671878758720231, "grad_norm": 1.396517038345337, "learning_rate": 2.2145377275278647e-05, "loss": 0.8406, "step": 27800 }, { "epoch": 1.6778927110897282, "grad_norm": 1.554319977760315, "learning_rate": 2.2045144735787025e-05, "loss": 0.836, "step": 27900 }, { "epoch": 1.6839066634592252, "grad_norm": 1.5663318634033203, "learning_rate": 2.1944912196295406e-05, "loss": 0.8386, "step": 28000 }, { "epoch": 1.6839066634592252, "eval_cer": 0.7604478286805494, "eval_loss": 0.8103429079055786, "eval_runtime": 5152.8312, "eval_samples_per_second": 3.229, "eval_steps_per_second": 0.404, "step": 28000 }, { "epoch": 1.6899206158287225, "grad_norm": 1.7991820573806763, "learning_rate": 2.1844679656803784e-05, "loss": 0.8428, "step": 28100 }, { "epoch": 1.6959345681982199, "grad_norm": 1.6566849946975708, "learning_rate": 2.1744447117312162e-05, "loss": 0.8331, "step": 28200 }, { "epoch": 1.7019485205677172, "grad_norm": 1.650564432144165, "learning_rate": 2.1644214577820547e-05, "loss": 0.8379, "step": 28300 }, { "epoch": 1.7079624729372145, "grad_norm": 1.6355592012405396, "learning_rate": 2.1543982038328925e-05, "loss": 0.8576, "step": 28400 }, { "epoch": 1.7139764253067116, "grad_norm": 1.7112095355987549, "learning_rate": 2.1443749498837303e-05, "loss": 0.8173, "step": 28500 }, { "epoch": 1.7199903776762087, "grad_norm": 1.7781462669372559, "learning_rate": 2.1343516959345684e-05, "loss": 0.8292, "step": 28600 }, { "epoch": 1.726004330045706, "grad_norm": 1.708770513534546, "learning_rate": 2.1243284419854062e-05, "loss": 0.8312, "step": 28700 }, { "epoch": 1.7320182824152033, "grad_norm": 1.869710087776184, "learning_rate": 2.114305188036244e-05, "loss": 0.8529, "step": 28800 }, { "epoch": 1.7380322347847006, "grad_norm": 1.4506940841674805, "learning_rate": 2.104281934087082e-05, "loss": 0.8454, "step": 28900 }, { "epoch": 1.744046187154198, "grad_norm": 1.5264636278152466, "learning_rate": 2.0942586801379203e-05, "loss": 0.8281, "step": 29000 }, { "epoch": 1.750060139523695, "grad_norm": 1.9614264965057373, "learning_rate": 2.084235426188758e-05, "loss": 0.8328, "step": 29100 }, { "epoch": 1.756074091893192, "grad_norm": 1.4223591089248657, "learning_rate": 2.074212172239596e-05, "loss": 0.8142, "step": 29200 }, { "epoch": 1.7620880442626894, "grad_norm": 1.6863304376602173, "learning_rate": 2.064188918290434e-05, "loss": 0.8306, "step": 29300 }, { "epoch": 1.7681019966321867, "grad_norm": 1.5096672773361206, "learning_rate": 2.0541656643412718e-05, "loss": 0.7964, "step": 29400 }, { "epoch": 1.774115949001684, "grad_norm": 1.9181997776031494, "learning_rate": 2.0441424103921096e-05, "loss": 0.8221, "step": 29500 }, { "epoch": 1.780129901371181, "grad_norm": 1.9824730157852173, "learning_rate": 2.0341191564429478e-05, "loss": 0.8379, "step": 29600 }, { "epoch": 1.7861438537406784, "grad_norm": 1.4306327104568481, "learning_rate": 2.0240959024937855e-05, "loss": 0.805, "step": 29700 }, { "epoch": 1.7921578061101755, "grad_norm": 1.6249910593032837, "learning_rate": 2.0140726485446237e-05, "loss": 0.8145, "step": 29800 }, { "epoch": 1.7981717584796728, "grad_norm": 1.551161766052246, "learning_rate": 2.0040493945954618e-05, "loss": 0.8436, "step": 29900 }, { "epoch": 1.8041857108491701, "grad_norm": 1.5218690633773804, "learning_rate": 1.9940261406462996e-05, "loss": 0.8181, "step": 30000 }, { "epoch": 1.8101996632186674, "grad_norm": 1.5932899713516235, "learning_rate": 1.9840028866971374e-05, "loss": 0.8478, "step": 30100 }, { "epoch": 1.8162136155881645, "grad_norm": 1.4991642236709595, "learning_rate": 1.9739796327479752e-05, "loss": 0.8254, "step": 30200 }, { "epoch": 1.8222275679576616, "grad_norm": 1.647438883781433, "learning_rate": 1.9639563787988133e-05, "loss": 0.8342, "step": 30300 }, { "epoch": 1.828241520327159, "grad_norm": 1.6653351783752441, "learning_rate": 1.953933124849651e-05, "loss": 0.8249, "step": 30400 }, { "epoch": 1.8342554726966562, "grad_norm": 1.6969921588897705, "learning_rate": 1.943909870900489e-05, "loss": 0.8544, "step": 30500 }, { "epoch": 1.8402694250661535, "grad_norm": 1.7201420068740845, "learning_rate": 1.9338866169513274e-05, "loss": 0.8323, "step": 30600 }, { "epoch": 1.8462833774356509, "grad_norm": 1.7479013204574585, "learning_rate": 1.9238633630021652e-05, "loss": 0.8307, "step": 30700 }, { "epoch": 1.852297329805148, "grad_norm": 1.6966118812561035, "learning_rate": 1.913840109053003e-05, "loss": 0.8291, "step": 30800 }, { "epoch": 1.858311282174645, "grad_norm": 1.6381275653839111, "learning_rate": 1.903816855103841e-05, "loss": 0.8121, "step": 30900 }, { "epoch": 1.8643252345441423, "grad_norm": 1.4601831436157227, "learning_rate": 1.893793601154679e-05, "loss": 0.8174, "step": 31000 }, { "epoch": 1.8703391869136397, "grad_norm": 1.8310879468917847, "learning_rate": 1.8837703472055167e-05, "loss": 0.8023, "step": 31100 }, { "epoch": 1.876353139283137, "grad_norm": 1.4689913988113403, "learning_rate": 1.8738473257958463e-05, "loss": 0.8035, "step": 31200 }, { "epoch": 1.8823670916526343, "grad_norm": 1.4330099821090698, "learning_rate": 1.8638240718466844e-05, "loss": 0.8185, "step": 31300 }, { "epoch": 1.8883810440221314, "grad_norm": 1.8419737815856934, "learning_rate": 1.8538008178975225e-05, "loss": 0.8144, "step": 31400 }, { "epoch": 1.8943949963916284, "grad_norm": 1.2664531469345093, "learning_rate": 1.8437775639483603e-05, "loss": 0.816, "step": 31500 }, { "epoch": 1.9004089487611258, "grad_norm": 1.6704432964324951, "learning_rate": 1.833754309999198e-05, "loss": 0.817, "step": 31600 }, { "epoch": 1.906422901130623, "grad_norm": 1.7487777471542358, "learning_rate": 1.8237310560500363e-05, "loss": 0.8041, "step": 31700 }, { "epoch": 1.9124368535001204, "grad_norm": 1.2405571937561035, "learning_rate": 1.813707802100874e-05, "loss": 0.825, "step": 31800 }, { "epoch": 1.9184508058696175, "grad_norm": 1.543731689453125, "learning_rate": 1.803684548151712e-05, "loss": 0.8147, "step": 31900 }, { "epoch": 1.9244647582391148, "grad_norm": 1.5452948808670044, "learning_rate": 1.79366129420255e-05, "loss": 0.7973, "step": 32000 }, { "epoch": 1.9244647582391148, "eval_cer": 0.7489049827996835, "eval_loss": 0.7830217480659485, "eval_runtime": 5092.4712, "eval_samples_per_second": 3.268, "eval_steps_per_second": 0.408, "step": 32000 }, { "epoch": 1.9304787106086119, "grad_norm": 1.640507459640503, "learning_rate": 1.7836380402533878e-05, "loss": 0.831, "step": 32100 }, { "epoch": 1.9364926629781092, "grad_norm": 1.919505000114441, "learning_rate": 1.773614786304226e-05, "loss": 0.8216, "step": 32200 }, { "epoch": 1.9425066153476065, "grad_norm": 1.5534350872039795, "learning_rate": 1.763591532355064e-05, "loss": 0.8157, "step": 32300 }, { "epoch": 1.9485205677171038, "grad_norm": 1.5943905115127563, "learning_rate": 1.753568278405902e-05, "loss": 0.8026, "step": 32400 }, { "epoch": 1.954534520086601, "grad_norm": 1.788720965385437, "learning_rate": 1.7435450244567397e-05, "loss": 0.7798, "step": 32500 }, { "epoch": 1.9605484724560982, "grad_norm": 1.707412600517273, "learning_rate": 1.7335217705075778e-05, "loss": 0.8125, "step": 32600 }, { "epoch": 1.9665624248255953, "grad_norm": 1.6318702697753906, "learning_rate": 1.7234985165584156e-05, "loss": 0.8184, "step": 32700 }, { "epoch": 1.9725763771950926, "grad_norm": 1.6191486120224, "learning_rate": 1.7134752626092534e-05, "loss": 0.8056, "step": 32800 }, { "epoch": 1.97859032956459, "grad_norm": 1.6070526838302612, "learning_rate": 1.7034520086600915e-05, "loss": 0.8134, "step": 32900 }, { "epoch": 1.9846042819340872, "grad_norm": 1.3369784355163574, "learning_rate": 1.6934287547109297e-05, "loss": 0.8086, "step": 33000 }, { "epoch": 1.9906182343035843, "grad_norm": 1.7080943584442139, "learning_rate": 1.6834055007617674e-05, "loss": 0.8304, "step": 33100 }, { "epoch": 1.9966321866730814, "grad_norm": 1.3051142692565918, "learning_rate": 1.673482479352097e-05, "loss": 0.778, "step": 33200 }, { "epoch": 2.0026461390425787, "grad_norm": 1.9086428880691528, "learning_rate": 1.6634592254029348e-05, "loss": 0.7972, "step": 33300 }, { "epoch": 2.008660091412076, "grad_norm": 1.6656805276870728, "learning_rate": 1.653435971453773e-05, "loss": 0.7701, "step": 33400 }, { "epoch": 2.0146740437815733, "grad_norm": 1.5769175291061401, "learning_rate": 1.6434127175046107e-05, "loss": 0.7602, "step": 33500 }, { "epoch": 2.0206879961510706, "grad_norm": 1.6363704204559326, "learning_rate": 1.6333894635554485e-05, "loss": 0.7826, "step": 33600 }, { "epoch": 2.0267019485205675, "grad_norm": 1.784424066543579, "learning_rate": 1.6233662096062867e-05, "loss": 0.7805, "step": 33700 }, { "epoch": 2.032715900890065, "grad_norm": 1.5795265436172485, "learning_rate": 1.6133429556571248e-05, "loss": 0.7289, "step": 33800 }, { "epoch": 2.038729853259562, "grad_norm": 1.382318377494812, "learning_rate": 1.6033197017079626e-05, "loss": 0.7869, "step": 33900 }, { "epoch": 2.0447438056290594, "grad_norm": 2.0357506275177, "learning_rate": 1.5932964477588004e-05, "loss": 0.7477, "step": 34000 }, { "epoch": 2.0507577579985568, "grad_norm": 1.6945205926895142, "learning_rate": 1.5832731938096385e-05, "loss": 0.7582, "step": 34100 }, { "epoch": 2.056771710368054, "grad_norm": 1.4343385696411133, "learning_rate": 1.5732499398604763e-05, "loss": 0.7536, "step": 34200 }, { "epoch": 2.062785662737551, "grad_norm": 1.7783600091934204, "learning_rate": 1.563226685911314e-05, "loss": 0.8331, "step": 34300 }, { "epoch": 2.0687996151070482, "grad_norm": 1.8440674543380737, "learning_rate": 1.5532034319621522e-05, "loss": 0.7352, "step": 34400 }, { "epoch": 2.0748135674765456, "grad_norm": 1.8098615407943726, "learning_rate": 1.54318017801299e-05, "loss": 0.7951, "step": 34500 }, { "epoch": 2.080827519846043, "grad_norm": 1.521584391593933, "learning_rate": 1.5331569240638282e-05, "loss": 0.7684, "step": 34600 }, { "epoch": 2.08684147221554, "grad_norm": 1.738142490386963, "learning_rate": 1.5231336701146661e-05, "loss": 0.7589, "step": 34700 }, { "epoch": 2.0928554245850375, "grad_norm": 1.5031851530075073, "learning_rate": 1.5131104161655041e-05, "loss": 0.7508, "step": 34800 }, { "epoch": 2.0988693769545343, "grad_norm": 1.8703136444091797, "learning_rate": 1.503087162216342e-05, "loss": 0.803, "step": 34900 }, { "epoch": 2.1048833293240317, "grad_norm": 1.6643913984298706, "learning_rate": 1.4930639082671799e-05, "loss": 0.7689, "step": 35000 }, { "epoch": 2.110897281693529, "grad_norm": 1.4073503017425537, "learning_rate": 1.4830406543180178e-05, "loss": 0.7575, "step": 35100 }, { "epoch": 2.1169112340630263, "grad_norm": 1.6007989645004272, "learning_rate": 1.4731176329083474e-05, "loss": 0.7317, "step": 35200 }, { "epoch": 2.1229251864325236, "grad_norm": 1.7965283393859863, "learning_rate": 1.4630943789591854e-05, "loss": 0.7142, "step": 35300 }, { "epoch": 2.1289391388020205, "grad_norm": 1.3655446767807007, "learning_rate": 1.4530711250100235e-05, "loss": 0.7613, "step": 35400 }, { "epoch": 2.1349530911715178, "grad_norm": 1.779159426689148, "learning_rate": 1.4430478710608613e-05, "loss": 0.7827, "step": 35500 }, { "epoch": 2.140967043541015, "grad_norm": 1.9253307580947876, "learning_rate": 1.4330246171116993e-05, "loss": 0.7899, "step": 35600 }, { "epoch": 2.1469809959105124, "grad_norm": 1.3449054956436157, "learning_rate": 1.4230013631625372e-05, "loss": 0.7494, "step": 35700 }, { "epoch": 2.1529949482800097, "grad_norm": 1.9044090509414673, "learning_rate": 1.412978109213375e-05, "loss": 0.7714, "step": 35800 }, { "epoch": 2.159008900649507, "grad_norm": 1.674017071723938, "learning_rate": 1.402954855264213e-05, "loss": 0.7478, "step": 35900 }, { "epoch": 2.1650228530190043, "grad_norm": 1.8109982013702393, "learning_rate": 1.392931601315051e-05, "loss": 0.7541, "step": 36000 }, { "epoch": 2.1650228530190043, "eval_cer": 0.7396310458108724, "eval_loss": 0.7524659633636475, "eval_runtime": 5154.2928, "eval_samples_per_second": 3.228, "eval_steps_per_second": 0.404, "step": 36000 }, { "epoch": 2.171036805388501, "grad_norm": 1.316389560699463, "learning_rate": 1.3829083473658887e-05, "loss": 0.7647, "step": 36100 }, { "epoch": 2.1770507577579985, "grad_norm": 1.7067075967788696, "learning_rate": 1.372885093416727e-05, "loss": 0.7635, "step": 36200 }, { "epoch": 2.183064710127496, "grad_norm": 1.8793973922729492, "learning_rate": 1.3628618394675648e-05, "loss": 0.7313, "step": 36300 }, { "epoch": 2.189078662496993, "grad_norm": 1.769338607788086, "learning_rate": 1.3528385855184028e-05, "loss": 0.7573, "step": 36400 }, { "epoch": 2.1950926148664904, "grad_norm": 1.6032990217208862, "learning_rate": 1.3428153315692408e-05, "loss": 0.76, "step": 36500 }, { "epoch": 2.2011065672359873, "grad_norm": 1.5864907503128052, "learning_rate": 1.3327920776200786e-05, "loss": 0.7587, "step": 36600 }, { "epoch": 2.2071205196054846, "grad_norm": 1.2785674333572388, "learning_rate": 1.3227688236709165e-05, "loss": 0.7403, "step": 36700 }, { "epoch": 2.213134471974982, "grad_norm": 1.4437572956085205, "learning_rate": 1.3127455697217545e-05, "loss": 0.7358, "step": 36800 }, { "epoch": 2.2191484243444792, "grad_norm": 1.8562610149383545, "learning_rate": 1.3027223157725923e-05, "loss": 0.7429, "step": 36900 }, { "epoch": 2.2251623767139765, "grad_norm": 1.6878858804702759, "learning_rate": 1.2926990618234306e-05, "loss": 0.7526, "step": 37000 }, { "epoch": 2.231176329083474, "grad_norm": 1.9118757247924805, "learning_rate": 1.2826758078742684e-05, "loss": 0.7513, "step": 37100 }, { "epoch": 2.2371902814529707, "grad_norm": 1.3607146739959717, "learning_rate": 1.272752786464598e-05, "loss": 0.747, "step": 37200 }, { "epoch": 2.243204233822468, "grad_norm": 1.8414541482925415, "learning_rate": 1.2627295325154359e-05, "loss": 0.757, "step": 37300 }, { "epoch": 2.2492181861919653, "grad_norm": 1.5014030933380127, "learning_rate": 1.2527062785662739e-05, "loss": 0.7704, "step": 37400 }, { "epoch": 2.2552321385614627, "grad_norm": 1.850203514099121, "learning_rate": 1.2426830246171117e-05, "loss": 0.7532, "step": 37500 }, { "epoch": 2.26124609093096, "grad_norm": 1.9308381080627441, "learning_rate": 1.2326597706679498e-05, "loss": 0.7658, "step": 37600 }, { "epoch": 2.267260043300457, "grad_norm": 1.8409243822097778, "learning_rate": 1.2226365167187876e-05, "loss": 0.7307, "step": 37700 }, { "epoch": 2.273273995669954, "grad_norm": 1.7760223150253296, "learning_rate": 1.2126132627696256e-05, "loss": 0.7548, "step": 37800 }, { "epoch": 2.2792879480394515, "grad_norm": 1.4862762689590454, "learning_rate": 1.2025900088204635e-05, "loss": 0.7196, "step": 37900 }, { "epoch": 2.2853019004089488, "grad_norm": 1.6604909896850586, "learning_rate": 1.1925667548713015e-05, "loss": 0.7306, "step": 38000 }, { "epoch": 2.291315852778446, "grad_norm": 1.6279034614562988, "learning_rate": 1.1825435009221395e-05, "loss": 0.7683, "step": 38100 }, { "epoch": 2.2973298051479434, "grad_norm": 1.5816621780395508, "learning_rate": 1.1725202469729774e-05, "loss": 0.723, "step": 38200 }, { "epoch": 2.3033437575174407, "grad_norm": 1.7849699258804321, "learning_rate": 1.1624969930238152e-05, "loss": 0.7831, "step": 38300 }, { "epoch": 2.3093577098869376, "grad_norm": 1.851671814918518, "learning_rate": 1.1524737390746534e-05, "loss": 0.7162, "step": 38400 }, { "epoch": 2.315371662256435, "grad_norm": 1.5026549100875854, "learning_rate": 1.1424504851254912e-05, "loss": 0.7803, "step": 38500 }, { "epoch": 2.321385614625932, "grad_norm": 1.6620761156082153, "learning_rate": 1.1324272311763291e-05, "loss": 0.733, "step": 38600 }, { "epoch": 2.3273995669954295, "grad_norm": 1.2507511377334595, "learning_rate": 1.1224039772271671e-05, "loss": 0.733, "step": 38700 }, { "epoch": 2.333413519364927, "grad_norm": 1.938541293144226, "learning_rate": 1.112380723278005e-05, "loss": 0.7499, "step": 38800 }, { "epoch": 2.3394274717344237, "grad_norm": 1.796823263168335, "learning_rate": 1.102357469328843e-05, "loss": 0.7399, "step": 38900 }, { "epoch": 2.345441424103921, "grad_norm": 1.834004521369934, "learning_rate": 1.0923342153796808e-05, "loss": 0.7322, "step": 39000 }, { "epoch": 2.3514553764734183, "grad_norm": 1.7822822332382202, "learning_rate": 1.0823109614305188e-05, "loss": 0.7315, "step": 39100 }, { "epoch": 2.3574693288429156, "grad_norm": 1.7883449792861938, "learning_rate": 1.0722877074813569e-05, "loss": 0.7205, "step": 39200 }, { "epoch": 2.363483281212413, "grad_norm": 1.966545581817627, "learning_rate": 1.0623646860716865e-05, "loss": 0.768, "step": 39300 }, { "epoch": 2.3694972335819102, "grad_norm": 1.722288966178894, "learning_rate": 1.0523414321225243e-05, "loss": 0.7554, "step": 39400 }, { "epoch": 2.375511185951407, "grad_norm": 1.7346769571304321, "learning_rate": 1.0423181781733622e-05, "loss": 0.7158, "step": 39500 }, { "epoch": 2.3815251383209044, "grad_norm": 1.603703498840332, "learning_rate": 1.0322949242242002e-05, "loss": 0.7467, "step": 39600 }, { "epoch": 2.3875390906904017, "grad_norm": 1.8487290143966675, "learning_rate": 1.0222716702750382e-05, "loss": 0.7492, "step": 39700 }, { "epoch": 2.393553043059899, "grad_norm": 1.7814853191375732, "learning_rate": 1.0122484163258761e-05, "loss": 0.7153, "step": 39800 }, { "epoch": 2.3995669954293963, "grad_norm": 1.745309591293335, "learning_rate": 1.002225162376714e-05, "loss": 0.7417, "step": 39900 }, { "epoch": 2.405580947798893, "grad_norm": 1.8568013906478882, "learning_rate": 9.92201908427552e-06, "loss": 0.7176, "step": 40000 }, { "epoch": 2.405580947798893, "eval_cer": 0.7346033545787555, "eval_loss": 0.7291049957275391, "eval_runtime": 5180.4578, "eval_samples_per_second": 3.212, "eval_steps_per_second": 0.402, "step": 40000 } ], "logging_steps": 100, "max_steps": 49884, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 4000, "total_flos": 4.154444709101568e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }