diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,14433 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6899028305872412, + "eval_steps": 100, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0008449514152936206, + "grad_norm": 1.3914175033569336, + "learning_rate": 4.99991550485847e-05, + "loss": 0.0856, + "step": 2 + }, + { + "epoch": 0.0016899028305872412, + "grad_norm": 1.3959804773330688, + "learning_rate": 4.999831009716942e-05, + "loss": 0.0982, + "step": 4 + }, + { + "epoch": 0.0025348542458808617, + "grad_norm": 1.4670644998550415, + "learning_rate": 4.999746514575412e-05, + "loss": 0.0918, + "step": 6 + }, + { + "epoch": 0.0033798056611744824, + "grad_norm": 2.241302728652954, + "learning_rate": 4.999662019433883e-05, + "loss": 0.1214, + "step": 8 + }, + { + "epoch": 0.004224757076468103, + "grad_norm": 2.0347797870635986, + "learning_rate": 4.999577524292353e-05, + "loss": 0.1111, + "step": 10 + }, + { + "epoch": 0.005069708491761723, + "grad_norm": 1.1951134204864502, + "learning_rate": 4.999493029150824e-05, + "loss": 0.0817, + "step": 12 + }, + { + "epoch": 0.005914659907055344, + "grad_norm": 1.2443560361862183, + "learning_rate": 4.9994085340092946e-05, + "loss": 0.0903, + "step": 14 + }, + { + "epoch": 0.006759611322348965, + "grad_norm": 1.4616291522979736, + "learning_rate": 4.999324038867765e-05, + "loss": 0.0968, + "step": 16 + }, + { + "epoch": 0.0076045627376425855, + "grad_norm": 1.999237298965454, + "learning_rate": 4.999239543726236e-05, + "loss": 0.1221, + "step": 18 + }, + { + "epoch": 0.008449514152936205, + "grad_norm": 1.4877630472183228, + "learning_rate": 4.999155048584707e-05, + "loss": 0.1137, + "step": 20 + }, + { + "epoch": 0.009294465568229826, + "grad_norm": 1.4436724185943604, + "learning_rate": 4.999070553443177e-05, + "loss": 0.1129, + "step": 22 + }, + { + "epoch": 0.010139416983523447, + "grad_norm": 2.1934313774108887, + "learning_rate": 4.998986058301648e-05, + "loss": 0.1052, + "step": 24 + }, + { + "epoch": 0.010984368398817067, + "grad_norm": 1.8377941846847534, + "learning_rate": 4.998901563160118e-05, + "loss": 0.1381, + "step": 26 + }, + { + "epoch": 0.011829319814110688, + "grad_norm": 2.0095016956329346, + "learning_rate": 4.998817068018589e-05, + "loss": 0.0852, + "step": 28 + }, + { + "epoch": 0.012674271229404309, + "grad_norm": 3.2389862537384033, + "learning_rate": 4.99873257287706e-05, + "loss": 0.1161, + "step": 30 + }, + { + "epoch": 0.01351922264469793, + "grad_norm": 2.737915277481079, + "learning_rate": 4.9986480777355304e-05, + "loss": 0.1713, + "step": 32 + }, + { + "epoch": 0.01436417405999155, + "grad_norm": 1.6804777383804321, + "learning_rate": 4.998563582594001e-05, + "loss": 0.1234, + "step": 34 + }, + { + "epoch": 0.015209125475285171, + "grad_norm": 1.8924976587295532, + "learning_rate": 4.998479087452472e-05, + "loss": 0.0826, + "step": 36 + }, + { + "epoch": 0.01605407689057879, + "grad_norm": 1.8899160623550415, + "learning_rate": 4.998394592310942e-05, + "loss": 0.1116, + "step": 38 + }, + { + "epoch": 0.01689902830587241, + "grad_norm": 1.7245818376541138, + "learning_rate": 4.998310097169413e-05, + "loss": 0.0863, + "step": 40 + }, + { + "epoch": 0.017743979721166033, + "grad_norm": 1.1223626136779785, + "learning_rate": 4.998225602027883e-05, + "loss": 0.0823, + "step": 42 + }, + { + "epoch": 0.018588931136459652, + "grad_norm": 2.4463398456573486, + "learning_rate": 4.998141106886354e-05, + "loss": 0.1465, + "step": 44 + }, + { + "epoch": 0.019433882551753275, + "grad_norm": 1.4499152898788452, + "learning_rate": 4.998056611744825e-05, + "loss": 0.1043, + "step": 46 + }, + { + "epoch": 0.020278833967046894, + "grad_norm": 2.1130013465881348, + "learning_rate": 4.9979721166032955e-05, + "loss": 0.153, + "step": 48 + }, + { + "epoch": 0.021123785382340516, + "grad_norm": 1.4567383527755737, + "learning_rate": 4.997887621461766e-05, + "loss": 0.1481, + "step": 50 + }, + { + "epoch": 0.021968736797634135, + "grad_norm": 1.4559706449508667, + "learning_rate": 4.997803126320237e-05, + "loss": 0.0989, + "step": 52 + }, + { + "epoch": 0.022813688212927757, + "grad_norm": 1.9101276397705078, + "learning_rate": 4.997718631178707e-05, + "loss": 0.1458, + "step": 54 + }, + { + "epoch": 0.023658639628221376, + "grad_norm": 1.50411856174469, + "learning_rate": 4.9976341360371784e-05, + "loss": 0.0991, + "step": 56 + }, + { + "epoch": 0.024503591043515, + "grad_norm": 1.0242832899093628, + "learning_rate": 4.9975496408956484e-05, + "loss": 0.0766, + "step": 58 + }, + { + "epoch": 0.025348542458808618, + "grad_norm": 2.2057089805603027, + "learning_rate": 4.997465145754119e-05, + "loss": 0.1176, + "step": 60 + }, + { + "epoch": 0.02619349387410224, + "grad_norm": 1.3259694576263428, + "learning_rate": 4.99738065061259e-05, + "loss": 0.1631, + "step": 62 + }, + { + "epoch": 0.02703844528939586, + "grad_norm": 1.2836776971817017, + "learning_rate": 4.9972961554710606e-05, + "loss": 0.08, + "step": 64 + }, + { + "epoch": 0.02788339670468948, + "grad_norm": 1.250314712524414, + "learning_rate": 4.997211660329531e-05, + "loss": 0.0944, + "step": 66 + }, + { + "epoch": 0.0287283481199831, + "grad_norm": 1.753688097000122, + "learning_rate": 4.997127165188002e-05, + "loss": 0.133, + "step": 68 + }, + { + "epoch": 0.029573299535276723, + "grad_norm": 1.5159249305725098, + "learning_rate": 4.997042670046472e-05, + "loss": 0.0975, + "step": 70 + }, + { + "epoch": 0.030418250950570342, + "grad_norm": 1.78278386592865, + "learning_rate": 4.9969581749049434e-05, + "loss": 0.1093, + "step": 72 + }, + { + "epoch": 0.03126320236586396, + "grad_norm": 1.725652813911438, + "learning_rate": 4.9968736797634135e-05, + "loss": 0.1074, + "step": 74 + }, + { + "epoch": 0.03210815378115758, + "grad_norm": 1.6478575468063354, + "learning_rate": 4.996789184621884e-05, + "loss": 0.1279, + "step": 76 + }, + { + "epoch": 0.032953105196451206, + "grad_norm": 1.2018022537231445, + "learning_rate": 4.996704689480355e-05, + "loss": 0.0749, + "step": 78 + }, + { + "epoch": 0.03379805661174482, + "grad_norm": 0.8992096185684204, + "learning_rate": 4.9966201943388256e-05, + "loss": 0.0656, + "step": 80 + }, + { + "epoch": 0.034643008027038444, + "grad_norm": 2.0089211463928223, + "learning_rate": 4.9965356991972964e-05, + "loss": 0.1403, + "step": 82 + }, + { + "epoch": 0.035487959442332066, + "grad_norm": 1.9890964031219482, + "learning_rate": 4.996451204055767e-05, + "loss": 0.0917, + "step": 84 + }, + { + "epoch": 0.03633291085762569, + "grad_norm": 1.6564279794692993, + "learning_rate": 4.996366708914237e-05, + "loss": 0.127, + "step": 86 + }, + { + "epoch": 0.037177862272919304, + "grad_norm": 2.2462258338928223, + "learning_rate": 4.9962822137727085e-05, + "loss": 0.1866, + "step": 88 + }, + { + "epoch": 0.03802281368821293, + "grad_norm": 2.442894697189331, + "learning_rate": 4.9961977186311786e-05, + "loss": 0.096, + "step": 90 + }, + { + "epoch": 0.03886776510350655, + "grad_norm": 1.7473067045211792, + "learning_rate": 4.996113223489649e-05, + "loss": 0.1065, + "step": 92 + }, + { + "epoch": 0.03971271651880017, + "grad_norm": 1.4837480783462524, + "learning_rate": 4.99602872834812e-05, + "loss": 0.0878, + "step": 94 + }, + { + "epoch": 0.04055766793409379, + "grad_norm": 0.9949487447738647, + "learning_rate": 4.995944233206591e-05, + "loss": 0.0795, + "step": 96 + }, + { + "epoch": 0.04140261934938741, + "grad_norm": 1.6431764364242554, + "learning_rate": 4.9958597380650614e-05, + "loss": 0.1184, + "step": 98 + }, + { + "epoch": 0.04224757076468103, + "grad_norm": 1.0905040502548218, + "learning_rate": 4.995775242923532e-05, + "loss": 0.086, + "step": 100 + }, + { + "epoch": 0.04224757076468103, + "eval_accuracy": 0.6362303845490602, + "eval_cer": 0.11421438464320686, + "eval_loss": 0.22910405695438385, + "eval_runtime": 887.866, + "eval_samples_per_second": 13.063, + "eval_steps_per_second": 0.409, + "step": 100 + }, + { + "epoch": 0.043092522179974654, + "grad_norm": 1.3749598264694214, + "learning_rate": 4.995690747782002e-05, + "loss": 0.1197, + "step": 102 + }, + { + "epoch": 0.04393747359526827, + "grad_norm": 1.572817325592041, + "learning_rate": 4.9956062526404736e-05, + "loss": 0.0924, + "step": 104 + }, + { + "epoch": 0.04478242501056189, + "grad_norm": 1.5053339004516602, + "learning_rate": 4.9955217574989436e-05, + "loss": 0.0926, + "step": 106 + }, + { + "epoch": 0.045627376425855515, + "grad_norm": 1.816696047782898, + "learning_rate": 4.995437262357415e-05, + "loss": 0.143, + "step": 108 + }, + { + "epoch": 0.04647232784114914, + "grad_norm": 1.8459056615829468, + "learning_rate": 4.995352767215885e-05, + "loss": 0.1229, + "step": 110 + }, + { + "epoch": 0.04731727925644275, + "grad_norm": 1.3267372846603394, + "learning_rate": 4.995268272074356e-05, + "loss": 0.0874, + "step": 112 + }, + { + "epoch": 0.048162230671736375, + "grad_norm": 1.7917542457580566, + "learning_rate": 4.9951837769328265e-05, + "loss": 0.1485, + "step": 114 + }, + { + "epoch": 0.04900718208703, + "grad_norm": 1.7345539331436157, + "learning_rate": 4.995099281791297e-05, + "loss": 0.09, + "step": 116 + }, + { + "epoch": 0.04985213350232361, + "grad_norm": 1.5779085159301758, + "learning_rate": 4.995014786649768e-05, + "loss": 0.1262, + "step": 118 + }, + { + "epoch": 0.050697084917617236, + "grad_norm": 1.391013503074646, + "learning_rate": 4.994930291508239e-05, + "loss": 0.1145, + "step": 120 + }, + { + "epoch": 0.05154203633291086, + "grad_norm": 1.6034536361694336, + "learning_rate": 4.994845796366709e-05, + "loss": 0.0881, + "step": 122 + }, + { + "epoch": 0.05238698774820448, + "grad_norm": 1.5294889211654663, + "learning_rate": 4.99476130122518e-05, + "loss": 0.0793, + "step": 124 + }, + { + "epoch": 0.053231939163498096, + "grad_norm": 1.197351336479187, + "learning_rate": 4.99467680608365e-05, + "loss": 0.091, + "step": 126 + }, + { + "epoch": 0.05407689057879172, + "grad_norm": 1.323849081993103, + "learning_rate": 4.994592310942121e-05, + "loss": 0.0888, + "step": 128 + }, + { + "epoch": 0.05492184199408534, + "grad_norm": 1.7171475887298584, + "learning_rate": 4.9945078158005916e-05, + "loss": 0.1436, + "step": 130 + }, + { + "epoch": 0.05576679340937896, + "grad_norm": 1.7138563394546509, + "learning_rate": 4.994423320659062e-05, + "loss": 0.09, + "step": 132 + }, + { + "epoch": 0.05661174482467258, + "grad_norm": 1.6496665477752686, + "learning_rate": 4.994338825517533e-05, + "loss": 0.1415, + "step": 134 + }, + { + "epoch": 0.0574566962399662, + "grad_norm": 1.3270729780197144, + "learning_rate": 4.994254330376004e-05, + "loss": 0.1073, + "step": 136 + }, + { + "epoch": 0.058301647655259824, + "grad_norm": 1.2084906101226807, + "learning_rate": 4.994169835234474e-05, + "loss": 0.0861, + "step": 138 + }, + { + "epoch": 0.059146599070553446, + "grad_norm": 1.885323405265808, + "learning_rate": 4.994085340092945e-05, + "loss": 0.138, + "step": 140 + }, + { + "epoch": 0.05999155048584706, + "grad_norm": 1.3070194721221924, + "learning_rate": 4.994000844951415e-05, + "loss": 0.0725, + "step": 142 + }, + { + "epoch": 0.060836501901140684, + "grad_norm": 1.494089961051941, + "learning_rate": 4.993916349809886e-05, + "loss": 0.1076, + "step": 144 + }, + { + "epoch": 0.061681453316434307, + "grad_norm": 1.5598512887954712, + "learning_rate": 4.993831854668357e-05, + "loss": 0.1068, + "step": 146 + }, + { + "epoch": 0.06252640473172792, + "grad_norm": 2.4986348152160645, + "learning_rate": 4.9937473595268274e-05, + "loss": 0.1439, + "step": 148 + }, + { + "epoch": 0.06337135614702155, + "grad_norm": 1.6942882537841797, + "learning_rate": 4.993662864385298e-05, + "loss": 0.0694, + "step": 150 + }, + { + "epoch": 0.06421630756231517, + "grad_norm": 1.8440594673156738, + "learning_rate": 4.993578369243769e-05, + "loss": 0.1387, + "step": 152 + }, + { + "epoch": 0.06506125897760878, + "grad_norm": 1.647815227508545, + "learning_rate": 4.993493874102239e-05, + "loss": 0.0997, + "step": 154 + }, + { + "epoch": 0.06590621039290241, + "grad_norm": 1.5283992290496826, + "learning_rate": 4.99340937896071e-05, + "loss": 0.0803, + "step": 156 + }, + { + "epoch": 0.06675116180819603, + "grad_norm": 1.7466899156570435, + "learning_rate": 4.99332488381918e-05, + "loss": 0.1309, + "step": 158 + }, + { + "epoch": 0.06759611322348964, + "grad_norm": 1.4426119327545166, + "learning_rate": 4.993240388677651e-05, + "loss": 0.107, + "step": 160 + }, + { + "epoch": 0.06844106463878327, + "grad_norm": 1.6664427518844604, + "learning_rate": 4.993155893536122e-05, + "loss": 0.1763, + "step": 162 + }, + { + "epoch": 0.06928601605407689, + "grad_norm": 1.8783625364303589, + "learning_rate": 4.9930713983945925e-05, + "loss": 0.0942, + "step": 164 + }, + { + "epoch": 0.07013096746937052, + "grad_norm": 1.3738420009613037, + "learning_rate": 4.992986903253063e-05, + "loss": 0.1128, + "step": 166 + }, + { + "epoch": 0.07097591888466413, + "grad_norm": 1.2888435125350952, + "learning_rate": 4.992902408111534e-05, + "loss": 0.08, + "step": 168 + }, + { + "epoch": 0.07182087029995775, + "grad_norm": 0.921699047088623, + "learning_rate": 4.992817912970004e-05, + "loss": 0.1028, + "step": 170 + }, + { + "epoch": 0.07266582171525138, + "grad_norm": 1.8012775182724, + "learning_rate": 4.9927334178284754e-05, + "loss": 0.1012, + "step": 172 + }, + { + "epoch": 0.07351077313054499, + "grad_norm": 1.061102271080017, + "learning_rate": 4.9926489226869454e-05, + "loss": 0.1341, + "step": 174 + }, + { + "epoch": 0.07435572454583861, + "grad_norm": 1.1755284070968628, + "learning_rate": 4.992564427545416e-05, + "loss": 0.0944, + "step": 176 + }, + { + "epoch": 0.07520067596113224, + "grad_norm": 1.4010897874832153, + "learning_rate": 4.992479932403887e-05, + "loss": 0.0736, + "step": 178 + }, + { + "epoch": 0.07604562737642585, + "grad_norm": 1.391518235206604, + "learning_rate": 4.9923954372623576e-05, + "loss": 0.0622, + "step": 180 + }, + { + "epoch": 0.07689057879171948, + "grad_norm": 1.2992569208145142, + "learning_rate": 4.992310942120828e-05, + "loss": 0.0982, + "step": 182 + }, + { + "epoch": 0.0777355302070131, + "grad_norm": 1.4223698377609253, + "learning_rate": 4.992226446979299e-05, + "loss": 0.098, + "step": 184 + }, + { + "epoch": 0.07858048162230671, + "grad_norm": 1.5545382499694824, + "learning_rate": 4.992141951837769e-05, + "loss": 0.171, + "step": 186 + }, + { + "epoch": 0.07942543303760034, + "grad_norm": 2.143627882003784, + "learning_rate": 4.9920574566962404e-05, + "loss": 0.1158, + "step": 188 + }, + { + "epoch": 0.08027038445289396, + "grad_norm": 1.5633560419082642, + "learning_rate": 4.9919729615547105e-05, + "loss": 0.0865, + "step": 190 + }, + { + "epoch": 0.08111533586818757, + "grad_norm": 1.6810214519500732, + "learning_rate": 4.991888466413181e-05, + "loss": 0.0905, + "step": 192 + }, + { + "epoch": 0.0819602872834812, + "grad_norm": 1.0320245027542114, + "learning_rate": 4.991803971271652e-05, + "loss": 0.0748, + "step": 194 + }, + { + "epoch": 0.08280523869877482, + "grad_norm": 1.9010602235794067, + "learning_rate": 4.9917194761301226e-05, + "loss": 0.1433, + "step": 196 + }, + { + "epoch": 0.08365019011406843, + "grad_norm": 2.0801427364349365, + "learning_rate": 4.9916349809885934e-05, + "loss": 0.1286, + "step": 198 + }, + { + "epoch": 0.08449514152936206, + "grad_norm": 1.6263489723205566, + "learning_rate": 4.991550485847064e-05, + "loss": 0.097, + "step": 200 + }, + { + "epoch": 0.08449514152936206, + "eval_accuracy": 0.6549405069839628, + "eval_cer": 0.10543680584348562, + "eval_loss": 0.21902646124362946, + "eval_runtime": 857.1192, + "eval_samples_per_second": 13.531, + "eval_steps_per_second": 0.424, + "step": 200 + }, + { + "epoch": 0.08534009294465568, + "grad_norm": 1.3326414823532104, + "learning_rate": 4.991465990705534e-05, + "loss": 0.1085, + "step": 202 + }, + { + "epoch": 0.08618504435994931, + "grad_norm": 1.6693871021270752, + "learning_rate": 4.9913814955640055e-05, + "loss": 0.1202, + "step": 204 + }, + { + "epoch": 0.08702999577524292, + "grad_norm": 1.5326752662658691, + "learning_rate": 4.9912970004224756e-05, + "loss": 0.1062, + "step": 206 + }, + { + "epoch": 0.08787494719053654, + "grad_norm": 2.1729543209075928, + "learning_rate": 4.991212505280947e-05, + "loss": 0.1443, + "step": 208 + }, + { + "epoch": 0.08871989860583017, + "grad_norm": 1.343558669090271, + "learning_rate": 4.991128010139417e-05, + "loss": 0.1014, + "step": 210 + }, + { + "epoch": 0.08956485002112378, + "grad_norm": 1.2149333953857422, + "learning_rate": 4.991043514997888e-05, + "loss": 0.09, + "step": 212 + }, + { + "epoch": 0.0904098014364174, + "grad_norm": 1.063955307006836, + "learning_rate": 4.9909590198563584e-05, + "loss": 0.1024, + "step": 214 + }, + { + "epoch": 0.09125475285171103, + "grad_norm": 1.5514568090438843, + "learning_rate": 4.990874524714829e-05, + "loss": 0.0901, + "step": 216 + }, + { + "epoch": 0.09209970426700465, + "grad_norm": 1.8420741558074951, + "learning_rate": 4.9907900295733e-05, + "loss": 0.1084, + "step": 218 + }, + { + "epoch": 0.09294465568229827, + "grad_norm": 1.7071248292922974, + "learning_rate": 4.9907055344317706e-05, + "loss": 0.0838, + "step": 220 + }, + { + "epoch": 0.09378960709759189, + "grad_norm": 1.5390372276306152, + "learning_rate": 4.9906210392902406e-05, + "loss": 0.1133, + "step": 222 + }, + { + "epoch": 0.0946345585128855, + "grad_norm": 1.734567403793335, + "learning_rate": 4.990536544148712e-05, + "loss": 0.0862, + "step": 224 + }, + { + "epoch": 0.09547950992817913, + "grad_norm": 0.9405034780502319, + "learning_rate": 4.990452049007182e-05, + "loss": 0.0941, + "step": 226 + }, + { + "epoch": 0.09632446134347275, + "grad_norm": 1.2817813158035278, + "learning_rate": 4.990367553865653e-05, + "loss": 0.073, + "step": 228 + }, + { + "epoch": 0.09716941275876637, + "grad_norm": 2.075343132019043, + "learning_rate": 4.9902830587241235e-05, + "loss": 0.132, + "step": 230 + }, + { + "epoch": 0.09801436417406, + "grad_norm": 1.2112518548965454, + "learning_rate": 4.990198563582594e-05, + "loss": 0.0918, + "step": 232 + }, + { + "epoch": 0.09885931558935361, + "grad_norm": 0.846361517906189, + "learning_rate": 4.990114068441065e-05, + "loss": 0.063, + "step": 234 + }, + { + "epoch": 0.09970426700464723, + "grad_norm": 1.6504607200622559, + "learning_rate": 4.990029573299536e-05, + "loss": 0.0915, + "step": 236 + }, + { + "epoch": 0.10054921841994086, + "grad_norm": 1.5142329931259155, + "learning_rate": 4.989945078158006e-05, + "loss": 0.1099, + "step": 238 + }, + { + "epoch": 0.10139416983523447, + "grad_norm": 2.2613418102264404, + "learning_rate": 4.989860583016477e-05, + "loss": 0.1245, + "step": 240 + }, + { + "epoch": 0.1022391212505281, + "grad_norm": 2.134218454360962, + "learning_rate": 4.989776087874947e-05, + "loss": 0.1116, + "step": 242 + }, + { + "epoch": 0.10308407266582172, + "grad_norm": 1.6032145023345947, + "learning_rate": 4.989691592733418e-05, + "loss": 0.085, + "step": 244 + }, + { + "epoch": 0.10392902408111533, + "grad_norm": 1.2227692604064941, + "learning_rate": 4.9896070975918886e-05, + "loss": 0.0886, + "step": 246 + }, + { + "epoch": 0.10477397549640896, + "grad_norm": 2.2352206707000732, + "learning_rate": 4.989522602450359e-05, + "loss": 0.1255, + "step": 248 + }, + { + "epoch": 0.10561892691170258, + "grad_norm": 0.9914106726646423, + "learning_rate": 4.98943810730883e-05, + "loss": 0.0814, + "step": 250 + }, + { + "epoch": 0.10646387832699619, + "grad_norm": 2.0763142108917236, + "learning_rate": 4.989353612167301e-05, + "loss": 0.0949, + "step": 252 + }, + { + "epoch": 0.10730882974228982, + "grad_norm": 1.0693895816802979, + "learning_rate": 4.989269117025771e-05, + "loss": 0.0659, + "step": 254 + }, + { + "epoch": 0.10815378115758344, + "grad_norm": 1.8804770708084106, + "learning_rate": 4.989184621884242e-05, + "loss": 0.1019, + "step": 256 + }, + { + "epoch": 0.10899873257287707, + "grad_norm": 1.8545231819152832, + "learning_rate": 4.989100126742712e-05, + "loss": 0.1008, + "step": 258 + }, + { + "epoch": 0.10984368398817068, + "grad_norm": 1.1952356100082397, + "learning_rate": 4.989015631601183e-05, + "loss": 0.1363, + "step": 260 + }, + { + "epoch": 0.1106886354034643, + "grad_norm": 1.715725064277649, + "learning_rate": 4.988931136459654e-05, + "loss": 0.1249, + "step": 262 + }, + { + "epoch": 0.11153358681875793, + "grad_norm": 1.5759917497634888, + "learning_rate": 4.9888466413181244e-05, + "loss": 0.0784, + "step": 264 + }, + { + "epoch": 0.11237853823405154, + "grad_norm": 1.252929925918579, + "learning_rate": 4.988762146176595e-05, + "loss": 0.073, + "step": 266 + }, + { + "epoch": 0.11322348964934516, + "grad_norm": 1.0508784055709839, + "learning_rate": 4.988677651035066e-05, + "loss": 0.0835, + "step": 268 + }, + { + "epoch": 0.11406844106463879, + "grad_norm": 1.2772610187530518, + "learning_rate": 4.988593155893536e-05, + "loss": 0.0727, + "step": 270 + }, + { + "epoch": 0.1149133924799324, + "grad_norm": 1.5705053806304932, + "learning_rate": 4.988508660752007e-05, + "loss": 0.1377, + "step": 272 + }, + { + "epoch": 0.11575834389522602, + "grad_norm": 2.1583268642425537, + "learning_rate": 4.988424165610477e-05, + "loss": 0.1126, + "step": 274 + }, + { + "epoch": 0.11660329531051965, + "grad_norm": 1.3954452276229858, + "learning_rate": 4.988339670468948e-05, + "loss": 0.0855, + "step": 276 + }, + { + "epoch": 0.11744824672581326, + "grad_norm": 2.0817723274230957, + "learning_rate": 4.988255175327419e-05, + "loss": 0.1132, + "step": 278 + }, + { + "epoch": 0.11829319814110689, + "grad_norm": 1.833322525024414, + "learning_rate": 4.9881706801858895e-05, + "loss": 0.1117, + "step": 280 + }, + { + "epoch": 0.11913814955640051, + "grad_norm": 1.5989530086517334, + "learning_rate": 4.98808618504436e-05, + "loss": 0.1265, + "step": 282 + }, + { + "epoch": 0.11998310097169412, + "grad_norm": 1.8421043157577515, + "learning_rate": 4.988001689902831e-05, + "loss": 0.1372, + "step": 284 + }, + { + "epoch": 0.12082805238698775, + "grad_norm": 1.897548794746399, + "learning_rate": 4.987917194761301e-05, + "loss": 0.1103, + "step": 286 + }, + { + "epoch": 0.12167300380228137, + "grad_norm": 1.4291915893554688, + "learning_rate": 4.9878326996197724e-05, + "loss": 0.0847, + "step": 288 + }, + { + "epoch": 0.12251795521757498, + "grad_norm": 1.1558469533920288, + "learning_rate": 4.9877482044782424e-05, + "loss": 0.0889, + "step": 290 + }, + { + "epoch": 0.12336290663286861, + "grad_norm": 1.7630038261413574, + "learning_rate": 4.987663709336713e-05, + "loss": 0.126, + "step": 292 + }, + { + "epoch": 0.12420785804816223, + "grad_norm": 1.0064209699630737, + "learning_rate": 4.987579214195184e-05, + "loss": 0.086, + "step": 294 + }, + { + "epoch": 0.12505280946345584, + "grad_norm": 1.59539794921875, + "learning_rate": 4.9874947190536546e-05, + "loss": 0.1184, + "step": 296 + }, + { + "epoch": 0.12589776087874946, + "grad_norm": 1.7932195663452148, + "learning_rate": 4.987410223912125e-05, + "loss": 0.1376, + "step": 298 + }, + { + "epoch": 0.1267427122940431, + "grad_norm": 1.8148212432861328, + "learning_rate": 4.987325728770596e-05, + "loss": 0.081, + "step": 300 + }, + { + "epoch": 0.1267427122940431, + "eval_accuracy": 0.6697706501120883, + "eval_cer": 0.096782730665067, + "eval_loss": 0.2164023518562317, + "eval_runtime": 849.0391, + "eval_samples_per_second": 13.66, + "eval_steps_per_second": 0.428, + "step": 300 + }, + { + "epoch": 0.12758766370933672, + "grad_norm": 1.354212760925293, + "learning_rate": 4.987241233629066e-05, + "loss": 0.1006, + "step": 302 + }, + { + "epoch": 0.12843261512463033, + "grad_norm": 2.113490104675293, + "learning_rate": 4.9871567384875374e-05, + "loss": 0.1404, + "step": 304 + }, + { + "epoch": 0.12927756653992395, + "grad_norm": 1.6982043981552124, + "learning_rate": 4.9870722433460075e-05, + "loss": 0.2001, + "step": 306 + }, + { + "epoch": 0.13012251795521756, + "grad_norm": 1.6756244897842407, + "learning_rate": 4.986987748204479e-05, + "loss": 0.085, + "step": 308 + }, + { + "epoch": 0.1309674693705112, + "grad_norm": 1.2098358869552612, + "learning_rate": 4.986903253062949e-05, + "loss": 0.0603, + "step": 310 + }, + { + "epoch": 0.13181242078580482, + "grad_norm": 1.7302310466766357, + "learning_rate": 4.9868187579214196e-05, + "loss": 0.0816, + "step": 312 + }, + { + "epoch": 0.13265737220109844, + "grad_norm": 1.3803563117980957, + "learning_rate": 4.9867342627798904e-05, + "loss": 0.0795, + "step": 314 + }, + { + "epoch": 0.13350232361639205, + "grad_norm": 1.4253759384155273, + "learning_rate": 4.986649767638361e-05, + "loss": 0.1219, + "step": 316 + }, + { + "epoch": 0.13434727503168567, + "grad_norm": 1.8579727411270142, + "learning_rate": 4.986565272496832e-05, + "loss": 0.117, + "step": 318 + }, + { + "epoch": 0.13519222644697929, + "grad_norm": 1.3731677532196045, + "learning_rate": 4.9864807773553025e-05, + "loss": 0.0697, + "step": 320 + }, + { + "epoch": 0.13603717786227293, + "grad_norm": 1.7941423654556274, + "learning_rate": 4.9863962822137726e-05, + "loss": 0.1499, + "step": 322 + }, + { + "epoch": 0.13688212927756654, + "grad_norm": 1.8242301940917969, + "learning_rate": 4.986311787072244e-05, + "loss": 0.1148, + "step": 324 + }, + { + "epoch": 0.13772708069286016, + "grad_norm": 1.8088881969451904, + "learning_rate": 4.986227291930714e-05, + "loss": 0.1117, + "step": 326 + }, + { + "epoch": 0.13857203210815378, + "grad_norm": 1.3591686487197876, + "learning_rate": 4.986142796789185e-05, + "loss": 0.1057, + "step": 328 + }, + { + "epoch": 0.1394169835234474, + "grad_norm": 1.6867038011550903, + "learning_rate": 4.9860583016476554e-05, + "loss": 0.1065, + "step": 330 + }, + { + "epoch": 0.14026193493874103, + "grad_norm": 1.2499358654022217, + "learning_rate": 4.985973806506126e-05, + "loss": 0.1251, + "step": 332 + }, + { + "epoch": 0.14110688635403465, + "grad_norm": 1.7259184122085571, + "learning_rate": 4.985889311364597e-05, + "loss": 0.1066, + "step": 334 + }, + { + "epoch": 0.14195183776932827, + "grad_norm": 2.0314226150512695, + "learning_rate": 4.9858048162230676e-05, + "loss": 0.117, + "step": 336 + }, + { + "epoch": 0.14279678918462188, + "grad_norm": 1.4193428754806519, + "learning_rate": 4.9857203210815376e-05, + "loss": 0.129, + "step": 338 + }, + { + "epoch": 0.1436417405999155, + "grad_norm": 1.639891266822815, + "learning_rate": 4.985635825940009e-05, + "loss": 0.1269, + "step": 340 + }, + { + "epoch": 0.1444866920152091, + "grad_norm": 1.1235016584396362, + "learning_rate": 4.985551330798479e-05, + "loss": 0.0722, + "step": 342 + }, + { + "epoch": 0.14533164343050275, + "grad_norm": 1.941998839378357, + "learning_rate": 4.98546683565695e-05, + "loss": 0.085, + "step": 344 + }, + { + "epoch": 0.14617659484579637, + "grad_norm": 1.3687087297439575, + "learning_rate": 4.9853823405154205e-05, + "loss": 0.0776, + "step": 346 + }, + { + "epoch": 0.14702154626108999, + "grad_norm": 1.7349680662155151, + "learning_rate": 4.985297845373891e-05, + "loss": 0.1329, + "step": 348 + }, + { + "epoch": 0.1478664976763836, + "grad_norm": 1.4035015106201172, + "learning_rate": 4.985213350232362e-05, + "loss": 0.1047, + "step": 350 + }, + { + "epoch": 0.14871144909167722, + "grad_norm": 1.2989908456802368, + "learning_rate": 4.985128855090833e-05, + "loss": 0.0821, + "step": 352 + }, + { + "epoch": 0.14955640050697086, + "grad_norm": 1.9474072456359863, + "learning_rate": 4.985044359949303e-05, + "loss": 0.1334, + "step": 354 + }, + { + "epoch": 0.15040135192226448, + "grad_norm": 1.348775863647461, + "learning_rate": 4.984959864807774e-05, + "loss": 0.0856, + "step": 356 + }, + { + "epoch": 0.1512463033375581, + "grad_norm": 1.348597764968872, + "learning_rate": 4.984875369666244e-05, + "loss": 0.093, + "step": 358 + }, + { + "epoch": 0.1520912547528517, + "grad_norm": 1.7595020532608032, + "learning_rate": 4.984790874524715e-05, + "loss": 0.1192, + "step": 360 + }, + { + "epoch": 0.15293620616814532, + "grad_norm": 1.1924686431884766, + "learning_rate": 4.9847063793831856e-05, + "loss": 0.0749, + "step": 362 + }, + { + "epoch": 0.15378115758343897, + "grad_norm": 1.7564600706100464, + "learning_rate": 4.984621884241656e-05, + "loss": 0.0952, + "step": 364 + }, + { + "epoch": 0.15462610899873258, + "grad_norm": 1.3915026187896729, + "learning_rate": 4.984537389100127e-05, + "loss": 0.1268, + "step": 366 + }, + { + "epoch": 0.1554710604140262, + "grad_norm": 1.8819456100463867, + "learning_rate": 4.984452893958598e-05, + "loss": 0.0854, + "step": 368 + }, + { + "epoch": 0.1563160118293198, + "grad_norm": 1.6254719495773315, + "learning_rate": 4.984368398817068e-05, + "loss": 0.084, + "step": 370 + }, + { + "epoch": 0.15716096324461343, + "grad_norm": 2.196483850479126, + "learning_rate": 4.984283903675539e-05, + "loss": 0.1089, + "step": 372 + }, + { + "epoch": 0.15800591465990704, + "grad_norm": 1.471126675605774, + "learning_rate": 4.984199408534009e-05, + "loss": 0.0832, + "step": 374 + }, + { + "epoch": 0.1588508660752007, + "grad_norm": 1.1505743265151978, + "learning_rate": 4.98411491339248e-05, + "loss": 0.076, + "step": 376 + }, + { + "epoch": 0.1596958174904943, + "grad_norm": 1.4914796352386475, + "learning_rate": 4.984030418250951e-05, + "loss": 0.1304, + "step": 378 + }, + { + "epoch": 0.16054076890578792, + "grad_norm": 1.9942247867584229, + "learning_rate": 4.9839459231094214e-05, + "loss": 0.1227, + "step": 380 + }, + { + "epoch": 0.16138572032108153, + "grad_norm": 1.6885210275650024, + "learning_rate": 4.983861427967892e-05, + "loss": 0.0926, + "step": 382 + }, + { + "epoch": 0.16223067173637515, + "grad_norm": 1.2002671957015991, + "learning_rate": 4.983776932826363e-05, + "loss": 0.1011, + "step": 384 + }, + { + "epoch": 0.1630756231516688, + "grad_norm": 1.6890459060668945, + "learning_rate": 4.983692437684833e-05, + "loss": 0.1243, + "step": 386 + }, + { + "epoch": 0.1639205745669624, + "grad_norm": 1.4184560775756836, + "learning_rate": 4.983607942543304e-05, + "loss": 0.0848, + "step": 388 + }, + { + "epoch": 0.16476552598225602, + "grad_norm": 2.200415849685669, + "learning_rate": 4.983523447401774e-05, + "loss": 0.0779, + "step": 390 + }, + { + "epoch": 0.16561047739754964, + "grad_norm": 1.5030500888824463, + "learning_rate": 4.983438952260245e-05, + "loss": 0.0828, + "step": 392 + }, + { + "epoch": 0.16645542881284325, + "grad_norm": 1.2970515489578247, + "learning_rate": 4.983354457118716e-05, + "loss": 0.0889, + "step": 394 + }, + { + "epoch": 0.16730038022813687, + "grad_norm": 1.576575517654419, + "learning_rate": 4.9832699619771865e-05, + "loss": 0.0942, + "step": 396 + }, + { + "epoch": 0.1681453316434305, + "grad_norm": 1.6719400882720947, + "learning_rate": 4.983185466835657e-05, + "loss": 0.1166, + "step": 398 + }, + { + "epoch": 0.16899028305872413, + "grad_norm": 1.6408090591430664, + "learning_rate": 4.983100971694128e-05, + "loss": 0.1293, + "step": 400 + }, + { + "epoch": 0.16899028305872413, + "eval_accuracy": 0.6733919641317468, + "eval_cer": 0.09762961263971347, + "eval_loss": 0.20648193359375, + "eval_runtime": 861.8594, + "eval_samples_per_second": 13.457, + "eval_steps_per_second": 0.421, + "step": 400 + }, + { + "epoch": 0.16983523447401774, + "grad_norm": 2.1278653144836426, + "learning_rate": 4.983016476552598e-05, + "loss": 0.1321, + "step": 402 + }, + { + "epoch": 0.17068018588931136, + "grad_norm": 1.8531532287597656, + "learning_rate": 4.9829319814110694e-05, + "loss": 0.1042, + "step": 404 + }, + { + "epoch": 0.17152513730460497, + "grad_norm": 1.587566614151001, + "learning_rate": 4.9828474862695394e-05, + "loss": 0.1147, + "step": 406 + }, + { + "epoch": 0.17237008871989862, + "grad_norm": 2.2086760997772217, + "learning_rate": 4.982762991128011e-05, + "loss": 0.1246, + "step": 408 + }, + { + "epoch": 0.17321504013519223, + "grad_norm": 1.2952370643615723, + "learning_rate": 4.982678495986481e-05, + "loss": 0.1073, + "step": 410 + }, + { + "epoch": 0.17405999155048585, + "grad_norm": 1.7142947912216187, + "learning_rate": 4.9825940008449516e-05, + "loss": 0.0869, + "step": 412 + }, + { + "epoch": 0.17490494296577946, + "grad_norm": 1.9402838945388794, + "learning_rate": 4.982509505703422e-05, + "loss": 0.1126, + "step": 414 + }, + { + "epoch": 0.17574989438107308, + "grad_norm": 2.2303125858306885, + "learning_rate": 4.982425010561893e-05, + "loss": 0.1515, + "step": 416 + }, + { + "epoch": 0.1765948457963667, + "grad_norm": 1.8753235340118408, + "learning_rate": 4.982340515420364e-05, + "loss": 0.1059, + "step": 418 + }, + { + "epoch": 0.17743979721166034, + "grad_norm": 1.825201392173767, + "learning_rate": 4.9822560202788344e-05, + "loss": 0.1604, + "step": 420 + }, + { + "epoch": 0.17828474862695395, + "grad_norm": 2.0175940990448, + "learning_rate": 4.9821715251373045e-05, + "loss": 0.1078, + "step": 422 + }, + { + "epoch": 0.17912970004224757, + "grad_norm": 1.2101908922195435, + "learning_rate": 4.982087029995776e-05, + "loss": 0.1017, + "step": 424 + }, + { + "epoch": 0.17997465145754118, + "grad_norm": 1.1782002449035645, + "learning_rate": 4.982002534854246e-05, + "loss": 0.0722, + "step": 426 + }, + { + "epoch": 0.1808196028728348, + "grad_norm": 1.684949517250061, + "learning_rate": 4.9819180397127167e-05, + "loss": 0.1096, + "step": 428 + }, + { + "epoch": 0.18166455428812844, + "grad_norm": 1.9654364585876465, + "learning_rate": 4.9818335445711874e-05, + "loss": 0.0896, + "step": 430 + }, + { + "epoch": 0.18250950570342206, + "grad_norm": 1.5413470268249512, + "learning_rate": 4.981749049429658e-05, + "loss": 0.0781, + "step": 432 + }, + { + "epoch": 0.18335445711871567, + "grad_norm": 2.4538686275482178, + "learning_rate": 4.981664554288129e-05, + "loss": 0.1153, + "step": 434 + }, + { + "epoch": 0.1841994085340093, + "grad_norm": 1.3073904514312744, + "learning_rate": 4.9815800591465995e-05, + "loss": 0.0847, + "step": 436 + }, + { + "epoch": 0.1850443599493029, + "grad_norm": 1.651445746421814, + "learning_rate": 4.9814955640050696e-05, + "loss": 0.1313, + "step": 438 + }, + { + "epoch": 0.18588931136459655, + "grad_norm": 1.7214906215667725, + "learning_rate": 4.981411068863541e-05, + "loss": 0.1008, + "step": 440 + }, + { + "epoch": 0.18673426277989016, + "grad_norm": 1.0988144874572754, + "learning_rate": 4.981326573722011e-05, + "loss": 0.1162, + "step": 442 + }, + { + "epoch": 0.18757921419518378, + "grad_norm": 2.5338993072509766, + "learning_rate": 4.981242078580482e-05, + "loss": 0.1522, + "step": 444 + }, + { + "epoch": 0.1884241656104774, + "grad_norm": 1.2699946165084839, + "learning_rate": 4.9811575834389525e-05, + "loss": 0.0943, + "step": 446 + }, + { + "epoch": 0.189269117025771, + "grad_norm": 2.2291219234466553, + "learning_rate": 4.981073088297423e-05, + "loss": 0.0993, + "step": 448 + }, + { + "epoch": 0.19011406844106463, + "grad_norm": 1.102462649345398, + "learning_rate": 4.980988593155894e-05, + "loss": 0.0959, + "step": 450 + }, + { + "epoch": 0.19095901985635827, + "grad_norm": 1.1297458410263062, + "learning_rate": 4.9809040980143646e-05, + "loss": 0.0748, + "step": 452 + }, + { + "epoch": 0.19180397127165189, + "grad_norm": 1.316767930984497, + "learning_rate": 4.9808196028728347e-05, + "loss": 0.0729, + "step": 454 + }, + { + "epoch": 0.1926489226869455, + "grad_norm": 1.3369216918945312, + "learning_rate": 4.980735107731306e-05, + "loss": 0.0896, + "step": 456 + }, + { + "epoch": 0.19349387410223912, + "grad_norm": 1.6894314289093018, + "learning_rate": 4.980650612589776e-05, + "loss": 0.1607, + "step": 458 + }, + { + "epoch": 0.19433882551753273, + "grad_norm": 1.727819800376892, + "learning_rate": 4.980566117448247e-05, + "loss": 0.1024, + "step": 460 + }, + { + "epoch": 0.19518377693282637, + "grad_norm": 2.435275077819824, + "learning_rate": 4.9804816223067175e-05, + "loss": 0.1779, + "step": 462 + }, + { + "epoch": 0.19602872834812, + "grad_norm": 1.8471691608428955, + "learning_rate": 4.980397127165188e-05, + "loss": 0.0943, + "step": 464 + }, + { + "epoch": 0.1968736797634136, + "grad_norm": 1.471346139907837, + "learning_rate": 4.980312632023659e-05, + "loss": 0.1148, + "step": 466 + }, + { + "epoch": 0.19771863117870722, + "grad_norm": 2.621778726577759, + "learning_rate": 4.98022813688213e-05, + "loss": 0.1145, + "step": 468 + }, + { + "epoch": 0.19856358259400084, + "grad_norm": 1.4826730489730835, + "learning_rate": 4.9801436417406e-05, + "loss": 0.1011, + "step": 470 + }, + { + "epoch": 0.19940853400929445, + "grad_norm": 1.8840138912200928, + "learning_rate": 4.980059146599071e-05, + "loss": 0.0823, + "step": 472 + }, + { + "epoch": 0.2002534854245881, + "grad_norm": 1.329613208770752, + "learning_rate": 4.979974651457541e-05, + "loss": 0.0794, + "step": 474 + }, + { + "epoch": 0.2010984368398817, + "grad_norm": 1.6798653602600098, + "learning_rate": 4.979890156316012e-05, + "loss": 0.068, + "step": 476 + }, + { + "epoch": 0.20194338825517533, + "grad_norm": 2.3119256496429443, + "learning_rate": 4.9798056611744826e-05, + "loss": 0.1738, + "step": 478 + }, + { + "epoch": 0.20278833967046894, + "grad_norm": 1.5852372646331787, + "learning_rate": 4.979721166032953e-05, + "loss": 0.1466, + "step": 480 + }, + { + "epoch": 0.20363329108576256, + "grad_norm": 1.3242956399917603, + "learning_rate": 4.979636670891424e-05, + "loss": 0.0747, + "step": 482 + }, + { + "epoch": 0.2044782425010562, + "grad_norm": 1.8334121704101562, + "learning_rate": 4.979552175749895e-05, + "loss": 0.0832, + "step": 484 + }, + { + "epoch": 0.20532319391634982, + "grad_norm": 1.5589213371276855, + "learning_rate": 4.979467680608365e-05, + "loss": 0.0939, + "step": 486 + }, + { + "epoch": 0.20616814533164343, + "grad_norm": 0.6316360831260681, + "learning_rate": 4.979383185466836e-05, + "loss": 0.0623, + "step": 488 + }, + { + "epoch": 0.20701309674693705, + "grad_norm": 1.5444834232330322, + "learning_rate": 4.979298690325306e-05, + "loss": 0.0954, + "step": 490 + }, + { + "epoch": 0.20785804816223066, + "grad_norm": 2.2183780670166016, + "learning_rate": 4.979214195183777e-05, + "loss": 0.1144, + "step": 492 + }, + { + "epoch": 0.20870299957752428, + "grad_norm": 1.4162744283676147, + "learning_rate": 4.979129700042248e-05, + "loss": 0.0713, + "step": 494 + }, + { + "epoch": 0.20954795099281792, + "grad_norm": 1.2704912424087524, + "learning_rate": 4.9790452049007184e-05, + "loss": 0.0556, + "step": 496 + }, + { + "epoch": 0.21039290240811154, + "grad_norm": 1.9707940816879272, + "learning_rate": 4.978960709759189e-05, + "loss": 0.0829, + "step": 498 + }, + { + "epoch": 0.21123785382340515, + "grad_norm": 1.1645714044570923, + "learning_rate": 4.97887621461766e-05, + "loss": 0.0997, + "step": 500 + }, + { + "epoch": 0.21123785382340515, + "eval_accuracy": 0.6679599931022591, + "eval_cer": 0.10002028988064257, + "eval_loss": 0.21607132256031036, + "eval_runtime": 889.5025, + "eval_samples_per_second": 13.039, + "eval_steps_per_second": 0.408, + "step": 500 + }, + { + "epoch": 0.21208280523869877, + "grad_norm": 1.2636017799377441, + "learning_rate": 4.97879171947613e-05, + "loss": 0.0853, + "step": 502 + }, + { + "epoch": 0.21292775665399238, + "grad_norm": 1.3952999114990234, + "learning_rate": 4.978707224334601e-05, + "loss": 0.0827, + "step": 504 + }, + { + "epoch": 0.21377270806928603, + "grad_norm": 1.4890190362930298, + "learning_rate": 4.978622729193071e-05, + "loss": 0.0793, + "step": 506 + }, + { + "epoch": 0.21461765948457964, + "grad_norm": 3.3428499698638916, + "learning_rate": 4.978538234051543e-05, + "loss": 0.0804, + "step": 508 + }, + { + "epoch": 0.21546261089987326, + "grad_norm": 1.8637378215789795, + "learning_rate": 4.978453738910013e-05, + "loss": 0.1163, + "step": 510 + }, + { + "epoch": 0.21630756231516687, + "grad_norm": 1.4218493700027466, + "learning_rate": 4.9783692437684835e-05, + "loss": 0.0926, + "step": 512 + }, + { + "epoch": 0.2171525137304605, + "grad_norm": 1.2058361768722534, + "learning_rate": 4.978284748626954e-05, + "loss": 0.0877, + "step": 514 + }, + { + "epoch": 0.21799746514575413, + "grad_norm": 1.6888997554779053, + "learning_rate": 4.978200253485425e-05, + "loss": 0.1208, + "step": 516 + }, + { + "epoch": 0.21884241656104775, + "grad_norm": 1.4230191707611084, + "learning_rate": 4.9781157583438957e-05, + "loss": 0.1062, + "step": 518 + }, + { + "epoch": 0.21968736797634136, + "grad_norm": 1.6900297403335571, + "learning_rate": 4.9780312632023664e-05, + "loss": 0.1051, + "step": 520 + }, + { + "epoch": 0.22053231939163498, + "grad_norm": 1.9866880178451538, + "learning_rate": 4.9779467680608364e-05, + "loss": 0.144, + "step": 522 + }, + { + "epoch": 0.2213772708069286, + "grad_norm": 1.5090681314468384, + "learning_rate": 4.977862272919308e-05, + "loss": 0.1688, + "step": 524 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 1.5939511060714722, + "learning_rate": 4.977777777777778e-05, + "loss": 0.1095, + "step": 526 + }, + { + "epoch": 0.22306717363751585, + "grad_norm": 1.6898002624511719, + "learning_rate": 4.9776932826362486e-05, + "loss": 0.0933, + "step": 528 + }, + { + "epoch": 0.22391212505280947, + "grad_norm": 1.6217966079711914, + "learning_rate": 4.977608787494719e-05, + "loss": 0.0906, + "step": 530 + }, + { + "epoch": 0.22475707646810308, + "grad_norm": 1.187659740447998, + "learning_rate": 4.97752429235319e-05, + "loss": 0.1091, + "step": 532 + }, + { + "epoch": 0.2256020278833967, + "grad_norm": 1.255968689918518, + "learning_rate": 4.977439797211661e-05, + "loss": 0.0659, + "step": 534 + }, + { + "epoch": 0.22644697929869032, + "grad_norm": 1.5511211156845093, + "learning_rate": 4.9773553020701315e-05, + "loss": 0.0802, + "step": 536 + }, + { + "epoch": 0.22729193071398396, + "grad_norm": 1.7793858051300049, + "learning_rate": 4.9772708069286015e-05, + "loss": 0.1129, + "step": 538 + }, + { + "epoch": 0.22813688212927757, + "grad_norm": 1.4437144994735718, + "learning_rate": 4.977186311787073e-05, + "loss": 0.1, + "step": 540 + }, + { + "epoch": 0.2289818335445712, + "grad_norm": 1.4379544258117676, + "learning_rate": 4.977101816645543e-05, + "loss": 0.0864, + "step": 542 + }, + { + "epoch": 0.2298267849598648, + "grad_norm": 0.8294060826301575, + "learning_rate": 4.9770173215040137e-05, + "loss": 0.0763, + "step": 544 + }, + { + "epoch": 0.23067173637515842, + "grad_norm": 2.189450740814209, + "learning_rate": 4.9769328263624844e-05, + "loss": 0.123, + "step": 546 + }, + { + "epoch": 0.23151668779045204, + "grad_norm": 1.3774739503860474, + "learning_rate": 4.976848331220955e-05, + "loss": 0.0839, + "step": 548 + }, + { + "epoch": 0.23236163920574568, + "grad_norm": 1.9243252277374268, + "learning_rate": 4.976763836079426e-05, + "loss": 0.1049, + "step": 550 + }, + { + "epoch": 0.2332065906210393, + "grad_norm": 1.1769413948059082, + "learning_rate": 4.9766793409378965e-05, + "loss": 0.0834, + "step": 552 + }, + { + "epoch": 0.2340515420363329, + "grad_norm": 1.6378949880599976, + "learning_rate": 4.9765948457963666e-05, + "loss": 0.1071, + "step": 554 + }, + { + "epoch": 0.23489649345162653, + "grad_norm": 1.901498556137085, + "learning_rate": 4.976510350654838e-05, + "loss": 0.1007, + "step": 556 + }, + { + "epoch": 0.23574144486692014, + "grad_norm": 1.9260848760604858, + "learning_rate": 4.976425855513308e-05, + "loss": 0.2061, + "step": 558 + }, + { + "epoch": 0.23658639628221378, + "grad_norm": 3.253631353378296, + "learning_rate": 4.976341360371779e-05, + "loss": 0.1726, + "step": 560 + }, + { + "epoch": 0.2374313476975074, + "grad_norm": 1.5378361940383911, + "learning_rate": 4.9762568652302495e-05, + "loss": 0.0962, + "step": 562 + }, + { + "epoch": 0.23827629911280102, + "grad_norm": 1.3367713689804077, + "learning_rate": 4.97617237008872e-05, + "loss": 0.1047, + "step": 564 + }, + { + "epoch": 0.23912125052809463, + "grad_norm": 1.2774410247802734, + "learning_rate": 4.976087874947191e-05, + "loss": 0.0745, + "step": 566 + }, + { + "epoch": 0.23996620194338825, + "grad_norm": 1.8468585014343262, + "learning_rate": 4.9760033798056616e-05, + "loss": 0.1207, + "step": 568 + }, + { + "epoch": 0.24081115335868186, + "grad_norm": 2.125532388687134, + "learning_rate": 4.9759188846641317e-05, + "loss": 0.1321, + "step": 570 + }, + { + "epoch": 0.2416561047739755, + "grad_norm": 1.7013835906982422, + "learning_rate": 4.975834389522603e-05, + "loss": 0.0941, + "step": 572 + }, + { + "epoch": 0.24250105618926912, + "grad_norm": 1.4416042566299438, + "learning_rate": 4.975749894381073e-05, + "loss": 0.1195, + "step": 574 + }, + { + "epoch": 0.24334600760456274, + "grad_norm": 1.7456398010253906, + "learning_rate": 4.975665399239544e-05, + "loss": 0.1035, + "step": 576 + }, + { + "epoch": 0.24419095901985635, + "grad_norm": 1.6715686321258545, + "learning_rate": 4.9755809040980145e-05, + "loss": 0.083, + "step": 578 + }, + { + "epoch": 0.24503591043514997, + "grad_norm": 1.738721251487732, + "learning_rate": 4.975496408956485e-05, + "loss": 0.0842, + "step": 580 + }, + { + "epoch": 0.2458808618504436, + "grad_norm": 1.4637415409088135, + "learning_rate": 4.975411913814956e-05, + "loss": 0.1192, + "step": 582 + }, + { + "epoch": 0.24672581326573723, + "grad_norm": 1.5448874235153198, + "learning_rate": 4.975327418673427e-05, + "loss": 0.1092, + "step": 584 + }, + { + "epoch": 0.24757076468103084, + "grad_norm": 1.877724051475525, + "learning_rate": 4.975242923531897e-05, + "loss": 0.1203, + "step": 586 + }, + { + "epoch": 0.24841571609632446, + "grad_norm": 1.81662917137146, + "learning_rate": 4.975158428390368e-05, + "loss": 0.1101, + "step": 588 + }, + { + "epoch": 0.24926066751161807, + "grad_norm": 1.8778841495513916, + "learning_rate": 4.975073933248838e-05, + "loss": 0.105, + "step": 590 + }, + { + "epoch": 0.2501056189269117, + "grad_norm": 1.7268378734588623, + "learning_rate": 4.974989438107309e-05, + "loss": 0.1164, + "step": 592 + }, + { + "epoch": 0.2509505703422053, + "grad_norm": 1.972676396369934, + "learning_rate": 4.9749049429657796e-05, + "loss": 0.1177, + "step": 594 + }, + { + "epoch": 0.2517955217574989, + "grad_norm": 1.2417385578155518, + "learning_rate": 4.97482044782425e-05, + "loss": 0.0752, + "step": 596 + }, + { + "epoch": 0.2526404731727926, + "grad_norm": 1.4219918251037598, + "learning_rate": 4.974735952682721e-05, + "loss": 0.0786, + "step": 598 + }, + { + "epoch": 0.2534854245880862, + "grad_norm": 1.9878331422805786, + "learning_rate": 4.974651457541192e-05, + "loss": 0.1277, + "step": 600 + }, + { + "epoch": 0.2534854245880862, + "eval_accuracy": 0.6739092947059838, + "eval_cer": 0.09566237638610761, + "eval_loss": 0.20857658982276917, + "eval_runtime": 850.0665, + "eval_samples_per_second": 13.644, + "eval_steps_per_second": 0.427, + "step": 600 + }, + { + "epoch": 0.2543303760033798, + "grad_norm": 2.0730292797088623, + "learning_rate": 4.974566962399662e-05, + "loss": 0.1183, + "step": 602 + }, + { + "epoch": 0.25517532741867344, + "grad_norm": 1.7861765623092651, + "learning_rate": 4.974482467258133e-05, + "loss": 0.1055, + "step": 604 + }, + { + "epoch": 0.25602027883396705, + "grad_norm": 1.1870955228805542, + "learning_rate": 4.974397972116603e-05, + "loss": 0.1028, + "step": 606 + }, + { + "epoch": 0.25686523024926067, + "grad_norm": 1.5035452842712402, + "learning_rate": 4.9743134769750747e-05, + "loss": 0.0945, + "step": 608 + }, + { + "epoch": 0.2577101816645543, + "grad_norm": 1.4320887327194214, + "learning_rate": 4.974228981833545e-05, + "loss": 0.0896, + "step": 610 + }, + { + "epoch": 0.2585551330798479, + "grad_norm": 1.3751976490020752, + "learning_rate": 4.9741444866920154e-05, + "loss": 0.0968, + "step": 612 + }, + { + "epoch": 0.2594000844951415, + "grad_norm": 1.5161865949630737, + "learning_rate": 4.974059991550486e-05, + "loss": 0.1359, + "step": 614 + }, + { + "epoch": 0.26024503591043513, + "grad_norm": 1.5288583040237427, + "learning_rate": 4.973975496408957e-05, + "loss": 0.0897, + "step": 616 + }, + { + "epoch": 0.26108998732572875, + "grad_norm": 2.264397382736206, + "learning_rate": 4.9738910012674276e-05, + "loss": 0.1312, + "step": 618 + }, + { + "epoch": 0.2619349387410224, + "grad_norm": 1.3001281023025513, + "learning_rate": 4.973806506125898e-05, + "loss": 0.0848, + "step": 620 + }, + { + "epoch": 0.26277989015631603, + "grad_norm": 2.159705638885498, + "learning_rate": 4.973722010984368e-05, + "loss": 0.0851, + "step": 622 + }, + { + "epoch": 0.26362484157160965, + "grad_norm": 1.870424747467041, + "learning_rate": 4.97363751584284e-05, + "loss": 0.0959, + "step": 624 + }, + { + "epoch": 0.26446979298690326, + "grad_norm": 1.2810477018356323, + "learning_rate": 4.97355302070131e-05, + "loss": 0.0888, + "step": 626 + }, + { + "epoch": 0.2653147444021969, + "grad_norm": 2.051835775375366, + "learning_rate": 4.9734685255597805e-05, + "loss": 0.0992, + "step": 628 + }, + { + "epoch": 0.2661596958174905, + "grad_norm": 2.29409122467041, + "learning_rate": 4.973384030418251e-05, + "loss": 0.1117, + "step": 630 + }, + { + "epoch": 0.2670046472327841, + "grad_norm": 1.814590573310852, + "learning_rate": 4.973299535276722e-05, + "loss": 0.0997, + "step": 632 + }, + { + "epoch": 0.2678495986480777, + "grad_norm": 1.616007685661316, + "learning_rate": 4.9732150401351927e-05, + "loss": 0.1106, + "step": 634 + }, + { + "epoch": 0.26869455006337134, + "grad_norm": 1.9944722652435303, + "learning_rate": 4.9731305449936634e-05, + "loss": 0.1214, + "step": 636 + }, + { + "epoch": 0.26953950147866496, + "grad_norm": 1.2416284084320068, + "learning_rate": 4.9730460498521334e-05, + "loss": 0.1131, + "step": 638 + }, + { + "epoch": 0.27038445289395857, + "grad_norm": 0.6368843913078308, + "learning_rate": 4.972961554710605e-05, + "loss": 0.043, + "step": 640 + }, + { + "epoch": 0.27122940430925224, + "grad_norm": 1.266077995300293, + "learning_rate": 4.972877059569075e-05, + "loss": 0.1076, + "step": 642 + }, + { + "epoch": 0.27207435572454586, + "grad_norm": 1.4216665029525757, + "learning_rate": 4.9727925644275456e-05, + "loss": 0.1004, + "step": 644 + }, + { + "epoch": 0.2729193071398395, + "grad_norm": 1.3999009132385254, + "learning_rate": 4.972708069286016e-05, + "loss": 0.0958, + "step": 646 + }, + { + "epoch": 0.2737642585551331, + "grad_norm": 0.8566420078277588, + "learning_rate": 4.972623574144487e-05, + "loss": 0.1033, + "step": 648 + }, + { + "epoch": 0.2746092099704267, + "grad_norm": 1.770908236503601, + "learning_rate": 4.972539079002958e-05, + "loss": 0.1356, + "step": 650 + }, + { + "epoch": 0.2754541613857203, + "grad_norm": 1.3873233795166016, + "learning_rate": 4.9724545838614285e-05, + "loss": 0.1131, + "step": 652 + }, + { + "epoch": 0.27629911280101394, + "grad_norm": 1.4224236011505127, + "learning_rate": 4.9723700887198985e-05, + "loss": 0.0794, + "step": 654 + }, + { + "epoch": 0.27714406421630755, + "grad_norm": 1.4874037504196167, + "learning_rate": 4.97228559357837e-05, + "loss": 0.0949, + "step": 656 + }, + { + "epoch": 0.27798901563160117, + "grad_norm": 1.5964959859848022, + "learning_rate": 4.97220109843684e-05, + "loss": 0.0936, + "step": 658 + }, + { + "epoch": 0.2788339670468948, + "grad_norm": 1.5574475526809692, + "learning_rate": 4.9721166032953107e-05, + "loss": 0.0992, + "step": 660 + }, + { + "epoch": 0.2796789184621884, + "grad_norm": 3.1248722076416016, + "learning_rate": 4.9720321081537814e-05, + "loss": 0.0757, + "step": 662 + }, + { + "epoch": 0.28052386987748207, + "grad_norm": 2.060107707977295, + "learning_rate": 4.971947613012252e-05, + "loss": 0.0923, + "step": 664 + }, + { + "epoch": 0.2813688212927757, + "grad_norm": 1.112054467201233, + "learning_rate": 4.971863117870723e-05, + "loss": 0.0541, + "step": 666 + }, + { + "epoch": 0.2822137727080693, + "grad_norm": 1.5512161254882812, + "learning_rate": 4.9717786227291935e-05, + "loss": 0.0921, + "step": 668 + }, + { + "epoch": 0.2830587241233629, + "grad_norm": 1.544339895248413, + "learning_rate": 4.9716941275876636e-05, + "loss": 0.0877, + "step": 670 + }, + { + "epoch": 0.28390367553865653, + "grad_norm": 1.6371303796768188, + "learning_rate": 4.971609632446135e-05, + "loss": 0.1396, + "step": 672 + }, + { + "epoch": 0.28474862695395015, + "grad_norm": 1.3973644971847534, + "learning_rate": 4.971525137304605e-05, + "loss": 0.1194, + "step": 674 + }, + { + "epoch": 0.28559357836924376, + "grad_norm": 1.4875513315200806, + "learning_rate": 4.971440642163076e-05, + "loss": 0.0841, + "step": 676 + }, + { + "epoch": 0.2864385297845374, + "grad_norm": 1.3536832332611084, + "learning_rate": 4.9713561470215465e-05, + "loss": 0.0736, + "step": 678 + }, + { + "epoch": 0.287283481199831, + "grad_norm": 1.8549797534942627, + "learning_rate": 4.971271651880017e-05, + "loss": 0.1192, + "step": 680 + }, + { + "epoch": 0.2881284326151246, + "grad_norm": 2.0601296424865723, + "learning_rate": 4.971187156738488e-05, + "loss": 0.1393, + "step": 682 + }, + { + "epoch": 0.2889733840304182, + "grad_norm": 1.577346920967102, + "learning_rate": 4.9711026615969586e-05, + "loss": 0.072, + "step": 684 + }, + { + "epoch": 0.2898183354457119, + "grad_norm": 1.3023242950439453, + "learning_rate": 4.9710181664554287e-05, + "loss": 0.0936, + "step": 686 + }, + { + "epoch": 0.2906632868610055, + "grad_norm": 1.5268125534057617, + "learning_rate": 4.9709336713139e-05, + "loss": 0.1015, + "step": 688 + }, + { + "epoch": 0.2915082382762991, + "grad_norm": 1.6719399690628052, + "learning_rate": 4.97084917617237e-05, + "loss": 0.0853, + "step": 690 + }, + { + "epoch": 0.29235318969159274, + "grad_norm": 1.622419834136963, + "learning_rate": 4.970764681030841e-05, + "loss": 0.0808, + "step": 692 + }, + { + "epoch": 0.29319814110688636, + "grad_norm": 1.5559837818145752, + "learning_rate": 4.9706801858893115e-05, + "loss": 0.1144, + "step": 694 + }, + { + "epoch": 0.29404309252217997, + "grad_norm": 1.5186535120010376, + "learning_rate": 4.970595690747782e-05, + "loss": 0.1343, + "step": 696 + }, + { + "epoch": 0.2948880439374736, + "grad_norm": 1.011915683746338, + "learning_rate": 4.970511195606253e-05, + "loss": 0.064, + "step": 698 + }, + { + "epoch": 0.2957329953527672, + "grad_norm": 1.7633938789367676, + "learning_rate": 4.970426700464724e-05, + "loss": 0.1294, + "step": 700 + }, + { + "epoch": 0.2957329953527672, + "eval_accuracy": 0.6749439558544577, + "eval_cer": 0.09162204363206507, + "eval_loss": 0.20422282814979553, + "eval_runtime": 857.9136, + "eval_samples_per_second": 13.519, + "eval_steps_per_second": 0.423, + "step": 700 + }, + { + "epoch": 0.2965779467680608, + "grad_norm": 1.6819132566452026, + "learning_rate": 4.970342205323194e-05, + "loss": 0.0934, + "step": 702 + }, + { + "epoch": 0.29742289818335443, + "grad_norm": 1.0647895336151123, + "learning_rate": 4.970257710181665e-05, + "loss": 0.1007, + "step": 704 + }, + { + "epoch": 0.2982678495986481, + "grad_norm": 2.1595728397369385, + "learning_rate": 4.970173215040135e-05, + "loss": 0.1345, + "step": 706 + }, + { + "epoch": 0.2991128010139417, + "grad_norm": 1.0478488206863403, + "learning_rate": 4.9700887198986066e-05, + "loss": 0.0608, + "step": 708 + }, + { + "epoch": 0.29995775242923534, + "grad_norm": 1.2893446683883667, + "learning_rate": 4.9700042247570766e-05, + "loss": 0.1021, + "step": 710 + }, + { + "epoch": 0.30080270384452895, + "grad_norm": 2.40004563331604, + "learning_rate": 4.969919729615547e-05, + "loss": 0.1251, + "step": 712 + }, + { + "epoch": 0.30164765525982257, + "grad_norm": 1.4895504713058472, + "learning_rate": 4.969835234474018e-05, + "loss": 0.101, + "step": 714 + }, + { + "epoch": 0.3024926066751162, + "grad_norm": 1.2066045999526978, + "learning_rate": 4.969750739332489e-05, + "loss": 0.1069, + "step": 716 + }, + { + "epoch": 0.3033375580904098, + "grad_norm": 2.0504655838012695, + "learning_rate": 4.9696662441909595e-05, + "loss": 0.1287, + "step": 718 + }, + { + "epoch": 0.3041825095057034, + "grad_norm": 2.006098508834839, + "learning_rate": 4.96958174904943e-05, + "loss": 0.1442, + "step": 720 + }, + { + "epoch": 0.30502746092099703, + "grad_norm": 1.9753419160842896, + "learning_rate": 4.9694972539079e-05, + "loss": 0.1023, + "step": 722 + }, + { + "epoch": 0.30587241233629064, + "grad_norm": 1.803053617477417, + "learning_rate": 4.9694127587663717e-05, + "loss": 0.1168, + "step": 724 + }, + { + "epoch": 0.30671736375158426, + "grad_norm": 1.8871780633926392, + "learning_rate": 4.969328263624842e-05, + "loss": 0.12, + "step": 726 + }, + { + "epoch": 0.30756231516687793, + "grad_norm": 1.7384098768234253, + "learning_rate": 4.9692437684833124e-05, + "loss": 0.1312, + "step": 728 + }, + { + "epoch": 0.30840726658217155, + "grad_norm": 1.6403692960739136, + "learning_rate": 4.969159273341783e-05, + "loss": 0.0897, + "step": 730 + }, + { + "epoch": 0.30925221799746516, + "grad_norm": 1.7307875156402588, + "learning_rate": 4.969074778200254e-05, + "loss": 0.0944, + "step": 732 + }, + { + "epoch": 0.3100971694127588, + "grad_norm": 1.8397547006607056, + "learning_rate": 4.9689902830587246e-05, + "loss": 0.0745, + "step": 734 + }, + { + "epoch": 0.3109421208280524, + "grad_norm": 1.6901930570602417, + "learning_rate": 4.968905787917195e-05, + "loss": 0.1161, + "step": 736 + }, + { + "epoch": 0.311787072243346, + "grad_norm": 1.9397594928741455, + "learning_rate": 4.9688212927756653e-05, + "loss": 0.0964, + "step": 738 + }, + { + "epoch": 0.3126320236586396, + "grad_norm": 1.4291322231292725, + "learning_rate": 4.968736797634137e-05, + "loss": 0.0987, + "step": 740 + }, + { + "epoch": 0.31347697507393324, + "grad_norm": 1.0510236024856567, + "learning_rate": 4.968652302492607e-05, + "loss": 0.0831, + "step": 742 + }, + { + "epoch": 0.31432192648922685, + "grad_norm": 1.2731682062149048, + "learning_rate": 4.9685678073510775e-05, + "loss": 0.0792, + "step": 744 + }, + { + "epoch": 0.31516687790452047, + "grad_norm": 1.3949739933013916, + "learning_rate": 4.968483312209548e-05, + "loss": 0.0904, + "step": 746 + }, + { + "epoch": 0.3160118293198141, + "grad_norm": 1.6889134645462036, + "learning_rate": 4.968398817068019e-05, + "loss": 0.0752, + "step": 748 + }, + { + "epoch": 0.31685678073510776, + "grad_norm": 1.578761339187622, + "learning_rate": 4.9683143219264897e-05, + "loss": 0.0781, + "step": 750 + }, + { + "epoch": 0.3177017321504014, + "grad_norm": 1.338944435119629, + "learning_rate": 4.9682298267849604e-05, + "loss": 0.0753, + "step": 752 + }, + { + "epoch": 0.318546683565695, + "grad_norm": 1.8360531330108643, + "learning_rate": 4.9681453316434304e-05, + "loss": 0.1355, + "step": 754 + }, + { + "epoch": 0.3193916349809886, + "grad_norm": 1.851449966430664, + "learning_rate": 4.968060836501902e-05, + "loss": 0.1004, + "step": 756 + }, + { + "epoch": 0.3202365863962822, + "grad_norm": 1.6657130718231201, + "learning_rate": 4.967976341360372e-05, + "loss": 0.1208, + "step": 758 + }, + { + "epoch": 0.32108153781157583, + "grad_norm": 1.2685651779174805, + "learning_rate": 4.9678918462188426e-05, + "loss": 0.0878, + "step": 760 + }, + { + "epoch": 0.32192648922686945, + "grad_norm": 1.2503762245178223, + "learning_rate": 4.967807351077313e-05, + "loss": 0.084, + "step": 762 + }, + { + "epoch": 0.32277144064216307, + "grad_norm": 1.9573127031326294, + "learning_rate": 4.967722855935784e-05, + "loss": 0.1035, + "step": 764 + }, + { + "epoch": 0.3236163920574567, + "grad_norm": 1.7501864433288574, + "learning_rate": 4.967638360794255e-05, + "loss": 0.1081, + "step": 766 + }, + { + "epoch": 0.3244613434727503, + "grad_norm": 1.4179309606552124, + "learning_rate": 4.9675538656527255e-05, + "loss": 0.0809, + "step": 768 + }, + { + "epoch": 0.3253062948880439, + "grad_norm": 1.814603328704834, + "learning_rate": 4.9674693705111955e-05, + "loss": 0.1207, + "step": 770 + }, + { + "epoch": 0.3261512463033376, + "grad_norm": 2.027031660079956, + "learning_rate": 4.967384875369667e-05, + "loss": 0.1055, + "step": 772 + }, + { + "epoch": 0.3269961977186312, + "grad_norm": 1.419382095336914, + "learning_rate": 4.967300380228137e-05, + "loss": 0.1301, + "step": 774 + }, + { + "epoch": 0.3278411491339248, + "grad_norm": 1.9750804901123047, + "learning_rate": 4.9672158850866077e-05, + "loss": 0.0979, + "step": 776 + }, + { + "epoch": 0.32868610054921843, + "grad_norm": 1.274034857749939, + "learning_rate": 4.9671313899450784e-05, + "loss": 0.1249, + "step": 778 + }, + { + "epoch": 0.32953105196451205, + "grad_norm": 1.4384434223175049, + "learning_rate": 4.967046894803549e-05, + "loss": 0.0958, + "step": 780 + }, + { + "epoch": 0.33037600337980566, + "grad_norm": 1.5350993871688843, + "learning_rate": 4.96696239966202e-05, + "loss": 0.104, + "step": 782 + }, + { + "epoch": 0.3312209547950993, + "grad_norm": 1.434937596321106, + "learning_rate": 4.9668779045204905e-05, + "loss": 0.1108, + "step": 784 + }, + { + "epoch": 0.3320659062103929, + "grad_norm": 1.7014148235321045, + "learning_rate": 4.9667934093789606e-05, + "loss": 0.0979, + "step": 786 + }, + { + "epoch": 0.3329108576256865, + "grad_norm": 2.0817980766296387, + "learning_rate": 4.966708914237432e-05, + "loss": 0.1755, + "step": 788 + }, + { + "epoch": 0.3337558090409801, + "grad_norm": 1.4785884618759155, + "learning_rate": 4.966624419095902e-05, + "loss": 0.1047, + "step": 790 + }, + { + "epoch": 0.33460076045627374, + "grad_norm": 1.2489787340164185, + "learning_rate": 4.966539923954373e-05, + "loss": 0.0788, + "step": 792 + }, + { + "epoch": 0.3354457118715674, + "grad_norm": 1.1976639032363892, + "learning_rate": 4.9664554288128435e-05, + "loss": 0.0901, + "step": 794 + }, + { + "epoch": 0.336290663286861, + "grad_norm": 1.2119868993759155, + "learning_rate": 4.966370933671314e-05, + "loss": 0.0854, + "step": 796 + }, + { + "epoch": 0.33713561470215464, + "grad_norm": 1.3082143068313599, + "learning_rate": 4.966286438529785e-05, + "loss": 0.0958, + "step": 798 + }, + { + "epoch": 0.33798056611744826, + "grad_norm": 1.2046393156051636, + "learning_rate": 4.9662019433882556e-05, + "loss": 0.1144, + "step": 800 + }, + { + "epoch": 0.33798056611744826, + "eval_accuracy": 0.6734781858941197, + "eval_cer": 0.09568884144781531, + "eval_loss": 0.20647239685058594, + "eval_runtime": 849.7943, + "eval_samples_per_second": 13.648, + "eval_steps_per_second": 0.427, + "step": 800 + }, + { + "epoch": 0.33882551753274187, + "grad_norm": 1.3277101516723633, + "learning_rate": 4.966117448246726e-05, + "loss": 0.1023, + "step": 802 + }, + { + "epoch": 0.3396704689480355, + "grad_norm": 2.174595832824707, + "learning_rate": 4.966032953105197e-05, + "loss": 0.1265, + "step": 804 + }, + { + "epoch": 0.3405154203633291, + "grad_norm": 1.4048463106155396, + "learning_rate": 4.965948457963667e-05, + "loss": 0.0777, + "step": 806 + }, + { + "epoch": 0.3413603717786227, + "grad_norm": 1.428268313407898, + "learning_rate": 4.9658639628221385e-05, + "loss": 0.0767, + "step": 808 + }, + { + "epoch": 0.34220532319391633, + "grad_norm": 1.6813422441482544, + "learning_rate": 4.9657794676806085e-05, + "loss": 0.088, + "step": 810 + }, + { + "epoch": 0.34305027460920995, + "grad_norm": 1.722402811050415, + "learning_rate": 4.965694972539079e-05, + "loss": 0.135, + "step": 812 + }, + { + "epoch": 0.34389522602450356, + "grad_norm": 1.913205862045288, + "learning_rate": 4.96561047739755e-05, + "loss": 0.084, + "step": 814 + }, + { + "epoch": 0.34474017743979724, + "grad_norm": 2.381237506866455, + "learning_rate": 4.965525982256021e-05, + "loss": 0.0957, + "step": 816 + }, + { + "epoch": 0.34558512885509085, + "grad_norm": 1.3818658590316772, + "learning_rate": 4.9654414871144914e-05, + "loss": 0.0889, + "step": 818 + }, + { + "epoch": 0.34643008027038447, + "grad_norm": 2.095109701156616, + "learning_rate": 4.965356991972962e-05, + "loss": 0.1397, + "step": 820 + }, + { + "epoch": 0.3472750316856781, + "grad_norm": 1.6586017608642578, + "learning_rate": 4.965272496831432e-05, + "loss": 0.105, + "step": 822 + }, + { + "epoch": 0.3481199831009717, + "grad_norm": 0.987585723400116, + "learning_rate": 4.9651880016899036e-05, + "loss": 0.0574, + "step": 824 + }, + { + "epoch": 0.3489649345162653, + "grad_norm": 1.1450375318527222, + "learning_rate": 4.9651035065483736e-05, + "loss": 0.1137, + "step": 826 + }, + { + "epoch": 0.34980988593155893, + "grad_norm": 1.1953158378601074, + "learning_rate": 4.9650190114068443e-05, + "loss": 0.0888, + "step": 828 + }, + { + "epoch": 0.35065483734685254, + "grad_norm": 1.4406365156173706, + "learning_rate": 4.964934516265315e-05, + "loss": 0.0587, + "step": 830 + }, + { + "epoch": 0.35149978876214616, + "grad_norm": 2.095817804336548, + "learning_rate": 4.964850021123786e-05, + "loss": 0.0986, + "step": 832 + }, + { + "epoch": 0.3523447401774398, + "grad_norm": 1.560104489326477, + "learning_rate": 4.9647655259822565e-05, + "loss": 0.0834, + "step": 834 + }, + { + "epoch": 0.3531896915927334, + "grad_norm": 1.1767041683197021, + "learning_rate": 4.964681030840727e-05, + "loss": 0.0685, + "step": 836 + }, + { + "epoch": 0.35403464300802706, + "grad_norm": 1.3894157409667969, + "learning_rate": 4.964596535699197e-05, + "loss": 0.075, + "step": 838 + }, + { + "epoch": 0.3548795944233207, + "grad_norm": 2.41341233253479, + "learning_rate": 4.9645120405576687e-05, + "loss": 0.1638, + "step": 840 + }, + { + "epoch": 0.3557245458386143, + "grad_norm": 1.4480561017990112, + "learning_rate": 4.964427545416139e-05, + "loss": 0.0849, + "step": 842 + }, + { + "epoch": 0.3565694972539079, + "grad_norm": 1.3235077857971191, + "learning_rate": 4.9643430502746094e-05, + "loss": 0.0826, + "step": 844 + }, + { + "epoch": 0.3574144486692015, + "grad_norm": 1.5331876277923584, + "learning_rate": 4.96425855513308e-05, + "loss": 0.1023, + "step": 846 + }, + { + "epoch": 0.35825940008449514, + "grad_norm": 1.0759912729263306, + "learning_rate": 4.964174059991551e-05, + "loss": 0.1031, + "step": 848 + }, + { + "epoch": 0.35910435149978875, + "grad_norm": 2.00351881980896, + "learning_rate": 4.9640895648500216e-05, + "loss": 0.1153, + "step": 850 + }, + { + "epoch": 0.35994930291508237, + "grad_norm": 1.0817115306854248, + "learning_rate": 4.964005069708492e-05, + "loss": 0.0852, + "step": 852 + }, + { + "epoch": 0.360794254330376, + "grad_norm": 2.143852472305298, + "learning_rate": 4.9639205745669623e-05, + "loss": 0.1136, + "step": 854 + }, + { + "epoch": 0.3616392057456696, + "grad_norm": 1.8244491815567017, + "learning_rate": 4.963836079425434e-05, + "loss": 0.0983, + "step": 856 + }, + { + "epoch": 0.36248415716096327, + "grad_norm": 1.4860317707061768, + "learning_rate": 4.963751584283904e-05, + "loss": 0.0958, + "step": 858 + }, + { + "epoch": 0.3633291085762569, + "grad_norm": 2.0593981742858887, + "learning_rate": 4.9636670891423745e-05, + "loss": 0.101, + "step": 860 + }, + { + "epoch": 0.3641740599915505, + "grad_norm": 1.366678237915039, + "learning_rate": 4.963582594000845e-05, + "loss": 0.0713, + "step": 862 + }, + { + "epoch": 0.3650190114068441, + "grad_norm": 1.5878137350082397, + "learning_rate": 4.963498098859316e-05, + "loss": 0.0652, + "step": 864 + }, + { + "epoch": 0.36586396282213773, + "grad_norm": 1.733601689338684, + "learning_rate": 4.9634136037177867e-05, + "loss": 0.0886, + "step": 866 + }, + { + "epoch": 0.36670891423743135, + "grad_norm": 1.2537791728973389, + "learning_rate": 4.9633291085762574e-05, + "loss": 0.0853, + "step": 868 + }, + { + "epoch": 0.36755386565272496, + "grad_norm": 1.5744574069976807, + "learning_rate": 4.9632446134347274e-05, + "loss": 0.0706, + "step": 870 + }, + { + "epoch": 0.3683988170680186, + "grad_norm": 2.3552136421203613, + "learning_rate": 4.963160118293199e-05, + "loss": 0.1153, + "step": 872 + }, + { + "epoch": 0.3692437684833122, + "grad_norm": 1.0820305347442627, + "learning_rate": 4.963075623151669e-05, + "loss": 0.0671, + "step": 874 + }, + { + "epoch": 0.3700887198986058, + "grad_norm": 1.6050610542297363, + "learning_rate": 4.9629911280101396e-05, + "loss": 0.0934, + "step": 876 + }, + { + "epoch": 0.3709336713138994, + "grad_norm": 1.5215171575546265, + "learning_rate": 4.96290663286861e-05, + "loss": 0.1586, + "step": 878 + }, + { + "epoch": 0.3717786227291931, + "grad_norm": 1.4043138027191162, + "learning_rate": 4.962822137727081e-05, + "loss": 0.0879, + "step": 880 + }, + { + "epoch": 0.3726235741444867, + "grad_norm": 0.8804981112480164, + "learning_rate": 4.962737642585552e-05, + "loss": 0.0489, + "step": 882 + }, + { + "epoch": 0.37346852555978033, + "grad_norm": 1.3882102966308594, + "learning_rate": 4.9626531474440225e-05, + "loss": 0.0761, + "step": 884 + }, + { + "epoch": 0.37431347697507394, + "grad_norm": 1.4841866493225098, + "learning_rate": 4.9625686523024925e-05, + "loss": 0.0844, + "step": 886 + }, + { + "epoch": 0.37515842839036756, + "grad_norm": 1.3954799175262451, + "learning_rate": 4.962484157160964e-05, + "loss": 0.0709, + "step": 888 + }, + { + "epoch": 0.3760033798056612, + "grad_norm": 1.3372573852539062, + "learning_rate": 4.962399662019434e-05, + "loss": 0.096, + "step": 890 + }, + { + "epoch": 0.3768483312209548, + "grad_norm": 1.2923667430877686, + "learning_rate": 4.962315166877905e-05, + "loss": 0.1023, + "step": 892 + }, + { + "epoch": 0.3776932826362484, + "grad_norm": 1.5196459293365479, + "learning_rate": 4.9622306717363754e-05, + "loss": 0.115, + "step": 894 + }, + { + "epoch": 0.378538234051542, + "grad_norm": 1.83379065990448, + "learning_rate": 4.962146176594846e-05, + "loss": 0.0993, + "step": 896 + }, + { + "epoch": 0.37938318546683564, + "grad_norm": 1.5020735263824463, + "learning_rate": 4.962061681453317e-05, + "loss": 0.0853, + "step": 898 + }, + { + "epoch": 0.38022813688212925, + "grad_norm": 1.7137893438339233, + "learning_rate": 4.9619771863117875e-05, + "loss": 0.1109, + "step": 900 + }, + { + "epoch": 0.38022813688212925, + "eval_accuracy": 0.6778754957751336, + "eval_cer": 0.09363338832185043, + "eval_loss": 0.20783965289592743, + "eval_runtime": 856.7083, + "eval_samples_per_second": 13.538, + "eval_steps_per_second": 0.424, + "step": 900 + }, + { + "epoch": 0.3810730882974229, + "grad_norm": 1.7347830533981323, + "learning_rate": 4.9618926911702576e-05, + "loss": 0.1199, + "step": 902 + }, + { + "epoch": 0.38191803971271654, + "grad_norm": 1.9052618741989136, + "learning_rate": 4.961808196028729e-05, + "loss": 0.1172, + "step": 904 + }, + { + "epoch": 0.38276299112801015, + "grad_norm": 1.5322483777999878, + "learning_rate": 4.961723700887199e-05, + "loss": 0.0881, + "step": 906 + }, + { + "epoch": 0.38360794254330377, + "grad_norm": 0.9071521759033203, + "learning_rate": 4.9616392057456704e-05, + "loss": 0.0793, + "step": 908 + }, + { + "epoch": 0.3844528939585974, + "grad_norm": 1.6952403783798218, + "learning_rate": 4.9615547106041405e-05, + "loss": 0.0917, + "step": 910 + }, + { + "epoch": 0.385297845373891, + "grad_norm": 1.9736824035644531, + "learning_rate": 4.961470215462611e-05, + "loss": 0.09, + "step": 912 + }, + { + "epoch": 0.3861427967891846, + "grad_norm": 1.999121904373169, + "learning_rate": 4.961385720321082e-05, + "loss": 0.1251, + "step": 914 + }, + { + "epoch": 0.38698774820447823, + "grad_norm": 1.406522512435913, + "learning_rate": 4.9613012251795526e-05, + "loss": 0.0969, + "step": 916 + }, + { + "epoch": 0.38783269961977185, + "grad_norm": 1.3701584339141846, + "learning_rate": 4.9612167300380233e-05, + "loss": 0.0932, + "step": 918 + }, + { + "epoch": 0.38867765103506546, + "grad_norm": 1.0016272068023682, + "learning_rate": 4.961132234896494e-05, + "loss": 0.0597, + "step": 920 + }, + { + "epoch": 0.3895226024503591, + "grad_norm": 1.3503810167312622, + "learning_rate": 4.961047739754964e-05, + "loss": 0.071, + "step": 922 + }, + { + "epoch": 0.39036755386565275, + "grad_norm": 1.599910855293274, + "learning_rate": 4.9609632446134355e-05, + "loss": 0.099, + "step": 924 + }, + { + "epoch": 0.39121250528094637, + "grad_norm": 1.4374080896377563, + "learning_rate": 4.9608787494719055e-05, + "loss": 0.0792, + "step": 926 + }, + { + "epoch": 0.39205745669624, + "grad_norm": 2.009080648422241, + "learning_rate": 4.960794254330376e-05, + "loss": 0.1313, + "step": 928 + }, + { + "epoch": 0.3929024081115336, + "grad_norm": 1.5988420248031616, + "learning_rate": 4.960709759188847e-05, + "loss": 0.1013, + "step": 930 + }, + { + "epoch": 0.3937473595268272, + "grad_norm": 1.9358876943588257, + "learning_rate": 4.960625264047318e-05, + "loss": 0.1015, + "step": 932 + }, + { + "epoch": 0.3945923109421208, + "grad_norm": 1.4540034532546997, + "learning_rate": 4.9605407689057884e-05, + "loss": 0.0727, + "step": 934 + }, + { + "epoch": 0.39543726235741444, + "grad_norm": 1.400652289390564, + "learning_rate": 4.960456273764259e-05, + "loss": 0.0867, + "step": 936 + }, + { + "epoch": 0.39628221377270806, + "grad_norm": 1.7808191776275635, + "learning_rate": 4.960371778622729e-05, + "loss": 0.1269, + "step": 938 + }, + { + "epoch": 0.3971271651880017, + "grad_norm": 1.5522500276565552, + "learning_rate": 4.9602872834812006e-05, + "loss": 0.1115, + "step": 940 + }, + { + "epoch": 0.3979721166032953, + "grad_norm": 1.5305380821228027, + "learning_rate": 4.9602027883396706e-05, + "loss": 0.0993, + "step": 942 + }, + { + "epoch": 0.3988170680185889, + "grad_norm": 1.0456280708312988, + "learning_rate": 4.9601182931981413e-05, + "loss": 0.0775, + "step": 944 + }, + { + "epoch": 0.3996620194338826, + "grad_norm": 1.5103769302368164, + "learning_rate": 4.960033798056612e-05, + "loss": 0.0801, + "step": 946 + }, + { + "epoch": 0.4005069708491762, + "grad_norm": 1.217235803604126, + "learning_rate": 4.959949302915083e-05, + "loss": 0.0951, + "step": 948 + }, + { + "epoch": 0.4013519222644698, + "grad_norm": 1.5151118040084839, + "learning_rate": 4.9598648077735535e-05, + "loss": 0.1016, + "step": 950 + }, + { + "epoch": 0.4021968736797634, + "grad_norm": 1.443966269493103, + "learning_rate": 4.959780312632024e-05, + "loss": 0.1283, + "step": 952 + }, + { + "epoch": 0.40304182509505704, + "grad_norm": 1.539581537246704, + "learning_rate": 4.959695817490494e-05, + "loss": 0.115, + "step": 954 + }, + { + "epoch": 0.40388677651035065, + "grad_norm": 1.4787349700927734, + "learning_rate": 4.959611322348966e-05, + "loss": 0.1022, + "step": 956 + }, + { + "epoch": 0.40473172792564427, + "grad_norm": 2.0702366828918457, + "learning_rate": 4.959526827207436e-05, + "loss": 0.1072, + "step": 958 + }, + { + "epoch": 0.4055766793409379, + "grad_norm": 1.1393963098526, + "learning_rate": 4.9594423320659064e-05, + "loss": 0.1219, + "step": 960 + }, + { + "epoch": 0.4064216307562315, + "grad_norm": 0.837650716304779, + "learning_rate": 4.959357836924377e-05, + "loss": 0.0657, + "step": 962 + }, + { + "epoch": 0.4072665821715251, + "grad_norm": 1.5785096883773804, + "learning_rate": 4.959273341782848e-05, + "loss": 0.0883, + "step": 964 + }, + { + "epoch": 0.40811153358681873, + "grad_norm": 1.83567214012146, + "learning_rate": 4.9591888466413186e-05, + "loss": 0.0854, + "step": 966 + }, + { + "epoch": 0.4089564850021124, + "grad_norm": 1.9943451881408691, + "learning_rate": 4.959104351499789e-05, + "loss": 0.133, + "step": 968 + }, + { + "epoch": 0.409801436417406, + "grad_norm": 1.8926469087600708, + "learning_rate": 4.9590198563582593e-05, + "loss": 0.1113, + "step": 970 + }, + { + "epoch": 0.41064638783269963, + "grad_norm": 1.2133026123046875, + "learning_rate": 4.958935361216731e-05, + "loss": 0.0637, + "step": 972 + }, + { + "epoch": 0.41149133924799325, + "grad_norm": 1.2940943241119385, + "learning_rate": 4.958850866075201e-05, + "loss": 0.0829, + "step": 974 + }, + { + "epoch": 0.41233629066328686, + "grad_norm": 1.3455214500427246, + "learning_rate": 4.9587663709336715e-05, + "loss": 0.0953, + "step": 976 + }, + { + "epoch": 0.4131812420785805, + "grad_norm": 1.8440299034118652, + "learning_rate": 4.958681875792142e-05, + "loss": 0.1046, + "step": 978 + }, + { + "epoch": 0.4140261934938741, + "grad_norm": 1.57143235206604, + "learning_rate": 4.958597380650613e-05, + "loss": 0.1112, + "step": 980 + }, + { + "epoch": 0.4148711449091677, + "grad_norm": 1.4068124294281006, + "learning_rate": 4.958512885509084e-05, + "loss": 0.0852, + "step": 982 + }, + { + "epoch": 0.4157160963244613, + "grad_norm": 1.538854718208313, + "learning_rate": 4.9584283903675544e-05, + "loss": 0.0878, + "step": 984 + }, + { + "epoch": 0.41656104773975494, + "grad_norm": 0.881242573261261, + "learning_rate": 4.9583438952260244e-05, + "loss": 0.0786, + "step": 986 + }, + { + "epoch": 0.41740599915504856, + "grad_norm": 1.5759536027908325, + "learning_rate": 4.958259400084496e-05, + "loss": 0.0756, + "step": 988 + }, + { + "epoch": 0.41825095057034223, + "grad_norm": 1.311062216758728, + "learning_rate": 4.958174904942966e-05, + "loss": 0.0629, + "step": 990 + }, + { + "epoch": 0.41909590198563584, + "grad_norm": 1.1126432418823242, + "learning_rate": 4.9580904098014366e-05, + "loss": 0.0562, + "step": 992 + }, + { + "epoch": 0.41994085340092946, + "grad_norm": 1.553884744644165, + "learning_rate": 4.958005914659907e-05, + "loss": 0.1244, + "step": 994 + }, + { + "epoch": 0.4207858048162231, + "grad_norm": 2.2669692039489746, + "learning_rate": 4.957921419518378e-05, + "loss": 0.098, + "step": 996 + }, + { + "epoch": 0.4216307562315167, + "grad_norm": 1.335403323173523, + "learning_rate": 4.957836924376849e-05, + "loss": 0.0727, + "step": 998 + }, + { + "epoch": 0.4224757076468103, + "grad_norm": 3.8670544624328613, + "learning_rate": 4.9577524292353195e-05, + "loss": 0.1018, + "step": 1000 + }, + { + "epoch": 0.4224757076468103, + "eval_accuracy": 0.6790826004483531, + "eval_cer": 0.09158675688312147, + "eval_loss": 0.20456762611865997, + "eval_runtime": 853.1585, + "eval_samples_per_second": 13.594, + "eval_steps_per_second": 0.425, + "step": 1000 + }, + { + "epoch": 0.4233206590621039, + "grad_norm": 1.5914243459701538, + "learning_rate": 4.9576679340937895e-05, + "loss": 0.0926, + "step": 1002 + }, + { + "epoch": 0.42416561047739754, + "grad_norm": 2.1932218074798584, + "learning_rate": 4.957583438952261e-05, + "loss": 0.1381, + "step": 1004 + }, + { + "epoch": 0.42501056189269115, + "grad_norm": 1.216361165046692, + "learning_rate": 4.957498943810731e-05, + "loss": 0.0675, + "step": 1006 + }, + { + "epoch": 0.42585551330798477, + "grad_norm": 1.8609379529953003, + "learning_rate": 4.9574144486692023e-05, + "loss": 0.0891, + "step": 1008 + }, + { + "epoch": 0.42670046472327844, + "grad_norm": 1.5109552145004272, + "learning_rate": 4.9573299535276724e-05, + "loss": 0.0534, + "step": 1010 + }, + { + "epoch": 0.42754541613857205, + "grad_norm": 2.247575521469116, + "learning_rate": 4.957245458386143e-05, + "loss": 0.1475, + "step": 1012 + }, + { + "epoch": 0.42839036755386567, + "grad_norm": 1.9921953678131104, + "learning_rate": 4.957160963244614e-05, + "loss": 0.1092, + "step": 1014 + }, + { + "epoch": 0.4292353189691593, + "grad_norm": 1.308531641960144, + "learning_rate": 4.9570764681030845e-05, + "loss": 0.048, + "step": 1016 + }, + { + "epoch": 0.4300802703844529, + "grad_norm": 1.459812045097351, + "learning_rate": 4.956991972961555e-05, + "loss": 0.0829, + "step": 1018 + }, + { + "epoch": 0.4309252217997465, + "grad_norm": 1.1062469482421875, + "learning_rate": 4.956907477820026e-05, + "loss": 0.0761, + "step": 1020 + }, + { + "epoch": 0.43177017321504013, + "grad_norm": 0.9959312081336975, + "learning_rate": 4.956822982678496e-05, + "loss": 0.0776, + "step": 1022 + }, + { + "epoch": 0.43261512463033375, + "grad_norm": 2.045827627182007, + "learning_rate": 4.9567384875369674e-05, + "loss": 0.0805, + "step": 1024 + }, + { + "epoch": 0.43346007604562736, + "grad_norm": 1.678009271621704, + "learning_rate": 4.9566539923954375e-05, + "loss": 0.129, + "step": 1026 + }, + { + "epoch": 0.434305027460921, + "grad_norm": 1.8472874164581299, + "learning_rate": 4.956569497253908e-05, + "loss": 0.0689, + "step": 1028 + }, + { + "epoch": 0.4351499788762146, + "grad_norm": 1.6969095468521118, + "learning_rate": 4.956485002112379e-05, + "loss": 0.104, + "step": 1030 + }, + { + "epoch": 0.43599493029150826, + "grad_norm": 1.5743629932403564, + "learning_rate": 4.9564005069708496e-05, + "loss": 0.0823, + "step": 1032 + }, + { + "epoch": 0.4368398817068019, + "grad_norm": 1.9706130027770996, + "learning_rate": 4.9563160118293203e-05, + "loss": 0.1057, + "step": 1034 + }, + { + "epoch": 0.4376848331220955, + "grad_norm": 1.8118597269058228, + "learning_rate": 4.956231516687791e-05, + "loss": 0.0937, + "step": 1036 + }, + { + "epoch": 0.4385297845373891, + "grad_norm": 1.5070606470108032, + "learning_rate": 4.956147021546261e-05, + "loss": 0.0813, + "step": 1038 + }, + { + "epoch": 0.4393747359526827, + "grad_norm": 2.170083999633789, + "learning_rate": 4.9560625264047325e-05, + "loss": 0.1135, + "step": 1040 + }, + { + "epoch": 0.44021968736797634, + "grad_norm": 1.5932613611221313, + "learning_rate": 4.9559780312632025e-05, + "loss": 0.1101, + "step": 1042 + }, + { + "epoch": 0.44106463878326996, + "grad_norm": 1.2873493432998657, + "learning_rate": 4.955893536121673e-05, + "loss": 0.0622, + "step": 1044 + }, + { + "epoch": 0.4419095901985636, + "grad_norm": 1.2306898832321167, + "learning_rate": 4.955809040980144e-05, + "loss": 0.0947, + "step": 1046 + }, + { + "epoch": 0.4427545416138572, + "grad_norm": 1.1455901861190796, + "learning_rate": 4.955724545838615e-05, + "loss": 0.0694, + "step": 1048 + }, + { + "epoch": 0.4435994930291508, + "grad_norm": 1.363356351852417, + "learning_rate": 4.9556400506970854e-05, + "loss": 0.0776, + "step": 1050 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 2.466238260269165, + "learning_rate": 4.955555555555556e-05, + "loss": 0.1503, + "step": 1052 + }, + { + "epoch": 0.4452893958597381, + "grad_norm": 1.633049488067627, + "learning_rate": 4.955471060414026e-05, + "loss": 0.0902, + "step": 1054 + }, + { + "epoch": 0.4461343472750317, + "grad_norm": 1.3308897018432617, + "learning_rate": 4.9553865652724976e-05, + "loss": 0.1079, + "step": 1056 + }, + { + "epoch": 0.4469792986903253, + "grad_norm": 1.2919471263885498, + "learning_rate": 4.9553020701309676e-05, + "loss": 0.0943, + "step": 1058 + }, + { + "epoch": 0.44782425010561894, + "grad_norm": 2.11273193359375, + "learning_rate": 4.9552175749894383e-05, + "loss": 0.1257, + "step": 1060 + }, + { + "epoch": 0.44866920152091255, + "grad_norm": 1.6662590503692627, + "learning_rate": 4.955133079847909e-05, + "loss": 0.1103, + "step": 1062 + }, + { + "epoch": 0.44951415293620617, + "grad_norm": 1.2448116540908813, + "learning_rate": 4.95504858470638e-05, + "loss": 0.1195, + "step": 1064 + }, + { + "epoch": 0.4503591043514998, + "grad_norm": 1.083648920059204, + "learning_rate": 4.9549640895648505e-05, + "loss": 0.0719, + "step": 1066 + }, + { + "epoch": 0.4512040557667934, + "grad_norm": 1.6844433546066284, + "learning_rate": 4.954879594423321e-05, + "loss": 0.1019, + "step": 1068 + }, + { + "epoch": 0.452049007182087, + "grad_norm": 1.9984877109527588, + "learning_rate": 4.954795099281791e-05, + "loss": 0.1353, + "step": 1070 + }, + { + "epoch": 0.45289395859738063, + "grad_norm": 1.462343692779541, + "learning_rate": 4.954710604140263e-05, + "loss": 0.1035, + "step": 1072 + }, + { + "epoch": 0.45373891001267425, + "grad_norm": 2.0531160831451416, + "learning_rate": 4.954626108998733e-05, + "loss": 0.1279, + "step": 1074 + }, + { + "epoch": 0.4545838614279679, + "grad_norm": 3.241274833679199, + "learning_rate": 4.9545416138572034e-05, + "loss": 0.0799, + "step": 1076 + }, + { + "epoch": 0.45542881284326153, + "grad_norm": 1.5598374605178833, + "learning_rate": 4.954457118715674e-05, + "loss": 0.0958, + "step": 1078 + }, + { + "epoch": 0.45627376425855515, + "grad_norm": 0.751557469367981, + "learning_rate": 4.954372623574145e-05, + "loss": 0.0542, + "step": 1080 + }, + { + "epoch": 0.45711871567384876, + "grad_norm": 1.6809083223342896, + "learning_rate": 4.9542881284326156e-05, + "loss": 0.0907, + "step": 1082 + }, + { + "epoch": 0.4579636670891424, + "grad_norm": 1.2016775608062744, + "learning_rate": 4.954203633291086e-05, + "loss": 0.0635, + "step": 1084 + }, + { + "epoch": 0.458808618504436, + "grad_norm": 1.912942886352539, + "learning_rate": 4.9541191381495563e-05, + "loss": 0.1076, + "step": 1086 + }, + { + "epoch": 0.4596535699197296, + "grad_norm": 1.3507989645004272, + "learning_rate": 4.954034643008027e-05, + "loss": 0.0906, + "step": 1088 + }, + { + "epoch": 0.4604985213350232, + "grad_norm": 1.4633934497833252, + "learning_rate": 4.953950147866498e-05, + "loss": 0.1085, + "step": 1090 + }, + { + "epoch": 0.46134347275031684, + "grad_norm": 1.7697800397872925, + "learning_rate": 4.9538656527249685e-05, + "loss": 0.1049, + "step": 1092 + }, + { + "epoch": 0.46218842416561046, + "grad_norm": 1.7392349243164062, + "learning_rate": 4.953781157583439e-05, + "loss": 0.1521, + "step": 1094 + }, + { + "epoch": 0.46303337558090407, + "grad_norm": 1.0418444871902466, + "learning_rate": 4.953696662441909e-05, + "loss": 0.0685, + "step": 1096 + }, + { + "epoch": 0.46387832699619774, + "grad_norm": 1.2857134342193604, + "learning_rate": 4.953612167300381e-05, + "loss": 0.1004, + "step": 1098 + }, + { + "epoch": 0.46472327841149136, + "grad_norm": 1.4575908184051514, + "learning_rate": 4.953527672158851e-05, + "loss": 0.0841, + "step": 1100 + }, + { + "epoch": 0.46472327841149136, + "eval_accuracy": 0.6908087601310571, + "eval_cer": 0.0895048386954489, + "eval_loss": 0.19633953273296356, + "eval_runtime": 863.5349, + "eval_samples_per_second": 13.431, + "eval_steps_per_second": 0.42, + "step": 1100 + }, + { + "epoch": 0.465568229826785, + "grad_norm": 1.182370901107788, + "learning_rate": 4.9534431770173214e-05, + "loss": 0.085, + "step": 1102 + }, + { + "epoch": 0.4664131812420786, + "grad_norm": 1.535293698310852, + "learning_rate": 4.953358681875792e-05, + "loss": 0.077, + "step": 1104 + }, + { + "epoch": 0.4672581326573722, + "grad_norm": 1.1662472486495972, + "learning_rate": 4.953274186734263e-05, + "loss": 0.0968, + "step": 1106 + }, + { + "epoch": 0.4681030840726658, + "grad_norm": 3.085517406463623, + "learning_rate": 4.9531896915927336e-05, + "loss": 0.1435, + "step": 1108 + }, + { + "epoch": 0.46894803548795944, + "grad_norm": 1.6203645467758179, + "learning_rate": 4.953105196451204e-05, + "loss": 0.0944, + "step": 1110 + }, + { + "epoch": 0.46979298690325305, + "grad_norm": 1.4693485498428345, + "learning_rate": 4.9530207013096744e-05, + "loss": 0.0955, + "step": 1112 + }, + { + "epoch": 0.47063793831854667, + "grad_norm": 1.4041072130203247, + "learning_rate": 4.952936206168146e-05, + "loss": 0.0675, + "step": 1114 + }, + { + "epoch": 0.4714828897338403, + "grad_norm": 2.4356982707977295, + "learning_rate": 4.952851711026616e-05, + "loss": 0.0835, + "step": 1116 + }, + { + "epoch": 0.4723278411491339, + "grad_norm": 1.7264307737350464, + "learning_rate": 4.952767215885087e-05, + "loss": 0.1218, + "step": 1118 + }, + { + "epoch": 0.47317279256442757, + "grad_norm": 1.329499363899231, + "learning_rate": 4.952682720743557e-05, + "loss": 0.0959, + "step": 1120 + }, + { + "epoch": 0.4740177439797212, + "grad_norm": 1.0061687231063843, + "learning_rate": 4.952598225602028e-05, + "loss": 0.052, + "step": 1122 + }, + { + "epoch": 0.4748626953950148, + "grad_norm": 1.739823818206787, + "learning_rate": 4.952513730460499e-05, + "loss": 0.0817, + "step": 1124 + }, + { + "epoch": 0.4757076468103084, + "grad_norm": 1.7504117488861084, + "learning_rate": 4.9524292353189694e-05, + "loss": 0.0709, + "step": 1126 + }, + { + "epoch": 0.47655259822560203, + "grad_norm": 2.0645530223846436, + "learning_rate": 4.9523447401774394e-05, + "loss": 0.1052, + "step": 1128 + }, + { + "epoch": 0.47739754964089565, + "grad_norm": 1.7341753244400024, + "learning_rate": 4.952260245035911e-05, + "loss": 0.0761, + "step": 1130 + }, + { + "epoch": 0.47824250105618926, + "grad_norm": 1.0064074993133545, + "learning_rate": 4.952175749894381e-05, + "loss": 0.0867, + "step": 1132 + }, + { + "epoch": 0.4790874524714829, + "grad_norm": 2.0180881023406982, + "learning_rate": 4.952091254752852e-05, + "loss": 0.0961, + "step": 1134 + }, + { + "epoch": 0.4799324038867765, + "grad_norm": 1.345908761024475, + "learning_rate": 4.952006759611322e-05, + "loss": 0.1018, + "step": 1136 + }, + { + "epoch": 0.4807773553020701, + "grad_norm": 1.6427843570709229, + "learning_rate": 4.951922264469793e-05, + "loss": 0.1179, + "step": 1138 + }, + { + "epoch": 0.4816223067173637, + "grad_norm": 1.294453501701355, + "learning_rate": 4.951837769328264e-05, + "loss": 0.0795, + "step": 1140 + }, + { + "epoch": 0.4824672581326574, + "grad_norm": 1.0479216575622559, + "learning_rate": 4.9517532741867345e-05, + "loss": 0.075, + "step": 1142 + }, + { + "epoch": 0.483312209547951, + "grad_norm": 1.1413781642913818, + "learning_rate": 4.951668779045205e-05, + "loss": 0.0705, + "step": 1144 + }, + { + "epoch": 0.4841571609632446, + "grad_norm": 1.2139525413513184, + "learning_rate": 4.951584283903676e-05, + "loss": 0.075, + "step": 1146 + }, + { + "epoch": 0.48500211237853824, + "grad_norm": 1.3668715953826904, + "learning_rate": 4.951499788762146e-05, + "loss": 0.1006, + "step": 1148 + }, + { + "epoch": 0.48584706379383186, + "grad_norm": 1.5356974601745605, + "learning_rate": 4.9514152936206173e-05, + "loss": 0.0996, + "step": 1150 + }, + { + "epoch": 0.4866920152091255, + "grad_norm": 1.1368151903152466, + "learning_rate": 4.9513307984790874e-05, + "loss": 0.1125, + "step": 1152 + }, + { + "epoch": 0.4875369666244191, + "grad_norm": 1.661220908164978, + "learning_rate": 4.951246303337558e-05, + "loss": 0.0705, + "step": 1154 + }, + { + "epoch": 0.4883819180397127, + "grad_norm": 1.7183247804641724, + "learning_rate": 4.951161808196029e-05, + "loss": 0.086, + "step": 1156 + }, + { + "epoch": 0.4892268694550063, + "grad_norm": 1.1112664937973022, + "learning_rate": 4.9510773130544995e-05, + "loss": 0.0401, + "step": 1158 + }, + { + "epoch": 0.49007182087029993, + "grad_norm": 2.460999011993408, + "learning_rate": 4.95099281791297e-05, + "loss": 0.1188, + "step": 1160 + }, + { + "epoch": 0.4909167722855936, + "grad_norm": 1.5830085277557373, + "learning_rate": 4.950908322771441e-05, + "loss": 0.0991, + "step": 1162 + }, + { + "epoch": 0.4917617237008872, + "grad_norm": 1.55359947681427, + "learning_rate": 4.950823827629911e-05, + "loss": 0.0971, + "step": 1164 + }, + { + "epoch": 0.49260667511618084, + "grad_norm": 1.919572114944458, + "learning_rate": 4.9507393324883824e-05, + "loss": 0.0943, + "step": 1166 + }, + { + "epoch": 0.49345162653147445, + "grad_norm": 1.5410270690917969, + "learning_rate": 4.9506548373468525e-05, + "loss": 0.1009, + "step": 1168 + }, + { + "epoch": 0.49429657794676807, + "grad_norm": 1.9407888650894165, + "learning_rate": 4.950570342205323e-05, + "loss": 0.1136, + "step": 1170 + }, + { + "epoch": 0.4951415293620617, + "grad_norm": 1.451253056526184, + "learning_rate": 4.950485847063794e-05, + "loss": 0.0933, + "step": 1172 + }, + { + "epoch": 0.4959864807773553, + "grad_norm": 1.2166279554367065, + "learning_rate": 4.9504013519222646e-05, + "loss": 0.0745, + "step": 1174 + }, + { + "epoch": 0.4968314321926489, + "grad_norm": 1.455151081085205, + "learning_rate": 4.9503168567807353e-05, + "loss": 0.0863, + "step": 1176 + }, + { + "epoch": 0.49767638360794253, + "grad_norm": 1.4487229585647583, + "learning_rate": 4.950232361639206e-05, + "loss": 0.1104, + "step": 1178 + }, + { + "epoch": 0.49852133502323615, + "grad_norm": 1.898914098739624, + "learning_rate": 4.950147866497676e-05, + "loss": 0.0923, + "step": 1180 + }, + { + "epoch": 0.49936628643852976, + "grad_norm": 1.0237014293670654, + "learning_rate": 4.9500633713561475e-05, + "loss": 0.0685, + "step": 1182 + }, + { + "epoch": 0.5002112378538234, + "grad_norm": 2.124537944793701, + "learning_rate": 4.9499788762146176e-05, + "loss": 0.0698, + "step": 1184 + }, + { + "epoch": 0.501056189269117, + "grad_norm": 1.1420694589614868, + "learning_rate": 4.949894381073088e-05, + "loss": 0.0652, + "step": 1186 + }, + { + "epoch": 0.5019011406844106, + "grad_norm": 1.4151241779327393, + "learning_rate": 4.949809885931559e-05, + "loss": 0.0701, + "step": 1188 + }, + { + "epoch": 0.5027460920997042, + "grad_norm": 1.3990213871002197, + "learning_rate": 4.94972539079003e-05, + "loss": 0.0737, + "step": 1190 + }, + { + "epoch": 0.5035910435149978, + "grad_norm": 1.4167402982711792, + "learning_rate": 4.9496408956485004e-05, + "loss": 0.0812, + "step": 1192 + }, + { + "epoch": 0.5044359949302915, + "grad_norm": 1.370712399482727, + "learning_rate": 4.949556400506971e-05, + "loss": 0.0744, + "step": 1194 + }, + { + "epoch": 0.5052809463455852, + "grad_norm": 1.3453139066696167, + "learning_rate": 4.949471905365441e-05, + "loss": 0.0754, + "step": 1196 + }, + { + "epoch": 0.5061258977608788, + "grad_norm": 1.5076264142990112, + "learning_rate": 4.9493874102239126e-05, + "loss": 0.1056, + "step": 1198 + }, + { + "epoch": 0.5069708491761724, + "grad_norm": 1.5745254755020142, + "learning_rate": 4.9493029150823826e-05, + "loss": 0.1156, + "step": 1200 + }, + { + "epoch": 0.5069708491761724, + "eval_accuracy": 0.6875323331608898, + "eval_cer": 0.0944185184858456, + "eval_loss": 0.20057915151119232, + "eval_runtime": 866.1613, + "eval_samples_per_second": 13.39, + "eval_steps_per_second": 0.419, + "step": 1200 + }, + { + "epoch": 0.507815800591466, + "grad_norm": 1.8199948072433472, + "learning_rate": 4.9492184199408534e-05, + "loss": 0.0937, + "step": 1202 + }, + { + "epoch": 0.5086607520067596, + "grad_norm": 1.4204751253128052, + "learning_rate": 4.949133924799324e-05, + "loss": 0.0713, + "step": 1204 + }, + { + "epoch": 0.5095057034220533, + "grad_norm": 1.4452489614486694, + "learning_rate": 4.949049429657795e-05, + "loss": 0.0746, + "step": 1206 + }, + { + "epoch": 0.5103506548373469, + "grad_norm": 1.4586948156356812, + "learning_rate": 4.9489649345162655e-05, + "loss": 0.0855, + "step": 1208 + }, + { + "epoch": 0.5111956062526405, + "grad_norm": 1.7247511148452759, + "learning_rate": 4.948880439374736e-05, + "loss": 0.116, + "step": 1210 + }, + { + "epoch": 0.5120405576679341, + "grad_norm": 1.3063023090362549, + "learning_rate": 4.948795944233206e-05, + "loss": 0.1308, + "step": 1212 + }, + { + "epoch": 0.5128855090832277, + "grad_norm": 1.9612244367599487, + "learning_rate": 4.948711449091678e-05, + "loss": 0.1132, + "step": 1214 + }, + { + "epoch": 0.5137304604985213, + "grad_norm": 1.2515413761138916, + "learning_rate": 4.948626953950148e-05, + "loss": 0.0853, + "step": 1216 + }, + { + "epoch": 0.514575411913815, + "grad_norm": 1.5823932886123657, + "learning_rate": 4.948542458808619e-05, + "loss": 0.0938, + "step": 1218 + }, + { + "epoch": 0.5154203633291086, + "grad_norm": 1.1870125532150269, + "learning_rate": 4.948457963667089e-05, + "loss": 0.0729, + "step": 1220 + }, + { + "epoch": 0.5162653147444022, + "grad_norm": 1.5410425662994385, + "learning_rate": 4.94837346852556e-05, + "loss": 0.1073, + "step": 1222 + }, + { + "epoch": 0.5171102661596958, + "grad_norm": 1.2500526905059814, + "learning_rate": 4.9482889733840306e-05, + "loss": 0.0997, + "step": 1224 + }, + { + "epoch": 0.5179552175749894, + "grad_norm": 1.1845496892929077, + "learning_rate": 4.948204478242501e-05, + "loss": 0.0758, + "step": 1226 + }, + { + "epoch": 0.518800168990283, + "grad_norm": 1.2934046983718872, + "learning_rate": 4.9481199831009714e-05, + "loss": 0.0725, + "step": 1228 + }, + { + "epoch": 0.5196451204055766, + "grad_norm": 1.3043361902236938, + "learning_rate": 4.948035487959443e-05, + "loss": 0.0793, + "step": 1230 + }, + { + "epoch": 0.5204900718208703, + "grad_norm": 1.6191684007644653, + "learning_rate": 4.947950992817913e-05, + "loss": 0.1012, + "step": 1232 + }, + { + "epoch": 0.5213350232361639, + "grad_norm": 1.4432382583618164, + "learning_rate": 4.947866497676384e-05, + "loss": 0.0988, + "step": 1234 + }, + { + "epoch": 0.5221799746514575, + "grad_norm": 2.0699760913848877, + "learning_rate": 4.947782002534854e-05, + "loss": 0.1041, + "step": 1236 + }, + { + "epoch": 0.5230249260667512, + "grad_norm": 1.1539101600646973, + "learning_rate": 4.947697507393325e-05, + "loss": 0.0571, + "step": 1238 + }, + { + "epoch": 0.5238698774820448, + "grad_norm": 1.6627494096755981, + "learning_rate": 4.947613012251796e-05, + "loss": 0.1198, + "step": 1240 + }, + { + "epoch": 0.5247148288973384, + "grad_norm": 1.9634501934051514, + "learning_rate": 4.9475285171102664e-05, + "loss": 0.0775, + "step": 1242 + }, + { + "epoch": 0.5255597803126321, + "grad_norm": 2.4786336421966553, + "learning_rate": 4.947444021968737e-05, + "loss": 0.1651, + "step": 1244 + }, + { + "epoch": 0.5264047317279257, + "grad_norm": 1.4655958414077759, + "learning_rate": 4.947359526827208e-05, + "loss": 0.1079, + "step": 1246 + }, + { + "epoch": 0.5272496831432193, + "grad_norm": 1.3418774604797363, + "learning_rate": 4.947275031685678e-05, + "loss": 0.0967, + "step": 1248 + }, + { + "epoch": 0.5280946345585129, + "grad_norm": 0.9843947887420654, + "learning_rate": 4.947190536544149e-05, + "loss": 0.0791, + "step": 1250 + }, + { + "epoch": 0.5289395859738065, + "grad_norm": 1.4920053482055664, + "learning_rate": 4.947106041402619e-05, + "loss": 0.1008, + "step": 1252 + }, + { + "epoch": 0.5297845373891001, + "grad_norm": 1.4727956056594849, + "learning_rate": 4.94702154626109e-05, + "loss": 0.0876, + "step": 1254 + }, + { + "epoch": 0.5306294888043938, + "grad_norm": 1.8977808952331543, + "learning_rate": 4.946937051119561e-05, + "loss": 0.1361, + "step": 1256 + }, + { + "epoch": 0.5314744402196874, + "grad_norm": 1.456107258796692, + "learning_rate": 4.9468525559780315e-05, + "loss": 0.0878, + "step": 1258 + }, + { + "epoch": 0.532319391634981, + "grad_norm": 1.4788284301757812, + "learning_rate": 4.946768060836502e-05, + "loss": 0.0822, + "step": 1260 + }, + { + "epoch": 0.5331643430502746, + "grad_norm": 1.1848583221435547, + "learning_rate": 4.946683565694973e-05, + "loss": 0.0886, + "step": 1262 + }, + { + "epoch": 0.5340092944655682, + "grad_norm": 0.9302415251731873, + "learning_rate": 4.946599070553443e-05, + "loss": 0.0481, + "step": 1264 + }, + { + "epoch": 0.5348542458808618, + "grad_norm": 1.38499915599823, + "learning_rate": 4.9465145754119144e-05, + "loss": 0.0893, + "step": 1266 + }, + { + "epoch": 0.5356991972961554, + "grad_norm": 1.4074949026107788, + "learning_rate": 4.9464300802703844e-05, + "loss": 0.1123, + "step": 1268 + }, + { + "epoch": 0.5365441487114491, + "grad_norm": 1.7731876373291016, + "learning_rate": 4.946345585128855e-05, + "loss": 0.1557, + "step": 1270 + }, + { + "epoch": 0.5373891001267427, + "grad_norm": 1.3012690544128418, + "learning_rate": 4.946261089987326e-05, + "loss": 0.0454, + "step": 1272 + }, + { + "epoch": 0.5382340515420363, + "grad_norm": 1.2259783744812012, + "learning_rate": 4.9461765948457966e-05, + "loss": 0.0802, + "step": 1274 + }, + { + "epoch": 0.5390790029573299, + "grad_norm": 1.516466736793518, + "learning_rate": 4.946092099704267e-05, + "loss": 0.0518, + "step": 1276 + }, + { + "epoch": 0.5399239543726235, + "grad_norm": 1.8392707109451294, + "learning_rate": 4.946007604562738e-05, + "loss": 0.1264, + "step": 1278 + }, + { + "epoch": 0.5407689057879171, + "grad_norm": 1.0790072679519653, + "learning_rate": 4.945923109421208e-05, + "loss": 0.0821, + "step": 1280 + }, + { + "epoch": 0.5416138572032109, + "grad_norm": 1.1289983987808228, + "learning_rate": 4.9458386142796794e-05, + "loss": 0.1059, + "step": 1282 + }, + { + "epoch": 0.5424588086185045, + "grad_norm": 1.5571544170379639, + "learning_rate": 4.9457541191381495e-05, + "loss": 0.0883, + "step": 1284 + }, + { + "epoch": 0.5433037600337981, + "grad_norm": 1.0887531042099, + "learning_rate": 4.94566962399662e-05, + "loss": 0.0886, + "step": 1286 + }, + { + "epoch": 0.5441487114490917, + "grad_norm": 1.6409960985183716, + "learning_rate": 4.945585128855091e-05, + "loss": 0.1336, + "step": 1288 + }, + { + "epoch": 0.5449936628643853, + "grad_norm": 1.6622674465179443, + "learning_rate": 4.9455006337135616e-05, + "loss": 0.114, + "step": 1290 + }, + { + "epoch": 0.545838614279679, + "grad_norm": 1.7212952375411987, + "learning_rate": 4.9454161385720324e-05, + "loss": 0.1141, + "step": 1292 + }, + { + "epoch": 0.5466835656949726, + "grad_norm": 1.6510391235351562, + "learning_rate": 4.945331643430503e-05, + "loss": 0.1123, + "step": 1294 + }, + { + "epoch": 0.5475285171102662, + "grad_norm": 2.1044700145721436, + "learning_rate": 4.945247148288973e-05, + "loss": 0.0899, + "step": 1296 + }, + { + "epoch": 0.5483734685255598, + "grad_norm": 1.7959705591201782, + "learning_rate": 4.9451626531474445e-05, + "loss": 0.1213, + "step": 1298 + }, + { + "epoch": 0.5492184199408534, + "grad_norm": 0.6972851753234863, + "learning_rate": 4.9450781580059146e-05, + "loss": 0.0517, + "step": 1300 + }, + { + "epoch": 0.5492184199408534, + "eval_accuracy": 0.7086566649422315, + "eval_cer": 0.08514692520091392, + "eval_loss": 0.189493328332901, + "eval_runtime": 866.8131, + "eval_samples_per_second": 13.38, + "eval_steps_per_second": 0.419, + "step": 1300 + }, + { + "epoch": 0.550063371356147, + "grad_norm": 1.3948755264282227, + "learning_rate": 4.944993662864385e-05, + "loss": 0.058, + "step": 1302 + }, + { + "epoch": 0.5509083227714406, + "grad_norm": 1.276227593421936, + "learning_rate": 4.944909167722856e-05, + "loss": 0.0629, + "step": 1304 + }, + { + "epoch": 0.5517532741867343, + "grad_norm": 1.310956597328186, + "learning_rate": 4.944824672581327e-05, + "loss": 0.1241, + "step": 1306 + }, + { + "epoch": 0.5525982256020279, + "grad_norm": 1.3557322025299072, + "learning_rate": 4.9447401774397974e-05, + "loss": 0.081, + "step": 1308 + }, + { + "epoch": 0.5534431770173215, + "grad_norm": 1.0258203744888306, + "learning_rate": 4.944655682298268e-05, + "loss": 0.0719, + "step": 1310 + }, + { + "epoch": 0.5542881284326151, + "grad_norm": 1.1905845403671265, + "learning_rate": 4.944571187156738e-05, + "loss": 0.0712, + "step": 1312 + }, + { + "epoch": 0.5551330798479087, + "grad_norm": 1.176316499710083, + "learning_rate": 4.9444866920152096e-05, + "loss": 0.0587, + "step": 1314 + }, + { + "epoch": 0.5559780312632023, + "grad_norm": 9.898603439331055, + "learning_rate": 4.9444021968736796e-05, + "loss": 0.1039, + "step": 1316 + }, + { + "epoch": 0.556822982678496, + "grad_norm": 1.3090765476226807, + "learning_rate": 4.944317701732151e-05, + "loss": 0.0823, + "step": 1318 + }, + { + "epoch": 0.5576679340937896, + "grad_norm": 1.292490839958191, + "learning_rate": 4.944233206590621e-05, + "loss": 0.0615, + "step": 1320 + }, + { + "epoch": 0.5585128855090832, + "grad_norm": 1.8449876308441162, + "learning_rate": 4.944148711449092e-05, + "loss": 0.0872, + "step": 1322 + }, + { + "epoch": 0.5593578369243768, + "grad_norm": 1.1056883335113525, + "learning_rate": 4.9440642163075625e-05, + "loss": 0.059, + "step": 1324 + }, + { + "epoch": 0.5602027883396705, + "grad_norm": 1.3612993955612183, + "learning_rate": 4.943979721166033e-05, + "loss": 0.0717, + "step": 1326 + }, + { + "epoch": 0.5610477397549641, + "grad_norm": 1.218751311302185, + "learning_rate": 4.943895226024503e-05, + "loss": 0.0785, + "step": 1328 + }, + { + "epoch": 0.5618926911702578, + "grad_norm": 1.318162441253662, + "learning_rate": 4.943810730882975e-05, + "loss": 0.0926, + "step": 1330 + }, + { + "epoch": 0.5627376425855514, + "grad_norm": 1.3321706056594849, + "learning_rate": 4.943726235741445e-05, + "loss": 0.0892, + "step": 1332 + }, + { + "epoch": 0.563582594000845, + "grad_norm": 1.7704854011535645, + "learning_rate": 4.943641740599916e-05, + "loss": 0.1265, + "step": 1334 + }, + { + "epoch": 0.5644275454161386, + "grad_norm": 1.5705130100250244, + "learning_rate": 4.943557245458386e-05, + "loss": 0.1006, + "step": 1336 + }, + { + "epoch": 0.5652724968314322, + "grad_norm": 1.3921126127243042, + "learning_rate": 4.943472750316857e-05, + "loss": 0.0755, + "step": 1338 + }, + { + "epoch": 0.5661174482467258, + "grad_norm": 1.9573496580123901, + "learning_rate": 4.9433882551753276e-05, + "loss": 0.1235, + "step": 1340 + }, + { + "epoch": 0.5669623996620194, + "grad_norm": 1.466693639755249, + "learning_rate": 4.943303760033798e-05, + "loss": 0.0701, + "step": 1342 + }, + { + "epoch": 0.5678073510773131, + "grad_norm": 0.9446965456008911, + "learning_rate": 4.943219264892269e-05, + "loss": 0.1052, + "step": 1344 + }, + { + "epoch": 0.5686523024926067, + "grad_norm": 1.8884165287017822, + "learning_rate": 4.94313476975074e-05, + "loss": 0.0726, + "step": 1346 + }, + { + "epoch": 0.5694972539079003, + "grad_norm": 2.2107183933258057, + "learning_rate": 4.94305027460921e-05, + "loss": 0.1353, + "step": 1348 + }, + { + "epoch": 0.5703422053231939, + "grad_norm": 1.5481027364730835, + "learning_rate": 4.942965779467681e-05, + "loss": 0.0856, + "step": 1350 + }, + { + "epoch": 0.5711871567384875, + "grad_norm": 1.549285650253296, + "learning_rate": 4.942881284326151e-05, + "loss": 0.1037, + "step": 1352 + }, + { + "epoch": 0.5720321081537811, + "grad_norm": 1.2403219938278198, + "learning_rate": 4.942796789184622e-05, + "loss": 0.0832, + "step": 1354 + }, + { + "epoch": 0.5728770595690748, + "grad_norm": 0.9897046089172363, + "learning_rate": 4.942712294043093e-05, + "loss": 0.0664, + "step": 1356 + }, + { + "epoch": 0.5737220109843684, + "grad_norm": 1.9358055591583252, + "learning_rate": 4.9426277989015634e-05, + "loss": 0.0824, + "step": 1358 + }, + { + "epoch": 0.574566962399662, + "grad_norm": 1.8482145071029663, + "learning_rate": 4.942543303760034e-05, + "loss": 0.1046, + "step": 1360 + }, + { + "epoch": 0.5754119138149556, + "grad_norm": 1.4269449710845947, + "learning_rate": 4.942458808618505e-05, + "loss": 0.0983, + "step": 1362 + }, + { + "epoch": 0.5762568652302492, + "grad_norm": 1.4967262744903564, + "learning_rate": 4.942374313476975e-05, + "loss": 0.0965, + "step": 1364 + }, + { + "epoch": 0.5771018166455428, + "grad_norm": 1.8186310529708862, + "learning_rate": 4.942289818335446e-05, + "loss": 0.092, + "step": 1366 + }, + { + "epoch": 0.5779467680608364, + "grad_norm": 1.330729603767395, + "learning_rate": 4.942205323193916e-05, + "loss": 0.0979, + "step": 1368 + }, + { + "epoch": 0.5787917194761302, + "grad_norm": 1.4221312999725342, + "learning_rate": 4.942120828052387e-05, + "loss": 0.0516, + "step": 1370 + }, + { + "epoch": 0.5796366708914238, + "grad_norm": 1.670209288597107, + "learning_rate": 4.942036332910858e-05, + "loss": 0.0888, + "step": 1372 + }, + { + "epoch": 0.5804816223067174, + "grad_norm": 1.2513757944107056, + "learning_rate": 4.9419518377693285e-05, + "loss": 0.106, + "step": 1374 + }, + { + "epoch": 0.581326573722011, + "grad_norm": 0.760837197303772, + "learning_rate": 4.941867342627799e-05, + "loss": 0.0722, + "step": 1376 + }, + { + "epoch": 0.5821715251373046, + "grad_norm": 1.0159239768981934, + "learning_rate": 4.94178284748627e-05, + "loss": 0.0757, + "step": 1378 + }, + { + "epoch": 0.5830164765525983, + "grad_norm": 1.9194375276565552, + "learning_rate": 4.94169835234474e-05, + "loss": 0.0796, + "step": 1380 + }, + { + "epoch": 0.5838614279678919, + "grad_norm": 1.4433456659317017, + "learning_rate": 4.9416138572032114e-05, + "loss": 0.076, + "step": 1382 + }, + { + "epoch": 0.5847063793831855, + "grad_norm": 1.4545176029205322, + "learning_rate": 4.9415293620616814e-05, + "loss": 0.0777, + "step": 1384 + }, + { + "epoch": 0.5855513307984791, + "grad_norm": 0.9777308106422424, + "learning_rate": 4.941444866920152e-05, + "loss": 0.0548, + "step": 1386 + }, + { + "epoch": 0.5863962822137727, + "grad_norm": 1.5005024671554565, + "learning_rate": 4.941360371778623e-05, + "loss": 0.1143, + "step": 1388 + }, + { + "epoch": 0.5872412336290663, + "grad_norm": 1.571602463722229, + "learning_rate": 4.9412758766370936e-05, + "loss": 0.0929, + "step": 1390 + }, + { + "epoch": 0.5880861850443599, + "grad_norm": 1.8419545888900757, + "learning_rate": 4.941191381495564e-05, + "loss": 0.1039, + "step": 1392 + }, + { + "epoch": 0.5889311364596536, + "grad_norm": 1.9891055822372437, + "learning_rate": 4.941106886354035e-05, + "loss": 0.0944, + "step": 1394 + }, + { + "epoch": 0.5897760878749472, + "grad_norm": 1.0187172889709473, + "learning_rate": 4.941022391212505e-05, + "loss": 0.1026, + "step": 1396 + }, + { + "epoch": 0.5906210392902408, + "grad_norm": 1.8273286819458008, + "learning_rate": 4.9409378960709764e-05, + "loss": 0.1627, + "step": 1398 + }, + { + "epoch": 0.5914659907055344, + "grad_norm": 1.9127320051193237, + "learning_rate": 4.9408534009294465e-05, + "loss": 0.0871, + "step": 1400 + }, + { + "epoch": 0.5914659907055344, + "eval_accuracy": 0.6934816347646146, + "eval_cer": 0.08890496396340763, + "eval_loss": 0.19768354296684265, + "eval_runtime": 860.1969, + "eval_samples_per_second": 13.483, + "eval_steps_per_second": 0.422, + "step": 1400 + }, + { + "epoch": 0.592310942120828, + "grad_norm": 1.432771921157837, + "learning_rate": 4.940768905787917e-05, + "loss": 0.12, + "step": 1402 + }, + { + "epoch": 0.5931558935361216, + "grad_norm": 1.443217158317566, + "learning_rate": 4.940684410646388e-05, + "loss": 0.0944, + "step": 1404 + }, + { + "epoch": 0.5940008449514153, + "grad_norm": 1.263504981994629, + "learning_rate": 4.9405999155048586e-05, + "loss": 0.0651, + "step": 1406 + }, + { + "epoch": 0.5948457963667089, + "grad_norm": 1.656178593635559, + "learning_rate": 4.9405154203633294e-05, + "loss": 0.1205, + "step": 1408 + }, + { + "epoch": 0.5956907477820025, + "grad_norm": 1.8223587274551392, + "learning_rate": 4.9404309252218e-05, + "loss": 0.0985, + "step": 1410 + }, + { + "epoch": 0.5965356991972962, + "grad_norm": 1.6137700080871582, + "learning_rate": 4.94034643008027e-05, + "loss": 0.1026, + "step": 1412 + }, + { + "epoch": 0.5973806506125898, + "grad_norm": 1.2118284702301025, + "learning_rate": 4.9402619349387415e-05, + "loss": 0.1008, + "step": 1414 + }, + { + "epoch": 0.5982256020278834, + "grad_norm": 1.541785717010498, + "learning_rate": 4.9401774397972116e-05, + "loss": 0.1128, + "step": 1416 + }, + { + "epoch": 0.5990705534431771, + "grad_norm": 1.5790534019470215, + "learning_rate": 4.940092944655683e-05, + "loss": 0.1197, + "step": 1418 + }, + { + "epoch": 0.5999155048584707, + "grad_norm": 2.461165189743042, + "learning_rate": 4.940008449514153e-05, + "loss": 0.1317, + "step": 1420 + }, + { + "epoch": 0.6007604562737643, + "grad_norm": 1.9079132080078125, + "learning_rate": 4.939923954372624e-05, + "loss": 0.0902, + "step": 1422 + }, + { + "epoch": 0.6016054076890579, + "grad_norm": 1.5427287817001343, + "learning_rate": 4.9398394592310944e-05, + "loss": 0.0782, + "step": 1424 + }, + { + "epoch": 0.6024503591043515, + "grad_norm": 1.3272346258163452, + "learning_rate": 4.939754964089565e-05, + "loss": 0.0631, + "step": 1426 + }, + { + "epoch": 0.6032953105196451, + "grad_norm": 1.1318906545639038, + "learning_rate": 4.939670468948035e-05, + "loss": 0.0485, + "step": 1428 + }, + { + "epoch": 0.6041402619349387, + "grad_norm": 2.0333144664764404, + "learning_rate": 4.9395859738065066e-05, + "loss": 0.1254, + "step": 1430 + }, + { + "epoch": 0.6049852133502324, + "grad_norm": 1.9606693983078003, + "learning_rate": 4.9395014786649766e-05, + "loss": 0.1414, + "step": 1432 + }, + { + "epoch": 0.605830164765526, + "grad_norm": 1.3295642137527466, + "learning_rate": 4.939416983523448e-05, + "loss": 0.0697, + "step": 1434 + }, + { + "epoch": 0.6066751161808196, + "grad_norm": 1.227980613708496, + "learning_rate": 4.939332488381918e-05, + "loss": 0.0723, + "step": 1436 + }, + { + "epoch": 0.6075200675961132, + "grad_norm": 1.926112413406372, + "learning_rate": 4.939247993240389e-05, + "loss": 0.136, + "step": 1438 + }, + { + "epoch": 0.6083650190114068, + "grad_norm": 1.1189000606536865, + "learning_rate": 4.9391634980988595e-05, + "loss": 0.0709, + "step": 1440 + }, + { + "epoch": 0.6092099704267004, + "grad_norm": 1.3692779541015625, + "learning_rate": 4.93907900295733e-05, + "loss": 0.0687, + "step": 1442 + }, + { + "epoch": 0.6100549218419941, + "grad_norm": 1.1951595544815063, + "learning_rate": 4.938994507815801e-05, + "loss": 0.0743, + "step": 1444 + }, + { + "epoch": 0.6108998732572877, + "grad_norm": 1.0123379230499268, + "learning_rate": 4.938910012674272e-05, + "loss": 0.0779, + "step": 1446 + }, + { + "epoch": 0.6117448246725813, + "grad_norm": 1.1633543968200684, + "learning_rate": 4.938825517532742e-05, + "loss": 0.0938, + "step": 1448 + }, + { + "epoch": 0.6125897760878749, + "grad_norm": 1.457838773727417, + "learning_rate": 4.938741022391213e-05, + "loss": 0.0932, + "step": 1450 + }, + { + "epoch": 0.6134347275031685, + "grad_norm": 1.9920662641525269, + "learning_rate": 4.938656527249683e-05, + "loss": 0.1435, + "step": 1452 + }, + { + "epoch": 0.6142796789184621, + "grad_norm": 1.591558814048767, + "learning_rate": 4.938572032108154e-05, + "loss": 0.0985, + "step": 1454 + }, + { + "epoch": 0.6151246303337559, + "grad_norm": 1.4729034900665283, + "learning_rate": 4.9384875369666246e-05, + "loss": 0.1336, + "step": 1456 + }, + { + "epoch": 0.6159695817490495, + "grad_norm": 1.2558763027191162, + "learning_rate": 4.938403041825095e-05, + "loss": 0.0699, + "step": 1458 + }, + { + "epoch": 0.6168145331643431, + "grad_norm": 1.9105608463287354, + "learning_rate": 4.938318546683566e-05, + "loss": 0.1193, + "step": 1460 + }, + { + "epoch": 0.6176594845796367, + "grad_norm": 1.7269306182861328, + "learning_rate": 4.938234051542037e-05, + "loss": 0.0777, + "step": 1462 + }, + { + "epoch": 0.6185044359949303, + "grad_norm": 1.195877194404602, + "learning_rate": 4.938149556400507e-05, + "loss": 0.0568, + "step": 1464 + }, + { + "epoch": 0.6193493874102239, + "grad_norm": 1.5723657608032227, + "learning_rate": 4.938065061258978e-05, + "loss": 0.1203, + "step": 1466 + }, + { + "epoch": 0.6201943388255176, + "grad_norm": 0.8827709555625916, + "learning_rate": 4.937980566117448e-05, + "loss": 0.0517, + "step": 1468 + }, + { + "epoch": 0.6210392902408112, + "grad_norm": 1.3201031684875488, + "learning_rate": 4.937896070975919e-05, + "loss": 0.0948, + "step": 1470 + }, + { + "epoch": 0.6218842416561048, + "grad_norm": 0.9939825534820557, + "learning_rate": 4.93781157583439e-05, + "loss": 0.0537, + "step": 1472 + }, + { + "epoch": 0.6227291930713984, + "grad_norm": 1.3091474771499634, + "learning_rate": 4.9377270806928604e-05, + "loss": 0.0888, + "step": 1474 + }, + { + "epoch": 0.623574144486692, + "grad_norm": 1.2489527463912964, + "learning_rate": 4.937642585551331e-05, + "loss": 0.1017, + "step": 1476 + }, + { + "epoch": 0.6244190959019856, + "grad_norm": 1.2982828617095947, + "learning_rate": 4.937558090409802e-05, + "loss": 0.0805, + "step": 1478 + }, + { + "epoch": 0.6252640473172792, + "grad_norm": 1.9336445331573486, + "learning_rate": 4.937473595268272e-05, + "loss": 0.1048, + "step": 1480 + }, + { + "epoch": 0.6261089987325729, + "grad_norm": 1.5748566389083862, + "learning_rate": 4.937389100126743e-05, + "loss": 0.0779, + "step": 1482 + }, + { + "epoch": 0.6269539501478665, + "grad_norm": 1.3378995656967163, + "learning_rate": 4.937304604985213e-05, + "loss": 0.082, + "step": 1484 + }, + { + "epoch": 0.6277989015631601, + "grad_norm": 1.0639171600341797, + "learning_rate": 4.937220109843684e-05, + "loss": 0.051, + "step": 1486 + }, + { + "epoch": 0.6286438529784537, + "grad_norm": 1.6450079679489136, + "learning_rate": 4.937135614702155e-05, + "loss": 0.1282, + "step": 1488 + }, + { + "epoch": 0.6294888043937473, + "grad_norm": 1.433846354484558, + "learning_rate": 4.9370511195606255e-05, + "loss": 0.091, + "step": 1490 + }, + { + "epoch": 0.6303337558090409, + "grad_norm": 1.7269262075424194, + "learning_rate": 4.936966624419096e-05, + "loss": 0.0804, + "step": 1492 + }, + { + "epoch": 0.6311787072243346, + "grad_norm": 1.0868664979934692, + "learning_rate": 4.936882129277567e-05, + "loss": 0.0454, + "step": 1494 + }, + { + "epoch": 0.6320236586396282, + "grad_norm": 1.883876085281372, + "learning_rate": 4.936797634136037e-05, + "loss": 0.0747, + "step": 1496 + }, + { + "epoch": 0.6328686100549218, + "grad_norm": 1.3019837141036987, + "learning_rate": 4.9367131389945084e-05, + "loss": 0.0904, + "step": 1498 + }, + { + "epoch": 0.6337135614702155, + "grad_norm": 1.3057466745376587, + "learning_rate": 4.9366286438529784e-05, + "loss": 0.0998, + "step": 1500 + }, + { + "epoch": 0.6337135614702155, + "eval_accuracy": 0.6930505259527505, + "eval_cer": 0.08929311820178727, + "eval_loss": 0.19271579384803772, + "eval_runtime": 852.7052, + "eval_samples_per_second": 13.601, + "eval_steps_per_second": 0.426, + "step": 1500 + }, + { + "epoch": 0.6345585128855091, + "grad_norm": 1.0389184951782227, + "learning_rate": 4.936544148711449e-05, + "loss": 0.063, + "step": 1502 + }, + { + "epoch": 0.6354034643008027, + "grad_norm": 1.1744555234909058, + "learning_rate": 4.93645965356992e-05, + "loss": 0.0855, + "step": 1504 + }, + { + "epoch": 0.6362484157160964, + "grad_norm": 1.6268651485443115, + "learning_rate": 4.9363751584283906e-05, + "loss": 0.0518, + "step": 1506 + }, + { + "epoch": 0.63709336713139, + "grad_norm": 1.0523124933242798, + "learning_rate": 4.936290663286861e-05, + "loss": 0.0617, + "step": 1508 + }, + { + "epoch": 0.6379383185466836, + "grad_norm": 1.024778962135315, + "learning_rate": 4.936206168145332e-05, + "loss": 0.1021, + "step": 1510 + }, + { + "epoch": 0.6387832699619772, + "grad_norm": 1.15168297290802, + "learning_rate": 4.936121673003802e-05, + "loss": 0.076, + "step": 1512 + }, + { + "epoch": 0.6396282213772708, + "grad_norm": 1.4460582733154297, + "learning_rate": 4.9360371778622734e-05, + "loss": 0.0866, + "step": 1514 + }, + { + "epoch": 0.6404731727925644, + "grad_norm": 1.1238740682601929, + "learning_rate": 4.9359526827207435e-05, + "loss": 0.0708, + "step": 1516 + }, + { + "epoch": 0.641318124207858, + "grad_norm": 1.175075650215149, + "learning_rate": 4.935868187579215e-05, + "loss": 0.0893, + "step": 1518 + }, + { + "epoch": 0.6421630756231517, + "grad_norm": 1.2177211046218872, + "learning_rate": 4.935783692437685e-05, + "loss": 0.0703, + "step": 1520 + }, + { + "epoch": 0.6430080270384453, + "grad_norm": 1.4435173273086548, + "learning_rate": 4.9356991972961556e-05, + "loss": 0.0947, + "step": 1522 + }, + { + "epoch": 0.6438529784537389, + "grad_norm": 2.4668965339660645, + "learning_rate": 4.9356147021546264e-05, + "loss": 0.0882, + "step": 1524 + }, + { + "epoch": 0.6446979298690325, + "grad_norm": 1.096189260482788, + "learning_rate": 4.935530207013097e-05, + "loss": 0.0671, + "step": 1526 + }, + { + "epoch": 0.6455428812843261, + "grad_norm": 1.430290937423706, + "learning_rate": 4.935445711871567e-05, + "loss": 0.1009, + "step": 1528 + }, + { + "epoch": 0.6463878326996197, + "grad_norm": 1.5221188068389893, + "learning_rate": 4.9353612167300385e-05, + "loss": 0.0941, + "step": 1530 + }, + { + "epoch": 0.6472327841149134, + "grad_norm": 1.0545361042022705, + "learning_rate": 4.9352767215885086e-05, + "loss": 0.0741, + "step": 1532 + }, + { + "epoch": 0.648077735530207, + "grad_norm": 1.3876293897628784, + "learning_rate": 4.93519222644698e-05, + "loss": 0.0963, + "step": 1534 + }, + { + "epoch": 0.6489226869455006, + "grad_norm": 1.425647497177124, + "learning_rate": 4.93510773130545e-05, + "loss": 0.0768, + "step": 1536 + }, + { + "epoch": 0.6497676383607942, + "grad_norm": 2.1672873497009277, + "learning_rate": 4.935023236163921e-05, + "loss": 0.0723, + "step": 1538 + }, + { + "epoch": 0.6506125897760878, + "grad_norm": 1.7907317876815796, + "learning_rate": 4.9349387410223914e-05, + "loss": 0.0776, + "step": 1540 + }, + { + "epoch": 0.6514575411913816, + "grad_norm": 1.7650822401046753, + "learning_rate": 4.934854245880862e-05, + "loss": 0.0895, + "step": 1542 + }, + { + "epoch": 0.6523024926066752, + "grad_norm": 1.2962666749954224, + "learning_rate": 4.934769750739333e-05, + "loss": 0.0661, + "step": 1544 + }, + { + "epoch": 0.6531474440219688, + "grad_norm": 1.4323803186416626, + "learning_rate": 4.9346852555978036e-05, + "loss": 0.1123, + "step": 1546 + }, + { + "epoch": 0.6539923954372624, + "grad_norm": 1.3358235359191895, + "learning_rate": 4.9346007604562736e-05, + "loss": 0.0559, + "step": 1548 + }, + { + "epoch": 0.654837346852556, + "grad_norm": 1.4420747756958008, + "learning_rate": 4.934516265314745e-05, + "loss": 0.0939, + "step": 1550 + }, + { + "epoch": 0.6556822982678496, + "grad_norm": 1.780863881111145, + "learning_rate": 4.934431770173215e-05, + "loss": 0.1005, + "step": 1552 + }, + { + "epoch": 0.6565272496831432, + "grad_norm": 1.2848925590515137, + "learning_rate": 4.934347275031686e-05, + "loss": 0.0594, + "step": 1554 + }, + { + "epoch": 0.6573722010984369, + "grad_norm": 2.379051923751831, + "learning_rate": 4.9342627798901565e-05, + "loss": 0.1367, + "step": 1556 + }, + { + "epoch": 0.6582171525137305, + "grad_norm": 1.4313839673995972, + "learning_rate": 4.934178284748627e-05, + "loss": 0.065, + "step": 1558 + }, + { + "epoch": 0.6590621039290241, + "grad_norm": 0.9839615821838379, + "learning_rate": 4.934093789607098e-05, + "loss": 0.0803, + "step": 1560 + }, + { + "epoch": 0.6599070553443177, + "grad_norm": 2.2462387084960938, + "learning_rate": 4.934009294465569e-05, + "loss": 0.1078, + "step": 1562 + }, + { + "epoch": 0.6607520067596113, + "grad_norm": 1.4986047744750977, + "learning_rate": 4.933924799324039e-05, + "loss": 0.0967, + "step": 1564 + }, + { + "epoch": 0.6615969581749049, + "grad_norm": 1.3797781467437744, + "learning_rate": 4.93384030418251e-05, + "loss": 0.0631, + "step": 1566 + }, + { + "epoch": 0.6624419095901986, + "grad_norm": 2.0769073963165283, + "learning_rate": 4.93375580904098e-05, + "loss": 0.1283, + "step": 1568 + }, + { + "epoch": 0.6632868610054922, + "grad_norm": 0.8822681307792664, + "learning_rate": 4.933671313899451e-05, + "loss": 0.0781, + "step": 1570 + }, + { + "epoch": 0.6641318124207858, + "grad_norm": 1.346742033958435, + "learning_rate": 4.9335868187579216e-05, + "loss": 0.0983, + "step": 1572 + }, + { + "epoch": 0.6649767638360794, + "grad_norm": 1.2557731866836548, + "learning_rate": 4.933502323616392e-05, + "loss": 0.0548, + "step": 1574 + }, + { + "epoch": 0.665821715251373, + "grad_norm": 1.5680410861968994, + "learning_rate": 4.933417828474863e-05, + "loss": 0.0855, + "step": 1576 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 1.2320771217346191, + "learning_rate": 4.933333333333334e-05, + "loss": 0.0845, + "step": 1578 + }, + { + "epoch": 0.6675116180819602, + "grad_norm": 1.3170132637023926, + "learning_rate": 4.933248838191804e-05, + "loss": 0.067, + "step": 1580 + }, + { + "epoch": 0.6683565694972539, + "grad_norm": 0.8890075087547302, + "learning_rate": 4.933164343050275e-05, + "loss": 0.055, + "step": 1582 + }, + { + "epoch": 0.6692015209125475, + "grad_norm": 1.6186269521713257, + "learning_rate": 4.933079847908745e-05, + "loss": 0.0718, + "step": 1584 + }, + { + "epoch": 0.6700464723278412, + "grad_norm": 1.8913078308105469, + "learning_rate": 4.932995352767216e-05, + "loss": 0.1134, + "step": 1586 + }, + { + "epoch": 0.6708914237431348, + "grad_norm": 1.2571996450424194, + "learning_rate": 4.932910857625687e-05, + "loss": 0.0947, + "step": 1588 + }, + { + "epoch": 0.6717363751584284, + "grad_norm": 1.6301320791244507, + "learning_rate": 4.9328263624841574e-05, + "loss": 0.0976, + "step": 1590 + }, + { + "epoch": 0.672581326573722, + "grad_norm": 1.32038414478302, + "learning_rate": 4.932741867342628e-05, + "loss": 0.0967, + "step": 1592 + }, + { + "epoch": 0.6734262779890157, + "grad_norm": 0.9501339197158813, + "learning_rate": 4.932657372201099e-05, + "loss": 0.0651, + "step": 1594 + }, + { + "epoch": 0.6742712294043093, + "grad_norm": 1.1755475997924805, + "learning_rate": 4.932572877059569e-05, + "loss": 0.0383, + "step": 1596 + }, + { + "epoch": 0.6751161808196029, + "grad_norm": 1.402435302734375, + "learning_rate": 4.93248838191804e-05, + "loss": 0.0969, + "step": 1598 + }, + { + "epoch": 0.6759611322348965, + "grad_norm": 1.6371697187423706, + "learning_rate": 4.93240388677651e-05, + "loss": 0.1062, + "step": 1600 + }, + { + "epoch": 0.6759611322348965, + "eval_accuracy": 0.6960682876357993, + "eval_cer": 0.08430886491350335, + "eval_loss": 0.1908927857875824, + "eval_runtime": 855.2921, + "eval_samples_per_second": 13.56, + "eval_steps_per_second": 0.424, + "step": 1600 + }, + { + "epoch": 0.6768060836501901, + "grad_norm": 1.6198582649230957, + "learning_rate": 4.932319391634981e-05, + "loss": 0.0832, + "step": 1602 + }, + { + "epoch": 0.6776510350654837, + "grad_norm": 1.237261414527893, + "learning_rate": 4.932234896493452e-05, + "loss": 0.0604, + "step": 1604 + }, + { + "epoch": 0.6784959864807774, + "grad_norm": 1.5254098176956177, + "learning_rate": 4.9321504013519225e-05, + "loss": 0.0925, + "step": 1606 + }, + { + "epoch": 0.679340937896071, + "grad_norm": 1.858588457107544, + "learning_rate": 4.932065906210393e-05, + "loss": 0.1063, + "step": 1608 + }, + { + "epoch": 0.6801858893113646, + "grad_norm": 1.6307566165924072, + "learning_rate": 4.931981411068864e-05, + "loss": 0.1097, + "step": 1610 + }, + { + "epoch": 0.6810308407266582, + "grad_norm": 1.245123267173767, + "learning_rate": 4.931896915927334e-05, + "loss": 0.0468, + "step": 1612 + }, + { + "epoch": 0.6818757921419518, + "grad_norm": 1.2134346961975098, + "learning_rate": 4.9318124207858054e-05, + "loss": 0.0527, + "step": 1614 + }, + { + "epoch": 0.6827207435572454, + "grad_norm": 1.724666953086853, + "learning_rate": 4.9317279256442754e-05, + "loss": 0.067, + "step": 1616 + }, + { + "epoch": 0.683565694972539, + "grad_norm": 1.303871750831604, + "learning_rate": 4.931643430502747e-05, + "loss": 0.089, + "step": 1618 + }, + { + "epoch": 0.6844106463878327, + "grad_norm": 1.2575643062591553, + "learning_rate": 4.931558935361217e-05, + "loss": 0.0684, + "step": 1620 + }, + { + "epoch": 0.6852555978031263, + "grad_norm": 0.8384171724319458, + "learning_rate": 4.9314744402196876e-05, + "loss": 0.0726, + "step": 1622 + }, + { + "epoch": 0.6861005492184199, + "grad_norm": 1.2416189908981323, + "learning_rate": 4.931389945078158e-05, + "loss": 0.0519, + "step": 1624 + }, + { + "epoch": 0.6869455006337135, + "grad_norm": 1.111448884010315, + "learning_rate": 4.931305449936629e-05, + "loss": 0.0773, + "step": 1626 + }, + { + "epoch": 0.6877904520490071, + "grad_norm": 1.8492799997329712, + "learning_rate": 4.931220954795099e-05, + "loss": 0.0861, + "step": 1628 + }, + { + "epoch": 0.6886354034643009, + "grad_norm": 1.129862666130066, + "learning_rate": 4.9311364596535704e-05, + "loss": 0.063, + "step": 1630 + }, + { + "epoch": 0.6894803548795945, + "grad_norm": 1.5507149696350098, + "learning_rate": 4.9310519645120405e-05, + "loss": 0.0924, + "step": 1632 + }, + { + "epoch": 0.6903253062948881, + "grad_norm": 1.3672925233840942, + "learning_rate": 4.930967469370512e-05, + "loss": 0.098, + "step": 1634 + }, + { + "epoch": 0.6911702577101817, + "grad_norm": 1.8510890007019043, + "learning_rate": 4.930882974228982e-05, + "loss": 0.0784, + "step": 1636 + }, + { + "epoch": 0.6920152091254753, + "grad_norm": 1.4934600591659546, + "learning_rate": 4.9307984790874526e-05, + "loss": 0.1049, + "step": 1638 + }, + { + "epoch": 0.6928601605407689, + "grad_norm": 1.453052282333374, + "learning_rate": 4.9307139839459234e-05, + "loss": 0.0774, + "step": 1640 + }, + { + "epoch": 0.6937051119560625, + "grad_norm": 2.2687666416168213, + "learning_rate": 4.930629488804394e-05, + "loss": 0.1204, + "step": 1642 + }, + { + "epoch": 0.6945500633713562, + "grad_norm": 1.256612777709961, + "learning_rate": 4.930544993662865e-05, + "loss": 0.0879, + "step": 1644 + }, + { + "epoch": 0.6953950147866498, + "grad_norm": 1.2979470491409302, + "learning_rate": 4.9304604985213355e-05, + "loss": 0.0789, + "step": 1646 + }, + { + "epoch": 0.6962399662019434, + "grad_norm": 1.1799136400222778, + "learning_rate": 4.9303760033798056e-05, + "loss": 0.0836, + "step": 1648 + }, + { + "epoch": 0.697084917617237, + "grad_norm": 1.8143103122711182, + "learning_rate": 4.930291508238277e-05, + "loss": 0.0853, + "step": 1650 + }, + { + "epoch": 0.6979298690325306, + "grad_norm": 1.3416088819503784, + "learning_rate": 4.930207013096747e-05, + "loss": 0.0847, + "step": 1652 + }, + { + "epoch": 0.6987748204478242, + "grad_norm": 1.4958585500717163, + "learning_rate": 4.930122517955218e-05, + "loss": 0.0858, + "step": 1654 + }, + { + "epoch": 0.6996197718631179, + "grad_norm": 1.0670164823532104, + "learning_rate": 4.9300380228136884e-05, + "loss": 0.069, + "step": 1656 + }, + { + "epoch": 0.7004647232784115, + "grad_norm": 1.2606033086776733, + "learning_rate": 4.929953527672159e-05, + "loss": 0.0869, + "step": 1658 + }, + { + "epoch": 0.7013096746937051, + "grad_norm": 1.3816112279891968, + "learning_rate": 4.92986903253063e-05, + "loss": 0.0723, + "step": 1660 + }, + { + "epoch": 0.7021546261089987, + "grad_norm": 1.619861125946045, + "learning_rate": 4.9297845373891006e-05, + "loss": 0.1112, + "step": 1662 + }, + { + "epoch": 0.7029995775242923, + "grad_norm": 1.5590943098068237, + "learning_rate": 4.9297000422475706e-05, + "loss": 0.1013, + "step": 1664 + }, + { + "epoch": 0.7038445289395859, + "grad_norm": 1.2259608507156372, + "learning_rate": 4.929615547106042e-05, + "loss": 0.0855, + "step": 1666 + }, + { + "epoch": 0.7046894803548795, + "grad_norm": 1.231600284576416, + "learning_rate": 4.929531051964512e-05, + "loss": 0.0624, + "step": 1668 + }, + { + "epoch": 0.7055344317701732, + "grad_norm": 1.3199141025543213, + "learning_rate": 4.929446556822983e-05, + "loss": 0.0742, + "step": 1670 + }, + { + "epoch": 0.7063793831854668, + "grad_norm": 1.259050726890564, + "learning_rate": 4.9293620616814535e-05, + "loss": 0.0468, + "step": 1672 + }, + { + "epoch": 0.7072243346007605, + "grad_norm": 1.5264647006988525, + "learning_rate": 4.929277566539924e-05, + "loss": 0.0859, + "step": 1674 + }, + { + "epoch": 0.7080692860160541, + "grad_norm": 1.6180810928344727, + "learning_rate": 4.929193071398395e-05, + "loss": 0.0636, + "step": 1676 + }, + { + "epoch": 0.7089142374313477, + "grad_norm": 0.8615092635154724, + "learning_rate": 4.929108576256866e-05, + "loss": 0.0533, + "step": 1678 + }, + { + "epoch": 0.7097591888466414, + "grad_norm": 1.568564534187317, + "learning_rate": 4.929024081115336e-05, + "loss": 0.0794, + "step": 1680 + }, + { + "epoch": 0.710604140261935, + "grad_norm": 1.2899255752563477, + "learning_rate": 4.928939585973807e-05, + "loss": 0.0938, + "step": 1682 + }, + { + "epoch": 0.7114490916772286, + "grad_norm": 1.375060796737671, + "learning_rate": 4.928855090832277e-05, + "loss": 0.0741, + "step": 1684 + }, + { + "epoch": 0.7122940430925222, + "grad_norm": 1.205748200416565, + "learning_rate": 4.928770595690748e-05, + "loss": 0.0844, + "step": 1686 + }, + { + "epoch": 0.7131389945078158, + "grad_norm": 1.0106422901153564, + "learning_rate": 4.9286861005492186e-05, + "loss": 0.0594, + "step": 1688 + }, + { + "epoch": 0.7139839459231094, + "grad_norm": 1.668855905532837, + "learning_rate": 4.928601605407689e-05, + "loss": 0.1166, + "step": 1690 + }, + { + "epoch": 0.714828897338403, + "grad_norm": 1.094852089881897, + "learning_rate": 4.92851711026616e-05, + "loss": 0.0687, + "step": 1692 + }, + { + "epoch": 0.7156738487536967, + "grad_norm": 1.2307512760162354, + "learning_rate": 4.928432615124631e-05, + "loss": 0.0645, + "step": 1694 + }, + { + "epoch": 0.7165188001689903, + "grad_norm": 1.497880220413208, + "learning_rate": 4.928348119983101e-05, + "loss": 0.0998, + "step": 1696 + }, + { + "epoch": 0.7173637515842839, + "grad_norm": 1.546085238456726, + "learning_rate": 4.928263624841572e-05, + "loss": 0.0825, + "step": 1698 + }, + { + "epoch": 0.7182087029995775, + "grad_norm": 1.2614907026290894, + "learning_rate": 4.928179129700042e-05, + "loss": 0.0985, + "step": 1700 + }, + { + "epoch": 0.7182087029995775, + "eval_accuracy": 0.7092602172788411, + "eval_cer": 0.08464408902846758, + "eval_loss": 0.18686163425445557, + "eval_runtime": 849.9029, + "eval_samples_per_second": 13.646, + "eval_steps_per_second": 0.427, + "step": 1700 + }, + { + "epoch": 0.7190536544148711, + "grad_norm": 1.5614362955093384, + "learning_rate": 4.928094634558513e-05, + "loss": 0.0727, + "step": 1702 + }, + { + "epoch": 0.7198986058301647, + "grad_norm": 1.0478379726409912, + "learning_rate": 4.928010139416984e-05, + "loss": 0.0629, + "step": 1704 + }, + { + "epoch": 0.7207435572454584, + "grad_norm": 1.7387198209762573, + "learning_rate": 4.9279256442754544e-05, + "loss": 0.097, + "step": 1706 + }, + { + "epoch": 0.721588508660752, + "grad_norm": 1.8205493688583374, + "learning_rate": 4.927841149133925e-05, + "loss": 0.1223, + "step": 1708 + }, + { + "epoch": 0.7224334600760456, + "grad_norm": 1.3796732425689697, + "learning_rate": 4.927756653992396e-05, + "loss": 0.0864, + "step": 1710 + }, + { + "epoch": 0.7232784114913392, + "grad_norm": 1.7345277070999146, + "learning_rate": 4.927672158850866e-05, + "loss": 0.0995, + "step": 1712 + }, + { + "epoch": 0.7241233629066328, + "grad_norm": 0.9681912660598755, + "learning_rate": 4.927587663709337e-05, + "loss": 0.05, + "step": 1714 + }, + { + "epoch": 0.7249683143219265, + "grad_norm": 1.7644003629684448, + "learning_rate": 4.927503168567807e-05, + "loss": 0.1208, + "step": 1716 + }, + { + "epoch": 0.7258132657372202, + "grad_norm": 1.760344386100769, + "learning_rate": 4.927418673426279e-05, + "loss": 0.08, + "step": 1718 + }, + { + "epoch": 0.7266582171525138, + "grad_norm": 1.5169976949691772, + "learning_rate": 4.927334178284749e-05, + "loss": 0.0945, + "step": 1720 + }, + { + "epoch": 0.7275031685678074, + "grad_norm": 1.2515987157821655, + "learning_rate": 4.9272496831432195e-05, + "loss": 0.0964, + "step": 1722 + }, + { + "epoch": 0.728348119983101, + "grad_norm": 1.1804972887039185, + "learning_rate": 4.92716518800169e-05, + "loss": 0.0945, + "step": 1724 + }, + { + "epoch": 0.7291930713983946, + "grad_norm": 1.7442156076431274, + "learning_rate": 4.927080692860161e-05, + "loss": 0.1006, + "step": 1726 + }, + { + "epoch": 0.7300380228136882, + "grad_norm": 1.4811662435531616, + "learning_rate": 4.926996197718631e-05, + "loss": 0.1162, + "step": 1728 + }, + { + "epoch": 0.7308829742289819, + "grad_norm": 1.6214262247085571, + "learning_rate": 4.9269117025771024e-05, + "loss": 0.0813, + "step": 1730 + }, + { + "epoch": 0.7317279256442755, + "grad_norm": 1.4832850694656372, + "learning_rate": 4.9268272074355724e-05, + "loss": 0.1104, + "step": 1732 + }, + { + "epoch": 0.7325728770595691, + "grad_norm": 1.1491117477416992, + "learning_rate": 4.926742712294044e-05, + "loss": 0.0714, + "step": 1734 + }, + { + "epoch": 0.7334178284748627, + "grad_norm": 1.9180891513824463, + "learning_rate": 4.926658217152514e-05, + "loss": 0.0781, + "step": 1736 + }, + { + "epoch": 0.7342627798901563, + "grad_norm": 1.390456199645996, + "learning_rate": 4.9265737220109846e-05, + "loss": 0.1352, + "step": 1738 + }, + { + "epoch": 0.7351077313054499, + "grad_norm": 1.0331450700759888, + "learning_rate": 4.926489226869455e-05, + "loss": 0.0808, + "step": 1740 + }, + { + "epoch": 0.7359526827207435, + "grad_norm": 1.2622566223144531, + "learning_rate": 4.926404731727926e-05, + "loss": 0.055, + "step": 1742 + }, + { + "epoch": 0.7367976341360372, + "grad_norm": 1.1740683317184448, + "learning_rate": 4.926320236586397e-05, + "loss": 0.0493, + "step": 1744 + }, + { + "epoch": 0.7376425855513308, + "grad_norm": 1.8595513105392456, + "learning_rate": 4.9262357414448674e-05, + "loss": 0.1211, + "step": 1746 + }, + { + "epoch": 0.7384875369666244, + "grad_norm": 1.0683369636535645, + "learning_rate": 4.9261512463033375e-05, + "loss": 0.0583, + "step": 1748 + }, + { + "epoch": 0.739332488381918, + "grad_norm": 1.4157065153121948, + "learning_rate": 4.926066751161809e-05, + "loss": 0.0554, + "step": 1750 + }, + { + "epoch": 0.7401774397972116, + "grad_norm": 1.5190364122390747, + "learning_rate": 4.925982256020279e-05, + "loss": 0.0631, + "step": 1752 + }, + { + "epoch": 0.7410223912125052, + "grad_norm": 2.315302848815918, + "learning_rate": 4.9258977608787496e-05, + "loss": 0.1047, + "step": 1754 + }, + { + "epoch": 0.7418673426277989, + "grad_norm": 0.8665867447853088, + "learning_rate": 4.9258132657372204e-05, + "loss": 0.0612, + "step": 1756 + }, + { + "epoch": 0.7427122940430925, + "grad_norm": 2.6190311908721924, + "learning_rate": 4.925728770595691e-05, + "loss": 0.1227, + "step": 1758 + }, + { + "epoch": 0.7435572454583862, + "grad_norm": 1.6784873008728027, + "learning_rate": 4.925644275454162e-05, + "loss": 0.0606, + "step": 1760 + }, + { + "epoch": 0.7444021968736798, + "grad_norm": 1.4590693712234497, + "learning_rate": 4.9255597803126325e-05, + "loss": 0.0942, + "step": 1762 + }, + { + "epoch": 0.7452471482889734, + "grad_norm": 1.7136425971984863, + "learning_rate": 4.9254752851711026e-05, + "loss": 0.1141, + "step": 1764 + }, + { + "epoch": 0.746092099704267, + "grad_norm": 0.9946573376655579, + "learning_rate": 4.925390790029574e-05, + "loss": 0.0359, + "step": 1766 + }, + { + "epoch": 0.7469370511195607, + "grad_norm": 1.1567507982254028, + "learning_rate": 4.925306294888044e-05, + "loss": 0.055, + "step": 1768 + }, + { + "epoch": 0.7477820025348543, + "grad_norm": 1.3129973411560059, + "learning_rate": 4.925221799746515e-05, + "loss": 0.0654, + "step": 1770 + }, + { + "epoch": 0.7486269539501479, + "grad_norm": 2.021648645401001, + "learning_rate": 4.9251373046049854e-05, + "loss": 0.0965, + "step": 1772 + }, + { + "epoch": 0.7494719053654415, + "grad_norm": 1.6443380117416382, + "learning_rate": 4.925052809463456e-05, + "loss": 0.1043, + "step": 1774 + }, + { + "epoch": 0.7503168567807351, + "grad_norm": 1.7316606044769287, + "learning_rate": 4.924968314321927e-05, + "loss": 0.0941, + "step": 1776 + }, + { + "epoch": 0.7511618081960287, + "grad_norm": 1.0968530178070068, + "learning_rate": 4.9248838191803976e-05, + "loss": 0.0676, + "step": 1778 + }, + { + "epoch": 0.7520067596113224, + "grad_norm": 0.9221205115318298, + "learning_rate": 4.9247993240388676e-05, + "loss": 0.0763, + "step": 1780 + }, + { + "epoch": 0.752851711026616, + "grad_norm": 0.9875777363777161, + "learning_rate": 4.924714828897339e-05, + "loss": 0.0691, + "step": 1782 + }, + { + "epoch": 0.7536966624419096, + "grad_norm": 1.3123608827590942, + "learning_rate": 4.924630333755809e-05, + "loss": 0.0727, + "step": 1784 + }, + { + "epoch": 0.7545416138572032, + "grad_norm": 1.5449692010879517, + "learning_rate": 4.92454583861428e-05, + "loss": 0.0764, + "step": 1786 + }, + { + "epoch": 0.7553865652724968, + "grad_norm": 1.8921825885772705, + "learning_rate": 4.9244613434727505e-05, + "loss": 0.1129, + "step": 1788 + }, + { + "epoch": 0.7562315166877904, + "grad_norm": 1.119535207748413, + "learning_rate": 4.924376848331221e-05, + "loss": 0.0859, + "step": 1790 + }, + { + "epoch": 0.757076468103084, + "grad_norm": 1.4250285625457764, + "learning_rate": 4.924292353189692e-05, + "loss": 0.085, + "step": 1792 + }, + { + "epoch": 0.7579214195183777, + "grad_norm": 1.9496543407440186, + "learning_rate": 4.924207858048163e-05, + "loss": 0.0963, + "step": 1794 + }, + { + "epoch": 0.7587663709336713, + "grad_norm": 1.6314023733139038, + "learning_rate": 4.924123362906633e-05, + "loss": 0.079, + "step": 1796 + }, + { + "epoch": 0.7596113223489649, + "grad_norm": 1.3706735372543335, + "learning_rate": 4.924038867765104e-05, + "loss": 0.0813, + "step": 1798 + }, + { + "epoch": 0.7604562737642585, + "grad_norm": 1.6105067729949951, + "learning_rate": 4.923954372623574e-05, + "loss": 0.0793, + "step": 1800 + }, + { + "epoch": 0.7604562737642585, + "eval_accuracy": 0.7114157613381618, + "eval_cer": 0.08386778055170832, + "eval_loss": 0.18643230199813843, + "eval_runtime": 864.6046, + "eval_samples_per_second": 13.414, + "eval_steps_per_second": 0.42, + "step": 1800 + }, + { + "epoch": 0.7613012251795521, + "grad_norm": 1.3744542598724365, + "learning_rate": 4.923869877482045e-05, + "loss": 0.1043, + "step": 1802 + }, + { + "epoch": 0.7621461765948458, + "grad_norm": 1.2851905822753906, + "learning_rate": 4.9237853823405156e-05, + "loss": 0.0903, + "step": 1804 + }, + { + "epoch": 0.7629911280101395, + "grad_norm": 1.7337363958358765, + "learning_rate": 4.923700887198986e-05, + "loss": 0.0664, + "step": 1806 + }, + { + "epoch": 0.7638360794254331, + "grad_norm": 1.3106553554534912, + "learning_rate": 4.923616392057457e-05, + "loss": 0.0589, + "step": 1808 + }, + { + "epoch": 0.7646810308407267, + "grad_norm": 1.8125030994415283, + "learning_rate": 4.923531896915928e-05, + "loss": 0.1044, + "step": 1810 + }, + { + "epoch": 0.7655259822560203, + "grad_norm": 0.759692907333374, + "learning_rate": 4.923447401774398e-05, + "loss": 0.0423, + "step": 1812 + }, + { + "epoch": 0.7663709336713139, + "grad_norm": 1.4425475597381592, + "learning_rate": 4.923362906632869e-05, + "loss": 0.0742, + "step": 1814 + }, + { + "epoch": 0.7672158850866075, + "grad_norm": 1.7777689695358276, + "learning_rate": 4.923278411491339e-05, + "loss": 0.1033, + "step": 1816 + }, + { + "epoch": 0.7680608365019012, + "grad_norm": 1.4362133741378784, + "learning_rate": 4.9231939163498106e-05, + "loss": 0.0826, + "step": 1818 + }, + { + "epoch": 0.7689057879171948, + "grad_norm": 1.4595433473587036, + "learning_rate": 4.923109421208281e-05, + "loss": 0.0929, + "step": 1820 + }, + { + "epoch": 0.7697507393324884, + "grad_norm": 1.1807221174240112, + "learning_rate": 4.9230249260667514e-05, + "loss": 0.0803, + "step": 1822 + }, + { + "epoch": 0.770595690747782, + "grad_norm": 1.3756366968154907, + "learning_rate": 4.922940430925222e-05, + "loss": 0.0933, + "step": 1824 + }, + { + "epoch": 0.7714406421630756, + "grad_norm": 1.903717041015625, + "learning_rate": 4.922855935783693e-05, + "loss": 0.1241, + "step": 1826 + }, + { + "epoch": 0.7722855935783692, + "grad_norm": 1.8950796127319336, + "learning_rate": 4.922771440642163e-05, + "loss": 0.113, + "step": 1828 + }, + { + "epoch": 0.7731305449936628, + "grad_norm": 1.7587209939956665, + "learning_rate": 4.922686945500634e-05, + "loss": 0.0748, + "step": 1830 + }, + { + "epoch": 0.7739754964089565, + "grad_norm": 1.414786458015442, + "learning_rate": 4.922602450359104e-05, + "loss": 0.0958, + "step": 1832 + }, + { + "epoch": 0.7748204478242501, + "grad_norm": 1.7570750713348389, + "learning_rate": 4.922517955217576e-05, + "loss": 0.0989, + "step": 1834 + }, + { + "epoch": 0.7756653992395437, + "grad_norm": 0.9731429815292358, + "learning_rate": 4.922433460076046e-05, + "loss": 0.0659, + "step": 1836 + }, + { + "epoch": 0.7765103506548373, + "grad_norm": 1.2279034852981567, + "learning_rate": 4.9223489649345165e-05, + "loss": 0.0802, + "step": 1838 + }, + { + "epoch": 0.7773553020701309, + "grad_norm": 1.8844351768493652, + "learning_rate": 4.922264469792987e-05, + "loss": 0.1129, + "step": 1840 + }, + { + "epoch": 0.7782002534854245, + "grad_norm": 1.544244408607483, + "learning_rate": 4.922179974651458e-05, + "loss": 0.0857, + "step": 1842 + }, + { + "epoch": 0.7790452049007182, + "grad_norm": 1.417275071144104, + "learning_rate": 4.9220954795099286e-05, + "loss": 0.0735, + "step": 1844 + }, + { + "epoch": 0.7798901563160119, + "grad_norm": 1.0909690856933594, + "learning_rate": 4.9220109843683994e-05, + "loss": 0.0635, + "step": 1846 + }, + { + "epoch": 0.7807351077313055, + "grad_norm": 1.1615931987762451, + "learning_rate": 4.9219264892268694e-05, + "loss": 0.0632, + "step": 1848 + }, + { + "epoch": 0.7815800591465991, + "grad_norm": 1.4405494928359985, + "learning_rate": 4.921841994085341e-05, + "loss": 0.0848, + "step": 1850 + }, + { + "epoch": 0.7824250105618927, + "grad_norm": 0.6920841932296753, + "learning_rate": 4.921757498943811e-05, + "loss": 0.0404, + "step": 1852 + }, + { + "epoch": 0.7832699619771863, + "grad_norm": 0.9411987662315369, + "learning_rate": 4.9216730038022816e-05, + "loss": 0.0662, + "step": 1854 + }, + { + "epoch": 0.78411491339248, + "grad_norm": 1.6385960578918457, + "learning_rate": 4.921588508660752e-05, + "loss": 0.0484, + "step": 1856 + }, + { + "epoch": 0.7849598648077736, + "grad_norm": 1.655723214149475, + "learning_rate": 4.921504013519223e-05, + "loss": 0.1107, + "step": 1858 + }, + { + "epoch": 0.7858048162230672, + "grad_norm": 1.5613646507263184, + "learning_rate": 4.921419518377694e-05, + "loss": 0.0997, + "step": 1860 + }, + { + "epoch": 0.7866497676383608, + "grad_norm": 1.729367971420288, + "learning_rate": 4.9213350232361644e-05, + "loss": 0.0823, + "step": 1862 + }, + { + "epoch": 0.7874947190536544, + "grad_norm": 1.5075145959854126, + "learning_rate": 4.9212505280946345e-05, + "loss": 0.0687, + "step": 1864 + }, + { + "epoch": 0.788339670468948, + "grad_norm": 1.4084213972091675, + "learning_rate": 4.921166032953106e-05, + "loss": 0.0671, + "step": 1866 + }, + { + "epoch": 0.7891846218842417, + "grad_norm": 1.0190798044204712, + "learning_rate": 4.921081537811576e-05, + "loss": 0.0727, + "step": 1868 + }, + { + "epoch": 0.7900295732995353, + "grad_norm": 0.8679783940315247, + "learning_rate": 4.9209970426700466e-05, + "loss": 0.0804, + "step": 1870 + }, + { + "epoch": 0.7908745247148289, + "grad_norm": 1.3874335289001465, + "learning_rate": 4.9209125475285174e-05, + "loss": 0.0753, + "step": 1872 + }, + { + "epoch": 0.7917194761301225, + "grad_norm": 1.33711576461792, + "learning_rate": 4.920828052386988e-05, + "loss": 0.0826, + "step": 1874 + }, + { + "epoch": 0.7925644275454161, + "grad_norm": 1.362648606300354, + "learning_rate": 4.920743557245459e-05, + "loss": 0.0591, + "step": 1876 + }, + { + "epoch": 0.7934093789607097, + "grad_norm": 1.0619179010391235, + "learning_rate": 4.9206590621039295e-05, + "loss": 0.0495, + "step": 1878 + }, + { + "epoch": 0.7942543303760033, + "grad_norm": 2.31254506111145, + "learning_rate": 4.9205745669623996e-05, + "loss": 0.0985, + "step": 1880 + }, + { + "epoch": 0.795099281791297, + "grad_norm": 1.3235636949539185, + "learning_rate": 4.920490071820871e-05, + "loss": 0.1215, + "step": 1882 + }, + { + "epoch": 0.7959442332065906, + "grad_norm": 1.0667047500610352, + "learning_rate": 4.920405576679341e-05, + "loss": 0.0401, + "step": 1884 + }, + { + "epoch": 0.7967891846218842, + "grad_norm": 1.7155953645706177, + "learning_rate": 4.920321081537812e-05, + "loss": 0.087, + "step": 1886 + }, + { + "epoch": 0.7976341360371778, + "grad_norm": 1.1526762247085571, + "learning_rate": 4.9202365863962824e-05, + "loss": 0.0752, + "step": 1888 + }, + { + "epoch": 0.7984790874524715, + "grad_norm": 1.039081335067749, + "learning_rate": 4.920152091254753e-05, + "loss": 0.0882, + "step": 1890 + }, + { + "epoch": 0.7993240388677652, + "grad_norm": 1.4192873239517212, + "learning_rate": 4.920067596113224e-05, + "loss": 0.0692, + "step": 1892 + }, + { + "epoch": 0.8001689902830588, + "grad_norm": 0.9547431468963623, + "learning_rate": 4.9199831009716946e-05, + "loss": 0.0972, + "step": 1894 + }, + { + "epoch": 0.8010139416983524, + "grad_norm": 1.382888674736023, + "learning_rate": 4.9198986058301646e-05, + "loss": 0.1152, + "step": 1896 + }, + { + "epoch": 0.801858893113646, + "grad_norm": 1.5938488245010376, + "learning_rate": 4.919814110688636e-05, + "loss": 0.0875, + "step": 1898 + }, + { + "epoch": 0.8027038445289396, + "grad_norm": 1.211908221244812, + "learning_rate": 4.919729615547106e-05, + "loss": 0.0808, + "step": 1900 + }, + { + "epoch": 0.8027038445289396, + "eval_accuracy": 0.7140886359717192, + "eval_cer": 0.08334730100479018, + "eval_loss": 0.18288320302963257, + "eval_runtime": 852.7471, + "eval_samples_per_second": 13.601, + "eval_steps_per_second": 0.426, + "step": 1900 + }, + { + "epoch": 0.8035487959442332, + "grad_norm": 1.273587703704834, + "learning_rate": 4.919645120405577e-05, + "loss": 0.0567, + "step": 1902 + }, + { + "epoch": 0.8043937473595268, + "grad_norm": 1.4771486520767212, + "learning_rate": 4.9195606252640475e-05, + "loss": 0.0687, + "step": 1904 + }, + { + "epoch": 0.8052386987748205, + "grad_norm": 1.6391814947128296, + "learning_rate": 4.919476130122518e-05, + "loss": 0.0936, + "step": 1906 + }, + { + "epoch": 0.8060836501901141, + "grad_norm": 1.199585199356079, + "learning_rate": 4.919391634980989e-05, + "loss": 0.0855, + "step": 1908 + }, + { + "epoch": 0.8069286016054077, + "grad_norm": 1.0377155542373657, + "learning_rate": 4.91930713983946e-05, + "loss": 0.0669, + "step": 1910 + }, + { + "epoch": 0.8077735530207013, + "grad_norm": 1.5404986143112183, + "learning_rate": 4.91922264469793e-05, + "loss": 0.1058, + "step": 1912 + }, + { + "epoch": 0.8086185044359949, + "grad_norm": 1.612430214881897, + "learning_rate": 4.919138149556401e-05, + "loss": 0.074, + "step": 1914 + }, + { + "epoch": 0.8094634558512885, + "grad_norm": 1.8857512474060059, + "learning_rate": 4.919053654414871e-05, + "loss": 0.0916, + "step": 1916 + }, + { + "epoch": 0.8103084072665822, + "grad_norm": 1.3807179927825928, + "learning_rate": 4.9189691592733426e-05, + "loss": 0.0728, + "step": 1918 + }, + { + "epoch": 0.8111533586818758, + "grad_norm": 1.6425131559371948, + "learning_rate": 4.9188846641318126e-05, + "loss": 0.1021, + "step": 1920 + }, + { + "epoch": 0.8119983100971694, + "grad_norm": 1.344103217124939, + "learning_rate": 4.918800168990283e-05, + "loss": 0.0515, + "step": 1922 + }, + { + "epoch": 0.812843261512463, + "grad_norm": 1.3962315320968628, + "learning_rate": 4.918715673848754e-05, + "loss": 0.0619, + "step": 1924 + }, + { + "epoch": 0.8136882129277566, + "grad_norm": 1.2917231321334839, + "learning_rate": 4.918631178707225e-05, + "loss": 0.0795, + "step": 1926 + }, + { + "epoch": 0.8145331643430502, + "grad_norm": 2.180860996246338, + "learning_rate": 4.918546683565695e-05, + "loss": 0.1103, + "step": 1928 + }, + { + "epoch": 0.8153781157583438, + "grad_norm": 2.4335546493530273, + "learning_rate": 4.918462188424166e-05, + "loss": 0.1115, + "step": 1930 + }, + { + "epoch": 0.8162230671736375, + "grad_norm": 1.399709939956665, + "learning_rate": 4.918377693282636e-05, + "loss": 0.0957, + "step": 1932 + }, + { + "epoch": 0.8170680185889312, + "grad_norm": 1.5264992713928223, + "learning_rate": 4.9182931981411076e-05, + "loss": 0.0728, + "step": 1934 + }, + { + "epoch": 0.8179129700042248, + "grad_norm": 1.3386390209197998, + "learning_rate": 4.918208702999578e-05, + "loss": 0.0642, + "step": 1936 + }, + { + "epoch": 0.8187579214195184, + "grad_norm": 2.438541889190674, + "learning_rate": 4.9181242078580484e-05, + "loss": 0.0949, + "step": 1938 + }, + { + "epoch": 0.819602872834812, + "grad_norm": 0.7417229413986206, + "learning_rate": 4.918039712716519e-05, + "loss": 0.029, + "step": 1940 + }, + { + "epoch": 0.8204478242501057, + "grad_norm": 1.8493363857269287, + "learning_rate": 4.91795521757499e-05, + "loss": 0.096, + "step": 1942 + }, + { + "epoch": 0.8212927756653993, + "grad_norm": 0.9756290912628174, + "learning_rate": 4.9178707224334606e-05, + "loss": 0.064, + "step": 1944 + }, + { + "epoch": 0.8221377270806929, + "grad_norm": 1.0960304737091064, + "learning_rate": 4.917786227291931e-05, + "loss": 0.0671, + "step": 1946 + }, + { + "epoch": 0.8229826784959865, + "grad_norm": 1.3161073923110962, + "learning_rate": 4.917701732150401e-05, + "loss": 0.0671, + "step": 1948 + }, + { + "epoch": 0.8238276299112801, + "grad_norm": 0.829484224319458, + "learning_rate": 4.917617237008873e-05, + "loss": 0.0633, + "step": 1950 + }, + { + "epoch": 0.8246725813265737, + "grad_norm": 1.1128253936767578, + "learning_rate": 4.917532741867343e-05, + "loss": 0.0658, + "step": 1952 + }, + { + "epoch": 0.8255175327418673, + "grad_norm": 1.2087801694869995, + "learning_rate": 4.9174482467258135e-05, + "loss": 0.0688, + "step": 1954 + }, + { + "epoch": 0.826362484157161, + "grad_norm": 1.6095219850540161, + "learning_rate": 4.917363751584284e-05, + "loss": 0.1243, + "step": 1956 + }, + { + "epoch": 0.8272074355724546, + "grad_norm": 1.4487024545669556, + "learning_rate": 4.917279256442755e-05, + "loss": 0.0848, + "step": 1958 + }, + { + "epoch": 0.8280523869877482, + "grad_norm": 1.430047869682312, + "learning_rate": 4.9171947613012256e-05, + "loss": 0.0731, + "step": 1960 + }, + { + "epoch": 0.8288973384030418, + "grad_norm": 1.377947449684143, + "learning_rate": 4.9171102661596964e-05, + "loss": 0.0692, + "step": 1962 + }, + { + "epoch": 0.8297422898183354, + "grad_norm": 1.4219231605529785, + "learning_rate": 4.9170257710181664e-05, + "loss": 0.0585, + "step": 1964 + }, + { + "epoch": 0.830587241233629, + "grad_norm": 1.647648811340332, + "learning_rate": 4.916941275876638e-05, + "loss": 0.0719, + "step": 1966 + }, + { + "epoch": 0.8314321926489227, + "grad_norm": 1.743295669555664, + "learning_rate": 4.916856780735108e-05, + "loss": 0.0811, + "step": 1968 + }, + { + "epoch": 0.8322771440642163, + "grad_norm": 1.3732768297195435, + "learning_rate": 4.9167722855935786e-05, + "loss": 0.0942, + "step": 1970 + }, + { + "epoch": 0.8331220954795099, + "grad_norm": 2.2335240840911865, + "learning_rate": 4.916687790452049e-05, + "loss": 0.1215, + "step": 1972 + }, + { + "epoch": 0.8339670468948035, + "grad_norm": 1.5069928169250488, + "learning_rate": 4.91660329531052e-05, + "loss": 0.0673, + "step": 1974 + }, + { + "epoch": 0.8348119983100971, + "grad_norm": 1.2649790048599243, + "learning_rate": 4.916518800168991e-05, + "loss": 0.0851, + "step": 1976 + }, + { + "epoch": 0.8356569497253908, + "grad_norm": 1.7199008464813232, + "learning_rate": 4.9164343050274614e-05, + "loss": 0.0936, + "step": 1978 + }, + { + "epoch": 0.8365019011406845, + "grad_norm": 0.6023948192596436, + "learning_rate": 4.9163498098859315e-05, + "loss": 0.0624, + "step": 1980 + }, + { + "epoch": 0.8373468525559781, + "grad_norm": 1.3942101001739502, + "learning_rate": 4.916265314744403e-05, + "loss": 0.0832, + "step": 1982 + }, + { + "epoch": 0.8381918039712717, + "grad_norm": 0.856122612953186, + "learning_rate": 4.916180819602873e-05, + "loss": 0.0485, + "step": 1984 + }, + { + "epoch": 0.8390367553865653, + "grad_norm": 1.3273183107376099, + "learning_rate": 4.9160963244613437e-05, + "loss": 0.0634, + "step": 1986 + }, + { + "epoch": 0.8398817068018589, + "grad_norm": 1.4347068071365356, + "learning_rate": 4.9160118293198144e-05, + "loss": 0.0599, + "step": 1988 + }, + { + "epoch": 0.8407266582171525, + "grad_norm": 1.5340876579284668, + "learning_rate": 4.915927334178285e-05, + "loss": 0.0607, + "step": 1990 + }, + { + "epoch": 0.8415716096324461, + "grad_norm": 1.6133233308792114, + "learning_rate": 4.915842839036756e-05, + "loss": 0.076, + "step": 1992 + }, + { + "epoch": 0.8424165610477398, + "grad_norm": 2.004894971847534, + "learning_rate": 4.9157583438952265e-05, + "loss": 0.1015, + "step": 1994 + }, + { + "epoch": 0.8432615124630334, + "grad_norm": 1.0740406513214111, + "learning_rate": 4.9156738487536966e-05, + "loss": 0.0576, + "step": 1996 + }, + { + "epoch": 0.844106463878327, + "grad_norm": 1.0599428415298462, + "learning_rate": 4.915589353612168e-05, + "loss": 0.0561, + "step": 1998 + }, + { + "epoch": 0.8449514152936206, + "grad_norm": 1.134386420249939, + "learning_rate": 4.915504858470638e-05, + "loss": 0.0546, + "step": 2000 + }, + { + "epoch": 0.8449514152936206, + "eval_accuracy": 0.7053802379720642, + "eval_cer": 0.0823592720343693, + "eval_loss": 0.18670396506786346, + "eval_runtime": 843.4731, + "eval_samples_per_second": 13.75, + "eval_steps_per_second": 0.43, + "step": 2000 + }, + { + "epoch": 0.8457963667089142, + "grad_norm": 1.8255170583724976, + "learning_rate": 4.915420363329109e-05, + "loss": 0.0661, + "step": 2002 + }, + { + "epoch": 0.8466413181242078, + "grad_norm": 1.5995157957077026, + "learning_rate": 4.9153358681875795e-05, + "loss": 0.0905, + "step": 2004 + }, + { + "epoch": 0.8474862695395015, + "grad_norm": 1.5706533193588257, + "learning_rate": 4.91525137304605e-05, + "loss": 0.1004, + "step": 2006 + }, + { + "epoch": 0.8483312209547951, + "grad_norm": 1.2460238933563232, + "learning_rate": 4.915166877904521e-05, + "loss": 0.0841, + "step": 2008 + }, + { + "epoch": 0.8491761723700887, + "grad_norm": 1.2849513292312622, + "learning_rate": 4.9150823827629916e-05, + "loss": 0.1101, + "step": 2010 + }, + { + "epoch": 0.8500211237853823, + "grad_norm": 1.5081242322921753, + "learning_rate": 4.9149978876214617e-05, + "loss": 0.0944, + "step": 2012 + }, + { + "epoch": 0.8508660752006759, + "grad_norm": 0.9104534387588501, + "learning_rate": 4.914913392479933e-05, + "loss": 0.0442, + "step": 2014 + }, + { + "epoch": 0.8517110266159695, + "grad_norm": 1.4931339025497437, + "learning_rate": 4.914828897338403e-05, + "loss": 0.0814, + "step": 2016 + }, + { + "epoch": 0.8525559780312632, + "grad_norm": 1.0569278001785278, + "learning_rate": 4.9147444021968745e-05, + "loss": 0.0601, + "step": 2018 + }, + { + "epoch": 0.8534009294465569, + "grad_norm": 1.3178728818893433, + "learning_rate": 4.9146599070553445e-05, + "loss": 0.0734, + "step": 2020 + }, + { + "epoch": 0.8542458808618505, + "grad_norm": 1.1889725923538208, + "learning_rate": 4.914575411913815e-05, + "loss": 0.0756, + "step": 2022 + }, + { + "epoch": 0.8550908322771441, + "grad_norm": 1.3995158672332764, + "learning_rate": 4.914490916772286e-05, + "loss": 0.0758, + "step": 2024 + }, + { + "epoch": 0.8559357836924377, + "grad_norm": 1.3304693698883057, + "learning_rate": 4.914406421630757e-05, + "loss": 0.1291, + "step": 2026 + }, + { + "epoch": 0.8567807351077313, + "grad_norm": 1.1206573247909546, + "learning_rate": 4.914321926489227e-05, + "loss": 0.0801, + "step": 2028 + }, + { + "epoch": 0.857625686523025, + "grad_norm": 1.374193787574768, + "learning_rate": 4.914237431347698e-05, + "loss": 0.0698, + "step": 2030 + }, + { + "epoch": 0.8584706379383186, + "grad_norm": 1.6789133548736572, + "learning_rate": 4.914152936206168e-05, + "loss": 0.0965, + "step": 2032 + }, + { + "epoch": 0.8593155893536122, + "grad_norm": 1.7472199201583862, + "learning_rate": 4.9140684410646396e-05, + "loss": 0.0747, + "step": 2034 + }, + { + "epoch": 0.8601605407689058, + "grad_norm": 1.2843263149261475, + "learning_rate": 4.9139839459231096e-05, + "loss": 0.13, + "step": 2036 + }, + { + "epoch": 0.8610054921841994, + "grad_norm": 1.7041819095611572, + "learning_rate": 4.91389945078158e-05, + "loss": 0.0828, + "step": 2038 + }, + { + "epoch": 0.861850443599493, + "grad_norm": 2.2484312057495117, + "learning_rate": 4.913814955640051e-05, + "loss": 0.0595, + "step": 2040 + }, + { + "epoch": 0.8626953950147866, + "grad_norm": 1.134190559387207, + "learning_rate": 4.913730460498522e-05, + "loss": 0.0963, + "step": 2042 + }, + { + "epoch": 0.8635403464300803, + "grad_norm": 1.1661795377731323, + "learning_rate": 4.9136459653569925e-05, + "loss": 0.0601, + "step": 2044 + }, + { + "epoch": 0.8643852978453739, + "grad_norm": 1.5845274925231934, + "learning_rate": 4.913561470215463e-05, + "loss": 0.0888, + "step": 2046 + }, + { + "epoch": 0.8652302492606675, + "grad_norm": 0.9580950140953064, + "learning_rate": 4.913476975073933e-05, + "loss": 0.0604, + "step": 2048 + }, + { + "epoch": 0.8660752006759611, + "grad_norm": 0.8986382484436035, + "learning_rate": 4.9133924799324047e-05, + "loss": 0.0651, + "step": 2050 + }, + { + "epoch": 0.8669201520912547, + "grad_norm": 1.660507321357727, + "learning_rate": 4.913307984790875e-05, + "loss": 0.098, + "step": 2052 + }, + { + "epoch": 0.8677651035065483, + "grad_norm": 1.333807349205017, + "learning_rate": 4.9132234896493454e-05, + "loss": 0.0846, + "step": 2054 + }, + { + "epoch": 0.868610054921842, + "grad_norm": 1.2735717296600342, + "learning_rate": 4.913138994507816e-05, + "loss": 0.0843, + "step": 2056 + }, + { + "epoch": 0.8694550063371356, + "grad_norm": 1.303557276725769, + "learning_rate": 4.913054499366287e-05, + "loss": 0.0682, + "step": 2058 + }, + { + "epoch": 0.8702999577524292, + "grad_norm": 0.9738208651542664, + "learning_rate": 4.9129700042247576e-05, + "loss": 0.0428, + "step": 2060 + }, + { + "epoch": 0.8711449091677228, + "grad_norm": 1.4468063116073608, + "learning_rate": 4.912885509083228e-05, + "loss": 0.0753, + "step": 2062 + }, + { + "epoch": 0.8719898605830165, + "grad_norm": 1.511555790901184, + "learning_rate": 4.912801013941698e-05, + "loss": 0.0601, + "step": 2064 + }, + { + "epoch": 0.8728348119983101, + "grad_norm": 1.3249778747558594, + "learning_rate": 4.91271651880017e-05, + "loss": 0.0742, + "step": 2066 + }, + { + "epoch": 0.8736797634136038, + "grad_norm": 1.4642529487609863, + "learning_rate": 4.91263202365864e-05, + "loss": 0.0613, + "step": 2068 + }, + { + "epoch": 0.8745247148288974, + "grad_norm": 1.8487999439239502, + "learning_rate": 4.9125475285171105e-05, + "loss": 0.097, + "step": 2070 + }, + { + "epoch": 0.875369666244191, + "grad_norm": 1.1964668035507202, + "learning_rate": 4.912463033375581e-05, + "loss": 0.0535, + "step": 2072 + }, + { + "epoch": 0.8762146176594846, + "grad_norm": 1.6669561862945557, + "learning_rate": 4.912378538234052e-05, + "loss": 0.0901, + "step": 2074 + }, + { + "epoch": 0.8770595690747782, + "grad_norm": 1.4297568798065186, + "learning_rate": 4.9122940430925227e-05, + "loss": 0.0731, + "step": 2076 + }, + { + "epoch": 0.8779045204900718, + "grad_norm": 1.4387907981872559, + "learning_rate": 4.9122095479509934e-05, + "loss": 0.0998, + "step": 2078 + }, + { + "epoch": 0.8787494719053655, + "grad_norm": 1.2306314706802368, + "learning_rate": 4.9121250528094634e-05, + "loss": 0.0688, + "step": 2080 + }, + { + "epoch": 0.8795944233206591, + "grad_norm": 1.894845724105835, + "learning_rate": 4.912040557667935e-05, + "loss": 0.0986, + "step": 2082 + }, + { + "epoch": 0.8804393747359527, + "grad_norm": 2.315152645111084, + "learning_rate": 4.911956062526405e-05, + "loss": 0.0945, + "step": 2084 + }, + { + "epoch": 0.8812843261512463, + "grad_norm": 2.2627370357513428, + "learning_rate": 4.9118715673848756e-05, + "loss": 0.0726, + "step": 2086 + }, + { + "epoch": 0.8821292775665399, + "grad_norm": 1.7095221281051636, + "learning_rate": 4.911787072243346e-05, + "loss": 0.1009, + "step": 2088 + }, + { + "epoch": 0.8829742289818335, + "grad_norm": 1.60263991355896, + "learning_rate": 4.911702577101817e-05, + "loss": 0.0422, + "step": 2090 + }, + { + "epoch": 0.8838191803971271, + "grad_norm": 0.9724740982055664, + "learning_rate": 4.911618081960288e-05, + "loss": 0.0434, + "step": 2092 + }, + { + "epoch": 0.8846641318124208, + "grad_norm": 1.3910263776779175, + "learning_rate": 4.9115335868187585e-05, + "loss": 0.0725, + "step": 2094 + }, + { + "epoch": 0.8855090832277144, + "grad_norm": 1.592474341392517, + "learning_rate": 4.9114490916772285e-05, + "loss": 0.0549, + "step": 2096 + }, + { + "epoch": 0.886354034643008, + "grad_norm": 0.9358974099159241, + "learning_rate": 4.9113645965357e-05, + "loss": 0.0485, + "step": 2098 + }, + { + "epoch": 0.8871989860583016, + "grad_norm": 1.6186065673828125, + "learning_rate": 4.91128010139417e-05, + "loss": 0.0803, + "step": 2100 + }, + { + "epoch": 0.8871989860583016, + "eval_accuracy": 0.7154681841696844, + "eval_cer": 0.08182997080021524, + "eval_loss": 0.18388408422470093, + "eval_runtime": 862.7282, + "eval_samples_per_second": 13.443, + "eval_steps_per_second": 0.421, + "step": 2100 + }, + { + "epoch": 0.8880439374735952, + "grad_norm": 1.2380942106246948, + "learning_rate": 4.9111956062526407e-05, + "loss": 0.0668, + "step": 2102 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 1.4649783372879028, + "learning_rate": 4.9111111111111114e-05, + "loss": 0.0652, + "step": 2104 + }, + { + "epoch": 0.8897338403041825, + "grad_norm": 1.243154525756836, + "learning_rate": 4.911026615969582e-05, + "loss": 0.0686, + "step": 2106 + }, + { + "epoch": 0.8905787917194762, + "grad_norm": 1.6419312953948975, + "learning_rate": 4.910942120828053e-05, + "loss": 0.0678, + "step": 2108 + }, + { + "epoch": 0.8914237431347698, + "grad_norm": 1.3396897315979004, + "learning_rate": 4.9108576256865235e-05, + "loss": 0.0713, + "step": 2110 + }, + { + "epoch": 0.8922686945500634, + "grad_norm": 1.093079686164856, + "learning_rate": 4.9107731305449936e-05, + "loss": 0.071, + "step": 2112 + }, + { + "epoch": 0.893113645965357, + "grad_norm": 1.6767911911010742, + "learning_rate": 4.910688635403465e-05, + "loss": 0.0732, + "step": 2114 + }, + { + "epoch": 0.8939585973806506, + "grad_norm": 1.8932477235794067, + "learning_rate": 4.910604140261935e-05, + "loss": 0.0712, + "step": 2116 + }, + { + "epoch": 0.8948035487959443, + "grad_norm": 1.5414156913757324, + "learning_rate": 4.9105196451204064e-05, + "loss": 0.0815, + "step": 2118 + }, + { + "epoch": 0.8956485002112379, + "grad_norm": 1.5718928575515747, + "learning_rate": 4.9104351499788765e-05, + "loss": 0.0805, + "step": 2120 + }, + { + "epoch": 0.8964934516265315, + "grad_norm": 1.5843585729599, + "learning_rate": 4.910350654837347e-05, + "loss": 0.0796, + "step": 2122 + }, + { + "epoch": 0.8973384030418251, + "grad_norm": 1.3187426328659058, + "learning_rate": 4.910266159695818e-05, + "loss": 0.0629, + "step": 2124 + }, + { + "epoch": 0.8981833544571187, + "grad_norm": 1.3708245754241943, + "learning_rate": 4.9101816645542886e-05, + "loss": 0.0706, + "step": 2126 + }, + { + "epoch": 0.8990283058724123, + "grad_norm": 1.081969976425171, + "learning_rate": 4.9100971694127587e-05, + "loss": 0.0749, + "step": 2128 + }, + { + "epoch": 0.899873257287706, + "grad_norm": 1.7093164920806885, + "learning_rate": 4.91001267427123e-05, + "loss": 0.1136, + "step": 2130 + }, + { + "epoch": 0.9007182087029996, + "grad_norm": 1.232445240020752, + "learning_rate": 4.9099281791297e-05, + "loss": 0.054, + "step": 2132 + }, + { + "epoch": 0.9015631601182932, + "grad_norm": 0.976844847202301, + "learning_rate": 4.9098436839881715e-05, + "loss": 0.0713, + "step": 2134 + }, + { + "epoch": 0.9024081115335868, + "grad_norm": 1.0007492303848267, + "learning_rate": 4.9097591888466415e-05, + "loss": 0.0991, + "step": 2136 + }, + { + "epoch": 0.9032530629488804, + "grad_norm": 1.6573784351348877, + "learning_rate": 4.909674693705112e-05, + "loss": 0.0958, + "step": 2138 + }, + { + "epoch": 0.904098014364174, + "grad_norm": 0.9490051865577698, + "learning_rate": 4.909590198563583e-05, + "loss": 0.0394, + "step": 2140 + }, + { + "epoch": 0.9049429657794676, + "grad_norm": 1.0081024169921875, + "learning_rate": 4.909505703422054e-05, + "loss": 0.0573, + "step": 2142 + }, + { + "epoch": 0.9057879171947613, + "grad_norm": 1.080897569656372, + "learning_rate": 4.9094212082805244e-05, + "loss": 0.0745, + "step": 2144 + }, + { + "epoch": 0.9066328686100549, + "grad_norm": 1.7212154865264893, + "learning_rate": 4.909336713138995e-05, + "loss": 0.0968, + "step": 2146 + }, + { + "epoch": 0.9074778200253485, + "grad_norm": 1.7466483116149902, + "learning_rate": 4.909252217997465e-05, + "loss": 0.101, + "step": 2148 + }, + { + "epoch": 0.9083227714406422, + "grad_norm": 1.1636947393417358, + "learning_rate": 4.9091677228559366e-05, + "loss": 0.0816, + "step": 2150 + }, + { + "epoch": 0.9091677228559358, + "grad_norm": 1.3398698568344116, + "learning_rate": 4.9090832277144066e-05, + "loss": 0.0766, + "step": 2152 + }, + { + "epoch": 0.9100126742712294, + "grad_norm": 1.0851556062698364, + "learning_rate": 4.908998732572877e-05, + "loss": 0.0523, + "step": 2154 + }, + { + "epoch": 0.9108576256865231, + "grad_norm": 1.4844582080841064, + "learning_rate": 4.908914237431348e-05, + "loss": 0.0495, + "step": 2156 + }, + { + "epoch": 0.9117025771018167, + "grad_norm": 1.3978465795516968, + "learning_rate": 4.908829742289819e-05, + "loss": 0.0908, + "step": 2158 + }, + { + "epoch": 0.9125475285171103, + "grad_norm": 1.431995153427124, + "learning_rate": 4.9087452471482895e-05, + "loss": 0.1416, + "step": 2160 + }, + { + "epoch": 0.9133924799324039, + "grad_norm": 0.8778975009918213, + "learning_rate": 4.90866075200676e-05, + "loss": 0.0601, + "step": 2162 + }, + { + "epoch": 0.9142374313476975, + "grad_norm": 1.388907551765442, + "learning_rate": 4.90857625686523e-05, + "loss": 0.0787, + "step": 2164 + }, + { + "epoch": 0.9150823827629911, + "grad_norm": 1.7531378269195557, + "learning_rate": 4.9084917617237017e-05, + "loss": 0.0747, + "step": 2166 + }, + { + "epoch": 0.9159273341782848, + "grad_norm": 1.6861788034439087, + "learning_rate": 4.908407266582172e-05, + "loss": 0.0863, + "step": 2168 + }, + { + "epoch": 0.9167722855935784, + "grad_norm": 1.4938262701034546, + "learning_rate": 4.9083227714406424e-05, + "loss": 0.0833, + "step": 2170 + }, + { + "epoch": 0.917617237008872, + "grad_norm": 1.2034574747085571, + "learning_rate": 4.908238276299113e-05, + "loss": 0.0638, + "step": 2172 + }, + { + "epoch": 0.9184621884241656, + "grad_norm": 1.2034447193145752, + "learning_rate": 4.908153781157583e-05, + "loss": 0.0532, + "step": 2174 + }, + { + "epoch": 0.9193071398394592, + "grad_norm": 1.0579421520233154, + "learning_rate": 4.9080692860160546e-05, + "loss": 0.0811, + "step": 2176 + }, + { + "epoch": 0.9201520912547528, + "grad_norm": 0.9848757386207581, + "learning_rate": 4.9079847908745246e-05, + "loss": 0.031, + "step": 2178 + }, + { + "epoch": 0.9209970426700465, + "grad_norm": 1.3882626295089722, + "learning_rate": 4.907900295732995e-05, + "loss": 0.0778, + "step": 2180 + }, + { + "epoch": 0.9218419940853401, + "grad_norm": 1.6235911846160889, + "learning_rate": 4.907815800591466e-05, + "loss": 0.0945, + "step": 2182 + }, + { + "epoch": 0.9226869455006337, + "grad_norm": 1.860224723815918, + "learning_rate": 4.907731305449937e-05, + "loss": 0.0859, + "step": 2184 + }, + { + "epoch": 0.9235318969159273, + "grad_norm": 1.091496229171753, + "learning_rate": 4.9076468103084075e-05, + "loss": 0.0622, + "step": 2186 + }, + { + "epoch": 0.9243768483312209, + "grad_norm": 1.7291980981826782, + "learning_rate": 4.907562315166878e-05, + "loss": 0.0749, + "step": 2188 + }, + { + "epoch": 0.9252217997465145, + "grad_norm": 1.0687997341156006, + "learning_rate": 4.907477820025348e-05, + "loss": 0.0507, + "step": 2190 + }, + { + "epoch": 0.9260667511618081, + "grad_norm": 0.9708640575408936, + "learning_rate": 4.9073933248838197e-05, + "loss": 0.0558, + "step": 2192 + }, + { + "epoch": 0.9269117025771019, + "grad_norm": 1.3645669221878052, + "learning_rate": 4.90730882974229e-05, + "loss": 0.0906, + "step": 2194 + }, + { + "epoch": 0.9277566539923955, + "grad_norm": 1.1811169385910034, + "learning_rate": 4.9072243346007604e-05, + "loss": 0.0693, + "step": 2196 + }, + { + "epoch": 0.9286016054076891, + "grad_norm": 1.0970830917358398, + "learning_rate": 4.907139839459231e-05, + "loss": 0.0704, + "step": 2198 + }, + { + "epoch": 0.9294465568229827, + "grad_norm": 1.5071057081222534, + "learning_rate": 4.907055344317702e-05, + "loss": 0.0932, + "step": 2200 + }, + { + "epoch": 0.9294465568229827, + "eval_accuracy": 0.7148646318330747, + "eval_cer": 0.0812124526937022, + "eval_loss": 0.1800081431865692, + "eval_runtime": 846.8242, + "eval_samples_per_second": 13.696, + "eval_steps_per_second": 0.429, + "step": 2200 + }, + { + "epoch": 0.9302915082382763, + "grad_norm": 1.3840326070785522, + "learning_rate": 4.9069708491761726e-05, + "loss": 0.0484, + "step": 2202 + }, + { + "epoch": 0.93113645965357, + "grad_norm": 1.4783692359924316, + "learning_rate": 4.906886354034643e-05, + "loss": 0.1473, + "step": 2204 + }, + { + "epoch": 0.9319814110688636, + "grad_norm": 1.3358536958694458, + "learning_rate": 4.906801858893113e-05, + "loss": 0.0753, + "step": 2206 + }, + { + "epoch": 0.9328263624841572, + "grad_norm": 0.773380696773529, + "learning_rate": 4.906717363751585e-05, + "loss": 0.0335, + "step": 2208 + }, + { + "epoch": 0.9336713138994508, + "grad_norm": 1.2706904411315918, + "learning_rate": 4.906632868610055e-05, + "loss": 0.0471, + "step": 2210 + }, + { + "epoch": 0.9345162653147444, + "grad_norm": 0.99266517162323, + "learning_rate": 4.9065483734685255e-05, + "loss": 0.067, + "step": 2212 + }, + { + "epoch": 0.935361216730038, + "grad_norm": 0.7559247612953186, + "learning_rate": 4.906463878326996e-05, + "loss": 0.0748, + "step": 2214 + }, + { + "epoch": 0.9362061681453316, + "grad_norm": 1.2688716650009155, + "learning_rate": 4.906379383185467e-05, + "loss": 0.0792, + "step": 2216 + }, + { + "epoch": 0.9370511195606253, + "grad_norm": 1.3983566761016846, + "learning_rate": 4.9062948880439377e-05, + "loss": 0.0737, + "step": 2218 + }, + { + "epoch": 0.9378960709759189, + "grad_norm": 1.3656744956970215, + "learning_rate": 4.9062103929024084e-05, + "loss": 0.0564, + "step": 2220 + }, + { + "epoch": 0.9387410223912125, + "grad_norm": 1.1833630800247192, + "learning_rate": 4.9061258977608784e-05, + "loss": 0.0567, + "step": 2222 + }, + { + "epoch": 0.9395859738065061, + "grad_norm": 1.6361331939697266, + "learning_rate": 4.90604140261935e-05, + "loss": 0.0693, + "step": 2224 + }, + { + "epoch": 0.9404309252217997, + "grad_norm": 1.5020231008529663, + "learning_rate": 4.90595690747782e-05, + "loss": 0.0655, + "step": 2226 + }, + { + "epoch": 0.9412758766370933, + "grad_norm": 1.5959280729293823, + "learning_rate": 4.9058724123362906e-05, + "loss": 0.1087, + "step": 2228 + }, + { + "epoch": 0.942120828052387, + "grad_norm": 1.0358680486679077, + "learning_rate": 4.905787917194761e-05, + "loss": 0.0493, + "step": 2230 + }, + { + "epoch": 0.9429657794676806, + "grad_norm": 1.8778531551361084, + "learning_rate": 4.905703422053232e-05, + "loss": 0.0577, + "step": 2232 + }, + { + "epoch": 0.9438107308829742, + "grad_norm": 2.4077320098876953, + "learning_rate": 4.905618926911703e-05, + "loss": 0.0651, + "step": 2234 + }, + { + "epoch": 0.9446556822982678, + "grad_norm": 1.2215629816055298, + "learning_rate": 4.9055344317701735e-05, + "loss": 0.0704, + "step": 2236 + }, + { + "epoch": 0.9455006337135615, + "grad_norm": 0.6339534521102905, + "learning_rate": 4.9054499366286435e-05, + "loss": 0.0603, + "step": 2238 + }, + { + "epoch": 0.9463455851288551, + "grad_norm": 1.2658920288085938, + "learning_rate": 4.905365441487115e-05, + "loss": 0.0615, + "step": 2240 + }, + { + "epoch": 0.9471905365441488, + "grad_norm": 0.8724871277809143, + "learning_rate": 4.905280946345585e-05, + "loss": 0.0728, + "step": 2242 + }, + { + "epoch": 0.9480354879594424, + "grad_norm": 1.5649269819259644, + "learning_rate": 4.905196451204056e-05, + "loss": 0.1156, + "step": 2244 + }, + { + "epoch": 0.948880439374736, + "grad_norm": 1.4007797241210938, + "learning_rate": 4.9051119560625264e-05, + "loss": 0.0734, + "step": 2246 + }, + { + "epoch": 0.9497253907900296, + "grad_norm": 0.7925835251808167, + "learning_rate": 4.905027460920997e-05, + "loss": 0.1002, + "step": 2248 + }, + { + "epoch": 0.9505703422053232, + "grad_norm": 0.9535337090492249, + "learning_rate": 4.904942965779468e-05, + "loss": 0.0518, + "step": 2250 + }, + { + "epoch": 0.9514152936206168, + "grad_norm": 0.983545184135437, + "learning_rate": 4.9048584706379385e-05, + "loss": 0.0521, + "step": 2252 + }, + { + "epoch": 0.9522602450359104, + "grad_norm": 1.7866032123565674, + "learning_rate": 4.904773975496409e-05, + "loss": 0.0574, + "step": 2254 + }, + { + "epoch": 0.9531051964512041, + "grad_norm": 1.5236202478408813, + "learning_rate": 4.90468948035488e-05, + "loss": 0.0679, + "step": 2256 + }, + { + "epoch": 0.9539501478664977, + "grad_norm": 0.9741945266723633, + "learning_rate": 4.90460498521335e-05, + "loss": 0.0574, + "step": 2258 + }, + { + "epoch": 0.9547950992817913, + "grad_norm": 1.6713435649871826, + "learning_rate": 4.9045204900718214e-05, + "loss": 0.0955, + "step": 2260 + }, + { + "epoch": 0.9556400506970849, + "grad_norm": 1.0833595991134644, + "learning_rate": 4.9044359949302915e-05, + "loss": 0.0803, + "step": 2262 + }, + { + "epoch": 0.9564850021123785, + "grad_norm": 1.2216488122940063, + "learning_rate": 4.904351499788762e-05, + "loss": 0.0859, + "step": 2264 + }, + { + "epoch": 0.9573299535276721, + "grad_norm": 1.2390130758285522, + "learning_rate": 4.904267004647233e-05, + "loss": 0.0506, + "step": 2266 + }, + { + "epoch": 0.9581749049429658, + "grad_norm": 1.330955982208252, + "learning_rate": 4.9041825095057036e-05, + "loss": 0.0526, + "step": 2268 + }, + { + "epoch": 0.9590198563582594, + "grad_norm": 1.5434483289718628, + "learning_rate": 4.904098014364174e-05, + "loss": 0.0539, + "step": 2270 + }, + { + "epoch": 0.959864807773553, + "grad_norm": 0.4926339387893677, + "learning_rate": 4.904013519222645e-05, + "loss": 0.051, + "step": 2272 + }, + { + "epoch": 0.9607097591888466, + "grad_norm": 1.3270699977874756, + "learning_rate": 4.903929024081115e-05, + "loss": 0.0641, + "step": 2274 + }, + { + "epoch": 0.9615547106041402, + "grad_norm": 1.2787952423095703, + "learning_rate": 4.9038445289395865e-05, + "loss": 0.0713, + "step": 2276 + }, + { + "epoch": 0.9623996620194338, + "grad_norm": 2.050325632095337, + "learning_rate": 4.9037600337980565e-05, + "loss": 0.0765, + "step": 2278 + }, + { + "epoch": 0.9632446134347274, + "grad_norm": 1.6342370510101318, + "learning_rate": 4.903675538656527e-05, + "loss": 0.1075, + "step": 2280 + }, + { + "epoch": 0.9640895648500212, + "grad_norm": 1.5692452192306519, + "learning_rate": 4.903591043514998e-05, + "loss": 0.0795, + "step": 2282 + }, + { + "epoch": 0.9649345162653148, + "grad_norm": 0.9670857787132263, + "learning_rate": 4.903506548373469e-05, + "loss": 0.0611, + "step": 2284 + }, + { + "epoch": 0.9657794676806084, + "grad_norm": 1.2690353393554688, + "learning_rate": 4.9034220532319394e-05, + "loss": 0.0779, + "step": 2286 + }, + { + "epoch": 0.966624419095902, + "grad_norm": 1.3158725500106812, + "learning_rate": 4.90333755809041e-05, + "loss": 0.0876, + "step": 2288 + }, + { + "epoch": 0.9674693705111956, + "grad_norm": 1.4484848976135254, + "learning_rate": 4.90325306294888e-05, + "loss": 0.0851, + "step": 2290 + }, + { + "epoch": 0.9683143219264893, + "grad_norm": 0.6505631804466248, + "learning_rate": 4.9031685678073516e-05, + "loss": 0.026, + "step": 2292 + }, + { + "epoch": 0.9691592733417829, + "grad_norm": 1.601660132408142, + "learning_rate": 4.9030840726658216e-05, + "loss": 0.0897, + "step": 2294 + }, + { + "epoch": 0.9700042247570765, + "grad_norm": 1.488582730293274, + "learning_rate": 4.9029995775242923e-05, + "loss": 0.1167, + "step": 2296 + }, + { + "epoch": 0.9708491761723701, + "grad_norm": 2.107656955718994, + "learning_rate": 4.902915082382763e-05, + "loss": 0.1119, + "step": 2298 + }, + { + "epoch": 0.9716941275876637, + "grad_norm": 0.8586990237236023, + "learning_rate": 4.902830587241234e-05, + "loss": 0.0527, + "step": 2300 + }, + { + "epoch": 0.9716941275876637, + "eval_accuracy": 0.7121917571995171, + "eval_cer": 0.08185643586192295, + "eval_loss": 0.1810568869113922, + "eval_runtime": 867.447, + "eval_samples_per_second": 13.37, + "eval_steps_per_second": 0.418, + "step": 2300 + }, + { + "epoch": 0.9725390790029573, + "grad_norm": 1.5183982849121094, + "learning_rate": 4.9027460920997045e-05, + "loss": 0.0819, + "step": 2302 + }, + { + "epoch": 0.973384030418251, + "grad_norm": 0.7407907247543335, + "learning_rate": 4.902661596958175e-05, + "loss": 0.0496, + "step": 2304 + }, + { + "epoch": 0.9742289818335446, + "grad_norm": 1.159289002418518, + "learning_rate": 4.902577101816645e-05, + "loss": 0.0925, + "step": 2306 + }, + { + "epoch": 0.9750739332488382, + "grad_norm": 1.0706584453582764, + "learning_rate": 4.9024926066751167e-05, + "loss": 0.0459, + "step": 2308 + }, + { + "epoch": 0.9759188846641318, + "grad_norm": 1.2856115102767944, + "learning_rate": 4.902408111533587e-05, + "loss": 0.1012, + "step": 2310 + }, + { + "epoch": 0.9767638360794254, + "grad_norm": 1.437662124633789, + "learning_rate": 4.9023236163920574e-05, + "loss": 0.0896, + "step": 2312 + }, + { + "epoch": 0.977608787494719, + "grad_norm": 1.2910943031311035, + "learning_rate": 4.902239121250528e-05, + "loss": 0.0656, + "step": 2314 + }, + { + "epoch": 0.9784537389100126, + "grad_norm": 1.636964201927185, + "learning_rate": 4.902154626108999e-05, + "loss": 0.0864, + "step": 2316 + }, + { + "epoch": 0.9792986903253063, + "grad_norm": 1.5599114894866943, + "learning_rate": 4.9020701309674696e-05, + "loss": 0.0813, + "step": 2318 + }, + { + "epoch": 0.9801436417405999, + "grad_norm": 1.6414740085601807, + "learning_rate": 4.90198563582594e-05, + "loss": 0.066, + "step": 2320 + }, + { + "epoch": 0.9809885931558935, + "grad_norm": 0.8238911032676697, + "learning_rate": 4.9019011406844103e-05, + "loss": 0.0533, + "step": 2322 + }, + { + "epoch": 0.9818335445711872, + "grad_norm": 1.3339018821716309, + "learning_rate": 4.901816645542882e-05, + "loss": 0.0765, + "step": 2324 + }, + { + "epoch": 0.9826784959864808, + "grad_norm": 1.6035016775131226, + "learning_rate": 4.901732150401352e-05, + "loss": 0.0885, + "step": 2326 + }, + { + "epoch": 0.9835234474017744, + "grad_norm": 0.8856274485588074, + "learning_rate": 4.9016476552598225e-05, + "loss": 0.0627, + "step": 2328 + }, + { + "epoch": 0.9843683988170681, + "grad_norm": 1.3794585466384888, + "learning_rate": 4.901563160118293e-05, + "loss": 0.072, + "step": 2330 + }, + { + "epoch": 0.9852133502323617, + "grad_norm": 1.3091775178909302, + "learning_rate": 4.901478664976764e-05, + "loss": 0.1265, + "step": 2332 + }, + { + "epoch": 0.9860583016476553, + "grad_norm": 1.0958503484725952, + "learning_rate": 4.9013941698352347e-05, + "loss": 0.0409, + "step": 2334 + }, + { + "epoch": 0.9869032530629489, + "grad_norm": 1.701104760169983, + "learning_rate": 4.9013096746937054e-05, + "loss": 0.0665, + "step": 2336 + }, + { + "epoch": 0.9877482044782425, + "grad_norm": 1.8106247186660767, + "learning_rate": 4.9012251795521754e-05, + "loss": 0.0842, + "step": 2338 + }, + { + "epoch": 0.9885931558935361, + "grad_norm": 1.0502506494522095, + "learning_rate": 4.901140684410647e-05, + "loss": 0.0682, + "step": 2340 + }, + { + "epoch": 0.9894381073088298, + "grad_norm": 1.2314811944961548, + "learning_rate": 4.901056189269117e-05, + "loss": 0.0712, + "step": 2342 + }, + { + "epoch": 0.9902830587241234, + "grad_norm": 1.5358272790908813, + "learning_rate": 4.900971694127588e-05, + "loss": 0.0675, + "step": 2344 + }, + { + "epoch": 0.991128010139417, + "grad_norm": 1.1753113269805908, + "learning_rate": 4.900887198986058e-05, + "loss": 0.07, + "step": 2346 + }, + { + "epoch": 0.9919729615547106, + "grad_norm": 1.636322021484375, + "learning_rate": 4.900802703844529e-05, + "loss": 0.0772, + "step": 2348 + }, + { + "epoch": 0.9928179129700042, + "grad_norm": 1.526485562324524, + "learning_rate": 4.900718208703e-05, + "loss": 0.0975, + "step": 2350 + }, + { + "epoch": 0.9936628643852978, + "grad_norm": 1.3800063133239746, + "learning_rate": 4.9006337135614705e-05, + "loss": 0.0984, + "step": 2352 + }, + { + "epoch": 0.9945078158005914, + "grad_norm": 1.46196711063385, + "learning_rate": 4.900549218419941e-05, + "loss": 0.0758, + "step": 2354 + }, + { + "epoch": 0.9953527672158851, + "grad_norm": 0.7219085693359375, + "learning_rate": 4.900464723278412e-05, + "loss": 0.0641, + "step": 2356 + }, + { + "epoch": 0.9961977186311787, + "grad_norm": 1.4330068826675415, + "learning_rate": 4.900380228136882e-05, + "loss": 0.1131, + "step": 2358 + }, + { + "epoch": 0.9970426700464723, + "grad_norm": 0.9048241972923279, + "learning_rate": 4.9002957329953533e-05, + "loss": 0.0586, + "step": 2360 + }, + { + "epoch": 0.9978876214617659, + "grad_norm": 1.220953106880188, + "learning_rate": 4.9002112378538234e-05, + "loss": 0.0737, + "step": 2362 + }, + { + "epoch": 0.9987325728770595, + "grad_norm": 1.8127233982086182, + "learning_rate": 4.900126742712294e-05, + "loss": 0.0779, + "step": 2364 + }, + { + "epoch": 0.9995775242923531, + "grad_norm": 0.9549395442008972, + "learning_rate": 4.900042247570765e-05, + "loss": 0.0507, + "step": 2366 + }, + { + "epoch": 1.0004224757076468, + "grad_norm": 1.0994274616241455, + "learning_rate": 4.8999577524292355e-05, + "loss": 0.0667, + "step": 2368 + }, + { + "epoch": 1.0012674271229405, + "grad_norm": 1.3673089742660522, + "learning_rate": 4.899873257287706e-05, + "loss": 0.0409, + "step": 2370 + }, + { + "epoch": 1.002112378538234, + "grad_norm": 1.2117745876312256, + "learning_rate": 4.899788762146177e-05, + "loss": 0.0487, + "step": 2372 + }, + { + "epoch": 1.0029573299535277, + "grad_norm": 1.5104575157165527, + "learning_rate": 4.899704267004647e-05, + "loss": 0.0739, + "step": 2374 + }, + { + "epoch": 1.0038022813688212, + "grad_norm": 1.4151710271835327, + "learning_rate": 4.8996197718631184e-05, + "loss": 0.0622, + "step": 2376 + }, + { + "epoch": 1.004647232784115, + "grad_norm": 0.9456071853637695, + "learning_rate": 4.8995352767215885e-05, + "loss": 0.0614, + "step": 2378 + }, + { + "epoch": 1.0054921841994084, + "grad_norm": 0.9996126890182495, + "learning_rate": 4.899450781580059e-05, + "loss": 0.0477, + "step": 2380 + }, + { + "epoch": 1.0063371356147022, + "grad_norm": 0.665332019329071, + "learning_rate": 4.89936628643853e-05, + "loss": 0.033, + "step": 2382 + }, + { + "epoch": 1.0071820870299957, + "grad_norm": 0.9107492566108704, + "learning_rate": 4.8992817912970006e-05, + "loss": 0.0541, + "step": 2384 + }, + { + "epoch": 1.0080270384452894, + "grad_norm": 1.0256426334381104, + "learning_rate": 4.8991972961554713e-05, + "loss": 0.0438, + "step": 2386 + }, + { + "epoch": 1.008871989860583, + "grad_norm": 1.5273648500442505, + "learning_rate": 4.899112801013942e-05, + "loss": 0.0486, + "step": 2388 + }, + { + "epoch": 1.0097169412758766, + "grad_norm": 0.8203524351119995, + "learning_rate": 4.899028305872412e-05, + "loss": 0.0556, + "step": 2390 + }, + { + "epoch": 1.0105618926911704, + "grad_norm": 1.0912214517593384, + "learning_rate": 4.8989438107308835e-05, + "loss": 0.0579, + "step": 2392 + }, + { + "epoch": 1.0114068441064639, + "grad_norm": 1.2179914712905884, + "learning_rate": 4.8988593155893535e-05, + "loss": 0.0423, + "step": 2394 + }, + { + "epoch": 1.0122517955217576, + "grad_norm": 0.6758914589881897, + "learning_rate": 4.898774820447824e-05, + "loss": 0.0311, + "step": 2396 + }, + { + "epoch": 1.013096746937051, + "grad_norm": 1.940477967262268, + "learning_rate": 4.898690325306295e-05, + "loss": 0.0979, + "step": 2398 + }, + { + "epoch": 1.0139416983523448, + "grad_norm": 1.4944899082183838, + "learning_rate": 4.898605830164766e-05, + "loss": 0.0532, + "step": 2400 + }, + { + "epoch": 1.0139416983523448, + "eval_accuracy": 0.7180548370408691, + "eval_cer": 0.07823954409520364, + "eval_loss": 0.17912158370018005, + "eval_runtime": 844.0986, + "eval_samples_per_second": 13.74, + "eval_steps_per_second": 0.43, + "step": 2400 + }, + { + "epoch": 1.0147866497676383, + "grad_norm": 1.0510189533233643, + "learning_rate": 4.8985213350232364e-05, + "loss": 0.0689, + "step": 2402 + }, + { + "epoch": 1.015631601182932, + "grad_norm": 0.9086995720863342, + "learning_rate": 4.898436839881707e-05, + "loss": 0.0534, + "step": 2404 + }, + { + "epoch": 1.0164765525982256, + "grad_norm": 1.3393218517303467, + "learning_rate": 4.898352344740177e-05, + "loss": 0.0418, + "step": 2406 + }, + { + "epoch": 1.0173215040135193, + "grad_norm": 0.9823423027992249, + "learning_rate": 4.8982678495986486e-05, + "loss": 0.0455, + "step": 2408 + }, + { + "epoch": 1.0181664554288128, + "grad_norm": 0.6783949136734009, + "learning_rate": 4.8981833544571186e-05, + "loss": 0.0495, + "step": 2410 + }, + { + "epoch": 1.0190114068441065, + "grad_norm": 1.0267900228500366, + "learning_rate": 4.8980988593155893e-05, + "loss": 0.0452, + "step": 2412 + }, + { + "epoch": 1.0198563582594, + "grad_norm": 0.7721602320671082, + "learning_rate": 4.89801436417406e-05, + "loss": 0.0577, + "step": 2414 + }, + { + "epoch": 1.0207013096746937, + "grad_norm": 1.2067053318023682, + "learning_rate": 4.897929869032531e-05, + "loss": 0.0488, + "step": 2416 + }, + { + "epoch": 1.0215462610899873, + "grad_norm": 1.3156932592391968, + "learning_rate": 4.8978453738910015e-05, + "loss": 0.0601, + "step": 2418 + }, + { + "epoch": 1.022391212505281, + "grad_norm": 1.6176562309265137, + "learning_rate": 4.897760878749472e-05, + "loss": 0.0538, + "step": 2420 + }, + { + "epoch": 1.0232361639205745, + "grad_norm": 1.0514973402023315, + "learning_rate": 4.897676383607942e-05, + "loss": 0.0583, + "step": 2422 + }, + { + "epoch": 1.0240811153358682, + "grad_norm": 1.4830445051193237, + "learning_rate": 4.8975918884664137e-05, + "loss": 0.0643, + "step": 2424 + }, + { + "epoch": 1.0249260667511617, + "grad_norm": 2.010371208190918, + "learning_rate": 4.897507393324884e-05, + "loss": 0.0437, + "step": 2426 + }, + { + "epoch": 1.0257710181664554, + "grad_norm": 0.8634136319160461, + "learning_rate": 4.8974228981833544e-05, + "loss": 0.0405, + "step": 2428 + }, + { + "epoch": 1.026615969581749, + "grad_norm": 1.5516853332519531, + "learning_rate": 4.897338403041825e-05, + "loss": 0.0441, + "step": 2430 + }, + { + "epoch": 1.0274609209970427, + "grad_norm": 1.314122200012207, + "learning_rate": 4.897253907900296e-05, + "loss": 0.0669, + "step": 2432 + }, + { + "epoch": 1.0283058724123364, + "grad_norm": 1.0754412412643433, + "learning_rate": 4.8971694127587666e-05, + "loss": 0.0333, + "step": 2434 + }, + { + "epoch": 1.02915082382763, + "grad_norm": 0.8331787586212158, + "learning_rate": 4.897084917617237e-05, + "loss": 0.066, + "step": 2436 + }, + { + "epoch": 1.0299957752429236, + "grad_norm": 1.0247435569763184, + "learning_rate": 4.8970004224757073e-05, + "loss": 0.0465, + "step": 2438 + }, + { + "epoch": 1.0308407266582171, + "grad_norm": 1.3126165866851807, + "learning_rate": 4.896915927334179e-05, + "loss": 0.0551, + "step": 2440 + }, + { + "epoch": 1.0316856780735109, + "grad_norm": 1.8739526271820068, + "learning_rate": 4.896831432192649e-05, + "loss": 0.0519, + "step": 2442 + }, + { + "epoch": 1.0325306294888044, + "grad_norm": 1.7847691774368286, + "learning_rate": 4.89674693705112e-05, + "loss": 0.057, + "step": 2444 + }, + { + "epoch": 1.033375580904098, + "grad_norm": 1.296621322631836, + "learning_rate": 4.89666244190959e-05, + "loss": 0.0829, + "step": 2446 + }, + { + "epoch": 1.0342205323193916, + "grad_norm": 1.0262562036514282, + "learning_rate": 4.896577946768061e-05, + "loss": 0.0656, + "step": 2448 + }, + { + "epoch": 1.0350654837346853, + "grad_norm": 1.4114569425582886, + "learning_rate": 4.896493451626532e-05, + "loss": 0.063, + "step": 2450 + }, + { + "epoch": 1.0359104351499788, + "grad_norm": 1.8110418319702148, + "learning_rate": 4.8964089564850024e-05, + "loss": 0.0508, + "step": 2452 + }, + { + "epoch": 1.0367553865652726, + "grad_norm": 1.9191076755523682, + "learning_rate": 4.896324461343473e-05, + "loss": 0.0761, + "step": 2454 + }, + { + "epoch": 1.037600337980566, + "grad_norm": 1.3961220979690552, + "learning_rate": 4.896239966201944e-05, + "loss": 0.061, + "step": 2456 + }, + { + "epoch": 1.0384452893958598, + "grad_norm": 0.8220006823539734, + "learning_rate": 4.896155471060414e-05, + "loss": 0.0524, + "step": 2458 + }, + { + "epoch": 1.0392902408111533, + "grad_norm": 1.0066295862197876, + "learning_rate": 4.896070975918885e-05, + "loss": 0.0468, + "step": 2460 + }, + { + "epoch": 1.040135192226447, + "grad_norm": 1.067622184753418, + "learning_rate": 4.895986480777355e-05, + "loss": 0.0476, + "step": 2462 + }, + { + "epoch": 1.0409801436417405, + "grad_norm": 2.0356531143188477, + "learning_rate": 4.895901985635826e-05, + "loss": 0.0969, + "step": 2464 + }, + { + "epoch": 1.0418250950570342, + "grad_norm": 2.8178772926330566, + "learning_rate": 4.895817490494297e-05, + "loss": 0.0532, + "step": 2466 + }, + { + "epoch": 1.0426700464723277, + "grad_norm": 2.3388311862945557, + "learning_rate": 4.8957329953527675e-05, + "loss": 0.0768, + "step": 2468 + }, + { + "epoch": 1.0435149978876215, + "grad_norm": 0.9694822430610657, + "learning_rate": 4.895648500211238e-05, + "loss": 0.0359, + "step": 2470 + }, + { + "epoch": 1.044359949302915, + "grad_norm": 1.434303879737854, + "learning_rate": 4.895564005069709e-05, + "loss": 0.0581, + "step": 2472 + }, + { + "epoch": 1.0452049007182087, + "grad_norm": 1.204118013381958, + "learning_rate": 4.895479509928179e-05, + "loss": 0.0881, + "step": 2474 + }, + { + "epoch": 1.0460498521335024, + "grad_norm": 1.4074668884277344, + "learning_rate": 4.8953950147866503e-05, + "loss": 0.0411, + "step": 2476 + }, + { + "epoch": 1.046894803548796, + "grad_norm": 1.492323637008667, + "learning_rate": 4.8953105196451204e-05, + "loss": 0.0401, + "step": 2478 + }, + { + "epoch": 1.0477397549640897, + "grad_norm": 1.0911036729812622, + "learning_rate": 4.895226024503591e-05, + "loss": 0.0441, + "step": 2480 + }, + { + "epoch": 1.0485847063793832, + "grad_norm": 1.6560875177383423, + "learning_rate": 4.895141529362062e-05, + "loss": 0.0521, + "step": 2482 + }, + { + "epoch": 1.049429657794677, + "grad_norm": 1.2704576253890991, + "learning_rate": 4.8950570342205325e-05, + "loss": 0.064, + "step": 2484 + }, + { + "epoch": 1.0502746092099704, + "grad_norm": 0.9503681063652039, + "learning_rate": 4.894972539079003e-05, + "loss": 0.0467, + "step": 2486 + }, + { + "epoch": 1.0511195606252641, + "grad_norm": 0.7836410403251648, + "learning_rate": 4.894888043937474e-05, + "loss": 0.0411, + "step": 2488 + }, + { + "epoch": 1.0519645120405576, + "grad_norm": 1.5865596532821655, + "learning_rate": 4.894803548795944e-05, + "loss": 0.0583, + "step": 2490 + }, + { + "epoch": 1.0528094634558514, + "grad_norm": 1.2086788415908813, + "learning_rate": 4.8947190536544154e-05, + "loss": 0.0408, + "step": 2492 + }, + { + "epoch": 1.0536544148711449, + "grad_norm": 0.8142328262329102, + "learning_rate": 4.8946345585128855e-05, + "loss": 0.0322, + "step": 2494 + }, + { + "epoch": 1.0544993662864386, + "grad_norm": 0.7533618211746216, + "learning_rate": 4.894550063371356e-05, + "loss": 0.0454, + "step": 2496 + }, + { + "epoch": 1.055344317701732, + "grad_norm": 0.9661786556243896, + "learning_rate": 4.894465568229827e-05, + "loss": 0.0479, + "step": 2498 + }, + { + "epoch": 1.0561892691170258, + "grad_norm": 1.2609660625457764, + "learning_rate": 4.8943810730882976e-05, + "loss": 0.0653, + "step": 2500 + }, + { + "epoch": 1.0561892691170258, + "eval_accuracy": 0.7173650629418865, + "eval_cer": 0.08063904302336865, + "eval_loss": 0.17944836616516113, + "eval_runtime": 861.1084, + "eval_samples_per_second": 13.469, + "eval_steps_per_second": 0.422, + "step": 2500 + }, + { + "epoch": 1.0570342205323193, + "grad_norm": 0.6538660526275635, + "learning_rate": 4.8942965779467683e-05, + "loss": 0.0483, + "step": 2502 + }, + { + "epoch": 1.057879171947613, + "grad_norm": 0.7036990523338318, + "learning_rate": 4.894212082805239e-05, + "loss": 0.0314, + "step": 2504 + }, + { + "epoch": 1.0587241233629066, + "grad_norm": 1.2298040390014648, + "learning_rate": 4.894127587663709e-05, + "loss": 0.0485, + "step": 2506 + }, + { + "epoch": 1.0595690747782003, + "grad_norm": 0.9623536467552185, + "learning_rate": 4.8940430925221805e-05, + "loss": 0.0521, + "step": 2508 + }, + { + "epoch": 1.0604140261934938, + "grad_norm": 1.2405459880828857, + "learning_rate": 4.8939585973806505e-05, + "loss": 0.0592, + "step": 2510 + }, + { + "epoch": 1.0612589776087875, + "grad_norm": 0.9672164916992188, + "learning_rate": 4.893874102239121e-05, + "loss": 0.0688, + "step": 2512 + }, + { + "epoch": 1.062103929024081, + "grad_norm": 1.3323535919189453, + "learning_rate": 4.893789607097592e-05, + "loss": 0.0475, + "step": 2514 + }, + { + "epoch": 1.0629488804393747, + "grad_norm": 0.9052666425704956, + "learning_rate": 4.893705111956063e-05, + "loss": 0.0275, + "step": 2516 + }, + { + "epoch": 1.0637938318546682, + "grad_norm": 0.8641655445098877, + "learning_rate": 4.8936206168145334e-05, + "loss": 0.0344, + "step": 2518 + }, + { + "epoch": 1.064638783269962, + "grad_norm": 1.5869804620742798, + "learning_rate": 4.893536121673004e-05, + "loss": 0.0674, + "step": 2520 + }, + { + "epoch": 1.0654837346852557, + "grad_norm": 1.136039137840271, + "learning_rate": 4.893451626531474e-05, + "loss": 0.0507, + "step": 2522 + }, + { + "epoch": 1.0663286861005492, + "grad_norm": 2.1299498081207275, + "learning_rate": 4.8933671313899456e-05, + "loss": 0.0856, + "step": 2524 + }, + { + "epoch": 1.067173637515843, + "grad_norm": 1.477407693862915, + "learning_rate": 4.8932826362484156e-05, + "loss": 0.0644, + "step": 2526 + }, + { + "epoch": 1.0680185889311364, + "grad_norm": 1.1373028755187988, + "learning_rate": 4.8931981411068863e-05, + "loss": 0.0342, + "step": 2528 + }, + { + "epoch": 1.0688635403464302, + "grad_norm": 1.0594611167907715, + "learning_rate": 4.893113645965357e-05, + "loss": 0.0743, + "step": 2530 + }, + { + "epoch": 1.0697084917617237, + "grad_norm": 1.1284602880477905, + "learning_rate": 4.893029150823828e-05, + "loss": 0.0621, + "step": 2532 + }, + { + "epoch": 1.0705534431770174, + "grad_norm": 1.341365933418274, + "learning_rate": 4.8929446556822985e-05, + "loss": 0.0596, + "step": 2534 + }, + { + "epoch": 1.071398394592311, + "grad_norm": 1.3680822849273682, + "learning_rate": 4.892860160540769e-05, + "loss": 0.0457, + "step": 2536 + }, + { + "epoch": 1.0722433460076046, + "grad_norm": 1.4528318643569946, + "learning_rate": 4.892775665399239e-05, + "loss": 0.0551, + "step": 2538 + }, + { + "epoch": 1.0730882974228981, + "grad_norm": 1.788142442703247, + "learning_rate": 4.892691170257711e-05, + "loss": 0.0783, + "step": 2540 + }, + { + "epoch": 1.0739332488381919, + "grad_norm": 1.1063508987426758, + "learning_rate": 4.892606675116181e-05, + "loss": 0.0448, + "step": 2542 + }, + { + "epoch": 1.0747782002534854, + "grad_norm": 1.275145173072815, + "learning_rate": 4.892522179974652e-05, + "loss": 0.0513, + "step": 2544 + }, + { + "epoch": 1.075623151668779, + "grad_norm": 1.1520663499832153, + "learning_rate": 4.892437684833122e-05, + "loss": 0.0384, + "step": 2546 + }, + { + "epoch": 1.0764681030840726, + "grad_norm": 1.0242836475372314, + "learning_rate": 4.892353189691593e-05, + "loss": 0.0444, + "step": 2548 + }, + { + "epoch": 1.0773130544993663, + "grad_norm": 1.0930004119873047, + "learning_rate": 4.8922686945500636e-05, + "loss": 0.0381, + "step": 2550 + }, + { + "epoch": 1.0781580059146598, + "grad_norm": 1.1523323059082031, + "learning_rate": 4.892184199408534e-05, + "loss": 0.0565, + "step": 2552 + }, + { + "epoch": 1.0790029573299535, + "grad_norm": 1.0468764305114746, + "learning_rate": 4.892099704267005e-05, + "loss": 0.0436, + "step": 2554 + }, + { + "epoch": 1.079847908745247, + "grad_norm": 1.1597340106964111, + "learning_rate": 4.892015209125476e-05, + "loss": 0.0477, + "step": 2556 + }, + { + "epoch": 1.0806928601605408, + "grad_norm": 1.3584051132202148, + "learning_rate": 4.891930713983946e-05, + "loss": 0.0652, + "step": 2558 + }, + { + "epoch": 1.0815378115758345, + "grad_norm": 0.9354948401451111, + "learning_rate": 4.891846218842417e-05, + "loss": 0.0463, + "step": 2560 + }, + { + "epoch": 1.082382762991128, + "grad_norm": 1.0756053924560547, + "learning_rate": 4.891761723700887e-05, + "loss": 0.0427, + "step": 2562 + }, + { + "epoch": 1.0832277144064215, + "grad_norm": 1.3494528532028198, + "learning_rate": 4.891677228559358e-05, + "loss": 0.0559, + "step": 2564 + }, + { + "epoch": 1.0840726658217152, + "grad_norm": 3.9756555557250977, + "learning_rate": 4.891592733417829e-05, + "loss": 0.0671, + "step": 2566 + }, + { + "epoch": 1.084917617237009, + "grad_norm": 1.4587855339050293, + "learning_rate": 4.8915082382762994e-05, + "loss": 0.042, + "step": 2568 + }, + { + "epoch": 1.0857625686523025, + "grad_norm": 1.1794617176055908, + "learning_rate": 4.89142374313477e-05, + "loss": 0.0722, + "step": 2570 + }, + { + "epoch": 1.0866075200675962, + "grad_norm": 1.3769999742507935, + "learning_rate": 4.891339247993241e-05, + "loss": 0.0846, + "step": 2572 + }, + { + "epoch": 1.0874524714828897, + "grad_norm": 0.7593065500259399, + "learning_rate": 4.891254752851711e-05, + "loss": 0.0436, + "step": 2574 + }, + { + "epoch": 1.0882974228981834, + "grad_norm": 1.1376982927322388, + "learning_rate": 4.891170257710182e-05, + "loss": 0.048, + "step": 2576 + }, + { + "epoch": 1.089142374313477, + "grad_norm": 0.9180907011032104, + "learning_rate": 4.891085762568652e-05, + "loss": 0.0582, + "step": 2578 + }, + { + "epoch": 1.0899873257287707, + "grad_norm": 1.5679020881652832, + "learning_rate": 4.891001267427123e-05, + "loss": 0.0565, + "step": 2580 + }, + { + "epoch": 1.0908322771440642, + "grad_norm": 1.0620578527450562, + "learning_rate": 4.890916772285594e-05, + "loss": 0.0823, + "step": 2582 + }, + { + "epoch": 1.091677228559358, + "grad_norm": 0.9624361395835876, + "learning_rate": 4.8908322771440645e-05, + "loss": 0.0332, + "step": 2584 + }, + { + "epoch": 1.0925221799746514, + "grad_norm": 1.1361215114593506, + "learning_rate": 4.890747782002535e-05, + "loss": 0.0579, + "step": 2586 + }, + { + "epoch": 1.0933671313899451, + "grad_norm": 1.0327388048171997, + "learning_rate": 4.890663286861006e-05, + "loss": 0.0408, + "step": 2588 + }, + { + "epoch": 1.0942120828052386, + "grad_norm": 1.140649676322937, + "learning_rate": 4.890578791719476e-05, + "loss": 0.0414, + "step": 2590 + }, + { + "epoch": 1.0950570342205324, + "grad_norm": 1.0419838428497314, + "learning_rate": 4.8904942965779473e-05, + "loss": 0.0517, + "step": 2592 + }, + { + "epoch": 1.0959019856358259, + "grad_norm": 0.7673367261886597, + "learning_rate": 4.8904098014364174e-05, + "loss": 0.0279, + "step": 2594 + }, + { + "epoch": 1.0967469370511196, + "grad_norm": 1.4603475332260132, + "learning_rate": 4.890325306294888e-05, + "loss": 0.0661, + "step": 2596 + }, + { + "epoch": 1.097591888466413, + "grad_norm": 0.6877373456954956, + "learning_rate": 4.890240811153359e-05, + "loss": 0.0309, + "step": 2598 + }, + { + "epoch": 1.0984368398817068, + "grad_norm": 0.6919780969619751, + "learning_rate": 4.8901563160118295e-05, + "loss": 0.045, + "step": 2600 + }, + { + "epoch": 1.0984368398817068, + "eval_accuracy": 0.7184859458527332, + "eval_cer": 0.07980098273595808, + "eval_loss": 0.1835366040468216, + "eval_runtime": 857.7507, + "eval_samples_per_second": 13.521, + "eval_steps_per_second": 0.423, + "step": 2600 + }, + { + "epoch": 1.0992817912970003, + "grad_norm": 1.1517709493637085, + "learning_rate": 4.8900718208703e-05, + "loss": 0.0561, + "step": 2602 + }, + { + "epoch": 1.100126742712294, + "grad_norm": 1.0358161926269531, + "learning_rate": 4.889987325728771e-05, + "loss": 0.046, + "step": 2604 + }, + { + "epoch": 1.1009716941275878, + "grad_norm": 1.4217817783355713, + "learning_rate": 4.889902830587241e-05, + "loss": 0.073, + "step": 2606 + }, + { + "epoch": 1.1018166455428813, + "grad_norm": 1.2074991464614868, + "learning_rate": 4.8898183354457124e-05, + "loss": 0.0871, + "step": 2608 + }, + { + "epoch": 1.102661596958175, + "grad_norm": 2.3143765926361084, + "learning_rate": 4.8897338403041825e-05, + "loss": 0.0497, + "step": 2610 + }, + { + "epoch": 1.1035065483734685, + "grad_norm": 1.1069965362548828, + "learning_rate": 4.889649345162653e-05, + "loss": 0.0511, + "step": 2612 + }, + { + "epoch": 1.1043514997887622, + "grad_norm": 0.9317350387573242, + "learning_rate": 4.889564850021124e-05, + "loss": 0.0506, + "step": 2614 + }, + { + "epoch": 1.1051964512040557, + "grad_norm": 0.9004279971122742, + "learning_rate": 4.8894803548795946e-05, + "loss": 0.0338, + "step": 2616 + }, + { + "epoch": 1.1060414026193495, + "grad_norm": 2.0052106380462646, + "learning_rate": 4.8893958597380653e-05, + "loss": 0.0945, + "step": 2618 + }, + { + "epoch": 1.106886354034643, + "grad_norm": 1.1178474426269531, + "learning_rate": 4.889311364596536e-05, + "loss": 0.0474, + "step": 2620 + }, + { + "epoch": 1.1077313054499367, + "grad_norm": 0.9016062617301941, + "learning_rate": 4.889226869455006e-05, + "loss": 0.0391, + "step": 2622 + }, + { + "epoch": 1.1085762568652302, + "grad_norm": 1.6335241794586182, + "learning_rate": 4.8891423743134775e-05, + "loss": 0.0487, + "step": 2624 + }, + { + "epoch": 1.109421208280524, + "grad_norm": 1.228885293006897, + "learning_rate": 4.8890578791719475e-05, + "loss": 0.0529, + "step": 2626 + }, + { + "epoch": 1.1102661596958174, + "grad_norm": 0.7745740413665771, + "learning_rate": 4.888973384030418e-05, + "loss": 0.0231, + "step": 2628 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 1.8813176155090332, + "learning_rate": 4.888888888888889e-05, + "loss": 0.054, + "step": 2630 + }, + { + "epoch": 1.1119560625264047, + "grad_norm": 1.0765502452850342, + "learning_rate": 4.88880439374736e-05, + "loss": 0.0518, + "step": 2632 + }, + { + "epoch": 1.1128010139416984, + "grad_norm": 1.6047254800796509, + "learning_rate": 4.8887198986058304e-05, + "loss": 0.0632, + "step": 2634 + }, + { + "epoch": 1.113645965356992, + "grad_norm": 0.8298768997192383, + "learning_rate": 4.888635403464301e-05, + "loss": 0.031, + "step": 2636 + }, + { + "epoch": 1.1144909167722856, + "grad_norm": 0.9682705402374268, + "learning_rate": 4.888550908322771e-05, + "loss": 0.0421, + "step": 2638 + }, + { + "epoch": 1.1153358681875791, + "grad_norm": 0.9020190834999084, + "learning_rate": 4.8884664131812426e-05, + "loss": 0.0587, + "step": 2640 + }, + { + "epoch": 1.1161808196028729, + "grad_norm": 1.5844128131866455, + "learning_rate": 4.8883819180397126e-05, + "loss": 0.0633, + "step": 2642 + }, + { + "epoch": 1.1170257710181664, + "grad_norm": 1.0159263610839844, + "learning_rate": 4.888297422898184e-05, + "loss": 0.0635, + "step": 2644 + }, + { + "epoch": 1.11787072243346, + "grad_norm": 1.2040449380874634, + "learning_rate": 4.888212927756654e-05, + "loss": 0.0546, + "step": 2646 + }, + { + "epoch": 1.1187156738487536, + "grad_norm": 1.333282232284546, + "learning_rate": 4.888128432615125e-05, + "loss": 0.0415, + "step": 2648 + }, + { + "epoch": 1.1195606252640473, + "grad_norm": 0.9611084461212158, + "learning_rate": 4.8880439374735955e-05, + "loss": 0.0467, + "step": 2650 + }, + { + "epoch": 1.120405576679341, + "grad_norm": 1.6842930316925049, + "learning_rate": 4.887959442332066e-05, + "loss": 0.0573, + "step": 2652 + }, + { + "epoch": 1.1212505280946345, + "grad_norm": 1.4707005023956299, + "learning_rate": 4.887874947190537e-05, + "loss": 0.0691, + "step": 2654 + }, + { + "epoch": 1.1220954795099283, + "grad_norm": 1.7009764909744263, + "learning_rate": 4.887790452049008e-05, + "loss": 0.0526, + "step": 2656 + }, + { + "epoch": 1.1229404309252218, + "grad_norm": 0.4141417145729065, + "learning_rate": 4.887705956907478e-05, + "loss": 0.0406, + "step": 2658 + }, + { + "epoch": 1.1237853823405155, + "grad_norm": 1.2767736911773682, + "learning_rate": 4.887621461765949e-05, + "loss": 0.0599, + "step": 2660 + }, + { + "epoch": 1.124630333755809, + "grad_norm": 1.0069574117660522, + "learning_rate": 4.887536966624419e-05, + "loss": 0.0733, + "step": 2662 + }, + { + "epoch": 1.1254752851711027, + "grad_norm": 1.3213729858398438, + "learning_rate": 4.88745247148289e-05, + "loss": 0.0641, + "step": 2664 + }, + { + "epoch": 1.1263202365863962, + "grad_norm": 0.9671632051467896, + "learning_rate": 4.8873679763413606e-05, + "loss": 0.065, + "step": 2666 + }, + { + "epoch": 1.12716518800169, + "grad_norm": 1.066148042678833, + "learning_rate": 4.887283481199831e-05, + "loss": 0.0689, + "step": 2668 + }, + { + "epoch": 1.1280101394169835, + "grad_norm": 1.6266218423843384, + "learning_rate": 4.887198986058302e-05, + "loss": 0.0444, + "step": 2670 + }, + { + "epoch": 1.1288550908322772, + "grad_norm": 0.7963671088218689, + "learning_rate": 4.887114490916773e-05, + "loss": 0.0423, + "step": 2672 + }, + { + "epoch": 1.1297000422475707, + "grad_norm": 5.003674507141113, + "learning_rate": 4.887029995775243e-05, + "loss": 0.0406, + "step": 2674 + }, + { + "epoch": 1.1305449936628644, + "grad_norm": 1.4064865112304688, + "learning_rate": 4.886945500633714e-05, + "loss": 0.0567, + "step": 2676 + }, + { + "epoch": 1.131389945078158, + "grad_norm": 1.2468315362930298, + "learning_rate": 4.886861005492184e-05, + "loss": 0.0517, + "step": 2678 + }, + { + "epoch": 1.1322348964934517, + "grad_norm": 1.068264365196228, + "learning_rate": 4.886776510350655e-05, + "loss": 0.0442, + "step": 2680 + }, + { + "epoch": 1.1330798479087452, + "grad_norm": 1.4150062799453735, + "learning_rate": 4.886692015209126e-05, + "loss": 0.0397, + "step": 2682 + }, + { + "epoch": 1.133924799324039, + "grad_norm": 1.4678819179534912, + "learning_rate": 4.8866075200675964e-05, + "loss": 0.061, + "step": 2684 + }, + { + "epoch": 1.1347697507393324, + "grad_norm": 1.3416491746902466, + "learning_rate": 4.886523024926067e-05, + "loss": 0.0644, + "step": 2686 + }, + { + "epoch": 1.1356147021546261, + "grad_norm": 1.5595964193344116, + "learning_rate": 4.886438529784538e-05, + "loss": 0.051, + "step": 2688 + }, + { + "epoch": 1.1364596535699198, + "grad_norm": 0.7982243895530701, + "learning_rate": 4.886354034643008e-05, + "loss": 0.056, + "step": 2690 + }, + { + "epoch": 1.1373046049852134, + "grad_norm": 1.517142415046692, + "learning_rate": 4.886269539501479e-05, + "loss": 0.0659, + "step": 2692 + }, + { + "epoch": 1.1381495564005069, + "grad_norm": 0.7119234800338745, + "learning_rate": 4.886185044359949e-05, + "loss": 0.0539, + "step": 2694 + }, + { + "epoch": 1.1389945078158006, + "grad_norm": 1.0239163637161255, + "learning_rate": 4.88610054921842e-05, + "loss": 0.0475, + "step": 2696 + }, + { + "epoch": 1.1398394592310943, + "grad_norm": 1.3597328662872314, + "learning_rate": 4.886016054076891e-05, + "loss": 0.0515, + "step": 2698 + }, + { + "epoch": 1.1406844106463878, + "grad_norm": 0.914358377456665, + "learning_rate": 4.8859315589353615e-05, + "loss": 0.0496, + "step": 2700 + }, + { + "epoch": 1.1406844106463878, + "eval_accuracy": 0.7282290050008622, + "eval_cer": 0.07597237047557716, + "eval_loss": 0.1734546571969986, + "eval_runtime": 866.2492, + "eval_samples_per_second": 13.389, + "eval_steps_per_second": 0.419, + "step": 2700 + }, + { + "epoch": 1.1415293620616815, + "grad_norm": 1.1831682920455933, + "learning_rate": 4.885847063793832e-05, + "loss": 0.0741, + "step": 2702 + }, + { + "epoch": 1.142374313476975, + "grad_norm": 0.8696686029434204, + "learning_rate": 4.885762568652303e-05, + "loss": 0.0446, + "step": 2704 + }, + { + "epoch": 1.1432192648922688, + "grad_norm": 0.9323683977127075, + "learning_rate": 4.885678073510773e-05, + "loss": 0.0605, + "step": 2706 + }, + { + "epoch": 1.1440642163075623, + "grad_norm": 1.124110460281372, + "learning_rate": 4.8855935783692443e-05, + "loss": 0.036, + "step": 2708 + }, + { + "epoch": 1.144909167722856, + "grad_norm": 1.1443071365356445, + "learning_rate": 4.8855090832277144e-05, + "loss": 0.035, + "step": 2710 + }, + { + "epoch": 1.1457541191381495, + "grad_norm": 0.8278305530548096, + "learning_rate": 4.885424588086185e-05, + "loss": 0.0369, + "step": 2712 + }, + { + "epoch": 1.1465990705534432, + "grad_norm": 1.1934250593185425, + "learning_rate": 4.885340092944656e-05, + "loss": 0.0539, + "step": 2714 + }, + { + "epoch": 1.1474440219687367, + "grad_norm": 1.858482003211975, + "learning_rate": 4.8852555978031265e-05, + "loss": 0.0523, + "step": 2716 + }, + { + "epoch": 1.1482889733840305, + "grad_norm": 1.45504891872406, + "learning_rate": 4.885171102661597e-05, + "loss": 0.0532, + "step": 2718 + }, + { + "epoch": 1.149133924799324, + "grad_norm": 1.0459725856781006, + "learning_rate": 4.885086607520068e-05, + "loss": 0.0487, + "step": 2720 + }, + { + "epoch": 1.1499788762146177, + "grad_norm": 2.038820743560791, + "learning_rate": 4.885002112378538e-05, + "loss": 0.0513, + "step": 2722 + }, + { + "epoch": 1.1508238276299112, + "grad_norm": 0.9835671186447144, + "learning_rate": 4.8849176172370094e-05, + "loss": 0.0536, + "step": 2724 + }, + { + "epoch": 1.151668779045205, + "grad_norm": 1.1258163452148438, + "learning_rate": 4.8848331220954795e-05, + "loss": 0.0583, + "step": 2726 + }, + { + "epoch": 1.1525137304604984, + "grad_norm": 0.7174440026283264, + "learning_rate": 4.88474862695395e-05, + "loss": 0.0283, + "step": 2728 + }, + { + "epoch": 1.1533586818757922, + "grad_norm": 1.195196509361267, + "learning_rate": 4.884664131812421e-05, + "loss": 0.0518, + "step": 2730 + }, + { + "epoch": 1.1542036332910857, + "grad_norm": 0.956576943397522, + "learning_rate": 4.8845796366708916e-05, + "loss": 0.0562, + "step": 2732 + }, + { + "epoch": 1.1550485847063794, + "grad_norm": 1.3628367185592651, + "learning_rate": 4.8844951415293623e-05, + "loss": 0.0548, + "step": 2734 + }, + { + "epoch": 1.1558935361216731, + "grad_norm": 0.6316198110580444, + "learning_rate": 4.884410646387833e-05, + "loss": 0.0293, + "step": 2736 + }, + { + "epoch": 1.1567384875369666, + "grad_norm": 0.6486905217170715, + "learning_rate": 4.884326151246303e-05, + "loss": 0.043, + "step": 2738 + }, + { + "epoch": 1.1575834389522601, + "grad_norm": 1.5592443943023682, + "learning_rate": 4.8842416561047745e-05, + "loss": 0.0589, + "step": 2740 + }, + { + "epoch": 1.1584283903675539, + "grad_norm": 1.3737517595291138, + "learning_rate": 4.8841571609632446e-05, + "loss": 0.0638, + "step": 2742 + }, + { + "epoch": 1.1592733417828476, + "grad_norm": 0.8782877922058105, + "learning_rate": 4.884072665821716e-05, + "loss": 0.0582, + "step": 2744 + }, + { + "epoch": 1.160118293198141, + "grad_norm": 0.960125207901001, + "learning_rate": 4.883988170680186e-05, + "loss": 0.0655, + "step": 2746 + }, + { + "epoch": 1.1609632446134348, + "grad_norm": 1.4179805517196655, + "learning_rate": 4.883903675538657e-05, + "loss": 0.0614, + "step": 2748 + }, + { + "epoch": 1.1618081960287283, + "grad_norm": 0.8707264065742493, + "learning_rate": 4.8838191803971274e-05, + "loss": 0.0385, + "step": 2750 + }, + { + "epoch": 1.162653147444022, + "grad_norm": 1.0426108837127686, + "learning_rate": 4.883734685255598e-05, + "loss": 0.07, + "step": 2752 + }, + { + "epoch": 1.1634980988593155, + "grad_norm": 1.4466257095336914, + "learning_rate": 4.883650190114069e-05, + "loss": 0.0584, + "step": 2754 + }, + { + "epoch": 1.1643430502746093, + "grad_norm": 1.018989086151123, + "learning_rate": 4.8835656949725396e-05, + "loss": 0.0623, + "step": 2756 + }, + { + "epoch": 1.1651880016899028, + "grad_norm": 1.6744930744171143, + "learning_rate": 4.8834811998310096e-05, + "loss": 0.0807, + "step": 2758 + }, + { + "epoch": 1.1660329531051965, + "grad_norm": 1.2810227870941162, + "learning_rate": 4.883396704689481e-05, + "loss": 0.0462, + "step": 2760 + }, + { + "epoch": 1.16687790452049, + "grad_norm": 0.7232649326324463, + "learning_rate": 4.883312209547951e-05, + "loss": 0.0473, + "step": 2762 + }, + { + "epoch": 1.1677228559357837, + "grad_norm": 1.4324157238006592, + "learning_rate": 4.883227714406422e-05, + "loss": 0.0736, + "step": 2764 + }, + { + "epoch": 1.1685678073510772, + "grad_norm": 0.6245022416114807, + "learning_rate": 4.8831432192648925e-05, + "loss": 0.0281, + "step": 2766 + }, + { + "epoch": 1.169412758766371, + "grad_norm": 1.720564842224121, + "learning_rate": 4.883058724123363e-05, + "loss": 0.0831, + "step": 2768 + }, + { + "epoch": 1.1702577101816645, + "grad_norm": 1.7815865278244019, + "learning_rate": 4.882974228981834e-05, + "loss": 0.0448, + "step": 2770 + }, + { + "epoch": 1.1711026615969582, + "grad_norm": 1.1956509351730347, + "learning_rate": 4.882889733840305e-05, + "loss": 0.0696, + "step": 2772 + }, + { + "epoch": 1.1719476130122517, + "grad_norm": 0.9493592381477356, + "learning_rate": 4.882805238698775e-05, + "loss": 0.0671, + "step": 2774 + }, + { + "epoch": 1.1727925644275454, + "grad_norm": 0.7172707319259644, + "learning_rate": 4.882720743557246e-05, + "loss": 0.0495, + "step": 2776 + }, + { + "epoch": 1.173637515842839, + "grad_norm": 1.3545876741409302, + "learning_rate": 4.882636248415716e-05, + "loss": 0.0609, + "step": 2778 + }, + { + "epoch": 1.1744824672581327, + "grad_norm": 0.9758241772651672, + "learning_rate": 4.882551753274187e-05, + "loss": 0.0454, + "step": 2780 + }, + { + "epoch": 1.1753274186734264, + "grad_norm": 1.241162657737732, + "learning_rate": 4.8824672581326576e-05, + "loss": 0.0447, + "step": 2782 + }, + { + "epoch": 1.1761723700887199, + "grad_norm": 1.0415271520614624, + "learning_rate": 4.882382762991128e-05, + "loss": 0.0389, + "step": 2784 + }, + { + "epoch": 1.1770173215040136, + "grad_norm": 1.916858196258545, + "learning_rate": 4.882298267849599e-05, + "loss": 0.0658, + "step": 2786 + }, + { + "epoch": 1.1778622729193071, + "grad_norm": 1.2773157358169556, + "learning_rate": 4.88221377270807e-05, + "loss": 0.0654, + "step": 2788 + }, + { + "epoch": 1.1787072243346008, + "grad_norm": 1.5616599321365356, + "learning_rate": 4.88212927756654e-05, + "loss": 0.0643, + "step": 2790 + }, + { + "epoch": 1.1795521757498943, + "grad_norm": 1.3588258028030396, + "learning_rate": 4.882044782425011e-05, + "loss": 0.0546, + "step": 2792 + }, + { + "epoch": 1.180397127165188, + "grad_norm": 0.8794555068016052, + "learning_rate": 4.881960287283481e-05, + "loss": 0.0537, + "step": 2794 + }, + { + "epoch": 1.1812420785804816, + "grad_norm": 1.2843623161315918, + "learning_rate": 4.881875792141952e-05, + "loss": 0.0349, + "step": 2796 + }, + { + "epoch": 1.1820870299957753, + "grad_norm": 1.0971851348876953, + "learning_rate": 4.881791297000423e-05, + "loss": 0.0529, + "step": 2798 + }, + { + "epoch": 1.1829319814110688, + "grad_norm": 1.3400644063949585, + "learning_rate": 4.8817068018588934e-05, + "loss": 0.0555, + "step": 2800 + }, + { + "epoch": 1.1829319814110688, + "eval_accuracy": 0.7257285738920504, + "eval_cer": 0.07709272475453656, + "eval_loss": 0.1766710728406906, + "eval_runtime": 853.1559, + "eval_samples_per_second": 13.594, + "eval_steps_per_second": 0.425, + "step": 2800 + }, + { + "epoch": 1.1837769328263625, + "grad_norm": 0.753764271736145, + "learning_rate": 4.881622306717364e-05, + "loss": 0.0612, + "step": 2802 + }, + { + "epoch": 1.184621884241656, + "grad_norm": 3.2383697032928467, + "learning_rate": 4.881537811575835e-05, + "loss": 0.0604, + "step": 2804 + }, + { + "epoch": 1.1854668356569498, + "grad_norm": 0.9122393727302551, + "learning_rate": 4.881453316434305e-05, + "loss": 0.0318, + "step": 2806 + }, + { + "epoch": 1.1863117870722433, + "grad_norm": 1.2476575374603271, + "learning_rate": 4.881368821292776e-05, + "loss": 0.0407, + "step": 2808 + }, + { + "epoch": 1.187156738487537, + "grad_norm": 1.0261362791061401, + "learning_rate": 4.881284326151246e-05, + "loss": 0.0529, + "step": 2810 + }, + { + "epoch": 1.1880016899028305, + "grad_norm": 1.278996467590332, + "learning_rate": 4.881199831009717e-05, + "loss": 0.0606, + "step": 2812 + }, + { + "epoch": 1.1888466413181242, + "grad_norm": 1.2511422634124756, + "learning_rate": 4.881115335868188e-05, + "loss": 0.0555, + "step": 2814 + }, + { + "epoch": 1.1896915927334177, + "grad_norm": 0.9766137599945068, + "learning_rate": 4.8810308407266585e-05, + "loss": 0.0435, + "step": 2816 + }, + { + "epoch": 1.1905365441487115, + "grad_norm": 1.5147414207458496, + "learning_rate": 4.880946345585129e-05, + "loss": 0.0418, + "step": 2818 + }, + { + "epoch": 1.1913814955640052, + "grad_norm": 1.3996652364730835, + "learning_rate": 4.8808618504436e-05, + "loss": 0.0914, + "step": 2820 + }, + { + "epoch": 1.1922264469792987, + "grad_norm": 1.1199573278427124, + "learning_rate": 4.88077735530207e-05, + "loss": 0.0549, + "step": 2822 + }, + { + "epoch": 1.1930713983945922, + "grad_norm": 0.946550726890564, + "learning_rate": 4.8806928601605414e-05, + "loss": 0.0356, + "step": 2824 + }, + { + "epoch": 1.193916349809886, + "grad_norm": 0.9064375162124634, + "learning_rate": 4.8806083650190114e-05, + "loss": 0.0669, + "step": 2826 + }, + { + "epoch": 1.1947613012251797, + "grad_norm": 1.4223860502243042, + "learning_rate": 4.880523869877482e-05, + "loss": 0.0581, + "step": 2828 + }, + { + "epoch": 1.1956062526404732, + "grad_norm": 1.123694658279419, + "learning_rate": 4.880439374735953e-05, + "loss": 0.0437, + "step": 2830 + }, + { + "epoch": 1.1964512040557669, + "grad_norm": 1.1511688232421875, + "learning_rate": 4.8803548795944236e-05, + "loss": 0.056, + "step": 2832 + }, + { + "epoch": 1.1972961554710604, + "grad_norm": 1.4350991249084473, + "learning_rate": 4.880270384452894e-05, + "loss": 0.044, + "step": 2834 + }, + { + "epoch": 1.1981411068863541, + "grad_norm": 1.211211919784546, + "learning_rate": 4.880185889311365e-05, + "loss": 0.0378, + "step": 2836 + }, + { + "epoch": 1.1989860583016476, + "grad_norm": 1.3658467531204224, + "learning_rate": 4.880101394169835e-05, + "loss": 0.0848, + "step": 2838 + }, + { + "epoch": 1.1998310097169413, + "grad_norm": 1.6708577871322632, + "learning_rate": 4.8800168990283064e-05, + "loss": 0.0461, + "step": 2840 + }, + { + "epoch": 1.2006759611322348, + "grad_norm": 1.1294584274291992, + "learning_rate": 4.8799324038867765e-05, + "loss": 0.0485, + "step": 2842 + }, + { + "epoch": 1.2015209125475286, + "grad_norm": 1.5034394264221191, + "learning_rate": 4.879847908745248e-05, + "loss": 0.0611, + "step": 2844 + }, + { + "epoch": 1.202365863962822, + "grad_norm": 1.4616371393203735, + "learning_rate": 4.879763413603718e-05, + "loss": 0.0576, + "step": 2846 + }, + { + "epoch": 1.2032108153781158, + "grad_norm": 1.160904049873352, + "learning_rate": 4.8796789184621886e-05, + "loss": 0.0682, + "step": 2848 + }, + { + "epoch": 1.2040557667934093, + "grad_norm": 0.753709614276886, + "learning_rate": 4.8795944233206594e-05, + "loss": 0.0333, + "step": 2850 + }, + { + "epoch": 1.204900718208703, + "grad_norm": 1.6820114850997925, + "learning_rate": 4.87950992817913e-05, + "loss": 0.0587, + "step": 2852 + }, + { + "epoch": 1.2057456696239965, + "grad_norm": 1.2073854207992554, + "learning_rate": 4.879425433037601e-05, + "loss": 0.0678, + "step": 2854 + }, + { + "epoch": 1.2065906210392903, + "grad_norm": 2.104133129119873, + "learning_rate": 4.8793409378960715e-05, + "loss": 0.0868, + "step": 2856 + }, + { + "epoch": 1.2074355724545838, + "grad_norm": 1.4604604244232178, + "learning_rate": 4.8792564427545416e-05, + "loss": 0.0587, + "step": 2858 + }, + { + "epoch": 1.2082805238698775, + "grad_norm": 1.0882490873336792, + "learning_rate": 4.879171947613013e-05, + "loss": 0.0493, + "step": 2860 + }, + { + "epoch": 1.209125475285171, + "grad_norm": 1.402060866355896, + "learning_rate": 4.879087452471483e-05, + "loss": 0.0895, + "step": 2862 + }, + { + "epoch": 1.2099704267004647, + "grad_norm": 1.4716256856918335, + "learning_rate": 4.879002957329954e-05, + "loss": 0.0625, + "step": 2864 + }, + { + "epoch": 1.2108153781157585, + "grad_norm": 1.0345948934555054, + "learning_rate": 4.8789184621884244e-05, + "loss": 0.0621, + "step": 2866 + }, + { + "epoch": 1.211660329531052, + "grad_norm": 1.3120019435882568, + "learning_rate": 4.878833967046895e-05, + "loss": 0.0606, + "step": 2868 + }, + { + "epoch": 1.2125052809463455, + "grad_norm": 1.2341971397399902, + "learning_rate": 4.878749471905366e-05, + "loss": 0.0595, + "step": 2870 + }, + { + "epoch": 1.2133502323616392, + "grad_norm": 1.1355195045471191, + "learning_rate": 4.8786649767638366e-05, + "loss": 0.0392, + "step": 2872 + }, + { + "epoch": 1.214195183776933, + "grad_norm": 1.4073996543884277, + "learning_rate": 4.8785804816223066e-05, + "loss": 0.048, + "step": 2874 + }, + { + "epoch": 1.2150401351922264, + "grad_norm": 0.8575471043586731, + "learning_rate": 4.878495986480778e-05, + "loss": 0.0436, + "step": 2876 + }, + { + "epoch": 1.2158850866075201, + "grad_norm": 1.1738005876541138, + "learning_rate": 4.878411491339248e-05, + "loss": 0.0434, + "step": 2878 + }, + { + "epoch": 1.2167300380228137, + "grad_norm": 1.412050724029541, + "learning_rate": 4.878326996197719e-05, + "loss": 0.0475, + "step": 2880 + }, + { + "epoch": 1.2175749894381074, + "grad_norm": 1.4315383434295654, + "learning_rate": 4.8782425010561895e-05, + "loss": 0.063, + "step": 2882 + }, + { + "epoch": 1.2184199408534009, + "grad_norm": 1.2847777605056763, + "learning_rate": 4.87815800591466e-05, + "loss": 0.0495, + "step": 2884 + }, + { + "epoch": 1.2192648922686946, + "grad_norm": 1.1097843647003174, + "learning_rate": 4.878073510773131e-05, + "loss": 0.0836, + "step": 2886 + }, + { + "epoch": 1.2201098436839881, + "grad_norm": 1.2086609601974487, + "learning_rate": 4.877989015631602e-05, + "loss": 0.069, + "step": 2888 + }, + { + "epoch": 1.2209547950992818, + "grad_norm": 1.2778347730636597, + "learning_rate": 4.877904520490072e-05, + "loss": 0.0595, + "step": 2890 + }, + { + "epoch": 1.2217997465145753, + "grad_norm": 1.0457631349563599, + "learning_rate": 4.877820025348543e-05, + "loss": 0.0322, + "step": 2892 + }, + { + "epoch": 1.222644697929869, + "grad_norm": 1.6547378301620483, + "learning_rate": 4.877735530207013e-05, + "loss": 0.0622, + "step": 2894 + }, + { + "epoch": 1.2234896493451626, + "grad_norm": 1.1878632307052612, + "learning_rate": 4.877651035065484e-05, + "loss": 0.0549, + "step": 2896 + }, + { + "epoch": 1.2243346007604563, + "grad_norm": 1.374286413192749, + "learning_rate": 4.8775665399239546e-05, + "loss": 0.0628, + "step": 2898 + }, + { + "epoch": 1.2251795521757498, + "grad_norm": 0.7572351098060608, + "learning_rate": 4.877482044782425e-05, + "loss": 0.0645, + "step": 2900 + }, + { + "epoch": 1.2251795521757498, + "eval_accuracy": 0.7215899292981549, + "eval_cer": 0.07695157775876214, + "eval_loss": 0.17909274995326996, + "eval_runtime": 861.6493, + "eval_samples_per_second": 13.46, + "eval_steps_per_second": 0.421, + "step": 2900 + }, + { + "epoch": 1.2260245035910435, + "grad_norm": 1.633081316947937, + "learning_rate": 4.877397549640896e-05, + "loss": 0.0579, + "step": 2902 + }, + { + "epoch": 1.226869455006337, + "grad_norm": 1.0226025581359863, + "learning_rate": 4.877313054499367e-05, + "loss": 0.0822, + "step": 2904 + }, + { + "epoch": 1.2277144064216308, + "grad_norm": 1.0272724628448486, + "learning_rate": 4.877228559357837e-05, + "loss": 0.0706, + "step": 2906 + }, + { + "epoch": 1.2285593578369243, + "grad_norm": 0.925186812877655, + "learning_rate": 4.877144064216308e-05, + "loss": 0.0578, + "step": 2908 + }, + { + "epoch": 1.229404309252218, + "grad_norm": 0.5941814184188843, + "learning_rate": 4.877059569074778e-05, + "loss": 0.0352, + "step": 2910 + }, + { + "epoch": 1.2302492606675117, + "grad_norm": 1.436691164970398, + "learning_rate": 4.876975073933249e-05, + "loss": 0.0637, + "step": 2912 + }, + { + "epoch": 1.2310942120828052, + "grad_norm": 0.9653565883636475, + "learning_rate": 4.87689057879172e-05, + "loss": 0.0374, + "step": 2914 + }, + { + "epoch": 1.231939163498099, + "grad_norm": 0.8369848132133484, + "learning_rate": 4.8768060836501904e-05, + "loss": 0.0431, + "step": 2916 + }, + { + "epoch": 1.2327841149133925, + "grad_norm": 1.2851033210754395, + "learning_rate": 4.876721588508661e-05, + "loss": 0.0618, + "step": 2918 + }, + { + "epoch": 1.2336290663286862, + "grad_norm": 1.213028073310852, + "learning_rate": 4.876637093367132e-05, + "loss": 0.0585, + "step": 2920 + }, + { + "epoch": 1.2344740177439797, + "grad_norm": 1.9548083543777466, + "learning_rate": 4.876552598225602e-05, + "loss": 0.0659, + "step": 2922 + }, + { + "epoch": 1.2353189691592734, + "grad_norm": 1.0520684719085693, + "learning_rate": 4.876468103084073e-05, + "loss": 0.0374, + "step": 2924 + }, + { + "epoch": 1.236163920574567, + "grad_norm": 0.8738290667533875, + "learning_rate": 4.876383607942543e-05, + "loss": 0.0727, + "step": 2926 + }, + { + "epoch": 1.2370088719898606, + "grad_norm": 1.4663364887237549, + "learning_rate": 4.876299112801014e-05, + "loss": 0.0471, + "step": 2928 + }, + { + "epoch": 1.2378538234051542, + "grad_norm": 1.1130000352859497, + "learning_rate": 4.876214617659485e-05, + "loss": 0.053, + "step": 2930 + }, + { + "epoch": 1.2386987748204479, + "grad_norm": 0.9483621120452881, + "learning_rate": 4.8761301225179555e-05, + "loss": 0.058, + "step": 2932 + }, + { + "epoch": 1.2395437262357414, + "grad_norm": 0.7923450469970703, + "learning_rate": 4.876045627376426e-05, + "loss": 0.0384, + "step": 2934 + }, + { + "epoch": 1.240388677651035, + "grad_norm": 1.8122419118881226, + "learning_rate": 4.875961132234897e-05, + "loss": 0.076, + "step": 2936 + }, + { + "epoch": 1.2412336290663286, + "grad_norm": 1.1781667470932007, + "learning_rate": 4.875876637093367e-05, + "loss": 0.053, + "step": 2938 + }, + { + "epoch": 1.2420785804816223, + "grad_norm": 1.2569353580474854, + "learning_rate": 4.8757921419518384e-05, + "loss": 0.0514, + "step": 2940 + }, + { + "epoch": 1.2429235318969158, + "grad_norm": 1.32847261428833, + "learning_rate": 4.8757076468103084e-05, + "loss": 0.0648, + "step": 2942 + }, + { + "epoch": 1.2437684833122096, + "grad_norm": 0.7314063310623169, + "learning_rate": 4.87562315166878e-05, + "loss": 0.0582, + "step": 2944 + }, + { + "epoch": 1.244613434727503, + "grad_norm": 1.6381207704544067, + "learning_rate": 4.87553865652725e-05, + "loss": 0.0582, + "step": 2946 + }, + { + "epoch": 1.2454583861427968, + "grad_norm": 1.02126145362854, + "learning_rate": 4.8754541613857206e-05, + "loss": 0.0705, + "step": 2948 + }, + { + "epoch": 1.2463033375580905, + "grad_norm": 1.4831101894378662, + "learning_rate": 4.875369666244191e-05, + "loss": 0.0784, + "step": 2950 + }, + { + "epoch": 1.247148288973384, + "grad_norm": 1.5562660694122314, + "learning_rate": 4.875285171102662e-05, + "loss": 0.064, + "step": 2952 + }, + { + "epoch": 1.2479932403886775, + "grad_norm": 0.9851226210594177, + "learning_rate": 4.875200675961133e-05, + "loss": 0.0513, + "step": 2954 + }, + { + "epoch": 1.2488381918039713, + "grad_norm": 1.7277909517288208, + "learning_rate": 4.8751161808196034e-05, + "loss": 0.0535, + "step": 2956 + }, + { + "epoch": 1.249683143219265, + "grad_norm": 1.9152166843414307, + "learning_rate": 4.8750316856780735e-05, + "loss": 0.0508, + "step": 2958 + }, + { + "epoch": 1.2505280946345585, + "grad_norm": 0.7767104506492615, + "learning_rate": 4.874947190536545e-05, + "loss": 0.0317, + "step": 2960 + }, + { + "epoch": 1.251373046049852, + "grad_norm": 1.0719561576843262, + "learning_rate": 4.874862695395015e-05, + "loss": 0.0338, + "step": 2962 + }, + { + "epoch": 1.2522179974651457, + "grad_norm": 0.9941785931587219, + "learning_rate": 4.8747782002534856e-05, + "loss": 0.0297, + "step": 2964 + }, + { + "epoch": 1.2530629488804395, + "grad_norm": 1.413793683052063, + "learning_rate": 4.8746937051119564e-05, + "loss": 0.0414, + "step": 2966 + }, + { + "epoch": 1.253907900295733, + "grad_norm": 0.8977073431015015, + "learning_rate": 4.874609209970427e-05, + "loss": 0.0697, + "step": 2968 + }, + { + "epoch": 1.2547528517110267, + "grad_norm": 1.1440623998641968, + "learning_rate": 4.874524714828898e-05, + "loss": 0.0727, + "step": 2970 + }, + { + "epoch": 1.2555978031263202, + "grad_norm": 0.648844301700592, + "learning_rate": 4.8744402196873685e-05, + "loss": 0.0411, + "step": 2972 + }, + { + "epoch": 1.256442754541614, + "grad_norm": 1.1983152627944946, + "learning_rate": 4.8743557245458386e-05, + "loss": 0.0414, + "step": 2974 + }, + { + "epoch": 1.2572877059569074, + "grad_norm": 1.0279157161712646, + "learning_rate": 4.87427122940431e-05, + "loss": 0.0489, + "step": 2976 + }, + { + "epoch": 1.2581326573722011, + "grad_norm": 1.3486169576644897, + "learning_rate": 4.87418673426278e-05, + "loss": 0.0751, + "step": 2978 + }, + { + "epoch": 1.2589776087874947, + "grad_norm": 0.9830628037452698, + "learning_rate": 4.874102239121251e-05, + "loss": 0.0492, + "step": 2980 + }, + { + "epoch": 1.2598225602027884, + "grad_norm": 1.5126888751983643, + "learning_rate": 4.8740177439797214e-05, + "loss": 0.0404, + "step": 2982 + }, + { + "epoch": 1.2606675116180819, + "grad_norm": 0.9845932126045227, + "learning_rate": 4.873933248838192e-05, + "loss": 0.077, + "step": 2984 + }, + { + "epoch": 1.2615124630333756, + "grad_norm": 0.8719518780708313, + "learning_rate": 4.873848753696663e-05, + "loss": 0.0355, + "step": 2986 + }, + { + "epoch": 1.2623574144486693, + "grad_norm": 1.3625831604003906, + "learning_rate": 4.8737642585551336e-05, + "loss": 0.0571, + "step": 2988 + }, + { + "epoch": 1.2632023658639628, + "grad_norm": 1.1835618019104004, + "learning_rate": 4.8736797634136036e-05, + "loss": 0.0423, + "step": 2990 + }, + { + "epoch": 1.2640473172792563, + "grad_norm": 1.0713012218475342, + "learning_rate": 4.873595268272075e-05, + "loss": 0.0571, + "step": 2992 + }, + { + "epoch": 1.26489226869455, + "grad_norm": 1.7218633890151978, + "learning_rate": 4.873510773130545e-05, + "loss": 0.0462, + "step": 2994 + }, + { + "epoch": 1.2657372201098438, + "grad_norm": 1.8238509893417358, + "learning_rate": 4.873426277989016e-05, + "loss": 0.0559, + "step": 2996 + }, + { + "epoch": 1.2665821715251373, + "grad_norm": 1.0371171236038208, + "learning_rate": 4.8733417828474865e-05, + "loss": 0.0469, + "step": 2998 + }, + { + "epoch": 1.2674271229404308, + "grad_norm": 1.3527776002883911, + "learning_rate": 4.873257287705957e-05, + "loss": 0.0402, + "step": 3000 + }, + { + "epoch": 1.2674271229404308, + "eval_accuracy": 0.7070184514571478, + "eval_cer": 0.0812742045043535, + "eval_loss": 0.18232469260692596, + "eval_runtime": 842.602, + "eval_samples_per_second": 13.765, + "eval_steps_per_second": 0.431, + "step": 3000 + }, + { + "epoch": 1.2682720743557245, + "grad_norm": 0.8487452268600464, + "learning_rate": 4.873172792564428e-05, + "loss": 0.0171, + "step": 3002 + }, + { + "epoch": 1.2691170257710183, + "grad_norm": 1.096564531326294, + "learning_rate": 4.873088297422899e-05, + "loss": 0.0425, + "step": 3004 + }, + { + "epoch": 1.2699619771863118, + "grad_norm": 1.2362899780273438, + "learning_rate": 4.873003802281369e-05, + "loss": 0.0334, + "step": 3006 + }, + { + "epoch": 1.2708069286016055, + "grad_norm": 1.4478521347045898, + "learning_rate": 4.87291930713984e-05, + "loss": 0.0666, + "step": 3008 + }, + { + "epoch": 1.271651880016899, + "grad_norm": 0.9289385080337524, + "learning_rate": 4.87283481199831e-05, + "loss": 0.0588, + "step": 3010 + }, + { + "epoch": 1.2724968314321927, + "grad_norm": 0.5512641668319702, + "learning_rate": 4.872750316856781e-05, + "loss": 0.0457, + "step": 3012 + }, + { + "epoch": 1.2733417828474862, + "grad_norm": 1.2245798110961914, + "learning_rate": 4.8726658217152516e-05, + "loss": 0.06, + "step": 3014 + }, + { + "epoch": 1.27418673426278, + "grad_norm": 1.5422766208648682, + "learning_rate": 4.872581326573722e-05, + "loss": 0.0587, + "step": 3016 + }, + { + "epoch": 1.2750316856780735, + "grad_norm": 1.526551604270935, + "learning_rate": 4.872496831432193e-05, + "loss": 0.0808, + "step": 3018 + }, + { + "epoch": 1.2758766370933672, + "grad_norm": 1.5560036897659302, + "learning_rate": 4.872412336290664e-05, + "loss": 0.0531, + "step": 3020 + }, + { + "epoch": 1.2767215885086607, + "grad_norm": 1.0509698390960693, + "learning_rate": 4.872327841149134e-05, + "loss": 0.0342, + "step": 3022 + }, + { + "epoch": 1.2775665399239544, + "grad_norm": 1.421242356300354, + "learning_rate": 4.872243346007605e-05, + "loss": 0.0536, + "step": 3024 + }, + { + "epoch": 1.278411491339248, + "grad_norm": 0.5734511613845825, + "learning_rate": 4.872158850866075e-05, + "loss": 0.0293, + "step": 3026 + }, + { + "epoch": 1.2792564427545416, + "grad_norm": 1.554356336593628, + "learning_rate": 4.872074355724546e-05, + "loss": 0.1014, + "step": 3028 + }, + { + "epoch": 1.2801013941698351, + "grad_norm": 0.9434201717376709, + "learning_rate": 4.871989860583017e-05, + "loss": 0.0332, + "step": 3030 + }, + { + "epoch": 1.2809463455851289, + "grad_norm": 1.089858055114746, + "learning_rate": 4.8719053654414874e-05, + "loss": 0.0408, + "step": 3032 + }, + { + "epoch": 1.2817912970004226, + "grad_norm": 1.2021937370300293, + "learning_rate": 4.871820870299958e-05, + "loss": 0.0588, + "step": 3034 + }, + { + "epoch": 1.282636248415716, + "grad_norm": 0.9895578622817993, + "learning_rate": 4.871736375158429e-05, + "loss": 0.0568, + "step": 3036 + }, + { + "epoch": 1.2834811998310096, + "grad_norm": 1.1666065454483032, + "learning_rate": 4.871651880016899e-05, + "loss": 0.0351, + "step": 3038 + }, + { + "epoch": 1.2843261512463033, + "grad_norm": 0.9048338532447815, + "learning_rate": 4.87156738487537e-05, + "loss": 0.0455, + "step": 3040 + }, + { + "epoch": 1.285171102661597, + "grad_norm": 1.392256259918213, + "learning_rate": 4.87148288973384e-05, + "loss": 0.0706, + "step": 3042 + }, + { + "epoch": 1.2860160540768906, + "grad_norm": 1.8354949951171875, + "learning_rate": 4.871398394592312e-05, + "loss": 0.0558, + "step": 3044 + }, + { + "epoch": 1.286861005492184, + "grad_norm": 1.7575472593307495, + "learning_rate": 4.871313899450782e-05, + "loss": 0.0918, + "step": 3046 + }, + { + "epoch": 1.2877059569074778, + "grad_norm": 1.5376323461532593, + "learning_rate": 4.8712294043092525e-05, + "loss": 0.0524, + "step": 3048 + }, + { + "epoch": 1.2885509083227715, + "grad_norm": 1.8223731517791748, + "learning_rate": 4.871144909167723e-05, + "loss": 0.0825, + "step": 3050 + }, + { + "epoch": 1.289395859738065, + "grad_norm": 0.7369791269302368, + "learning_rate": 4.871060414026194e-05, + "loss": 0.0392, + "step": 3052 + }, + { + "epoch": 1.2902408111533588, + "grad_norm": 1.4611024856567383, + "learning_rate": 4.8709759188846646e-05, + "loss": 0.0358, + "step": 3054 + }, + { + "epoch": 1.2910857625686523, + "grad_norm": 0.8670369386672974, + "learning_rate": 4.8708914237431354e-05, + "loss": 0.0487, + "step": 3056 + }, + { + "epoch": 1.291930713983946, + "grad_norm": 1.0213311910629272, + "learning_rate": 4.8708069286016054e-05, + "loss": 0.0559, + "step": 3058 + }, + { + "epoch": 1.2927756653992395, + "grad_norm": 1.72591233253479, + "learning_rate": 4.870722433460077e-05, + "loss": 0.0665, + "step": 3060 + }, + { + "epoch": 1.2936206168145332, + "grad_norm": 0.8167611360549927, + "learning_rate": 4.870637938318547e-05, + "loss": 0.0359, + "step": 3062 + }, + { + "epoch": 1.2944655682298267, + "grad_norm": 0.7950469255447388, + "learning_rate": 4.8705534431770176e-05, + "loss": 0.0254, + "step": 3064 + }, + { + "epoch": 1.2953105196451205, + "grad_norm": 0.8578920364379883, + "learning_rate": 4.870468948035488e-05, + "loss": 0.037, + "step": 3066 + }, + { + "epoch": 1.296155471060414, + "grad_norm": 1.4960883855819702, + "learning_rate": 4.870384452893959e-05, + "loss": 0.0682, + "step": 3068 + }, + { + "epoch": 1.2970004224757077, + "grad_norm": 1.0831998586654663, + "learning_rate": 4.87029995775243e-05, + "loss": 0.0592, + "step": 3070 + }, + { + "epoch": 1.2978453738910012, + "grad_norm": 0.7494056820869446, + "learning_rate": 4.8702154626109004e-05, + "loss": 0.0346, + "step": 3072 + }, + { + "epoch": 1.298690325306295, + "grad_norm": 1.0414024591445923, + "learning_rate": 4.8701309674693705e-05, + "loss": 0.0519, + "step": 3074 + }, + { + "epoch": 1.2995352767215884, + "grad_norm": 1.9897801876068115, + "learning_rate": 4.870046472327842e-05, + "loss": 0.0363, + "step": 3076 + }, + { + "epoch": 1.3003802281368821, + "grad_norm": 1.645512580871582, + "learning_rate": 4.869961977186312e-05, + "loss": 0.0679, + "step": 3078 + }, + { + "epoch": 1.3012251795521759, + "grad_norm": 0.9590452909469604, + "learning_rate": 4.8698774820447826e-05, + "loss": 0.0389, + "step": 3080 + }, + { + "epoch": 1.3020701309674694, + "grad_norm": 1.0363378524780273, + "learning_rate": 4.8697929869032534e-05, + "loss": 0.0662, + "step": 3082 + }, + { + "epoch": 1.3029150823827629, + "grad_norm": 0.8959032297134399, + "learning_rate": 4.869708491761724e-05, + "loss": 0.0451, + "step": 3084 + }, + { + "epoch": 1.3037600337980566, + "grad_norm": 1.110154628753662, + "learning_rate": 4.869623996620195e-05, + "loss": 0.0425, + "step": 3086 + }, + { + "epoch": 1.3046049852133503, + "grad_norm": 1.2252897024154663, + "learning_rate": 4.8695395014786655e-05, + "loss": 0.0492, + "step": 3088 + }, + { + "epoch": 1.3054499366286438, + "grad_norm": 0.8930359482765198, + "learning_rate": 4.8694550063371356e-05, + "loss": 0.0376, + "step": 3090 + }, + { + "epoch": 1.3062948880439373, + "grad_norm": 1.2637935876846313, + "learning_rate": 4.869370511195607e-05, + "loss": 0.0387, + "step": 3092 + }, + { + "epoch": 1.307139839459231, + "grad_norm": 1.6533139944076538, + "learning_rate": 4.869286016054077e-05, + "loss": 0.068, + "step": 3094 + }, + { + "epoch": 1.3079847908745248, + "grad_norm": 1.4910434484481812, + "learning_rate": 4.869201520912548e-05, + "loss": 0.0341, + "step": 3096 + }, + { + "epoch": 1.3088297422898183, + "grad_norm": 1.8592034578323364, + "learning_rate": 4.8691170257710184e-05, + "loss": 0.0765, + "step": 3098 + }, + { + "epoch": 1.309674693705112, + "grad_norm": 1.5364049673080444, + "learning_rate": 4.869032530629489e-05, + "loss": 0.0805, + "step": 3100 + }, + { + "epoch": 1.309674693705112, + "eval_accuracy": 0.7233143645456113, + "eval_cer": 0.07973040923807087, + "eval_loss": 0.183185413479805, + "eval_runtime": 858.546, + "eval_samples_per_second": 13.509, + "eval_steps_per_second": 0.423, + "step": 3100 + }, + { + "epoch": 1.3105196451204055, + "grad_norm": 0.9121136665344238, + "learning_rate": 4.86894803548796e-05, + "loss": 0.0553, + "step": 3102 + }, + { + "epoch": 1.3113645965356993, + "grad_norm": 0.8206782341003418, + "learning_rate": 4.8688635403464306e-05, + "loss": 0.0856, + "step": 3104 + }, + { + "epoch": 1.3122095479509928, + "grad_norm": 1.5402354001998901, + "learning_rate": 4.8687790452049006e-05, + "loss": 0.0776, + "step": 3106 + }, + { + "epoch": 1.3130544993662865, + "grad_norm": 1.8080055713653564, + "learning_rate": 4.868694550063372e-05, + "loss": 0.0838, + "step": 3108 + }, + { + "epoch": 1.31389945078158, + "grad_norm": 1.5559817552566528, + "learning_rate": 4.868610054921842e-05, + "loss": 0.0727, + "step": 3110 + }, + { + "epoch": 1.3147444021968737, + "grad_norm": 1.1048095226287842, + "learning_rate": 4.868525559780313e-05, + "loss": 0.042, + "step": 3112 + }, + { + "epoch": 1.3155893536121672, + "grad_norm": 1.1088711023330688, + "learning_rate": 4.8684410646387835e-05, + "loss": 0.0363, + "step": 3114 + }, + { + "epoch": 1.316434305027461, + "grad_norm": 0.7765578031539917, + "learning_rate": 4.868356569497254e-05, + "loss": 0.0496, + "step": 3116 + }, + { + "epoch": 1.3172792564427547, + "grad_norm": 1.4490267038345337, + "learning_rate": 4.868272074355725e-05, + "loss": 0.0701, + "step": 3118 + }, + { + "epoch": 1.3181242078580482, + "grad_norm": 1.2103753089904785, + "learning_rate": 4.868187579214196e-05, + "loss": 0.0698, + "step": 3120 + }, + { + "epoch": 1.3189691592733417, + "grad_norm": 0.9798583984375, + "learning_rate": 4.868103084072666e-05, + "loss": 0.0581, + "step": 3122 + }, + { + "epoch": 1.3198141106886354, + "grad_norm": 1.6193875074386597, + "learning_rate": 4.868018588931137e-05, + "loss": 0.0594, + "step": 3124 + }, + { + "epoch": 1.3206590621039291, + "grad_norm": 1.3513240814208984, + "learning_rate": 4.867934093789607e-05, + "loss": 0.0716, + "step": 3126 + }, + { + "epoch": 1.3215040135192226, + "grad_norm": 2.1795995235443115, + "learning_rate": 4.867849598648078e-05, + "loss": 0.0645, + "step": 3128 + }, + { + "epoch": 1.3223489649345161, + "grad_norm": 1.0579677820205688, + "learning_rate": 4.8677651035065486e-05, + "loss": 0.0576, + "step": 3130 + }, + { + "epoch": 1.3231939163498099, + "grad_norm": 1.3420937061309814, + "learning_rate": 4.867680608365019e-05, + "loss": 0.0651, + "step": 3132 + }, + { + "epoch": 1.3240388677651036, + "grad_norm": 1.300595760345459, + "learning_rate": 4.86759611322349e-05, + "loss": 0.0684, + "step": 3134 + }, + { + "epoch": 1.324883819180397, + "grad_norm": 1.1893776655197144, + "learning_rate": 4.867511618081961e-05, + "loss": 0.0447, + "step": 3136 + }, + { + "epoch": 1.3257287705956908, + "grad_norm": 0.800032913684845, + "learning_rate": 4.867427122940431e-05, + "loss": 0.0312, + "step": 3138 + }, + { + "epoch": 1.3265737220109843, + "grad_norm": 0.8352811932563782, + "learning_rate": 4.867342627798902e-05, + "loss": 0.0304, + "step": 3140 + }, + { + "epoch": 1.327418673426278, + "grad_norm": 1.032954454421997, + "learning_rate": 4.867258132657372e-05, + "loss": 0.05, + "step": 3142 + }, + { + "epoch": 1.3282636248415716, + "grad_norm": 1.1263511180877686, + "learning_rate": 4.8671736375158436e-05, + "loss": 0.0373, + "step": 3144 + }, + { + "epoch": 1.3291085762568653, + "grad_norm": 0.6103000044822693, + "learning_rate": 4.867089142374314e-05, + "loss": 0.0483, + "step": 3146 + }, + { + "epoch": 1.3299535276721588, + "grad_norm": 0.8234397768974304, + "learning_rate": 4.8670046472327844e-05, + "loss": 0.0406, + "step": 3148 + }, + { + "epoch": 1.3307984790874525, + "grad_norm": 1.4005287885665894, + "learning_rate": 4.866920152091255e-05, + "loss": 0.0543, + "step": 3150 + }, + { + "epoch": 1.331643430502746, + "grad_norm": 0.7845334410667419, + "learning_rate": 4.866835656949726e-05, + "loss": 0.0483, + "step": 3152 + }, + { + "epoch": 1.3324883819180398, + "grad_norm": 1.4690190553665161, + "learning_rate": 4.8667511618081966e-05, + "loss": 0.031, + "step": 3154 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 1.3612943887710571, + "learning_rate": 4.866666666666667e-05, + "loss": 0.0605, + "step": 3156 + }, + { + "epoch": 1.334178284748627, + "grad_norm": 1.120639681816101, + "learning_rate": 4.866582171525137e-05, + "loss": 0.0481, + "step": 3158 + }, + { + "epoch": 1.3350232361639205, + "grad_norm": 1.0624009370803833, + "learning_rate": 4.866497676383609e-05, + "loss": 0.0894, + "step": 3160 + }, + { + "epoch": 1.3358681875792142, + "grad_norm": 0.8806793689727783, + "learning_rate": 4.866413181242079e-05, + "loss": 0.0623, + "step": 3162 + }, + { + "epoch": 1.336713138994508, + "grad_norm": 1.1015374660491943, + "learning_rate": 4.8663286861005495e-05, + "loss": 0.0596, + "step": 3164 + }, + { + "epoch": 1.3375580904098014, + "grad_norm": 1.0393707752227783, + "learning_rate": 4.86624419095902e-05, + "loss": 0.0635, + "step": 3166 + }, + { + "epoch": 1.338403041825095, + "grad_norm": 1.5667744874954224, + "learning_rate": 4.866159695817491e-05, + "loss": 0.0555, + "step": 3168 + }, + { + "epoch": 1.3392479932403887, + "grad_norm": 0.5672454237937927, + "learning_rate": 4.8660752006759616e-05, + "loss": 0.0206, + "step": 3170 + }, + { + "epoch": 1.3400929446556824, + "grad_norm": 1.0931203365325928, + "learning_rate": 4.8659907055344324e-05, + "loss": 0.0463, + "step": 3172 + }, + { + "epoch": 1.340937896070976, + "grad_norm": 1.1247515678405762, + "learning_rate": 4.8659062103929024e-05, + "loss": 0.0466, + "step": 3174 + }, + { + "epoch": 1.3417828474862694, + "grad_norm": 1.6329139471054077, + "learning_rate": 4.865821715251374e-05, + "loss": 0.0686, + "step": 3176 + }, + { + "epoch": 1.3426277989015631, + "grad_norm": 0.6330051422119141, + "learning_rate": 4.865737220109844e-05, + "loss": 0.0517, + "step": 3178 + }, + { + "epoch": 1.3434727503168569, + "grad_norm": 1.0388518571853638, + "learning_rate": 4.8656527249683146e-05, + "loss": 0.0455, + "step": 3180 + }, + { + "epoch": 1.3443177017321504, + "grad_norm": 1.2583259344100952, + "learning_rate": 4.865568229826785e-05, + "loss": 0.0837, + "step": 3182 + }, + { + "epoch": 1.345162653147444, + "grad_norm": 1.0448977947235107, + "learning_rate": 4.865483734685256e-05, + "loss": 0.0481, + "step": 3184 + }, + { + "epoch": 1.3460076045627376, + "grad_norm": 1.3680537939071655, + "learning_rate": 4.865399239543727e-05, + "loss": 0.0615, + "step": 3186 + }, + { + "epoch": 1.3468525559780313, + "grad_norm": 1.131136417388916, + "learning_rate": 4.8653147444021974e-05, + "loss": 0.0402, + "step": 3188 + }, + { + "epoch": 1.3476975073933248, + "grad_norm": 1.0717076063156128, + "learning_rate": 4.8652302492606675e-05, + "loss": 0.0569, + "step": 3190 + }, + { + "epoch": 1.3485424588086186, + "grad_norm": 0.7371670603752136, + "learning_rate": 4.865145754119139e-05, + "loss": 0.0498, + "step": 3192 + }, + { + "epoch": 1.349387410223912, + "grad_norm": 1.0945547819137573, + "learning_rate": 4.865061258977609e-05, + "loss": 0.0543, + "step": 3194 + }, + { + "epoch": 1.3502323616392058, + "grad_norm": 1.342444658279419, + "learning_rate": 4.8649767638360796e-05, + "loss": 0.0763, + "step": 3196 + }, + { + "epoch": 1.3510773130544993, + "grad_norm": 0.6870439648628235, + "learning_rate": 4.8648922686945504e-05, + "loss": 0.0454, + "step": 3198 + }, + { + "epoch": 1.351922264469793, + "grad_norm": 1.172759771347046, + "learning_rate": 4.864807773553021e-05, + "loss": 0.055, + "step": 3200 + }, + { + "epoch": 1.351922264469793, + "eval_accuracy": 0.7313329884462838, + "eval_cer": 0.07825718746967544, + "eval_loss": 0.1792062371969223, + "eval_runtime": 861.651, + "eval_samples_per_second": 13.46, + "eval_steps_per_second": 0.421, + "step": 3200 + }, + { + "epoch": 1.3527672158850865, + "grad_norm": 1.0497750043869019, + "learning_rate": 4.864723278411492e-05, + "loss": 0.0348, + "step": 3202 + }, + { + "epoch": 1.3536121673003803, + "grad_norm": 0.7206516265869141, + "learning_rate": 4.8646387832699625e-05, + "loss": 0.0368, + "step": 3204 + }, + { + "epoch": 1.3544571187156738, + "grad_norm": 1.5473790168762207, + "learning_rate": 4.8645542881284326e-05, + "loss": 0.053, + "step": 3206 + }, + { + "epoch": 1.3553020701309675, + "grad_norm": 1.5096811056137085, + "learning_rate": 4.864469792986904e-05, + "loss": 0.0786, + "step": 3208 + }, + { + "epoch": 1.3561470215462612, + "grad_norm": 1.3632938861846924, + "learning_rate": 4.864385297845374e-05, + "loss": 0.08, + "step": 3210 + }, + { + "epoch": 1.3569919729615547, + "grad_norm": 1.0166387557983398, + "learning_rate": 4.864300802703845e-05, + "loss": 0.0559, + "step": 3212 + }, + { + "epoch": 1.3578369243768482, + "grad_norm": 1.3810648918151855, + "learning_rate": 4.8642163075623154e-05, + "loss": 0.0533, + "step": 3214 + }, + { + "epoch": 1.358681875792142, + "grad_norm": 1.6063209772109985, + "learning_rate": 4.864131812420786e-05, + "loss": 0.0802, + "step": 3216 + }, + { + "epoch": 1.3595268272074357, + "grad_norm": 0.754176676273346, + "learning_rate": 4.864047317279257e-05, + "loss": 0.056, + "step": 3218 + }, + { + "epoch": 1.3603717786227292, + "grad_norm": 0.9545078873634338, + "learning_rate": 4.8639628221377276e-05, + "loss": 0.0346, + "step": 3220 + }, + { + "epoch": 1.3612167300380227, + "grad_norm": 1.2821192741394043, + "learning_rate": 4.8638783269961976e-05, + "loss": 0.0522, + "step": 3222 + }, + { + "epoch": 1.3620616814533164, + "grad_norm": 1.6496917009353638, + "learning_rate": 4.863793831854669e-05, + "loss": 0.0552, + "step": 3224 + }, + { + "epoch": 1.3629066328686101, + "grad_norm": 0.9524357318878174, + "learning_rate": 4.863709336713139e-05, + "loss": 0.0443, + "step": 3226 + }, + { + "epoch": 1.3637515842839036, + "grad_norm": 2.012040138244629, + "learning_rate": 4.86362484157161e-05, + "loss": 0.106, + "step": 3228 + }, + { + "epoch": 1.3645965356991974, + "grad_norm": 1.7522271871566772, + "learning_rate": 4.8635403464300805e-05, + "loss": 0.0511, + "step": 3230 + }, + { + "epoch": 1.3654414871144909, + "grad_norm": 1.4558488130569458, + "learning_rate": 4.863455851288551e-05, + "loss": 0.0682, + "step": 3232 + }, + { + "epoch": 1.3662864385297846, + "grad_norm": 1.263836145401001, + "learning_rate": 4.863371356147022e-05, + "loss": 0.0678, + "step": 3234 + }, + { + "epoch": 1.367131389945078, + "grad_norm": 1.5037480592727661, + "learning_rate": 4.863286861005493e-05, + "loss": 0.0764, + "step": 3236 + }, + { + "epoch": 1.3679763413603718, + "grad_norm": 0.8802792429924011, + "learning_rate": 4.863202365863963e-05, + "loss": 0.0575, + "step": 3238 + }, + { + "epoch": 1.3688212927756653, + "grad_norm": 1.4338186979293823, + "learning_rate": 4.863117870722434e-05, + "loss": 0.0469, + "step": 3240 + }, + { + "epoch": 1.369666244190959, + "grad_norm": 1.3018105030059814, + "learning_rate": 4.863033375580904e-05, + "loss": 0.0542, + "step": 3242 + }, + { + "epoch": 1.3705111956062526, + "grad_norm": 0.8074570894241333, + "learning_rate": 4.8629488804393756e-05, + "loss": 0.0392, + "step": 3244 + }, + { + "epoch": 1.3713561470215463, + "grad_norm": 1.513663411140442, + "learning_rate": 4.8628643852978456e-05, + "loss": 0.0563, + "step": 3246 + }, + { + "epoch": 1.37220109843684, + "grad_norm": 1.381496787071228, + "learning_rate": 4.862779890156316e-05, + "loss": 0.0526, + "step": 3248 + }, + { + "epoch": 1.3730460498521335, + "grad_norm": 1.0638294219970703, + "learning_rate": 4.862695395014787e-05, + "loss": 0.0497, + "step": 3250 + }, + { + "epoch": 1.373891001267427, + "grad_norm": 1.7604972124099731, + "learning_rate": 4.862610899873258e-05, + "loss": 0.0656, + "step": 3252 + }, + { + "epoch": 1.3747359526827208, + "grad_norm": 0.8120830059051514, + "learning_rate": 4.8625264047317285e-05, + "loss": 0.033, + "step": 3254 + }, + { + "epoch": 1.3755809040980145, + "grad_norm": 0.7674900889396667, + "learning_rate": 4.862441909590199e-05, + "loss": 0.0427, + "step": 3256 + }, + { + "epoch": 1.376425855513308, + "grad_norm": 0.8798816800117493, + "learning_rate": 4.862357414448669e-05, + "loss": 0.0474, + "step": 3258 + }, + { + "epoch": 1.3772708069286015, + "grad_norm": 0.8720738887786865, + "learning_rate": 4.86227291930714e-05, + "loss": 0.053, + "step": 3260 + }, + { + "epoch": 1.3781157583438952, + "grad_norm": 1.4881258010864258, + "learning_rate": 4.862188424165611e-05, + "loss": 0.0565, + "step": 3262 + }, + { + "epoch": 1.378960709759189, + "grad_norm": 1.2955182790756226, + "learning_rate": 4.8621039290240814e-05, + "loss": 0.0409, + "step": 3264 + }, + { + "epoch": 1.3798056611744824, + "grad_norm": 0.8371695876121521, + "learning_rate": 4.862019433882552e-05, + "loss": 0.0453, + "step": 3266 + }, + { + "epoch": 1.380650612589776, + "grad_norm": 0.7356275916099548, + "learning_rate": 4.861934938741022e-05, + "loss": 0.0385, + "step": 3268 + }, + { + "epoch": 1.3814955640050697, + "grad_norm": 0.7049744725227356, + "learning_rate": 4.8618504435994936e-05, + "loss": 0.0311, + "step": 3270 + }, + { + "epoch": 1.3823405154203634, + "grad_norm": 1.2064480781555176, + "learning_rate": 4.8617659484579636e-05, + "loss": 0.0689, + "step": 3272 + }, + { + "epoch": 1.383185466835657, + "grad_norm": 1.173024296760559, + "learning_rate": 4.861681453316434e-05, + "loss": 0.0644, + "step": 3274 + }, + { + "epoch": 1.3840304182509506, + "grad_norm": 1.1009023189544678, + "learning_rate": 4.861596958174905e-05, + "loss": 0.0343, + "step": 3276 + }, + { + "epoch": 1.3848753696662441, + "grad_norm": 1.1866307258605957, + "learning_rate": 4.861512463033376e-05, + "loss": 0.0398, + "step": 3278 + }, + { + "epoch": 1.3857203210815379, + "grad_norm": 0.8218021988868713, + "learning_rate": 4.8614279678918465e-05, + "loss": 0.0483, + "step": 3280 + }, + { + "epoch": 1.3865652724968314, + "grad_norm": 1.1046007871627808, + "learning_rate": 4.861343472750317e-05, + "loss": 0.0676, + "step": 3282 + }, + { + "epoch": 1.387410223912125, + "grad_norm": 0.6497601866722107, + "learning_rate": 4.861258977608787e-05, + "loss": 0.0293, + "step": 3284 + }, + { + "epoch": 1.3882551753274186, + "grad_norm": 1.103437066078186, + "learning_rate": 4.8611744824672586e-05, + "loss": 0.0379, + "step": 3286 + }, + { + "epoch": 1.3891001267427123, + "grad_norm": 0.9892705678939819, + "learning_rate": 4.861089987325729e-05, + "loss": 0.0557, + "step": 3288 + }, + { + "epoch": 1.3899450781580058, + "grad_norm": 0.8333448767662048, + "learning_rate": 4.8610054921841994e-05, + "loss": 0.0275, + "step": 3290 + }, + { + "epoch": 1.3907900295732996, + "grad_norm": 1.19562828540802, + "learning_rate": 4.86092099704267e-05, + "loss": 0.0497, + "step": 3292 + }, + { + "epoch": 1.3916349809885933, + "grad_norm": 1.448594093322754, + "learning_rate": 4.860836501901141e-05, + "loss": 0.0694, + "step": 3294 + }, + { + "epoch": 1.3924799324038868, + "grad_norm": 1.5646843910217285, + "learning_rate": 4.8607520067596116e-05, + "loss": 0.0426, + "step": 3296 + }, + { + "epoch": 1.3933248838191803, + "grad_norm": 1.2052522897720337, + "learning_rate": 4.860667511618082e-05, + "loss": 0.0654, + "step": 3298 + }, + { + "epoch": 1.394169835234474, + "grad_norm": 0.7969152331352234, + "learning_rate": 4.860583016476552e-05, + "loss": 0.0585, + "step": 3300 + }, + { + "epoch": 1.394169835234474, + "eval_accuracy": 0.74098982583204, + "eval_cer": 0.07193203772153461, + "eval_loss": 0.17303836345672607, + "eval_runtime": 852.7734, + "eval_samples_per_second": 13.6, + "eval_steps_per_second": 0.426, + "step": 3300 + }, + { + "epoch": 1.3950147866497677, + "grad_norm": 1.1091243028640747, + "learning_rate": 4.860498521335024e-05, + "loss": 0.0598, + "step": 3302 + }, + { + "epoch": 1.3958597380650613, + "grad_norm": 1.6562272310256958, + "learning_rate": 4.860414026193494e-05, + "loss": 0.0402, + "step": 3304 + }, + { + "epoch": 1.3967046894803548, + "grad_norm": 0.8330667018890381, + "learning_rate": 4.8603295310519645e-05, + "loss": 0.0563, + "step": 3306 + }, + { + "epoch": 1.3975496408956485, + "grad_norm": 1.4728366136550903, + "learning_rate": 4.860245035910435e-05, + "loss": 0.0635, + "step": 3308 + }, + { + "epoch": 1.3983945923109422, + "grad_norm": 1.3313997983932495, + "learning_rate": 4.860160540768906e-05, + "loss": 0.0547, + "step": 3310 + }, + { + "epoch": 1.3992395437262357, + "grad_norm": 1.9861879348754883, + "learning_rate": 4.8600760456273766e-05, + "loss": 0.0567, + "step": 3312 + }, + { + "epoch": 1.4000844951415294, + "grad_norm": 0.7557132244110107, + "learning_rate": 4.8599915504858474e-05, + "loss": 0.0478, + "step": 3314 + }, + { + "epoch": 1.400929446556823, + "grad_norm": 0.5707297325134277, + "learning_rate": 4.8599070553443174e-05, + "loss": 0.0377, + "step": 3316 + }, + { + "epoch": 1.4017743979721167, + "grad_norm": 1.3128173351287842, + "learning_rate": 4.859822560202789e-05, + "loss": 0.0579, + "step": 3318 + }, + { + "epoch": 1.4026193493874102, + "grad_norm": 1.7858682870864868, + "learning_rate": 4.859738065061259e-05, + "loss": 0.0718, + "step": 3320 + }, + { + "epoch": 1.403464300802704, + "grad_norm": 1.2796276807785034, + "learning_rate": 4.8596535699197296e-05, + "loss": 0.0408, + "step": 3322 + }, + { + "epoch": 1.4043092522179974, + "grad_norm": 0.8803476095199585, + "learning_rate": 4.8595690747782e-05, + "loss": 0.0522, + "step": 3324 + }, + { + "epoch": 1.4051542036332911, + "grad_norm": 0.9975403547286987, + "learning_rate": 4.859484579636671e-05, + "loss": 0.0305, + "step": 3326 + }, + { + "epoch": 1.4059991550485846, + "grad_norm": 0.4852333962917328, + "learning_rate": 4.859400084495142e-05, + "loss": 0.021, + "step": 3328 + }, + { + "epoch": 1.4068441064638784, + "grad_norm": 1.1154181957244873, + "learning_rate": 4.8593155893536124e-05, + "loss": 0.0351, + "step": 3330 + }, + { + "epoch": 1.4076890578791719, + "grad_norm": 0.9479047656059265, + "learning_rate": 4.8592310942120825e-05, + "loss": 0.0219, + "step": 3332 + }, + { + "epoch": 1.4085340092944656, + "grad_norm": 0.5760020017623901, + "learning_rate": 4.859146599070554e-05, + "loss": 0.0527, + "step": 3334 + }, + { + "epoch": 1.409378960709759, + "grad_norm": 1.5132248401641846, + "learning_rate": 4.859062103929024e-05, + "loss": 0.05, + "step": 3336 + }, + { + "epoch": 1.4102239121250528, + "grad_norm": 1.0323017835617065, + "learning_rate": 4.8589776087874946e-05, + "loss": 0.044, + "step": 3338 + }, + { + "epoch": 1.4110688635403466, + "grad_norm": 1.1576844453811646, + "learning_rate": 4.8588931136459654e-05, + "loss": 0.0751, + "step": 3340 + }, + { + "epoch": 1.41191381495564, + "grad_norm": 1.1319127082824707, + "learning_rate": 4.858808618504436e-05, + "loss": 0.0598, + "step": 3342 + }, + { + "epoch": 1.4127587663709336, + "grad_norm": 1.139543056488037, + "learning_rate": 4.858724123362907e-05, + "loss": 0.0542, + "step": 3344 + }, + { + "epoch": 1.4136037177862273, + "grad_norm": 1.0183779001235962, + "learning_rate": 4.8586396282213775e-05, + "loss": 0.0385, + "step": 3346 + }, + { + "epoch": 1.414448669201521, + "grad_norm": 0.6774150729179382, + "learning_rate": 4.8585551330798476e-05, + "loss": 0.0225, + "step": 3348 + }, + { + "epoch": 1.4152936206168145, + "grad_norm": 1.525310754776001, + "learning_rate": 4.858470637938319e-05, + "loss": 0.071, + "step": 3350 + }, + { + "epoch": 1.416138572032108, + "grad_norm": 1.1040525436401367, + "learning_rate": 4.858386142796789e-05, + "loss": 0.0322, + "step": 3352 + }, + { + "epoch": 1.4169835234474017, + "grad_norm": 1.2062792778015137, + "learning_rate": 4.8583016476552604e-05, + "loss": 0.0679, + "step": 3354 + }, + { + "epoch": 1.4178284748626955, + "grad_norm": 1.0307128429412842, + "learning_rate": 4.8582171525137304e-05, + "loss": 0.0512, + "step": 3356 + }, + { + "epoch": 1.418673426277989, + "grad_norm": 1.3051033020019531, + "learning_rate": 4.858132657372201e-05, + "loss": 0.045, + "step": 3358 + }, + { + "epoch": 1.4195183776932827, + "grad_norm": 0.960086464881897, + "learning_rate": 4.858048162230672e-05, + "loss": 0.065, + "step": 3360 + }, + { + "epoch": 1.4203633291085762, + "grad_norm": 1.8996869325637817, + "learning_rate": 4.8579636670891426e-05, + "loss": 0.0925, + "step": 3362 + }, + { + "epoch": 1.42120828052387, + "grad_norm": 1.6697046756744385, + "learning_rate": 4.857879171947613e-05, + "loss": 0.0473, + "step": 3364 + }, + { + "epoch": 1.4220532319391634, + "grad_norm": 1.2368358373641968, + "learning_rate": 4.857794676806084e-05, + "loss": 0.0495, + "step": 3366 + }, + { + "epoch": 1.4228981833544572, + "grad_norm": 1.3032385110855103, + "learning_rate": 4.857710181664554e-05, + "loss": 0.042, + "step": 3368 + }, + { + "epoch": 1.4237431347697507, + "grad_norm": 1.1829075813293457, + "learning_rate": 4.8576256865230255e-05, + "loss": 0.0909, + "step": 3370 + }, + { + "epoch": 1.4245880861850444, + "grad_norm": 1.2332669496536255, + "learning_rate": 4.8575411913814955e-05, + "loss": 0.0592, + "step": 3372 + }, + { + "epoch": 1.425433037600338, + "grad_norm": 1.6120294332504272, + "learning_rate": 4.857456696239966e-05, + "loss": 0.0711, + "step": 3374 + }, + { + "epoch": 1.4262779890156316, + "grad_norm": 1.1956902742385864, + "learning_rate": 4.857372201098437e-05, + "loss": 0.04, + "step": 3376 + }, + { + "epoch": 1.4271229404309254, + "grad_norm": 1.5087355375289917, + "learning_rate": 4.857287705956908e-05, + "loss": 0.0488, + "step": 3378 + }, + { + "epoch": 1.4279678918462189, + "grad_norm": 1.6760591268539429, + "learning_rate": 4.8572032108153784e-05, + "loss": 0.0851, + "step": 3380 + }, + { + "epoch": 1.4288128432615124, + "grad_norm": 0.8411288857460022, + "learning_rate": 4.857118715673849e-05, + "loss": 0.023, + "step": 3382 + }, + { + "epoch": 1.429657794676806, + "grad_norm": 1.0701520442962646, + "learning_rate": 4.857034220532319e-05, + "loss": 0.0479, + "step": 3384 + }, + { + "epoch": 1.4305027460920998, + "grad_norm": 0.9319422841072083, + "learning_rate": 4.8569497253907906e-05, + "loss": 0.0686, + "step": 3386 + }, + { + "epoch": 1.4313476975073933, + "grad_norm": 0.8737354278564453, + "learning_rate": 4.8568652302492606e-05, + "loss": 0.0662, + "step": 3388 + }, + { + "epoch": 1.4321926489226868, + "grad_norm": 0.5453359484672546, + "learning_rate": 4.856780735107731e-05, + "loss": 0.0324, + "step": 3390 + }, + { + "epoch": 1.4330376003379806, + "grad_norm": 0.6740847229957581, + "learning_rate": 4.856696239966202e-05, + "loss": 0.0392, + "step": 3392 + }, + { + "epoch": 1.4338825517532743, + "grad_norm": 1.1385046243667603, + "learning_rate": 4.856611744824673e-05, + "loss": 0.0392, + "step": 3394 + }, + { + "epoch": 1.4347275031685678, + "grad_norm": 0.7803374528884888, + "learning_rate": 4.8565272496831435e-05, + "loss": 0.0372, + "step": 3396 + }, + { + "epoch": 1.4355724545838613, + "grad_norm": 0.8832899928092957, + "learning_rate": 4.856442754541614e-05, + "loss": 0.0406, + "step": 3398 + }, + { + "epoch": 1.436417405999155, + "grad_norm": 1.3527898788452148, + "learning_rate": 4.856358259400084e-05, + "loss": 0.048, + "step": 3400 + }, + { + "epoch": 1.436417405999155, + "eval_accuracy": 0.7294361096740818, + "eval_cer": 0.07786903323129582, + "eval_loss": 0.1757935732603073, + "eval_runtime": 862.8655, + "eval_samples_per_second": 13.441, + "eval_steps_per_second": 0.421, + "step": 3400 + }, + { + "epoch": 1.4372623574144487, + "grad_norm": 0.9552874565124512, + "learning_rate": 4.8562737642585556e-05, + "loss": 0.0644, + "step": 3402 + }, + { + "epoch": 1.4381073088297422, + "grad_norm": 1.3589330911636353, + "learning_rate": 4.856189269117026e-05, + "loss": 0.0489, + "step": 3404 + }, + { + "epoch": 1.438952260245036, + "grad_norm": 0.9378911852836609, + "learning_rate": 4.8561047739754964e-05, + "loss": 0.0302, + "step": 3406 + }, + { + "epoch": 1.4397972116603295, + "grad_norm": 1.4078298807144165, + "learning_rate": 4.856020278833967e-05, + "loss": 0.0621, + "step": 3408 + }, + { + "epoch": 1.4406421630756232, + "grad_norm": 0.6866748929023743, + "learning_rate": 4.855935783692438e-05, + "loss": 0.0289, + "step": 3410 + }, + { + "epoch": 1.4414871144909167, + "grad_norm": 1.534067988395691, + "learning_rate": 4.8558512885509086e-05, + "loss": 0.0658, + "step": 3412 + }, + { + "epoch": 1.4423320659062104, + "grad_norm": 1.2581876516342163, + "learning_rate": 4.855766793409379e-05, + "loss": 0.0338, + "step": 3414 + }, + { + "epoch": 1.443177017321504, + "grad_norm": 0.8533525466918945, + "learning_rate": 4.855682298267849e-05, + "loss": 0.0269, + "step": 3416 + }, + { + "epoch": 1.4440219687367977, + "grad_norm": 1.2948267459869385, + "learning_rate": 4.855597803126321e-05, + "loss": 0.0476, + "step": 3418 + }, + { + "epoch": 1.4448669201520912, + "grad_norm": 1.1799198389053345, + "learning_rate": 4.855513307984791e-05, + "loss": 0.0315, + "step": 3420 + }, + { + "epoch": 1.445711871567385, + "grad_norm": 1.5496565103530884, + "learning_rate": 4.8554288128432615e-05, + "loss": 0.0595, + "step": 3422 + }, + { + "epoch": 1.4465568229826786, + "grad_norm": 1.3309407234191895, + "learning_rate": 4.855344317701732e-05, + "loss": 0.0335, + "step": 3424 + }, + { + "epoch": 1.4474017743979721, + "grad_norm": 0.614343523979187, + "learning_rate": 4.855259822560203e-05, + "loss": 0.0232, + "step": 3426 + }, + { + "epoch": 1.4482467258132656, + "grad_norm": 1.4628639221191406, + "learning_rate": 4.8551753274186736e-05, + "loss": 0.0552, + "step": 3428 + }, + { + "epoch": 1.4490916772285594, + "grad_norm": 1.2969770431518555, + "learning_rate": 4.8550908322771444e-05, + "loss": 0.058, + "step": 3430 + }, + { + "epoch": 1.449936628643853, + "grad_norm": 1.3628487586975098, + "learning_rate": 4.8550063371356144e-05, + "loss": 0.0558, + "step": 3432 + }, + { + "epoch": 1.4507815800591466, + "grad_norm": 1.4558742046356201, + "learning_rate": 4.854921841994086e-05, + "loss": 0.0619, + "step": 3434 + }, + { + "epoch": 1.45162653147444, + "grad_norm": 1.390631079673767, + "learning_rate": 4.854837346852556e-05, + "loss": 0.0686, + "step": 3436 + }, + { + "epoch": 1.4524714828897338, + "grad_norm": 1.0281144380569458, + "learning_rate": 4.8547528517110266e-05, + "loss": 0.039, + "step": 3438 + }, + { + "epoch": 1.4533164343050275, + "grad_norm": 1.603379726409912, + "learning_rate": 4.854668356569497e-05, + "loss": 0.0877, + "step": 3440 + }, + { + "epoch": 1.454161385720321, + "grad_norm": 0.7130106091499329, + "learning_rate": 4.854583861427968e-05, + "loss": 0.0424, + "step": 3442 + }, + { + "epoch": 1.4550063371356148, + "grad_norm": 1.1394532918930054, + "learning_rate": 4.854499366286439e-05, + "loss": 0.0464, + "step": 3444 + }, + { + "epoch": 1.4558512885509083, + "grad_norm": 1.2217916250228882, + "learning_rate": 4.8544148711449094e-05, + "loss": 0.0582, + "step": 3446 + }, + { + "epoch": 1.456696239966202, + "grad_norm": 1.2456247806549072, + "learning_rate": 4.8543303760033795e-05, + "loss": 0.0477, + "step": 3448 + }, + { + "epoch": 1.4575411913814955, + "grad_norm": 0.7696735858917236, + "learning_rate": 4.854245880861851e-05, + "loss": 0.0342, + "step": 3450 + }, + { + "epoch": 1.4583861427967892, + "grad_norm": 1.327634334564209, + "learning_rate": 4.854161385720321e-05, + "loss": 0.0875, + "step": 3452 + }, + { + "epoch": 1.4592310942120827, + "grad_norm": 1.5110286474227905, + "learning_rate": 4.854076890578792e-05, + "loss": 0.057, + "step": 3454 + }, + { + "epoch": 1.4600760456273765, + "grad_norm": 1.0184332132339478, + "learning_rate": 4.8539923954372624e-05, + "loss": 0.0526, + "step": 3456 + }, + { + "epoch": 1.46092099704267, + "grad_norm": 1.3125816583633423, + "learning_rate": 4.853907900295733e-05, + "loss": 0.0311, + "step": 3458 + }, + { + "epoch": 1.4617659484579637, + "grad_norm": 1.00360906124115, + "learning_rate": 4.853823405154204e-05, + "loss": 0.0511, + "step": 3460 + }, + { + "epoch": 1.4626108998732572, + "grad_norm": 0.9407820701599121, + "learning_rate": 4.8537389100126745e-05, + "loss": 0.041, + "step": 3462 + }, + { + "epoch": 1.463455851288551, + "grad_norm": 1.2548154592514038, + "learning_rate": 4.853654414871145e-05, + "loss": 0.0588, + "step": 3464 + }, + { + "epoch": 1.4643008027038444, + "grad_norm": 1.1931437253952026, + "learning_rate": 4.853569919729616e-05, + "loss": 0.0372, + "step": 3466 + }, + { + "epoch": 1.4651457541191382, + "grad_norm": 1.3026175498962402, + "learning_rate": 4.853485424588086e-05, + "loss": 0.0482, + "step": 3468 + }, + { + "epoch": 1.465990705534432, + "grad_norm": 1.1066519021987915, + "learning_rate": 4.8534009294465574e-05, + "loss": 0.0604, + "step": 3470 + }, + { + "epoch": 1.4668356569497254, + "grad_norm": 1.1285970211029053, + "learning_rate": 4.8533164343050274e-05, + "loss": 0.0729, + "step": 3472 + }, + { + "epoch": 1.467680608365019, + "grad_norm": 1.0288052558898926, + "learning_rate": 4.853231939163498e-05, + "loss": 0.0488, + "step": 3474 + }, + { + "epoch": 1.4685255597803126, + "grad_norm": 1.7681716680526733, + "learning_rate": 4.853147444021969e-05, + "loss": 0.0572, + "step": 3476 + }, + { + "epoch": 1.4693705111956064, + "grad_norm": 1.4394867420196533, + "learning_rate": 4.8530629488804396e-05, + "loss": 0.0669, + "step": 3478 + }, + { + "epoch": 1.4702154626108999, + "grad_norm": 0.686235249042511, + "learning_rate": 4.85297845373891e-05, + "loss": 0.0246, + "step": 3480 + }, + { + "epoch": 1.4710604140261934, + "grad_norm": 2.047348976135254, + "learning_rate": 4.852893958597381e-05, + "loss": 0.0798, + "step": 3482 + }, + { + "epoch": 1.471905365441487, + "grad_norm": 1.1849197149276733, + "learning_rate": 4.852809463455851e-05, + "loss": 0.0487, + "step": 3484 + }, + { + "epoch": 1.4727503168567808, + "grad_norm": 0.973675012588501, + "learning_rate": 4.8527249683143225e-05, + "loss": 0.0483, + "step": 3486 + }, + { + "epoch": 1.4735952682720743, + "grad_norm": 1.173438549041748, + "learning_rate": 4.8526404731727925e-05, + "loss": 0.0497, + "step": 3488 + }, + { + "epoch": 1.474440219687368, + "grad_norm": 0.9411981701850891, + "learning_rate": 4.852555978031263e-05, + "loss": 0.0599, + "step": 3490 + }, + { + "epoch": 1.4752851711026616, + "grad_norm": 0.963691771030426, + "learning_rate": 4.852471482889734e-05, + "loss": 0.0372, + "step": 3492 + }, + { + "epoch": 1.4761301225179553, + "grad_norm": 1.085005283355713, + "learning_rate": 4.852386987748205e-05, + "loss": 0.0612, + "step": 3494 + }, + { + "epoch": 1.4769750739332488, + "grad_norm": 1.5302870273590088, + "learning_rate": 4.8523024926066754e-05, + "loss": 0.0519, + "step": 3496 + }, + { + "epoch": 1.4778200253485425, + "grad_norm": 1.8927478790283203, + "learning_rate": 4.852217997465146e-05, + "loss": 0.0782, + "step": 3498 + }, + { + "epoch": 1.478664976763836, + "grad_norm": 1.595861554145813, + "learning_rate": 4.852133502323616e-05, + "loss": 0.0766, + "step": 3500 + }, + { + "epoch": 1.478664976763836, + "eval_accuracy": 0.7130539748232454, + "eval_cer": 0.08314440219836446, + "eval_loss": 0.18499918282032013, + "eval_runtime": 847.7155, + "eval_samples_per_second": 13.681, + "eval_steps_per_second": 0.428, + "step": 3500 + }, + { + "epoch": 1.4795099281791297, + "grad_norm": 2.2448182106018066, + "learning_rate": 4.8520490071820876e-05, + "loss": 0.0531, + "step": 3502 + }, + { + "epoch": 1.4803548795944232, + "grad_norm": 1.2173892259597778, + "learning_rate": 4.8519645120405576e-05, + "loss": 0.0576, + "step": 3504 + }, + { + "epoch": 1.481199831009717, + "grad_norm": 0.8814436793327332, + "learning_rate": 4.851880016899028e-05, + "loss": 0.0401, + "step": 3506 + }, + { + "epoch": 1.4820447824250107, + "grad_norm": 1.2326481342315674, + "learning_rate": 4.851795521757499e-05, + "loss": 0.0289, + "step": 3508 + }, + { + "epoch": 1.4828897338403042, + "grad_norm": 1.1329704523086548, + "learning_rate": 4.85171102661597e-05, + "loss": 0.0597, + "step": 3510 + }, + { + "epoch": 1.4837346852555977, + "grad_norm": 1.6266759634017944, + "learning_rate": 4.8516265314744405e-05, + "loss": 0.0549, + "step": 3512 + }, + { + "epoch": 1.4845796366708914, + "grad_norm": 0.7869406342506409, + "learning_rate": 4.851542036332911e-05, + "loss": 0.0411, + "step": 3514 + }, + { + "epoch": 1.4854245880861852, + "grad_norm": 1.4602829217910767, + "learning_rate": 4.851457541191381e-05, + "loss": 0.0635, + "step": 3516 + }, + { + "epoch": 1.4862695395014787, + "grad_norm": 1.53984534740448, + "learning_rate": 4.8513730460498526e-05, + "loss": 0.062, + "step": 3518 + }, + { + "epoch": 1.4871144909167722, + "grad_norm": 1.5901530981063843, + "learning_rate": 4.851288550908323e-05, + "loss": 0.0504, + "step": 3520 + }, + { + "epoch": 1.487959442332066, + "grad_norm": 1.6808077096939087, + "learning_rate": 4.8512040557667934e-05, + "loss": 0.0728, + "step": 3522 + }, + { + "epoch": 1.4888043937473596, + "grad_norm": 1.1791267395019531, + "learning_rate": 4.851119560625264e-05, + "loss": 0.0568, + "step": 3524 + }, + { + "epoch": 1.4896493451626531, + "grad_norm": 0.7604468464851379, + "learning_rate": 4.851035065483735e-05, + "loss": 0.0348, + "step": 3526 + }, + { + "epoch": 1.4904942965779466, + "grad_norm": 0.5152177214622498, + "learning_rate": 4.8509505703422056e-05, + "loss": 0.0358, + "step": 3528 + }, + { + "epoch": 1.4913392479932404, + "grad_norm": 1.253456473350525, + "learning_rate": 4.850866075200676e-05, + "loss": 0.0637, + "step": 3530 + }, + { + "epoch": 1.492184199408534, + "grad_norm": 1.5630178451538086, + "learning_rate": 4.850781580059146e-05, + "loss": 0.0478, + "step": 3532 + }, + { + "epoch": 1.4930291508238276, + "grad_norm": 0.7693706154823303, + "learning_rate": 4.850697084917618e-05, + "loss": 0.0295, + "step": 3534 + }, + { + "epoch": 1.4938741022391213, + "grad_norm": 1.4552922248840332, + "learning_rate": 4.850612589776088e-05, + "loss": 0.0498, + "step": 3536 + }, + { + "epoch": 1.4947190536544148, + "grad_norm": 1.1829502582550049, + "learning_rate": 4.8505280946345585e-05, + "loss": 0.036, + "step": 3538 + }, + { + "epoch": 1.4955640050697085, + "grad_norm": 1.4774078130722046, + "learning_rate": 4.850443599493029e-05, + "loss": 0.0378, + "step": 3540 + }, + { + "epoch": 1.496408956485002, + "grad_norm": 1.580325961112976, + "learning_rate": 4.8503591043515e-05, + "loss": 0.0629, + "step": 3542 + }, + { + "epoch": 1.4972539079002958, + "grad_norm": 0.8769638538360596, + "learning_rate": 4.8502746092099707e-05, + "loss": 0.0229, + "step": 3544 + }, + { + "epoch": 1.4980988593155893, + "grad_norm": 0.45959189534187317, + "learning_rate": 4.8501901140684414e-05, + "loss": 0.035, + "step": 3546 + }, + { + "epoch": 1.498943810730883, + "grad_norm": 1.3305035829544067, + "learning_rate": 4.8501056189269114e-05, + "loss": 0.0617, + "step": 3548 + }, + { + "epoch": 1.4997887621461765, + "grad_norm": 0.7923805713653564, + "learning_rate": 4.850021123785383e-05, + "loss": 0.0263, + "step": 3550 + }, + { + "epoch": 1.5006337135614702, + "grad_norm": 1.4536417722702026, + "learning_rate": 4.849936628643853e-05, + "loss": 0.0513, + "step": 3552 + }, + { + "epoch": 1.501478664976764, + "grad_norm": 1.329981803894043, + "learning_rate": 4.849852133502324e-05, + "loss": 0.0615, + "step": 3554 + }, + { + "epoch": 1.5023236163920575, + "grad_norm": 0.9799890518188477, + "learning_rate": 4.849767638360794e-05, + "loss": 0.0483, + "step": 3556 + }, + { + "epoch": 1.503168567807351, + "grad_norm": 0.7838258743286133, + "learning_rate": 4.849683143219265e-05, + "loss": 0.0378, + "step": 3558 + }, + { + "epoch": 1.5040135192226447, + "grad_norm": 1.4174835681915283, + "learning_rate": 4.849598648077736e-05, + "loss": 0.057, + "step": 3560 + }, + { + "epoch": 1.5048584706379384, + "grad_norm": 1.955910325050354, + "learning_rate": 4.8495141529362065e-05, + "loss": 0.0573, + "step": 3562 + }, + { + "epoch": 1.505703422053232, + "grad_norm": 1.1473673582077026, + "learning_rate": 4.849429657794677e-05, + "loss": 0.0664, + "step": 3564 + }, + { + "epoch": 1.5065483734685254, + "grad_norm": 1.1155494451522827, + "learning_rate": 4.849345162653148e-05, + "loss": 0.0519, + "step": 3566 + }, + { + "epoch": 1.5073933248838192, + "grad_norm": 1.3899874687194824, + "learning_rate": 4.849260667511618e-05, + "loss": 0.0513, + "step": 3568 + }, + { + "epoch": 1.508238276299113, + "grad_norm": 1.4755295515060425, + "learning_rate": 4.849176172370089e-05, + "loss": 0.0808, + "step": 3570 + }, + { + "epoch": 1.5090832277144064, + "grad_norm": 2.025038242340088, + "learning_rate": 4.8490916772285594e-05, + "loss": 0.07, + "step": 3572 + }, + { + "epoch": 1.5099281791297, + "grad_norm": 1.5562915802001953, + "learning_rate": 4.84900718208703e-05, + "loss": 0.0648, + "step": 3574 + }, + { + "epoch": 1.5107731305449936, + "grad_norm": 1.214245319366455, + "learning_rate": 4.848922686945501e-05, + "loss": 0.031, + "step": 3576 + }, + { + "epoch": 1.5116180819602874, + "grad_norm": 1.378103256225586, + "learning_rate": 4.8488381918039715e-05, + "loss": 0.0728, + "step": 3578 + }, + { + "epoch": 1.5124630333755809, + "grad_norm": 1.2112326622009277, + "learning_rate": 4.848753696662442e-05, + "loss": 0.0583, + "step": 3580 + }, + { + "epoch": 1.5133079847908744, + "grad_norm": 0.9792726039886475, + "learning_rate": 4.848669201520913e-05, + "loss": 0.0547, + "step": 3582 + }, + { + "epoch": 1.5141529362061683, + "grad_norm": 0.9429163336753845, + "learning_rate": 4.848584706379383e-05, + "loss": 0.0405, + "step": 3584 + }, + { + "epoch": 1.5149978876214618, + "grad_norm": 1.9372706413269043, + "learning_rate": 4.8485002112378544e-05, + "loss": 0.0825, + "step": 3586 + }, + { + "epoch": 1.5158428390367553, + "grad_norm": 0.8863945007324219, + "learning_rate": 4.8484157160963245e-05, + "loss": 0.0645, + "step": 3588 + }, + { + "epoch": 1.516687790452049, + "grad_norm": 0.756432831287384, + "learning_rate": 4.848331220954795e-05, + "loss": 0.0399, + "step": 3590 + }, + { + "epoch": 1.5175327418673428, + "grad_norm": 0.6821669936180115, + "learning_rate": 4.848246725813266e-05, + "loss": 0.0479, + "step": 3592 + }, + { + "epoch": 1.5183776932826363, + "grad_norm": 0.9661495089530945, + "learning_rate": 4.8481622306717366e-05, + "loss": 0.0469, + "step": 3594 + }, + { + "epoch": 1.5192226446979298, + "grad_norm": 0.7377157807350159, + "learning_rate": 4.848077735530207e-05, + "loss": 0.0441, + "step": 3596 + }, + { + "epoch": 1.5200675961132235, + "grad_norm": 1.2411776781082153, + "learning_rate": 4.847993240388678e-05, + "loss": 0.0629, + "step": 3598 + }, + { + "epoch": 1.5209125475285172, + "grad_norm": 0.49103307723999023, + "learning_rate": 4.847908745247148e-05, + "loss": 0.0325, + "step": 3600 + }, + { + "epoch": 1.5209125475285172, + "eval_accuracy": 0.7358165200896706, + "eval_cer": 0.07696039944599804, + "eval_loss": 0.17656931281089783, + "eval_runtime": 856.6087, + "eval_samples_per_second": 13.539, + "eval_steps_per_second": 0.424, + "step": 3600 + }, + { + "epoch": 1.5217574989438107, + "grad_norm": 1.3446134328842163, + "learning_rate": 4.8478242501056195e-05, + "loss": 0.0377, + "step": 3602 + }, + { + "epoch": 1.5226024503591042, + "grad_norm": 1.5772300958633423, + "learning_rate": 4.8477397549640895e-05, + "loss": 0.0764, + "step": 3604 + }, + { + "epoch": 1.523447401774398, + "grad_norm": 1.2345671653747559, + "learning_rate": 4.84765525982256e-05, + "loss": 0.0713, + "step": 3606 + }, + { + "epoch": 1.5242923531896917, + "grad_norm": 0.7605233192443848, + "learning_rate": 4.847570764681031e-05, + "loss": 0.0466, + "step": 3608 + }, + { + "epoch": 1.5251373046049852, + "grad_norm": 1.6644865274429321, + "learning_rate": 4.847486269539502e-05, + "loss": 0.0703, + "step": 3610 + }, + { + "epoch": 1.5259822560202787, + "grad_norm": 1.4693437814712524, + "learning_rate": 4.8474017743979724e-05, + "loss": 0.041, + "step": 3612 + }, + { + "epoch": 1.5268272074355724, + "grad_norm": 0.45182543992996216, + "learning_rate": 4.847317279256443e-05, + "loss": 0.0364, + "step": 3614 + }, + { + "epoch": 1.5276721588508662, + "grad_norm": 0.8842322826385498, + "learning_rate": 4.847232784114913e-05, + "loss": 0.0299, + "step": 3616 + }, + { + "epoch": 1.5285171102661597, + "grad_norm": 1.3502392768859863, + "learning_rate": 4.8471482889733846e-05, + "loss": 0.0492, + "step": 3618 + }, + { + "epoch": 1.5293620616814532, + "grad_norm": 0.7896035313606262, + "learning_rate": 4.8470637938318546e-05, + "loss": 0.0429, + "step": 3620 + }, + { + "epoch": 1.530207013096747, + "grad_norm": 0.8707165122032166, + "learning_rate": 4.846979298690325e-05, + "loss": 0.0199, + "step": 3622 + }, + { + "epoch": 1.5310519645120406, + "grad_norm": 1.013792634010315, + "learning_rate": 4.846894803548796e-05, + "loss": 0.0599, + "step": 3624 + }, + { + "epoch": 1.5318969159273341, + "grad_norm": 1.2326784133911133, + "learning_rate": 4.846810308407267e-05, + "loss": 0.05, + "step": 3626 + }, + { + "epoch": 1.5327418673426279, + "grad_norm": 1.147342562675476, + "learning_rate": 4.8467258132657375e-05, + "loss": 0.0525, + "step": 3628 + }, + { + "epoch": 1.5335868187579216, + "grad_norm": 1.6748418807983398, + "learning_rate": 4.846641318124208e-05, + "loss": 0.0703, + "step": 3630 + }, + { + "epoch": 1.534431770173215, + "grad_norm": 0.6268454790115356, + "learning_rate": 4.846556822982678e-05, + "loss": 0.0372, + "step": 3632 + }, + { + "epoch": 1.5352767215885086, + "grad_norm": 0.7244894504547119, + "learning_rate": 4.8464723278411497e-05, + "loss": 0.0441, + "step": 3634 + }, + { + "epoch": 1.5361216730038023, + "grad_norm": 1.510006070137024, + "learning_rate": 4.84638783269962e-05, + "loss": 0.0542, + "step": 3636 + }, + { + "epoch": 1.536966624419096, + "grad_norm": 1.2709475755691528, + "learning_rate": 4.8463033375580904e-05, + "loss": 0.0383, + "step": 3638 + }, + { + "epoch": 1.5378115758343895, + "grad_norm": 1.0092389583587646, + "learning_rate": 4.846218842416561e-05, + "loss": 0.0452, + "step": 3640 + }, + { + "epoch": 1.538656527249683, + "grad_norm": 1.1898479461669922, + "learning_rate": 4.846134347275032e-05, + "loss": 0.0454, + "step": 3642 + }, + { + "epoch": 1.5395014786649768, + "grad_norm": 1.168468952178955, + "learning_rate": 4.8460498521335026e-05, + "loss": 0.0367, + "step": 3644 + }, + { + "epoch": 1.5403464300802705, + "grad_norm": 0.954703152179718, + "learning_rate": 4.845965356991973e-05, + "loss": 0.0181, + "step": 3646 + }, + { + "epoch": 1.541191381495564, + "grad_norm": 1.3118579387664795, + "learning_rate": 4.845880861850443e-05, + "loss": 0.0656, + "step": 3648 + }, + { + "epoch": 1.5420363329108575, + "grad_norm": 0.525249719619751, + "learning_rate": 4.845796366708915e-05, + "loss": 0.0246, + "step": 3650 + }, + { + "epoch": 1.5428812843261512, + "grad_norm": 1.0019251108169556, + "learning_rate": 4.845711871567385e-05, + "loss": 0.0469, + "step": 3652 + }, + { + "epoch": 1.543726235741445, + "grad_norm": 1.4187920093536377, + "learning_rate": 4.845627376425856e-05, + "loss": 0.0472, + "step": 3654 + }, + { + "epoch": 1.5445711871567385, + "grad_norm": 1.3827303647994995, + "learning_rate": 4.845542881284326e-05, + "loss": 0.0635, + "step": 3656 + }, + { + "epoch": 1.545416138572032, + "grad_norm": 1.0358022451400757, + "learning_rate": 4.845458386142797e-05, + "loss": 0.0414, + "step": 3658 + }, + { + "epoch": 1.5462610899873257, + "grad_norm": 1.110398530960083, + "learning_rate": 4.8453738910012677e-05, + "loss": 0.036, + "step": 3660 + }, + { + "epoch": 1.5471060414026194, + "grad_norm": 0.48275184631347656, + "learning_rate": 4.8452893958597384e-05, + "loss": 0.0356, + "step": 3662 + }, + { + "epoch": 1.547950992817913, + "grad_norm": 0.5242049694061279, + "learning_rate": 4.845204900718209e-05, + "loss": 0.0235, + "step": 3664 + }, + { + "epoch": 1.5487959442332064, + "grad_norm": 1.1783398389816284, + "learning_rate": 4.84512040557668e-05, + "loss": 0.0572, + "step": 3666 + }, + { + "epoch": 1.5496408956485002, + "grad_norm": 1.400747299194336, + "learning_rate": 4.84503591043515e-05, + "loss": 0.0575, + "step": 3668 + }, + { + "epoch": 1.5504858470637939, + "grad_norm": 1.717510461807251, + "learning_rate": 4.844951415293621e-05, + "loss": 0.0668, + "step": 3670 + }, + { + "epoch": 1.5513307984790874, + "grad_norm": 1.1266288757324219, + "learning_rate": 4.844866920152091e-05, + "loss": 0.0462, + "step": 3672 + }, + { + "epoch": 1.5521757498943811, + "grad_norm": 0.7779021263122559, + "learning_rate": 4.844782425010562e-05, + "loss": 0.046, + "step": 3674 + }, + { + "epoch": 1.5530207013096748, + "grad_norm": 1.299617886543274, + "learning_rate": 4.844697929869033e-05, + "loss": 0.0495, + "step": 3676 + }, + { + "epoch": 1.5538656527249683, + "grad_norm": 0.6191155910491943, + "learning_rate": 4.8446134347275035e-05, + "loss": 0.0419, + "step": 3678 + }, + { + "epoch": 1.5547106041402619, + "grad_norm": 1.5401053428649902, + "learning_rate": 4.844528939585974e-05, + "loss": 0.0492, + "step": 3680 + }, + { + "epoch": 1.5555555555555556, + "grad_norm": 1.5327844619750977, + "learning_rate": 4.844444444444445e-05, + "loss": 0.0692, + "step": 3682 + }, + { + "epoch": 1.5564005069708493, + "grad_norm": 0.7170135378837585, + "learning_rate": 4.844359949302915e-05, + "loss": 0.0327, + "step": 3684 + }, + { + "epoch": 1.5572454583861428, + "grad_norm": 1.2383910417556763, + "learning_rate": 4.844275454161386e-05, + "loss": 0.0454, + "step": 3686 + }, + { + "epoch": 1.5580904098014363, + "grad_norm": 1.0864990949630737, + "learning_rate": 4.8441909590198564e-05, + "loss": 0.0492, + "step": 3688 + }, + { + "epoch": 1.55893536121673, + "grad_norm": 1.1290335655212402, + "learning_rate": 4.844106463878327e-05, + "loss": 0.0443, + "step": 3690 + }, + { + "epoch": 1.5597803126320238, + "grad_norm": 1.9262590408325195, + "learning_rate": 4.844021968736798e-05, + "loss": 0.06, + "step": 3692 + }, + { + "epoch": 1.5606252640473173, + "grad_norm": 1.2128050327301025, + "learning_rate": 4.8439374735952685e-05, + "loss": 0.0707, + "step": 3694 + }, + { + "epoch": 1.5614702154626108, + "grad_norm": 0.8806398510932922, + "learning_rate": 4.843852978453739e-05, + "loss": 0.0451, + "step": 3696 + }, + { + "epoch": 1.5623151668779045, + "grad_norm": 1.6122541427612305, + "learning_rate": 4.84376848331221e-05, + "loss": 0.0485, + "step": 3698 + }, + { + "epoch": 1.5631601182931982, + "grad_norm": 1.153076171875, + "learning_rate": 4.84368398817068e-05, + "loss": 0.0568, + "step": 3700 + }, + { + "epoch": 1.5631601182931982, + "eval_accuracy": 0.7448698051388171, + "eval_cer": 0.07185264253641152, + "eval_loss": 0.16689425706863403, + "eval_runtime": 859.966, + "eval_samples_per_second": 13.487, + "eval_steps_per_second": 0.422, + "step": 3700 + }, + { + "epoch": 1.5640050697084917, + "grad_norm": 1.530321478843689, + "learning_rate": 4.8435994930291514e-05, + "loss": 0.0513, + "step": 3702 + }, + { + "epoch": 1.5648500211237852, + "grad_norm": 0.5067934989929199, + "learning_rate": 4.8435149978876215e-05, + "loss": 0.0333, + "step": 3704 + }, + { + "epoch": 1.565694972539079, + "grad_norm": 0.6863754987716675, + "learning_rate": 4.843430502746092e-05, + "loss": 0.0331, + "step": 3706 + }, + { + "epoch": 1.5665399239543727, + "grad_norm": 0.6762244701385498, + "learning_rate": 4.843346007604563e-05, + "loss": 0.0281, + "step": 3708 + }, + { + "epoch": 1.5673848753696662, + "grad_norm": 0.8880215287208557, + "learning_rate": 4.8432615124630336e-05, + "loss": 0.029, + "step": 3710 + }, + { + "epoch": 1.5682298267849597, + "grad_norm": 1.9256477355957031, + "learning_rate": 4.843177017321504e-05, + "loss": 0.0542, + "step": 3712 + }, + { + "epoch": 1.5690747782002537, + "grad_norm": 1.291383147239685, + "learning_rate": 4.843092522179975e-05, + "loss": 0.0429, + "step": 3714 + }, + { + "epoch": 1.5699197296155472, + "grad_norm": 0.8845472931861877, + "learning_rate": 4.843008027038445e-05, + "loss": 0.0295, + "step": 3716 + }, + { + "epoch": 1.5707646810308407, + "grad_norm": 1.2028437852859497, + "learning_rate": 4.8429235318969165e-05, + "loss": 0.0485, + "step": 3718 + }, + { + "epoch": 1.5716096324461344, + "grad_norm": 1.249176025390625, + "learning_rate": 4.8428390367553865e-05, + "loss": 0.0261, + "step": 3720 + }, + { + "epoch": 1.5724545838614281, + "grad_norm": 1.1442838907241821, + "learning_rate": 4.842754541613857e-05, + "loss": 0.0597, + "step": 3722 + }, + { + "epoch": 1.5732995352767216, + "grad_norm": 1.2954820394515991, + "learning_rate": 4.842670046472328e-05, + "loss": 0.047, + "step": 3724 + }, + { + "epoch": 1.5741444866920151, + "grad_norm": 1.1261757612228394, + "learning_rate": 4.842585551330799e-05, + "loss": 0.0375, + "step": 3726 + }, + { + "epoch": 1.5749894381073088, + "grad_norm": 0.8863781094551086, + "learning_rate": 4.8425010561892694e-05, + "loss": 0.0332, + "step": 3728 + }, + { + "epoch": 1.5758343895226026, + "grad_norm": 0.660490095615387, + "learning_rate": 4.84241656104774e-05, + "loss": 0.0293, + "step": 3730 + }, + { + "epoch": 1.576679340937896, + "grad_norm": 0.3680121600627899, + "learning_rate": 4.84233206590621e-05, + "loss": 0.0454, + "step": 3732 + }, + { + "epoch": 1.5775242923531896, + "grad_norm": 1.0620887279510498, + "learning_rate": 4.8422475707646816e-05, + "loss": 0.0517, + "step": 3734 + }, + { + "epoch": 1.5783692437684833, + "grad_norm": 0.967285692691803, + "learning_rate": 4.8421630756231516e-05, + "loss": 0.0304, + "step": 3736 + }, + { + "epoch": 1.579214195183777, + "grad_norm": 1.02311372756958, + "learning_rate": 4.842078580481622e-05, + "loss": 0.0482, + "step": 3738 + }, + { + "epoch": 1.5800591465990705, + "grad_norm": 1.0686031579971313, + "learning_rate": 4.841994085340093e-05, + "loss": 0.0298, + "step": 3740 + }, + { + "epoch": 1.580904098014364, + "grad_norm": 1.1448440551757812, + "learning_rate": 4.841909590198564e-05, + "loss": 0.0596, + "step": 3742 + }, + { + "epoch": 1.5817490494296578, + "grad_norm": 0.899435818195343, + "learning_rate": 4.8418250950570345e-05, + "loss": 0.0393, + "step": 3744 + }, + { + "epoch": 1.5825940008449515, + "grad_norm": 0.6797332167625427, + "learning_rate": 4.841740599915505e-05, + "loss": 0.0286, + "step": 3746 + }, + { + "epoch": 1.583438952260245, + "grad_norm": 1.0120704174041748, + "learning_rate": 4.841656104773975e-05, + "loss": 0.0597, + "step": 3748 + }, + { + "epoch": 1.5842839036755385, + "grad_norm": 1.1370409727096558, + "learning_rate": 4.8415716096324467e-05, + "loss": 0.0551, + "step": 3750 + }, + { + "epoch": 1.5851288550908322, + "grad_norm": 1.4799180030822754, + "learning_rate": 4.841487114490917e-05, + "loss": 0.03, + "step": 3752 + }, + { + "epoch": 1.585973806506126, + "grad_norm": 1.0635912418365479, + "learning_rate": 4.841402619349388e-05, + "loss": 0.0513, + "step": 3754 + }, + { + "epoch": 1.5868187579214195, + "grad_norm": 0.6243870854377747, + "learning_rate": 4.841318124207858e-05, + "loss": 0.0308, + "step": 3756 + }, + { + "epoch": 1.5876637093367132, + "grad_norm": 1.172440528869629, + "learning_rate": 4.841233629066329e-05, + "loss": 0.0428, + "step": 3758 + }, + { + "epoch": 1.588508660752007, + "grad_norm": 1.209961175918579, + "learning_rate": 4.8411491339247996e-05, + "loss": 0.0458, + "step": 3760 + }, + { + "epoch": 1.5893536121673004, + "grad_norm": 1.1312841176986694, + "learning_rate": 4.84106463878327e-05, + "loss": 0.054, + "step": 3762 + }, + { + "epoch": 1.590198563582594, + "grad_norm": 0.6913443207740784, + "learning_rate": 4.840980143641741e-05, + "loss": 0.0403, + "step": 3764 + }, + { + "epoch": 1.5910435149978877, + "grad_norm": 1.1605889797210693, + "learning_rate": 4.840895648500212e-05, + "loss": 0.0479, + "step": 3766 + }, + { + "epoch": 1.5918884664131814, + "grad_norm": 0.7447473406791687, + "learning_rate": 4.840811153358682e-05, + "loss": 0.0268, + "step": 3768 + }, + { + "epoch": 1.5927334178284749, + "grad_norm": 0.9783474206924438, + "learning_rate": 4.840726658217153e-05, + "loss": 0.0465, + "step": 3770 + }, + { + "epoch": 1.5935783692437684, + "grad_norm": 1.1488279104232788, + "learning_rate": 4.840642163075623e-05, + "loss": 0.0517, + "step": 3772 + }, + { + "epoch": 1.5944233206590621, + "grad_norm": 0.8444589376449585, + "learning_rate": 4.840557667934094e-05, + "loss": 0.0339, + "step": 3774 + }, + { + "epoch": 1.5952682720743558, + "grad_norm": 1.1645581722259521, + "learning_rate": 4.8404731727925647e-05, + "loss": 0.041, + "step": 3776 + }, + { + "epoch": 1.5961132234896493, + "grad_norm": 1.6231311559677124, + "learning_rate": 4.8403886776510354e-05, + "loss": 0.054, + "step": 3778 + }, + { + "epoch": 1.5969581749049429, + "grad_norm": 0.9748203754425049, + "learning_rate": 4.840304182509506e-05, + "loss": 0.0428, + "step": 3780 + }, + { + "epoch": 1.5978031263202366, + "grad_norm": 1.2018306255340576, + "learning_rate": 4.840219687367977e-05, + "loss": 0.0527, + "step": 3782 + }, + { + "epoch": 1.5986480777355303, + "grad_norm": 1.094373345375061, + "learning_rate": 4.840135192226447e-05, + "loss": 0.0628, + "step": 3784 + }, + { + "epoch": 1.5994930291508238, + "grad_norm": 0.814469575881958, + "learning_rate": 4.840050697084918e-05, + "loss": 0.0224, + "step": 3786 + }, + { + "epoch": 1.6003379805661173, + "grad_norm": 0.8022437691688538, + "learning_rate": 4.839966201943388e-05, + "loss": 0.0482, + "step": 3788 + }, + { + "epoch": 1.601182931981411, + "grad_norm": 1.218186855316162, + "learning_rate": 4.839881706801859e-05, + "loss": 0.0675, + "step": 3790 + }, + { + "epoch": 1.6020278833967048, + "grad_norm": 1.626511573791504, + "learning_rate": 4.83979721166033e-05, + "loss": 0.0712, + "step": 3792 + }, + { + "epoch": 1.6028728348119983, + "grad_norm": 1.63881516456604, + "learning_rate": 4.8397127165188005e-05, + "loss": 0.0836, + "step": 3794 + }, + { + "epoch": 1.6037177862272918, + "grad_norm": 1.0243483781814575, + "learning_rate": 4.839628221377271e-05, + "loss": 0.0255, + "step": 3796 + }, + { + "epoch": 1.6045627376425855, + "grad_norm": 1.4431846141815186, + "learning_rate": 4.839543726235742e-05, + "loss": 0.0573, + "step": 3798 + }, + { + "epoch": 1.6054076890578792, + "grad_norm": 1.132359266281128, + "learning_rate": 4.839459231094212e-05, + "loss": 0.0403, + "step": 3800 + }, + { + "epoch": 1.6054076890578792, + "eval_accuracy": 0.7347818589411967, + "eval_cer": 0.075275457183941, + "eval_loss": 0.17756511270999908, + "eval_runtime": 841.6089, + "eval_samples_per_second": 13.781, + "eval_steps_per_second": 0.431, + "step": 3800 + }, + { + "epoch": 1.6062526404731727, + "grad_norm": 1.0357937812805176, + "learning_rate": 4.839374735952683e-05, + "loss": 0.0542, + "step": 3802 + }, + { + "epoch": 1.6070975918884665, + "grad_norm": 0.7196958661079407, + "learning_rate": 4.8392902408111534e-05, + "loss": 0.0331, + "step": 3804 + }, + { + "epoch": 1.6079425433037602, + "grad_norm": 0.6513099670410156, + "learning_rate": 4.839205745669624e-05, + "loss": 0.0424, + "step": 3806 + }, + { + "epoch": 1.6087874947190537, + "grad_norm": 1.5637171268463135, + "learning_rate": 4.839121250528095e-05, + "loss": 0.0593, + "step": 3808 + }, + { + "epoch": 1.6096324461343472, + "grad_norm": 0.7417770028114319, + "learning_rate": 4.8390367553865655e-05, + "loss": 0.0378, + "step": 3810 + }, + { + "epoch": 1.610477397549641, + "grad_norm": 0.97124183177948, + "learning_rate": 4.838952260245036e-05, + "loss": 0.0405, + "step": 3812 + }, + { + "epoch": 1.6113223489649346, + "grad_norm": 1.1514919996261597, + "learning_rate": 4.838867765103507e-05, + "loss": 0.0416, + "step": 3814 + }, + { + "epoch": 1.6121673003802282, + "grad_norm": 1.0334582328796387, + "learning_rate": 4.838783269961977e-05, + "loss": 0.0359, + "step": 3816 + }, + { + "epoch": 1.6130122517955217, + "grad_norm": 0.6415423154830933, + "learning_rate": 4.8386987748204484e-05, + "loss": 0.0269, + "step": 3818 + }, + { + "epoch": 1.6138572032108154, + "grad_norm": 0.9257076382637024, + "learning_rate": 4.8386142796789185e-05, + "loss": 0.0433, + "step": 3820 + }, + { + "epoch": 1.614702154626109, + "grad_norm": 1.0483789443969727, + "learning_rate": 4.838529784537389e-05, + "loss": 0.0402, + "step": 3822 + }, + { + "epoch": 1.6155471060414026, + "grad_norm": 1.224219799041748, + "learning_rate": 4.83844528939586e-05, + "loss": 0.0515, + "step": 3824 + }, + { + "epoch": 1.6163920574566961, + "grad_norm": 2.190443992614746, + "learning_rate": 4.8383607942543306e-05, + "loss": 0.0708, + "step": 3826 + }, + { + "epoch": 1.6172370088719898, + "grad_norm": 0.5145219564437866, + "learning_rate": 4.838276299112801e-05, + "loss": 0.0281, + "step": 3828 + }, + { + "epoch": 1.6180819602872836, + "grad_norm": 0.6071678400039673, + "learning_rate": 4.838191803971272e-05, + "loss": 0.0281, + "step": 3830 + }, + { + "epoch": 1.618926911702577, + "grad_norm": 1.6799170970916748, + "learning_rate": 4.838107308829742e-05, + "loss": 0.0694, + "step": 3832 + }, + { + "epoch": 1.6197718631178706, + "grad_norm": 1.0800015926361084, + "learning_rate": 4.8380228136882135e-05, + "loss": 0.0589, + "step": 3834 + }, + { + "epoch": 1.6206168145331643, + "grad_norm": 0.9297267198562622, + "learning_rate": 4.8379383185466835e-05, + "loss": 0.0276, + "step": 3836 + }, + { + "epoch": 1.621461765948458, + "grad_norm": 0.6874012351036072, + "learning_rate": 4.837853823405154e-05, + "loss": 0.0321, + "step": 3838 + }, + { + "epoch": 1.6223067173637515, + "grad_norm": 0.8664073944091797, + "learning_rate": 4.837769328263625e-05, + "loss": 0.0322, + "step": 3840 + }, + { + "epoch": 1.623151668779045, + "grad_norm": 1.3340883255004883, + "learning_rate": 4.837684833122096e-05, + "loss": 0.0442, + "step": 3842 + }, + { + "epoch": 1.623996620194339, + "grad_norm": 1.2647576332092285, + "learning_rate": 4.8376003379805664e-05, + "loss": 0.0795, + "step": 3844 + }, + { + "epoch": 1.6248415716096325, + "grad_norm": 1.0526044368743896, + "learning_rate": 4.837515842839037e-05, + "loss": 0.0379, + "step": 3846 + }, + { + "epoch": 1.625686523024926, + "grad_norm": 0.8544779419898987, + "learning_rate": 4.837431347697507e-05, + "loss": 0.0446, + "step": 3848 + }, + { + "epoch": 1.6265314744402197, + "grad_norm": 1.369625449180603, + "learning_rate": 4.8373468525559786e-05, + "loss": 0.0624, + "step": 3850 + }, + { + "epoch": 1.6273764258555135, + "grad_norm": 0.6502795219421387, + "learning_rate": 4.8372623574144486e-05, + "loss": 0.0625, + "step": 3852 + }, + { + "epoch": 1.628221377270807, + "grad_norm": 1.5579420328140259, + "learning_rate": 4.83717786227292e-05, + "loss": 0.0634, + "step": 3854 + }, + { + "epoch": 1.6290663286861005, + "grad_norm": 1.384822130203247, + "learning_rate": 4.83709336713139e-05, + "loss": 0.0428, + "step": 3856 + }, + { + "epoch": 1.6299112801013942, + "grad_norm": 0.95893394947052, + "learning_rate": 4.837008871989861e-05, + "loss": 0.0487, + "step": 3858 + }, + { + "epoch": 1.630756231516688, + "grad_norm": 1.7926594018936157, + "learning_rate": 4.8369243768483315e-05, + "loss": 0.056, + "step": 3860 + }, + { + "epoch": 1.6316011829319814, + "grad_norm": 2.259760856628418, + "learning_rate": 4.836839881706802e-05, + "loss": 0.0356, + "step": 3862 + }, + { + "epoch": 1.632446134347275, + "grad_norm": 0.7830055356025696, + "learning_rate": 4.836755386565273e-05, + "loss": 0.0371, + "step": 3864 + }, + { + "epoch": 1.6332910857625687, + "grad_norm": 0.8463422060012817, + "learning_rate": 4.8366708914237437e-05, + "loss": 0.0532, + "step": 3866 + }, + { + "epoch": 1.6341360371778624, + "grad_norm": 1.0259829759597778, + "learning_rate": 4.836586396282214e-05, + "loss": 0.0461, + "step": 3868 + }, + { + "epoch": 1.6349809885931559, + "grad_norm": 0.7621318697929382, + "learning_rate": 4.836501901140685e-05, + "loss": 0.0515, + "step": 3870 + }, + { + "epoch": 1.6358259400084494, + "grad_norm": 1.094347596168518, + "learning_rate": 4.836417405999155e-05, + "loss": 0.0504, + "step": 3872 + }, + { + "epoch": 1.6366708914237431, + "grad_norm": 1.4083515405654907, + "learning_rate": 4.836332910857626e-05, + "loss": 0.0464, + "step": 3874 + }, + { + "epoch": 1.6375158428390368, + "grad_norm": 1.221833348274231, + "learning_rate": 4.8362484157160966e-05, + "loss": 0.0393, + "step": 3876 + }, + { + "epoch": 1.6383607942543303, + "grad_norm": 1.5514699220657349, + "learning_rate": 4.836163920574567e-05, + "loss": 0.0591, + "step": 3878 + }, + { + "epoch": 1.6392057456696238, + "grad_norm": 1.4877634048461914, + "learning_rate": 4.836079425433038e-05, + "loss": 0.0546, + "step": 3880 + }, + { + "epoch": 1.6400506970849176, + "grad_norm": 1.098713994026184, + "learning_rate": 4.835994930291509e-05, + "loss": 0.0385, + "step": 3882 + }, + { + "epoch": 1.6408956485002113, + "grad_norm": 1.0514299869537354, + "learning_rate": 4.835910435149979e-05, + "loss": 0.0556, + "step": 3884 + }, + { + "epoch": 1.6417405999155048, + "grad_norm": 0.7681018710136414, + "learning_rate": 4.83582594000845e-05, + "loss": 0.0355, + "step": 3886 + }, + { + "epoch": 1.6425855513307985, + "grad_norm": 1.322800636291504, + "learning_rate": 4.83574144486692e-05, + "loss": 0.0453, + "step": 3888 + }, + { + "epoch": 1.6434305027460923, + "grad_norm": 1.2197022438049316, + "learning_rate": 4.835656949725391e-05, + "loss": 0.0542, + "step": 3890 + }, + { + "epoch": 1.6442754541613858, + "grad_norm": 1.9387176036834717, + "learning_rate": 4.8355724545838617e-05, + "loss": 0.06, + "step": 3892 + }, + { + "epoch": 1.6451204055766793, + "grad_norm": 0.9585774540901184, + "learning_rate": 4.8354879594423324e-05, + "loss": 0.0847, + "step": 3894 + }, + { + "epoch": 1.645965356991973, + "grad_norm": 0.7485412359237671, + "learning_rate": 4.835403464300803e-05, + "loss": 0.0278, + "step": 3896 + }, + { + "epoch": 1.6468103084072667, + "grad_norm": 0.7363729476928711, + "learning_rate": 4.835318969159274e-05, + "loss": 0.0548, + "step": 3898 + }, + { + "epoch": 1.6476552598225602, + "grad_norm": 2.1429085731506348, + "learning_rate": 4.835234474017744e-05, + "loss": 0.044, + "step": 3900 + }, + { + "epoch": 1.6476552598225602, + "eval_accuracy": 0.7290050008622176, + "eval_cer": 0.07787785491853172, + "eval_loss": 0.17509505152702332, + "eval_runtime": 862.0291, + "eval_samples_per_second": 13.454, + "eval_steps_per_second": 0.421, + "step": 3900 + }, + { + "epoch": 1.6485002112378537, + "grad_norm": 1.3191771507263184, + "learning_rate": 4.835149978876215e-05, + "loss": 0.05, + "step": 3902 + }, + { + "epoch": 1.6493451626531475, + "grad_norm": 1.1920831203460693, + "learning_rate": 4.835065483734685e-05, + "loss": 0.045, + "step": 3904 + }, + { + "epoch": 1.6501901140684412, + "grad_norm": 0.7942538261413574, + "learning_rate": 4.834980988593156e-05, + "loss": 0.0335, + "step": 3906 + }, + { + "epoch": 1.6510350654837347, + "grad_norm": 1.081873893737793, + "learning_rate": 4.834896493451627e-05, + "loss": 0.0959, + "step": 3908 + }, + { + "epoch": 1.6518800168990282, + "grad_norm": 0.9521547555923462, + "learning_rate": 4.8348119983100975e-05, + "loss": 0.0277, + "step": 3910 + }, + { + "epoch": 1.652724968314322, + "grad_norm": 1.2026457786560059, + "learning_rate": 4.834727503168568e-05, + "loss": 0.0523, + "step": 3912 + }, + { + "epoch": 1.6535699197296156, + "grad_norm": 0.7098683714866638, + "learning_rate": 4.834643008027039e-05, + "loss": 0.0256, + "step": 3914 + }, + { + "epoch": 1.6544148711449091, + "grad_norm": 1.0528465509414673, + "learning_rate": 4.834558512885509e-05, + "loss": 0.064, + "step": 3916 + }, + { + "epoch": 1.6552598225602027, + "grad_norm": 1.4617663621902466, + "learning_rate": 4.8344740177439803e-05, + "loss": 0.0489, + "step": 3918 + }, + { + "epoch": 1.6561047739754964, + "grad_norm": 1.6092969179153442, + "learning_rate": 4.8343895226024504e-05, + "loss": 0.0505, + "step": 3920 + }, + { + "epoch": 1.65694972539079, + "grad_norm": 1.4073854684829712, + "learning_rate": 4.834305027460921e-05, + "loss": 0.0411, + "step": 3922 + }, + { + "epoch": 1.6577946768060836, + "grad_norm": 1.2704623937606812, + "learning_rate": 4.834220532319392e-05, + "loss": 0.0713, + "step": 3924 + }, + { + "epoch": 1.6586396282213771, + "grad_norm": 1.0156190395355225, + "learning_rate": 4.8341360371778625e-05, + "loss": 0.0335, + "step": 3926 + }, + { + "epoch": 1.6594845796366708, + "grad_norm": 1.200533151626587, + "learning_rate": 4.834051542036333e-05, + "loss": 0.0465, + "step": 3928 + }, + { + "epoch": 1.6603295310519646, + "grad_norm": 0.7413454651832581, + "learning_rate": 4.833967046894804e-05, + "loss": 0.044, + "step": 3930 + }, + { + "epoch": 1.661174482467258, + "grad_norm": 1.4523422718048096, + "learning_rate": 4.833882551753274e-05, + "loss": 0.0612, + "step": 3932 + }, + { + "epoch": 1.6620194338825518, + "grad_norm": 1.259691596031189, + "learning_rate": 4.8337980566117454e-05, + "loss": 0.061, + "step": 3934 + }, + { + "epoch": 1.6628643852978455, + "grad_norm": 1.7233320474624634, + "learning_rate": 4.8337135614702155e-05, + "loss": 0.0653, + "step": 3936 + }, + { + "epoch": 1.663709336713139, + "grad_norm": 1.2285038232803345, + "learning_rate": 4.833629066328686e-05, + "loss": 0.0703, + "step": 3938 + }, + { + "epoch": 1.6645542881284325, + "grad_norm": 1.288918375968933, + "learning_rate": 4.833544571187157e-05, + "loss": 0.0618, + "step": 3940 + }, + { + "epoch": 1.6653992395437263, + "grad_norm": 0.7824044823646545, + "learning_rate": 4.8334600760456276e-05, + "loss": 0.048, + "step": 3942 + }, + { + "epoch": 1.66624419095902, + "grad_norm": 1.211120843887329, + "learning_rate": 4.8333755809040983e-05, + "loss": 0.0669, + "step": 3944 + }, + { + "epoch": 1.6670891423743135, + "grad_norm": 2.0919220447540283, + "learning_rate": 4.833291085762569e-05, + "loss": 0.0803, + "step": 3946 + }, + { + "epoch": 1.667934093789607, + "grad_norm": 0.8903474807739258, + "learning_rate": 4.833206590621039e-05, + "loss": 0.063, + "step": 3948 + }, + { + "epoch": 1.6687790452049007, + "grad_norm": 1.258203148841858, + "learning_rate": 4.8331220954795105e-05, + "loss": 0.0331, + "step": 3950 + }, + { + "epoch": 1.6696239966201945, + "grad_norm": 1.2711974382400513, + "learning_rate": 4.8330376003379805e-05, + "loss": 0.0414, + "step": 3952 + }, + { + "epoch": 1.670468948035488, + "grad_norm": 0.8380125164985657, + "learning_rate": 4.832953105196452e-05, + "loss": 0.0445, + "step": 3954 + }, + { + "epoch": 1.6713138994507815, + "grad_norm": 1.392994999885559, + "learning_rate": 4.832868610054922e-05, + "loss": 0.0605, + "step": 3956 + }, + { + "epoch": 1.6721588508660752, + "grad_norm": 1.1180285215377808, + "learning_rate": 4.832784114913393e-05, + "loss": 0.0493, + "step": 3958 + }, + { + "epoch": 1.673003802281369, + "grad_norm": 1.3766900300979614, + "learning_rate": 4.8326996197718634e-05, + "loss": 0.057, + "step": 3960 + }, + { + "epoch": 1.6738487536966624, + "grad_norm": 1.6987857818603516, + "learning_rate": 4.832615124630334e-05, + "loss": 0.0476, + "step": 3962 + }, + { + "epoch": 1.674693705111956, + "grad_norm": 1.4040523767471313, + "learning_rate": 4.832530629488805e-05, + "loss": 0.0397, + "step": 3964 + }, + { + "epoch": 1.6755386565272496, + "grad_norm": 0.45623424649238586, + "learning_rate": 4.8324461343472756e-05, + "loss": 0.0455, + "step": 3966 + }, + { + "epoch": 1.6763836079425434, + "grad_norm": 0.9928030967712402, + "learning_rate": 4.8323616392057456e-05, + "loss": 0.0448, + "step": 3968 + }, + { + "epoch": 1.6772285593578369, + "grad_norm": 0.9238046407699585, + "learning_rate": 4.832277144064217e-05, + "loss": 0.0295, + "step": 3970 + }, + { + "epoch": 1.6780735107731304, + "grad_norm": 0.2931238114833832, + "learning_rate": 4.832192648922687e-05, + "loss": 0.0247, + "step": 3972 + }, + { + "epoch": 1.6789184621884243, + "grad_norm": 0.8757814168930054, + "learning_rate": 4.832108153781158e-05, + "loss": 0.0358, + "step": 3974 + }, + { + "epoch": 1.6797634136037178, + "grad_norm": 0.8881898522377014, + "learning_rate": 4.8320236586396285e-05, + "loss": 0.0331, + "step": 3976 + }, + { + "epoch": 1.6806083650190113, + "grad_norm": 0.7834998965263367, + "learning_rate": 4.831939163498099e-05, + "loss": 0.0407, + "step": 3978 + }, + { + "epoch": 1.681453316434305, + "grad_norm": 1.1790554523468018, + "learning_rate": 4.83185466835657e-05, + "loss": 0.0513, + "step": 3980 + }, + { + "epoch": 1.6822982678495988, + "grad_norm": 0.5270421504974365, + "learning_rate": 4.8317701732150407e-05, + "loss": 0.0395, + "step": 3982 + }, + { + "epoch": 1.6831432192648923, + "grad_norm": 1.120709776878357, + "learning_rate": 4.831685678073511e-05, + "loss": 0.0394, + "step": 3984 + }, + { + "epoch": 1.6839881706801858, + "grad_norm": 0.8393281102180481, + "learning_rate": 4.831601182931982e-05, + "loss": 0.0342, + "step": 3986 + }, + { + "epoch": 1.6848331220954795, + "grad_norm": 0.6471826434135437, + "learning_rate": 4.831516687790452e-05, + "loss": 0.0257, + "step": 3988 + }, + { + "epoch": 1.6856780735107733, + "grad_norm": 1.7182902097702026, + "learning_rate": 4.831432192648923e-05, + "loss": 0.0506, + "step": 3990 + }, + { + "epoch": 1.6865230249260668, + "grad_norm": 0.7699361443519592, + "learning_rate": 4.8313476975073936e-05, + "loss": 0.0408, + "step": 3992 + }, + { + "epoch": 1.6873679763413603, + "grad_norm": 0.8868207335472107, + "learning_rate": 4.831263202365864e-05, + "loss": 0.0359, + "step": 3994 + }, + { + "epoch": 1.688212927756654, + "grad_norm": 1.3517881631851196, + "learning_rate": 4.831178707224335e-05, + "loss": 0.0439, + "step": 3996 + }, + { + "epoch": 1.6890578791719477, + "grad_norm": 1.8134877681732178, + "learning_rate": 4.831094212082806e-05, + "loss": 0.0508, + "step": 3998 + }, + { + "epoch": 1.6899028305872412, + "grad_norm": 0.8728620409965515, + "learning_rate": 4.831009716941276e-05, + "loss": 0.0638, + "step": 4000 + }, + { + "epoch": 1.6899028305872412, + "eval_accuracy": 0.7381445076737368, + "eval_cer": 0.07271716788552979, + "eval_loss": 0.17253856360912323, + "eval_runtime": 854.712, + "eval_samples_per_second": 13.569, + "eval_steps_per_second": 0.425, + "step": 4000 + } + ], + "logging_steps": 2, + "max_steps": 118350, + "num_input_tokens_seen": 0, + "num_train_epochs": 50, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.6614257405400023e+19, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +}