{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 100, "global_step": 10605, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14144271570014144, "eval_loss": 3.7426819801330566, "eval_runtime": 154.1912, "eval_samples_per_second": 36.682, "eval_steps_per_second": 4.585, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.2828854314002829, "eval_loss": 2.9179046154022217, "eval_runtime": 149.8513, "eval_samples_per_second": 37.744, "eval_steps_per_second": 4.718, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.4243281471004243, "eval_loss": 2.8035507202148438, "eval_runtime": 151.9434, "eval_samples_per_second": 37.224, "eval_steps_per_second": 4.653, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.5657708628005658, "eval_loss": 1.2195814847946167, "eval_runtime": 152.8075, "eval_samples_per_second": 37.014, "eval_steps_per_second": 4.627, "eval_wer": 0.8933627681528051, "step": 400 }, { "epoch": 0.7072135785007072, "grad_norm": 2.533352851867676, "learning_rate": 0.00029699999999999996, "loss": 3.574, "step": 500 }, { "epoch": 0.7072135785007072, "eval_loss": 0.9860211610794067, "eval_runtime": 151.7482, "eval_samples_per_second": 37.272, "eval_steps_per_second": 4.659, "eval_wer": 0.7276159395946313, "step": 500 }, { "epoch": 0.8486562942008486, "eval_loss": 0.8391533493995667, "eval_runtime": 153.0126, "eval_samples_per_second": 36.964, "eval_steps_per_second": 4.621, "eval_wer": 0.6504295244037049, "step": 600 }, { "epoch": 0.9900990099009901, "eval_loss": 0.7803803086280823, "eval_runtime": 152.7414, "eval_samples_per_second": 37.03, "eval_steps_per_second": 4.629, "eval_wer": 0.6029178864519844, "step": 700 }, { "epoch": 1.1315417256011315, "eval_loss": 0.6122242212295532, "eval_runtime": 153.2031, "eval_samples_per_second": 36.918, "eval_steps_per_second": 4.615, "eval_wer": 0.4909056006142919, "step": 800 }, { "epoch": 1.272984441301273, "eval_loss": 0.5901117920875549, "eval_runtime": 152.5961, "eval_samples_per_second": 37.065, "eval_steps_per_second": 4.633, "eval_wer": 0.48828206235702515, "step": 900 }, { "epoch": 1.4144271570014144, "grad_norm": 0.9860969185829163, "learning_rate": 0.0002853043047996041, "loss": 0.811, "step": 1000 }, { "epoch": 1.4144271570014144, "eval_loss": 0.5500049591064453, "eval_runtime": 153.3918, "eval_samples_per_second": 36.873, "eval_steps_per_second": 4.609, "eval_wer": 0.45078466189950567, "step": 1000 }, { "epoch": 1.5558698727015559, "eval_loss": 0.5231846570968628, "eval_runtime": 151.7862, "eval_samples_per_second": 37.263, "eval_steps_per_second": 4.658, "eval_wer": 0.41418310377373585, "step": 1100 }, { "epoch": 1.6973125884016973, "eval_loss": 0.518621027469635, "eval_runtime": 152.9089, "eval_samples_per_second": 36.989, "eval_steps_per_second": 4.624, "eval_wer": 0.4064724608468909, "step": 1200 }, { "epoch": 1.8387553041018387, "eval_loss": 0.49534252285957336, "eval_runtime": 153.7928, "eval_samples_per_second": 36.777, "eval_steps_per_second": 4.597, "eval_wer": 0.3929388427636736, "step": 1300 }, { "epoch": 1.9801980198019802, "eval_loss": 0.48800522089004517, "eval_runtime": 152.9541, "eval_samples_per_second": 36.978, "eval_steps_per_second": 4.622, "eval_wer": 0.39279486810321385, "step": 1400 }, { "epoch": 2.1216407355021216, "grad_norm": 0.8855465650558472, "learning_rate": 0.00027054923305294405, "loss": 0.6459, "step": 1500 }, { "epoch": 2.1216407355021216, "eval_loss": 0.46446114778518677, "eval_runtime": 152.609, "eval_samples_per_second": 37.062, "eval_steps_per_second": 4.633, "eval_wer": 0.3691990209723089, "step": 1500 }, { "epoch": 2.263083451202263, "eval_loss": 0.4666256904602051, "eval_runtime": 153.127, "eval_samples_per_second": 36.937, "eval_steps_per_second": 4.617, "eval_wer": 0.3585928876517733, "step": 1600 }, { "epoch": 2.4045261669024045, "eval_loss": 0.45017901062965393, "eval_runtime": 154.5149, "eval_samples_per_second": 36.605, "eval_steps_per_second": 4.576, "eval_wer": 0.35934475532306315, "step": 1700 }, { "epoch": 2.545968882602546, "eval_loss": 0.45280084013938904, "eval_runtime": 152.9066, "eval_samples_per_second": 36.99, "eval_steps_per_second": 4.624, "eval_wer": 0.3637759754283246, "step": 1800 }, { "epoch": 2.6874115983026874, "eval_loss": 0.46647289395332336, "eval_runtime": 153.103, "eval_samples_per_second": 36.942, "eval_steps_per_second": 4.618, "eval_wer": 0.392618899073763, "step": 1900 }, { "epoch": 2.828854314002829, "grad_norm": 0.6227492690086365, "learning_rate": 0.00025579416130628403, "loss": 0.5306, "step": 2000 }, { "epoch": 2.828854314002829, "eval_loss": 0.4328605532646179, "eval_runtime": 153.5492, "eval_samples_per_second": 36.835, "eval_steps_per_second": 4.604, "eval_wer": 0.3505143094815312, "step": 2000 }, { "epoch": 2.9702970297029703, "eval_loss": 0.4245360791683197, "eval_runtime": 153.9467, "eval_samples_per_second": 36.74, "eval_steps_per_second": 4.592, "eval_wer": 0.3373806210107021, "step": 2100 }, { "epoch": 3.1117397454031117, "eval_loss": 0.4376748204231262, "eval_runtime": 152.9335, "eval_samples_per_second": 36.983, "eval_steps_per_second": 4.623, "eval_wer": 0.3340372094511366, "step": 2200 }, { "epoch": 3.253182461103253, "eval_loss": 0.4271674156188965, "eval_runtime": 153.1738, "eval_samples_per_second": 36.925, "eval_steps_per_second": 4.616, "eval_wer": 0.33373326294572153, "step": 2300 }, { "epoch": 3.3946251768033946, "eval_loss": 0.43350949883461, "eval_runtime": 153.2683, "eval_samples_per_second": 36.903, "eval_steps_per_second": 4.613, "eval_wer": 0.332597462846539, "step": 2400 }, { "epoch": 3.536067892503536, "grad_norm": 0.6211841106414795, "learning_rate": 0.0002410094012864918, "loss": 0.4628, "step": 2500 }, { "epoch": 3.536067892503536, "eval_loss": 0.42679545283317566, "eval_runtime": 154.4777, "eval_samples_per_second": 36.614, "eval_steps_per_second": 4.577, "eval_wer": 0.3274783638079698, "step": 2500 }, { "epoch": 3.6775106082036775, "eval_loss": 0.4502430558204651, "eval_runtime": 154.6864, "eval_samples_per_second": 36.564, "eval_steps_per_second": 4.571, "eval_wer": 0.34091599878421397, "step": 2600 }, { "epoch": 3.818953323903819, "eval_loss": 0.6344878673553467, "eval_runtime": 153.1131, "eval_samples_per_second": 36.94, "eval_steps_per_second": 4.618, "eval_wer": 0.43904272847978754, "step": 2700 }, { "epoch": 3.9603960396039604, "eval_loss": 1.0202795267105103, "eval_runtime": 153.5324, "eval_samples_per_second": 36.839, "eval_steps_per_second": 4.605, "eval_wer": 0.6403193038025308, "step": 2800 }, { "epoch": 4.101838755304102, "eval_loss": 1.2207801342010498, "eval_runtime": 154.3697, "eval_samples_per_second": 36.639, "eval_steps_per_second": 4.58, "eval_wer": 0.7921805762185855, "step": 2900 }, { "epoch": 4.243281471004243, "grad_norm": 2.4671809673309326, "learning_rate": 0.00022619495299356753, "loss": 0.8685, "step": 3000 }, { "epoch": 4.243281471004243, "eval_loss": 1.101838231086731, "eval_runtime": 155.2474, "eval_samples_per_second": 36.432, "eval_steps_per_second": 4.554, "eval_wer": 0.7387019884500328, "step": 3000 }, { "epoch": 4.384724186704385, "eval_loss": 1.24972665309906, "eval_runtime": 154.4564, "eval_samples_per_second": 36.619, "eval_steps_per_second": 4.577, "eval_wer": 0.8061941098366687, "step": 3100 }, { "epoch": 4.526166902404526, "eval_loss": 1.6164859533309937, "eval_runtime": 153.9159, "eval_samples_per_second": 36.747, "eval_steps_per_second": 4.593, "eval_wer": 0.9616227543952265, "step": 3200 }, { "epoch": 4.667609618104668, "eval_loss": 1.4655081033706665, "eval_runtime": 153.645, "eval_samples_per_second": 36.812, "eval_steps_per_second": 4.602, "eval_wer": 0.9216617875253955, "step": 3300 }, { "epoch": 4.809052333804809, "eval_loss": 1.0288450717926025, "eval_runtime": 152.6496, "eval_samples_per_second": 37.052, "eval_steps_per_second": 4.632, "eval_wer": 0.7464606229303643, "step": 3400 }, { "epoch": 4.9504950495049505, "grad_norm": 2.009023666381836, "learning_rate": 0.00021143988124690746, "loss": 1.3918, "step": 3500 }, { "epoch": 4.9504950495049505, "eval_loss": 0.9067263603210449, "eval_runtime": 156.1428, "eval_samples_per_second": 36.223, "eval_steps_per_second": 4.528, "eval_wer": 0.5948393082817424, "step": 3500 }, { "epoch": 5.091937765205092, "eval_loss": 0.9486163258552551, "eval_runtime": 154.3538, "eval_samples_per_second": 36.643, "eval_steps_per_second": 4.58, "eval_wer": 0.6352801906864392, "step": 3600 }, { "epoch": 5.233380480905233, "eval_loss": 0.8674383163452148, "eval_runtime": 155.8543, "eval_samples_per_second": 36.29, "eval_steps_per_second": 4.536, "eval_wer": 0.5428324614867783, "step": 3700 }, { "epoch": 5.374823196605375, "eval_loss": 0.9402504563331604, "eval_runtime": 154.1915, "eval_samples_per_second": 36.682, "eval_steps_per_second": 4.585, "eval_wer": 0.5792900449520885, "step": 3800 }, { "epoch": 5.516265912305516, "eval_loss": 0.948098361492157, "eval_runtime": 159.4305, "eval_samples_per_second": 35.476, "eval_steps_per_second": 4.435, "eval_wer": 0.5763625601894067, "step": 3900 }, { "epoch": 5.657708628005658, "grad_norm": 1.2370288372039795, "learning_rate": 0.0001966848095002474, "loss": 1.0402, "step": 4000 }, { "epoch": 5.657708628005658, "eval_loss": 1.0175639390945435, "eval_runtime": 154.2179, "eval_samples_per_second": 36.675, "eval_steps_per_second": 4.584, "eval_wer": 0.8256946777367183, "step": 4000 }, { "epoch": 5.799151343705799, "eval_loss": 0.9857003092765808, "eval_runtime": 154.4099, "eval_samples_per_second": 36.63, "eval_steps_per_second": 4.579, "eval_wer": 0.6342883652477164, "step": 4100 }, { "epoch": 5.9405940594059405, "eval_loss": 1.3289028406143188, "eval_runtime": 155.7394, "eval_samples_per_second": 36.317, "eval_steps_per_second": 4.54, "eval_wer": 0.9014093519540561, "step": 4200 }, { "epoch": 6.082036775106082, "eval_loss": 2.0890820026397705, "eval_runtime": 154.2749, "eval_samples_per_second": 36.662, "eval_steps_per_second": 4.583, "eval_wer": 0.7125305946153477, "step": 4300 }, { "epoch": 6.223479490806223, "eval_loss": 1.256324291229248, "eval_runtime": 154.5375, "eval_samples_per_second": 36.6, "eval_steps_per_second": 4.575, "eval_wer": 0.7696085488953944, "step": 4400 }, { "epoch": 6.364922206506365, "grad_norm": 0.9906980395317078, "learning_rate": 0.0001819000494804552, "loss": 1.2886, "step": 4500 }, { "epoch": 6.364922206506365, "eval_loss": 1.1441457271575928, "eval_runtime": 155.0449, "eval_samples_per_second": 36.48, "eval_steps_per_second": 4.56, "eval_wer": 0.692726080209883, "step": 4500 }, { "epoch": 6.506364922206506, "eval_loss": 1.0626095533370972, "eval_runtime": 155.5445, "eval_samples_per_second": 36.363, "eval_steps_per_second": 4.545, "eval_wer": 0.6573083137367823, "step": 4600 }, { "epoch": 6.647807637906648, "eval_loss": 0.9997339248657227, "eval_runtime": 155.8362, "eval_samples_per_second": 36.295, "eval_steps_per_second": 4.537, "eval_wer": 0.6422869574954808, "step": 4700 }, { "epoch": 6.789250353606789, "eval_loss": 0.9813728928565979, "eval_runtime": 155.2999, "eval_samples_per_second": 36.42, "eval_steps_per_second": 4.552, "eval_wer": 0.6380317064196701, "step": 4800 }, { "epoch": 6.930693069306931, "eval_loss": 1.0955251455307007, "eval_runtime": 153.746, "eval_samples_per_second": 36.788, "eval_steps_per_second": 4.598, "eval_wer": 0.7651133400521508, "step": 4900 }, { "epoch": 7.072135785007072, "grad_norm": 1.6002442836761475, "learning_rate": 0.00016708560118753091, "loss": 1.0984, "step": 5000 }, { "epoch": 7.072135785007072, "eval_loss": 0.9212619066238403, "eval_runtime": 155.7654, "eval_samples_per_second": 36.311, "eval_steps_per_second": 4.539, "eval_wer": 0.5882964598230711, "step": 5000 }, { "epoch": 7.2135785007072135, "eval_loss": 0.8884870409965515, "eval_runtime": 153.6804, "eval_samples_per_second": 36.804, "eval_steps_per_second": 4.6, "eval_wer": 0.5932715842011806, "step": 5100 }, { "epoch": 7.355021216407355, "eval_loss": 0.900116503238678, "eval_runtime": 155.054, "eval_samples_per_second": 36.478, "eval_steps_per_second": 4.56, "eval_wer": 0.5898641839036329, "step": 5200 }, { "epoch": 7.496463932107496, "eval_loss": 0.8783684372901917, "eval_runtime": 156.2294, "eval_samples_per_second": 36.203, "eval_steps_per_second": 4.525, "eval_wer": 0.5858968821487418, "step": 5300 }, { "epoch": 7.637906647807638, "eval_loss": 0.9072028398513794, "eval_runtime": 154.0671, "eval_samples_per_second": 36.711, "eval_steps_per_second": 4.589, "eval_wer": 0.5897522036121643, "step": 5400 }, { "epoch": 7.779349363507779, "grad_norm": 1.3490877151489258, "learning_rate": 0.00015230084116773872, "loss": 0.9659, "step": 5500 }, { "epoch": 7.779349363507779, "eval_loss": 0.8811922669410706, "eval_runtime": 153.8934, "eval_samples_per_second": 36.753, "eval_steps_per_second": 4.594, "eval_wer": 0.5841051974852426, "step": 5500 }, { "epoch": 7.920792079207921, "eval_loss": 0.891165018081665, "eval_runtime": 153.7325, "eval_samples_per_second": 36.791, "eval_steps_per_second": 4.599, "eval_wer": 0.5855129497208491, "step": 5600 }, { "epoch": 8.062234794908063, "eval_loss": 0.8815582990646362, "eval_runtime": 154.6799, "eval_samples_per_second": 36.566, "eval_steps_per_second": 4.571, "eval_wer": 0.5807137943721905, "step": 5700 }, { "epoch": 8.203677510608204, "eval_loss": 0.891440749168396, "eval_runtime": 155.4972, "eval_samples_per_second": 36.374, "eval_steps_per_second": 4.547, "eval_wer": 0.5803138647598023, "step": 5800 }, { "epoch": 8.345120226308346, "eval_loss": 0.8956438899040222, "eval_runtime": 154.6865, "eval_samples_per_second": 36.564, "eval_steps_per_second": 4.571, "eval_wer": 0.5810337380621011, "step": 5900 }, { "epoch": 8.486562942008486, "grad_norm": 1.257432460784912, "learning_rate": 0.00013754576942107867, "loss": 0.9679, "step": 6000 }, { "epoch": 8.486562942008486, "eval_loss": 0.9162164330482483, "eval_runtime": 155.1949, "eval_samples_per_second": 36.444, "eval_steps_per_second": 4.556, "eval_wer": 0.5780262673769416, "step": 6000 }, { "epoch": 8.628005657708629, "eval_loss": 0.9409377574920654, "eval_runtime": 154.6732, "eval_samples_per_second": 36.567, "eval_steps_per_second": 4.571, "eval_wer": 0.5810177408776055, "step": 6100 }, { "epoch": 8.76944837340877, "eval_loss": 0.9370973706245422, "eval_runtime": 155.5326, "eval_samples_per_second": 36.365, "eval_steps_per_second": 4.546, "eval_wer": 0.5780742589304283, "step": 6200 }, { "epoch": 8.910891089108912, "eval_loss": 0.941677987575531, "eval_runtime": 155.4822, "eval_samples_per_second": 36.377, "eval_steps_per_second": 4.547, "eval_wer": 0.5790020956311689, "step": 6300 }, { "epoch": 9.052333804809052, "eval_loss": 0.9663541913032532, "eval_runtime": 155.844, "eval_samples_per_second": 36.293, "eval_steps_per_second": 4.537, "eval_wer": 0.5783782054358433, "step": 6400 }, { "epoch": 9.193776520509195, "grad_norm": 1.5867938995361328, "learning_rate": 0.00012273132112815437, "loss": 1.0241, "step": 6500 }, { "epoch": 9.193776520509195, "eval_loss": 0.9720383286476135, "eval_runtime": 155.604, "eval_samples_per_second": 36.349, "eval_steps_per_second": 4.544, "eval_wer": 0.5775143574730848, "step": 6500 }, { "epoch": 9.335219236209335, "eval_loss": 0.9840742349624634, "eval_runtime": 155.8175, "eval_samples_per_second": 36.299, "eval_steps_per_second": 4.537, "eval_wer": 0.5783942026203388, "step": 6600 }, { "epoch": 9.476661951909477, "eval_loss": 0.9574136137962341, "eval_runtime": 155.2609, "eval_samples_per_second": 36.429, "eval_steps_per_second": 4.554, "eval_wer": 0.5886803922509638, "step": 6700 }, { "epoch": 9.618104667609618, "eval_loss": 1.0725222826004028, "eval_runtime": 154.2708, "eval_samples_per_second": 36.663, "eval_steps_per_second": 4.583, "eval_wer": 0.606837196653389, "step": 6800 }, { "epoch": 9.75954738330976, "eval_loss": 1.0362112522125244, "eval_runtime": 155.3381, "eval_samples_per_second": 36.411, "eval_steps_per_second": 4.551, "eval_wer": 0.5999584073203116, "step": 6900 }, { "epoch": 9.900990099009901, "grad_norm": 0.6058325171470642, "learning_rate": 0.00010794656110836219, "loss": 1.0797, "step": 7000 }, { "epoch": 9.900990099009901, "eval_loss": 1.0116764307022095, "eval_runtime": 155.5442, "eval_samples_per_second": 36.363, "eval_steps_per_second": 4.545, "eval_wer": 0.5914319079841948, "step": 7000 }, { "epoch": 10.042432814710043, "eval_loss": 0.9563263058662415, "eval_runtime": 155.5047, "eval_samples_per_second": 36.372, "eval_steps_per_second": 4.546, "eval_wer": 0.6058293740301707, "step": 7100 }, { "epoch": 10.183875530410184, "eval_loss": 0.9663692116737366, "eval_runtime": 155.3578, "eval_samples_per_second": 36.406, "eval_steps_per_second": 4.551, "eval_wer": 0.5978307817824063, "step": 7200 }, { "epoch": 10.325318246110326, "eval_loss": 1.0209406614303589, "eval_runtime": 155.4685, "eval_samples_per_second": 36.38, "eval_steps_per_second": 4.548, "eval_wer": 0.6022140103341812, "step": 7300 }, { "epoch": 10.466760961810467, "eval_loss": 0.9848981499671936, "eval_runtime": 156.2008, "eval_samples_per_second": 36.21, "eval_steps_per_second": 4.526, "eval_wer": 0.5974788437235047, "step": 7400 }, { "epoch": 10.608203677510609, "grad_norm": 0.0, "learning_rate": 9.319148936170212e-05, "loss": 1.0701, "step": 7500 }, { "epoch": 10.608203677510609, "eval_loss": 0.9718888401985168, "eval_runtime": 156.5494, "eval_samples_per_second": 36.129, "eval_steps_per_second": 4.516, "eval_wer": 0.6057013965542064, "step": 7500 }, { "epoch": 10.74964639321075, "eval_loss": 0.966968834400177, "eval_runtime": 155.045, "eval_samples_per_second": 36.48, "eval_steps_per_second": 4.56, "eval_wer": 0.6122602421973733, "step": 7600 }, { "epoch": 10.891089108910892, "eval_loss": 0.9668737053871155, "eval_runtime": 156.2212, "eval_samples_per_second": 36.205, "eval_steps_per_second": 4.526, "eval_wer": 0.6124682055958152, "step": 7700 }, { "epoch": 11.032531824611032, "eval_loss": 0.9668737053871155, "eval_runtime": 156.7045, "eval_samples_per_second": 36.093, "eval_steps_per_second": 4.512, "eval_wer": 0.6124682055958152, "step": 7800 }, { "epoch": 11.173974540311175, "eval_loss": 0.9668737053871155, "eval_runtime": 156.3156, "eval_samples_per_second": 36.183, "eval_steps_per_second": 4.523, "eval_wer": 0.6124682055958152, "step": 7900 }, { "epoch": 11.315417256011315, "grad_norm": 0.0, "learning_rate": 7.843641761504205e-05, "loss": 1.0518, "step": 8000 }, { "epoch": 11.315417256011315, "eval_loss": 0.9668737053871155, "eval_runtime": 156.4393, "eval_samples_per_second": 36.155, "eval_steps_per_second": 4.519, "eval_wer": 0.6124682055958152, "step": 8000 }, { "epoch": 11.456859971711458, "eval_loss": 0.9668737053871155, "eval_runtime": 155.12, "eval_samples_per_second": 36.462, "eval_steps_per_second": 4.558, "eval_wer": 0.6124682055958152, "step": 8100 }, { "epoch": 11.598302687411598, "eval_loss": 0.9668737053871155, "eval_runtime": 155.4065, "eval_samples_per_second": 36.395, "eval_steps_per_second": 4.549, "eval_wer": 0.6124682055958152, "step": 8200 }, { "epoch": 11.73974540311174, "eval_loss": 0.9668737053871155, "eval_runtime": 154.2056, "eval_samples_per_second": 36.678, "eval_steps_per_second": 4.585, "eval_wer": 0.6124682055958152, "step": 8300 }, { "epoch": 11.881188118811881, "eval_loss": 0.9668737053871155, "eval_runtime": 155.9204, "eval_samples_per_second": 36.275, "eval_steps_per_second": 4.534, "eval_wer": 0.6124682055958152, "step": 8400 }, { "epoch": 12.022630834512023, "grad_norm": 0.0, "learning_rate": 6.362196932211776e-05, "loss": 1.0594, "step": 8500 }, { "epoch": 12.022630834512023, "eval_loss": 0.9668737053871155, "eval_runtime": 154.4966, "eval_samples_per_second": 36.609, "eval_steps_per_second": 4.576, "eval_wer": 0.6124682055958152, "step": 8500 }, { "epoch": 12.164073550212164, "eval_loss": 0.9668737053871155, "eval_runtime": 154.0012, "eval_samples_per_second": 36.727, "eval_steps_per_second": 4.591, "eval_wer": 0.6124682055958152, "step": 8600 }, { "epoch": 12.305516265912306, "eval_loss": 0.9668737053871155, "eval_runtime": 155.297, "eval_samples_per_second": 36.421, "eval_steps_per_second": 4.553, "eval_wer": 0.6124682055958152, "step": 8700 }, { "epoch": 12.446958981612447, "eval_loss": 0.9668737053871155, "eval_runtime": 156.7077, "eval_samples_per_second": 36.093, "eval_steps_per_second": 4.512, "eval_wer": 0.6124682055958152, "step": 8800 }, { "epoch": 12.58840169731259, "eval_loss": 0.9668737053871155, "eval_runtime": 155.8279, "eval_samples_per_second": 36.296, "eval_steps_per_second": 4.537, "eval_wer": 0.6124682055958152, "step": 8900 }, { "epoch": 12.72984441301273, "grad_norm": 0.0, "learning_rate": 4.8866897575457694e-05, "loss": 1.0584, "step": 9000 }, { "epoch": 12.72984441301273, "eval_loss": 0.9668737053871155, "eval_runtime": 155.3599, "eval_samples_per_second": 36.406, "eval_steps_per_second": 4.551, "eval_wer": 0.6124682055958152, "step": 9000 }, { "epoch": 12.871287128712872, "eval_loss": 0.9668737053871155, "eval_runtime": 155.3076, "eval_samples_per_second": 36.418, "eval_steps_per_second": 4.552, "eval_wer": 0.6124682055958152, "step": 9100 }, { "epoch": 13.012729844413013, "eval_loss": 0.9668737053871155, "eval_runtime": 155.448, "eval_samples_per_second": 36.385, "eval_steps_per_second": 4.548, "eval_wer": 0.6124682055958152, "step": 9200 }, { "epoch": 13.154172560113155, "eval_loss": 0.9668737053871155, "eval_runtime": 155.6022, "eval_samples_per_second": 36.349, "eval_steps_per_second": 4.544, "eval_wer": 0.6124682055958152, "step": 9300 }, { "epoch": 13.295615275813295, "eval_loss": 0.9668737053871155, "eval_runtime": 154.7567, "eval_samples_per_second": 36.548, "eval_steps_per_second": 4.568, "eval_wer": 0.6124682055958152, "step": 9400 }, { "epoch": 13.437057991513438, "grad_norm": 0.0, "learning_rate": 3.408213755566551e-05, "loss": 1.0556, "step": 9500 }, { "epoch": 13.437057991513438, "eval_loss": 0.9668737053871155, "eval_runtime": 154.3187, "eval_samples_per_second": 36.651, "eval_steps_per_second": 4.581, "eval_wer": 0.6124682055958152, "step": 9500 }, { "epoch": 13.578500707213578, "eval_loss": 0.9668737053871155, "eval_runtime": 155.5249, "eval_samples_per_second": 36.367, "eval_steps_per_second": 4.546, "eval_wer": 0.6124682055958152, "step": 9600 }, { "epoch": 13.71994342291372, "eval_loss": 0.9668737053871155, "eval_runtime": 155.212, "eval_samples_per_second": 36.44, "eval_steps_per_second": 4.555, "eval_wer": 0.6124682055958152, "step": 9700 }, { "epoch": 13.861386138613861, "eval_loss": 0.9668737053871155, "eval_runtime": 154.6321, "eval_samples_per_second": 36.577, "eval_steps_per_second": 4.572, "eval_wer": 0.6124682055958152, "step": 9800 }, { "epoch": 14.002828854314004, "eval_loss": 0.9668737053871155, "eval_runtime": 155.2988, "eval_samples_per_second": 36.42, "eval_steps_per_second": 4.553, "eval_wer": 0.6124682055958152, "step": 9900 }, { "epoch": 14.144271570014144, "grad_norm": 0.0, "learning_rate": 1.9297377535873327e-05, "loss": 1.0511, "step": 10000 }, { "epoch": 14.144271570014144, "eval_loss": 0.9668737053871155, "eval_runtime": 155.9222, "eval_samples_per_second": 36.274, "eval_steps_per_second": 4.534, "eval_wer": 0.6124682055958152, "step": 10000 }, { "epoch": 14.285714285714286, "eval_loss": 0.9668737053871155, "eval_runtime": 156.6698, "eval_samples_per_second": 36.101, "eval_steps_per_second": 4.513, "eval_wer": 0.6124682055958152, "step": 10100 }, { "epoch": 14.427157001414427, "eval_loss": 0.9668737053871155, "eval_runtime": 155.8474, "eval_samples_per_second": 36.292, "eval_steps_per_second": 4.536, "eval_wer": 0.6124682055958152, "step": 10200 }, { "epoch": 14.56859971711457, "eval_loss": 0.9668737053871155, "eval_runtime": 154.9914, "eval_samples_per_second": 36.492, "eval_steps_per_second": 4.562, "eval_wer": 0.6124682055958152, "step": 10300 }, { "epoch": 14.71004243281471, "eval_loss": 0.9668737053871155, "eval_runtime": 155.7638, "eval_samples_per_second": 36.311, "eval_steps_per_second": 4.539, "eval_wer": 0.6124682055958152, "step": 10400 }, { "epoch": 14.851485148514852, "grad_norm": 0.0, "learning_rate": 4.512617516081148e-06, "loss": 1.0585, "step": 10500 }, { "epoch": 14.851485148514852, "eval_loss": 0.9668737053871155, "eval_runtime": 154.954, "eval_samples_per_second": 36.501, "eval_steps_per_second": 4.563, "eval_wer": 0.6124682055958152, "step": 10500 }, { "epoch": 14.992927864214993, "eval_loss": 0.9668737053871155, "eval_runtime": 154.7299, "eval_samples_per_second": 36.554, "eval_steps_per_second": 4.569, "eval_wer": 0.6124682055958152, "step": 10600 }, { "epoch": 15.0, "step": 10605, "total_flos": 4.036432448909298e+19, "train_loss": 1.1022147617942597, "train_runtime": 28048.236, "train_samples_per_second": 12.098, "train_steps_per_second": 0.378 } ], "logging_steps": 500, "max_steps": 10605, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 400, "total_flos": 4.036432448909298e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }