|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 100, |
|
"global_step": 10605, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14144271570014144, |
|
"eval_loss": 3.7426819801330566, |
|
"eval_runtime": 154.1912, |
|
"eval_samples_per_second": 36.682, |
|
"eval_steps_per_second": 4.585, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2828854314002829, |
|
"eval_loss": 2.9179046154022217, |
|
"eval_runtime": 149.8513, |
|
"eval_samples_per_second": 37.744, |
|
"eval_steps_per_second": 4.718, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4243281471004243, |
|
"eval_loss": 2.8035507202148438, |
|
"eval_runtime": 151.9434, |
|
"eval_samples_per_second": 37.224, |
|
"eval_steps_per_second": 4.653, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5657708628005658, |
|
"eval_loss": 1.2195814847946167, |
|
"eval_runtime": 152.8075, |
|
"eval_samples_per_second": 37.014, |
|
"eval_steps_per_second": 4.627, |
|
"eval_wer": 0.8933627681528051, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"grad_norm": 2.533352851867676, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 3.574, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7072135785007072, |
|
"eval_loss": 0.9860211610794067, |
|
"eval_runtime": 151.7482, |
|
"eval_samples_per_second": 37.272, |
|
"eval_steps_per_second": 4.659, |
|
"eval_wer": 0.7276159395946313, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8486562942008486, |
|
"eval_loss": 0.8391533493995667, |
|
"eval_runtime": 153.0126, |
|
"eval_samples_per_second": 36.964, |
|
"eval_steps_per_second": 4.621, |
|
"eval_wer": 0.6504295244037049, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9900990099009901, |
|
"eval_loss": 0.7803803086280823, |
|
"eval_runtime": 152.7414, |
|
"eval_samples_per_second": 37.03, |
|
"eval_steps_per_second": 4.629, |
|
"eval_wer": 0.6029178864519844, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1315417256011315, |
|
"eval_loss": 0.6122242212295532, |
|
"eval_runtime": 153.2031, |
|
"eval_samples_per_second": 36.918, |
|
"eval_steps_per_second": 4.615, |
|
"eval_wer": 0.4909056006142919, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.272984441301273, |
|
"eval_loss": 0.5901117920875549, |
|
"eval_runtime": 152.5961, |
|
"eval_samples_per_second": 37.065, |
|
"eval_steps_per_second": 4.633, |
|
"eval_wer": 0.48828206235702515, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"grad_norm": 0.9860969185829163, |
|
"learning_rate": 0.0002853043047996041, |
|
"loss": 0.811, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4144271570014144, |
|
"eval_loss": 0.5500049591064453, |
|
"eval_runtime": 153.3918, |
|
"eval_samples_per_second": 36.873, |
|
"eval_steps_per_second": 4.609, |
|
"eval_wer": 0.45078466189950567, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5558698727015559, |
|
"eval_loss": 0.5231846570968628, |
|
"eval_runtime": 151.7862, |
|
"eval_samples_per_second": 37.263, |
|
"eval_steps_per_second": 4.658, |
|
"eval_wer": 0.41418310377373585, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6973125884016973, |
|
"eval_loss": 0.518621027469635, |
|
"eval_runtime": 152.9089, |
|
"eval_samples_per_second": 36.989, |
|
"eval_steps_per_second": 4.624, |
|
"eval_wer": 0.4064724608468909, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8387553041018387, |
|
"eval_loss": 0.49534252285957336, |
|
"eval_runtime": 153.7928, |
|
"eval_samples_per_second": 36.777, |
|
"eval_steps_per_second": 4.597, |
|
"eval_wer": 0.3929388427636736, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9801980198019802, |
|
"eval_loss": 0.48800522089004517, |
|
"eval_runtime": 152.9541, |
|
"eval_samples_per_second": 36.978, |
|
"eval_steps_per_second": 4.622, |
|
"eval_wer": 0.39279486810321385, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"grad_norm": 0.8855465650558472, |
|
"learning_rate": 0.00027054923305294405, |
|
"loss": 0.6459, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1216407355021216, |
|
"eval_loss": 0.46446114778518677, |
|
"eval_runtime": 152.609, |
|
"eval_samples_per_second": 37.062, |
|
"eval_steps_per_second": 4.633, |
|
"eval_wer": 0.3691990209723089, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.263083451202263, |
|
"eval_loss": 0.4666256904602051, |
|
"eval_runtime": 153.127, |
|
"eval_samples_per_second": 36.937, |
|
"eval_steps_per_second": 4.617, |
|
"eval_wer": 0.3585928876517733, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4045261669024045, |
|
"eval_loss": 0.45017901062965393, |
|
"eval_runtime": 154.5149, |
|
"eval_samples_per_second": 36.605, |
|
"eval_steps_per_second": 4.576, |
|
"eval_wer": 0.35934475532306315, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.545968882602546, |
|
"eval_loss": 0.45280084013938904, |
|
"eval_runtime": 152.9066, |
|
"eval_samples_per_second": 36.99, |
|
"eval_steps_per_second": 4.624, |
|
"eval_wer": 0.3637759754283246, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6874115983026874, |
|
"eval_loss": 0.46647289395332336, |
|
"eval_runtime": 153.103, |
|
"eval_samples_per_second": 36.942, |
|
"eval_steps_per_second": 4.618, |
|
"eval_wer": 0.392618899073763, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"grad_norm": 0.6227492690086365, |
|
"learning_rate": 0.00025579416130628403, |
|
"loss": 0.5306, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.828854314002829, |
|
"eval_loss": 0.4328605532646179, |
|
"eval_runtime": 153.5492, |
|
"eval_samples_per_second": 36.835, |
|
"eval_steps_per_second": 4.604, |
|
"eval_wer": 0.3505143094815312, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.9702970297029703, |
|
"eval_loss": 0.4245360791683197, |
|
"eval_runtime": 153.9467, |
|
"eval_samples_per_second": 36.74, |
|
"eval_steps_per_second": 4.592, |
|
"eval_wer": 0.3373806210107021, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.1117397454031117, |
|
"eval_loss": 0.4376748204231262, |
|
"eval_runtime": 152.9335, |
|
"eval_samples_per_second": 36.983, |
|
"eval_steps_per_second": 4.623, |
|
"eval_wer": 0.3340372094511366, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.253182461103253, |
|
"eval_loss": 0.4271674156188965, |
|
"eval_runtime": 153.1738, |
|
"eval_samples_per_second": 36.925, |
|
"eval_steps_per_second": 4.616, |
|
"eval_wer": 0.33373326294572153, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.3946251768033946, |
|
"eval_loss": 0.43350949883461, |
|
"eval_runtime": 153.2683, |
|
"eval_samples_per_second": 36.903, |
|
"eval_steps_per_second": 4.613, |
|
"eval_wer": 0.332597462846539, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"grad_norm": 0.6211841106414795, |
|
"learning_rate": 0.0002410094012864918, |
|
"loss": 0.4628, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.536067892503536, |
|
"eval_loss": 0.42679545283317566, |
|
"eval_runtime": 154.4777, |
|
"eval_samples_per_second": 36.614, |
|
"eval_steps_per_second": 4.577, |
|
"eval_wer": 0.3274783638079698, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.6775106082036775, |
|
"eval_loss": 0.4502430558204651, |
|
"eval_runtime": 154.6864, |
|
"eval_samples_per_second": 36.564, |
|
"eval_steps_per_second": 4.571, |
|
"eval_wer": 0.34091599878421397, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.818953323903819, |
|
"eval_loss": 0.6344878673553467, |
|
"eval_runtime": 153.1131, |
|
"eval_samples_per_second": 36.94, |
|
"eval_steps_per_second": 4.618, |
|
"eval_wer": 0.43904272847978754, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.9603960396039604, |
|
"eval_loss": 1.0202795267105103, |
|
"eval_runtime": 153.5324, |
|
"eval_samples_per_second": 36.839, |
|
"eval_steps_per_second": 4.605, |
|
"eval_wer": 0.6403193038025308, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.101838755304102, |
|
"eval_loss": 1.2207801342010498, |
|
"eval_runtime": 154.3697, |
|
"eval_samples_per_second": 36.639, |
|
"eval_steps_per_second": 4.58, |
|
"eval_wer": 0.7921805762185855, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.243281471004243, |
|
"grad_norm": 2.4671809673309326, |
|
"learning_rate": 0.00022619495299356753, |
|
"loss": 0.8685, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.243281471004243, |
|
"eval_loss": 1.101838231086731, |
|
"eval_runtime": 155.2474, |
|
"eval_samples_per_second": 36.432, |
|
"eval_steps_per_second": 4.554, |
|
"eval_wer": 0.7387019884500328, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.384724186704385, |
|
"eval_loss": 1.24972665309906, |
|
"eval_runtime": 154.4564, |
|
"eval_samples_per_second": 36.619, |
|
"eval_steps_per_second": 4.577, |
|
"eval_wer": 0.8061941098366687, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.526166902404526, |
|
"eval_loss": 1.6164859533309937, |
|
"eval_runtime": 153.9159, |
|
"eval_samples_per_second": 36.747, |
|
"eval_steps_per_second": 4.593, |
|
"eval_wer": 0.9616227543952265, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.667609618104668, |
|
"eval_loss": 1.4655081033706665, |
|
"eval_runtime": 153.645, |
|
"eval_samples_per_second": 36.812, |
|
"eval_steps_per_second": 4.602, |
|
"eval_wer": 0.9216617875253955, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.809052333804809, |
|
"eval_loss": 1.0288450717926025, |
|
"eval_runtime": 152.6496, |
|
"eval_samples_per_second": 37.052, |
|
"eval_steps_per_second": 4.632, |
|
"eval_wer": 0.7464606229303643, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.9504950495049505, |
|
"grad_norm": 2.009023666381836, |
|
"learning_rate": 0.00021143988124690746, |
|
"loss": 1.3918, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.9504950495049505, |
|
"eval_loss": 0.9067263603210449, |
|
"eval_runtime": 156.1428, |
|
"eval_samples_per_second": 36.223, |
|
"eval_steps_per_second": 4.528, |
|
"eval_wer": 0.5948393082817424, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.091937765205092, |
|
"eval_loss": 0.9486163258552551, |
|
"eval_runtime": 154.3538, |
|
"eval_samples_per_second": 36.643, |
|
"eval_steps_per_second": 4.58, |
|
"eval_wer": 0.6352801906864392, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.233380480905233, |
|
"eval_loss": 0.8674383163452148, |
|
"eval_runtime": 155.8543, |
|
"eval_samples_per_second": 36.29, |
|
"eval_steps_per_second": 4.536, |
|
"eval_wer": 0.5428324614867783, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.374823196605375, |
|
"eval_loss": 0.9402504563331604, |
|
"eval_runtime": 154.1915, |
|
"eval_samples_per_second": 36.682, |
|
"eval_steps_per_second": 4.585, |
|
"eval_wer": 0.5792900449520885, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.516265912305516, |
|
"eval_loss": 0.948098361492157, |
|
"eval_runtime": 159.4305, |
|
"eval_samples_per_second": 35.476, |
|
"eval_steps_per_second": 4.435, |
|
"eval_wer": 0.5763625601894067, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.657708628005658, |
|
"grad_norm": 1.2370288372039795, |
|
"learning_rate": 0.0001966848095002474, |
|
"loss": 1.0402, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.657708628005658, |
|
"eval_loss": 1.0175639390945435, |
|
"eval_runtime": 154.2179, |
|
"eval_samples_per_second": 36.675, |
|
"eval_steps_per_second": 4.584, |
|
"eval_wer": 0.8256946777367183, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.799151343705799, |
|
"eval_loss": 0.9857003092765808, |
|
"eval_runtime": 154.4099, |
|
"eval_samples_per_second": 36.63, |
|
"eval_steps_per_second": 4.579, |
|
"eval_wer": 0.6342883652477164, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.9405940594059405, |
|
"eval_loss": 1.3289028406143188, |
|
"eval_runtime": 155.7394, |
|
"eval_samples_per_second": 36.317, |
|
"eval_steps_per_second": 4.54, |
|
"eval_wer": 0.9014093519540561, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.082036775106082, |
|
"eval_loss": 2.0890820026397705, |
|
"eval_runtime": 154.2749, |
|
"eval_samples_per_second": 36.662, |
|
"eval_steps_per_second": 4.583, |
|
"eval_wer": 0.7125305946153477, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 6.223479490806223, |
|
"eval_loss": 1.256324291229248, |
|
"eval_runtime": 154.5375, |
|
"eval_samples_per_second": 36.6, |
|
"eval_steps_per_second": 4.575, |
|
"eval_wer": 0.7696085488953944, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.364922206506365, |
|
"grad_norm": 0.9906980395317078, |
|
"learning_rate": 0.0001819000494804552, |
|
"loss": 1.2886, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.364922206506365, |
|
"eval_loss": 1.1441457271575928, |
|
"eval_runtime": 155.0449, |
|
"eval_samples_per_second": 36.48, |
|
"eval_steps_per_second": 4.56, |
|
"eval_wer": 0.692726080209883, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.506364922206506, |
|
"eval_loss": 1.0626095533370972, |
|
"eval_runtime": 155.5445, |
|
"eval_samples_per_second": 36.363, |
|
"eval_steps_per_second": 4.545, |
|
"eval_wer": 0.6573083137367823, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.647807637906648, |
|
"eval_loss": 0.9997339248657227, |
|
"eval_runtime": 155.8362, |
|
"eval_samples_per_second": 36.295, |
|
"eval_steps_per_second": 4.537, |
|
"eval_wer": 0.6422869574954808, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 6.789250353606789, |
|
"eval_loss": 0.9813728928565979, |
|
"eval_runtime": 155.2999, |
|
"eval_samples_per_second": 36.42, |
|
"eval_steps_per_second": 4.552, |
|
"eval_wer": 0.6380317064196701, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.930693069306931, |
|
"eval_loss": 1.0955251455307007, |
|
"eval_runtime": 153.746, |
|
"eval_samples_per_second": 36.788, |
|
"eval_steps_per_second": 4.598, |
|
"eval_wer": 0.7651133400521508, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 7.072135785007072, |
|
"grad_norm": 1.6002442836761475, |
|
"learning_rate": 0.00016708560118753091, |
|
"loss": 1.0984, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.072135785007072, |
|
"eval_loss": 0.9212619066238403, |
|
"eval_runtime": 155.7654, |
|
"eval_samples_per_second": 36.311, |
|
"eval_steps_per_second": 4.539, |
|
"eval_wer": 0.5882964598230711, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.2135785007072135, |
|
"eval_loss": 0.8884870409965515, |
|
"eval_runtime": 153.6804, |
|
"eval_samples_per_second": 36.804, |
|
"eval_steps_per_second": 4.6, |
|
"eval_wer": 0.5932715842011806, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 7.355021216407355, |
|
"eval_loss": 0.900116503238678, |
|
"eval_runtime": 155.054, |
|
"eval_samples_per_second": 36.478, |
|
"eval_steps_per_second": 4.56, |
|
"eval_wer": 0.5898641839036329, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.496463932107496, |
|
"eval_loss": 0.8783684372901917, |
|
"eval_runtime": 156.2294, |
|
"eval_samples_per_second": 36.203, |
|
"eval_steps_per_second": 4.525, |
|
"eval_wer": 0.5858968821487418, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 7.637906647807638, |
|
"eval_loss": 0.9072028398513794, |
|
"eval_runtime": 154.0671, |
|
"eval_samples_per_second": 36.711, |
|
"eval_steps_per_second": 4.589, |
|
"eval_wer": 0.5897522036121643, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 7.779349363507779, |
|
"grad_norm": 1.3490877151489258, |
|
"learning_rate": 0.00015230084116773872, |
|
"loss": 0.9659, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.779349363507779, |
|
"eval_loss": 0.8811922669410706, |
|
"eval_runtime": 153.8934, |
|
"eval_samples_per_second": 36.753, |
|
"eval_steps_per_second": 4.594, |
|
"eval_wer": 0.5841051974852426, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.920792079207921, |
|
"eval_loss": 0.891165018081665, |
|
"eval_runtime": 153.7325, |
|
"eval_samples_per_second": 36.791, |
|
"eval_steps_per_second": 4.599, |
|
"eval_wer": 0.5855129497208491, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 8.062234794908063, |
|
"eval_loss": 0.8815582990646362, |
|
"eval_runtime": 154.6799, |
|
"eval_samples_per_second": 36.566, |
|
"eval_steps_per_second": 4.571, |
|
"eval_wer": 0.5807137943721905, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 8.203677510608204, |
|
"eval_loss": 0.891440749168396, |
|
"eval_runtime": 155.4972, |
|
"eval_samples_per_second": 36.374, |
|
"eval_steps_per_second": 4.547, |
|
"eval_wer": 0.5803138647598023, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 8.345120226308346, |
|
"eval_loss": 0.8956438899040222, |
|
"eval_runtime": 154.6865, |
|
"eval_samples_per_second": 36.564, |
|
"eval_steps_per_second": 4.571, |
|
"eval_wer": 0.5810337380621011, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 8.486562942008486, |
|
"grad_norm": 1.257432460784912, |
|
"learning_rate": 0.00013754576942107867, |
|
"loss": 0.9679, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.486562942008486, |
|
"eval_loss": 0.9162164330482483, |
|
"eval_runtime": 155.1949, |
|
"eval_samples_per_second": 36.444, |
|
"eval_steps_per_second": 4.556, |
|
"eval_wer": 0.5780262673769416, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.628005657708629, |
|
"eval_loss": 0.9409377574920654, |
|
"eval_runtime": 154.6732, |
|
"eval_samples_per_second": 36.567, |
|
"eval_steps_per_second": 4.571, |
|
"eval_wer": 0.5810177408776055, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 8.76944837340877, |
|
"eval_loss": 0.9370973706245422, |
|
"eval_runtime": 155.5326, |
|
"eval_samples_per_second": 36.365, |
|
"eval_steps_per_second": 4.546, |
|
"eval_wer": 0.5780742589304283, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 8.910891089108912, |
|
"eval_loss": 0.941677987575531, |
|
"eval_runtime": 155.4822, |
|
"eval_samples_per_second": 36.377, |
|
"eval_steps_per_second": 4.547, |
|
"eval_wer": 0.5790020956311689, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 9.052333804809052, |
|
"eval_loss": 0.9663541913032532, |
|
"eval_runtime": 155.844, |
|
"eval_samples_per_second": 36.293, |
|
"eval_steps_per_second": 4.537, |
|
"eval_wer": 0.5783782054358433, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 9.193776520509195, |
|
"grad_norm": 1.5867938995361328, |
|
"learning_rate": 0.00012273132112815437, |
|
"loss": 1.0241, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.193776520509195, |
|
"eval_loss": 0.9720383286476135, |
|
"eval_runtime": 155.604, |
|
"eval_samples_per_second": 36.349, |
|
"eval_steps_per_second": 4.544, |
|
"eval_wer": 0.5775143574730848, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.335219236209335, |
|
"eval_loss": 0.9840742349624634, |
|
"eval_runtime": 155.8175, |
|
"eval_samples_per_second": 36.299, |
|
"eval_steps_per_second": 4.537, |
|
"eval_wer": 0.5783942026203388, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 9.476661951909477, |
|
"eval_loss": 0.9574136137962341, |
|
"eval_runtime": 155.2609, |
|
"eval_samples_per_second": 36.429, |
|
"eval_steps_per_second": 4.554, |
|
"eval_wer": 0.5886803922509638, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 9.618104667609618, |
|
"eval_loss": 1.0725222826004028, |
|
"eval_runtime": 154.2708, |
|
"eval_samples_per_second": 36.663, |
|
"eval_steps_per_second": 4.583, |
|
"eval_wer": 0.606837196653389, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 9.75954738330976, |
|
"eval_loss": 1.0362112522125244, |
|
"eval_runtime": 155.3381, |
|
"eval_samples_per_second": 36.411, |
|
"eval_steps_per_second": 4.551, |
|
"eval_wer": 0.5999584073203116, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 9.900990099009901, |
|
"grad_norm": 0.6058325171470642, |
|
"learning_rate": 0.00010794656110836219, |
|
"loss": 1.0797, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.900990099009901, |
|
"eval_loss": 1.0116764307022095, |
|
"eval_runtime": 155.5442, |
|
"eval_samples_per_second": 36.363, |
|
"eval_steps_per_second": 4.545, |
|
"eval_wer": 0.5914319079841948, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 10.042432814710043, |
|
"eval_loss": 0.9563263058662415, |
|
"eval_runtime": 155.5047, |
|
"eval_samples_per_second": 36.372, |
|
"eval_steps_per_second": 4.546, |
|
"eval_wer": 0.6058293740301707, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 10.183875530410184, |
|
"eval_loss": 0.9663692116737366, |
|
"eval_runtime": 155.3578, |
|
"eval_samples_per_second": 36.406, |
|
"eval_steps_per_second": 4.551, |
|
"eval_wer": 0.5978307817824063, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 10.325318246110326, |
|
"eval_loss": 1.0209406614303589, |
|
"eval_runtime": 155.4685, |
|
"eval_samples_per_second": 36.38, |
|
"eval_steps_per_second": 4.548, |
|
"eval_wer": 0.6022140103341812, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 10.466760961810467, |
|
"eval_loss": 0.9848981499671936, |
|
"eval_runtime": 156.2008, |
|
"eval_samples_per_second": 36.21, |
|
"eval_steps_per_second": 4.526, |
|
"eval_wer": 0.5974788437235047, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 10.608203677510609, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.319148936170212e-05, |
|
"loss": 1.0701, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 10.608203677510609, |
|
"eval_loss": 0.9718888401985168, |
|
"eval_runtime": 156.5494, |
|
"eval_samples_per_second": 36.129, |
|
"eval_steps_per_second": 4.516, |
|
"eval_wer": 0.6057013965542064, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 10.74964639321075, |
|
"eval_loss": 0.966968834400177, |
|
"eval_runtime": 155.045, |
|
"eval_samples_per_second": 36.48, |
|
"eval_steps_per_second": 4.56, |
|
"eval_wer": 0.6122602421973733, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 10.891089108910892, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 156.2212, |
|
"eval_samples_per_second": 36.205, |
|
"eval_steps_per_second": 4.526, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 11.032531824611032, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 156.7045, |
|
"eval_samples_per_second": 36.093, |
|
"eval_steps_per_second": 4.512, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 11.173974540311175, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 156.3156, |
|
"eval_samples_per_second": 36.183, |
|
"eval_steps_per_second": 4.523, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 11.315417256011315, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.843641761504205e-05, |
|
"loss": 1.0518, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 11.315417256011315, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 156.4393, |
|
"eval_samples_per_second": 36.155, |
|
"eval_steps_per_second": 4.519, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 11.456859971711458, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.12, |
|
"eval_samples_per_second": 36.462, |
|
"eval_steps_per_second": 4.558, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 11.598302687411598, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.4065, |
|
"eval_samples_per_second": 36.395, |
|
"eval_steps_per_second": 4.549, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 11.73974540311174, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 154.2056, |
|
"eval_samples_per_second": 36.678, |
|
"eval_steps_per_second": 4.585, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 11.881188118811881, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.9204, |
|
"eval_samples_per_second": 36.275, |
|
"eval_steps_per_second": 4.534, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 12.022630834512023, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.362196932211776e-05, |
|
"loss": 1.0594, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 12.022630834512023, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 154.4966, |
|
"eval_samples_per_second": 36.609, |
|
"eval_steps_per_second": 4.576, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 12.164073550212164, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 154.0012, |
|
"eval_samples_per_second": 36.727, |
|
"eval_steps_per_second": 4.591, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 12.305516265912306, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.297, |
|
"eval_samples_per_second": 36.421, |
|
"eval_steps_per_second": 4.553, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 12.446958981612447, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 156.7077, |
|
"eval_samples_per_second": 36.093, |
|
"eval_steps_per_second": 4.512, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 12.58840169731259, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.8279, |
|
"eval_samples_per_second": 36.296, |
|
"eval_steps_per_second": 4.537, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 12.72984441301273, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.8866897575457694e-05, |
|
"loss": 1.0584, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.72984441301273, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.3599, |
|
"eval_samples_per_second": 36.406, |
|
"eval_steps_per_second": 4.551, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.871287128712872, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.3076, |
|
"eval_samples_per_second": 36.418, |
|
"eval_steps_per_second": 4.552, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 13.012729844413013, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.448, |
|
"eval_samples_per_second": 36.385, |
|
"eval_steps_per_second": 4.548, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 13.154172560113155, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.6022, |
|
"eval_samples_per_second": 36.349, |
|
"eval_steps_per_second": 4.544, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 13.295615275813295, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 154.7567, |
|
"eval_samples_per_second": 36.548, |
|
"eval_steps_per_second": 4.568, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 13.437057991513438, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.408213755566551e-05, |
|
"loss": 1.0556, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 13.437057991513438, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 154.3187, |
|
"eval_samples_per_second": 36.651, |
|
"eval_steps_per_second": 4.581, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 13.578500707213578, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.5249, |
|
"eval_samples_per_second": 36.367, |
|
"eval_steps_per_second": 4.546, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 13.71994342291372, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.212, |
|
"eval_samples_per_second": 36.44, |
|
"eval_steps_per_second": 4.555, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 13.861386138613861, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 154.6321, |
|
"eval_samples_per_second": 36.577, |
|
"eval_steps_per_second": 4.572, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 14.002828854314004, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.2988, |
|
"eval_samples_per_second": 36.42, |
|
"eval_steps_per_second": 4.553, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 14.144271570014144, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9297377535873327e-05, |
|
"loss": 1.0511, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 14.144271570014144, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.9222, |
|
"eval_samples_per_second": 36.274, |
|
"eval_steps_per_second": 4.534, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 156.6698, |
|
"eval_samples_per_second": 36.101, |
|
"eval_steps_per_second": 4.513, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 14.427157001414427, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.8474, |
|
"eval_samples_per_second": 36.292, |
|
"eval_steps_per_second": 4.536, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 14.56859971711457, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 154.9914, |
|
"eval_samples_per_second": 36.492, |
|
"eval_steps_per_second": 4.562, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 14.71004243281471, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 155.7638, |
|
"eval_samples_per_second": 36.311, |
|
"eval_steps_per_second": 4.539, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 14.851485148514852, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.512617516081148e-06, |
|
"loss": 1.0585, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 14.851485148514852, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 154.954, |
|
"eval_samples_per_second": 36.501, |
|
"eval_steps_per_second": 4.563, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 14.992927864214993, |
|
"eval_loss": 0.9668737053871155, |
|
"eval_runtime": 154.7299, |
|
"eval_samples_per_second": 36.554, |
|
"eval_steps_per_second": 4.569, |
|
"eval_wer": 0.6124682055958152, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 10605, |
|
"total_flos": 4.036432448909298e+19, |
|
"train_loss": 1.1022147617942597, |
|
"train_runtime": 28048.236, |
|
"train_samples_per_second": 12.098, |
|
"train_steps_per_second": 0.378 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10605, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 400, |
|
"total_flos": 4.036432448909298e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|