{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 758, "global_step": 11370, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6596306068601583, "grad_norm": 1.0755512714385986, "learning_rate": 0.0002982, "loss": 3.7968, "step": 500 }, { "epoch": 1.0, "eval_loss": 0.2848176658153534, "eval_runtime": 177.1073, "eval_samples_per_second": 64.565, "eval_steps_per_second": 4.037, "eval_wer": 0.5295409253093712, "step": 758 }, { "epoch": 1.3192612137203166, "grad_norm": 0.398578405380249, "learning_rate": 0.00028628334866605336, "loss": 0.3537, "step": 1000 }, { "epoch": 1.978891820580475, "grad_norm": 0.6923481822013855, "learning_rate": 0.00027248390064397424, "loss": 0.2547, "step": 1500 }, { "epoch": 2.0, "eval_loss": 0.19083459675312042, "eval_runtime": 177.247, "eval_samples_per_second": 64.514, "eval_steps_per_second": 4.034, "eval_wer": 0.42224847551880096, "step": 1516 }, { "epoch": 2.638522427440633, "grad_norm": 0.2965054214000702, "learning_rate": 0.0002586844526218951, "loss": 0.1929, "step": 2000 }, { "epoch": 3.0, "eval_loss": 0.1753465086221695, "eval_runtime": 183.1101, "eval_samples_per_second": 62.449, "eval_steps_per_second": 3.905, "eval_wer": 0.39995704006014393, "step": 2274 }, { "epoch": 3.2981530343007917, "grad_norm": 0.22291332483291626, "learning_rate": 0.000244885004599816, "loss": 0.172, "step": 2500 }, { "epoch": 3.9577836411609497, "grad_norm": 0.21266356110572815, "learning_rate": 0.00023108555657773688, "loss": 0.1532, "step": 3000 }, { "epoch": 4.0, "eval_loss": 0.15584248304367065, "eval_runtime": 179.8085, "eval_samples_per_second": 63.595, "eval_steps_per_second": 3.976, "eval_wer": 0.37095908065728705, "step": 3032 }, { "epoch": 4.617414248021108, "grad_norm": 0.3851657509803772, "learning_rate": 0.00021728610855565776, "loss": 0.1297, "step": 3500 }, { "epoch": 5.0, "eval_loss": 0.15116363763809204, "eval_runtime": 178.6057, "eval_samples_per_second": 64.024, "eval_steps_per_second": 4.003, "eval_wer": 0.35356030501557295, "step": 3790 }, { "epoch": 5.277044854881266, "grad_norm": 0.6844151616096497, "learning_rate": 0.00020348666053357864, "loss": 0.1236, "step": 4000 }, { "epoch": 5.936675461741425, "grad_norm": 0.2345355600118637, "learning_rate": 0.00018968721251149952, "loss": 0.1167, "step": 4500 }, { "epoch": 6.0, "eval_loss": 0.15742355585098267, "eval_runtime": 183.8285, "eval_samples_per_second": 62.205, "eval_steps_per_second": 3.889, "eval_wer": 0.35143617465602217, "step": 4548 }, { "epoch": 6.596306068601583, "grad_norm": 0.18163040280342102, "learning_rate": 0.00017588776448942042, "loss": 0.101, "step": 5000 }, { "epoch": 7.0, "eval_loss": 0.1482517421245575, "eval_runtime": 181.3176, "eval_samples_per_second": 63.066, "eval_steps_per_second": 3.943, "eval_wer": 0.3374383942529147, "step": 5306 }, { "epoch": 7.255936675461742, "grad_norm": 0.3131236135959625, "learning_rate": 0.0001620883164673413, "loss": 0.0945, "step": 5500 }, { "epoch": 7.915567282321899, "grad_norm": 0.5193817615509033, "learning_rate": 0.00014828886844526218, "loss": 0.0859, "step": 6000 }, { "epoch": 8.0, "eval_loss": 0.14896264672279358, "eval_runtime": 180.9851, "eval_samples_per_second": 63.182, "eval_steps_per_second": 3.951, "eval_wer": 0.32986073819496653, "step": 6064 }, { "epoch": 8.575197889182059, "grad_norm": 0.21172136068344116, "learning_rate": 0.00013448942042318306, "loss": 0.0791, "step": 6500 }, { "epoch": 9.0, "eval_loss": 0.15233299136161804, "eval_runtime": 179.7038, "eval_samples_per_second": 63.632, "eval_steps_per_second": 3.979, "eval_wer": 0.32503967827778374, "step": 6822 }, { "epoch": 9.234828496042216, "grad_norm": 0.20351053774356842, "learning_rate": 0.0001207175712971481, "loss": 0.0745, "step": 7000 }, { "epoch": 9.894459102902374, "grad_norm": 0.1984056681394577, "learning_rate": 0.00010694572217111315, "loss": 0.0702, "step": 7500 }, { "epoch": 10.0, "eval_loss": 0.16084744036197662, "eval_runtime": 179.8551, "eval_samples_per_second": 63.579, "eval_steps_per_second": 3.975, "eval_wer": 0.3191923531307056, "step": 7580 }, { "epoch": 10.554089709762533, "grad_norm": 0.9130335450172424, "learning_rate": 9.314627414903402e-05, "loss": 0.0629, "step": 8000 }, { "epoch": 11.0, "eval_loss": 0.16640283167362213, "eval_runtime": 180.2008, "eval_samples_per_second": 63.457, "eval_steps_per_second": 3.968, "eval_wer": 0.31460995954605664, "step": 8338 }, { "epoch": 11.213720316622691, "grad_norm": 0.24980618059635162, "learning_rate": 7.934682612695491e-05, "loss": 0.0587, "step": 8500 }, { "epoch": 11.87335092348285, "grad_norm": 0.2797747850418091, "learning_rate": 6.55473781048758e-05, "loss": 0.0559, "step": 9000 }, { "epoch": 12.0, "eval_loss": 0.16405943036079407, "eval_runtime": 182.307, "eval_samples_per_second": 62.724, "eval_steps_per_second": 3.922, "eval_wer": 0.3103020322438215, "step": 9096 }, { "epoch": 12.532981530343008, "grad_norm": 0.15323172509670258, "learning_rate": 5.1775528978840844e-05, "loss": 0.0527, "step": 9500 }, { "epoch": 13.0, "eval_loss": 0.16645778715610504, "eval_runtime": 180.7018, "eval_samples_per_second": 63.281, "eval_steps_per_second": 3.957, "eval_wer": 0.3063043711738804, "step": 9854 }, { "epoch": 13.192612137203167, "grad_norm": 0.7859821319580078, "learning_rate": 3.797608095676173e-05, "loss": 0.0491, "step": 10000 }, { "epoch": 13.852242744063325, "grad_norm": 0.2534639537334442, "learning_rate": 2.4176632934682608e-05, "loss": 0.0468, "step": 10500 }, { "epoch": 14.0, "eval_loss": 0.16911591589450836, "eval_runtime": 181.5239, "eval_samples_per_second": 62.994, "eval_steps_per_second": 3.939, "eval_wer": 0.30112531175789686, "step": 10612 }, { "epoch": 14.511873350923484, "grad_norm": 0.21820344030857086, "learning_rate": 1.0377184912603496e-05, "loss": 0.0443, "step": 11000 }, { "epoch": 15.0, "eval_loss": 0.17480023205280304, "eval_runtime": 182.6284, "eval_samples_per_second": 62.613, "eval_steps_per_second": 3.915, "eval_wer": 0.2998007136123343, "step": 11370 }, { "epoch": 15.0, "step": 11370, "total_flos": 1.156593069425741e+20, "train_loss": 0.27266296556149017, "train_runtime": 21246.5656, "train_samples_per_second": 34.234, "train_steps_per_second": 0.535 } ], "logging_steps": 500, "max_steps": 11370, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 758, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.156593069425741e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }