{ "best_metric": null, "best_model_checkpoint": null, "epoch": 48.87983706720978, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.07, "grad_norm": 3.8863396644592285, "learning_rate": 3.936e-05, "loss": 3.9012, "step": 500 }, { "epoch": 4.07, "eval_loss": 0.4798698425292969, "eval_runtime": 164.8654, "eval_samples_per_second": 19.234, "eval_steps_per_second": 2.408, "eval_wer": 0.46691878014085975, "step": 500 }, { "epoch": 8.15, "grad_norm": 1.7655788660049438, "learning_rate": 7.936000000000001e-05, "loss": 0.7087, "step": 1000 }, { "epoch": 8.15, "eval_loss": 0.331632524728775, "eval_runtime": 165.1487, "eval_samples_per_second": 19.201, "eval_steps_per_second": 2.404, "eval_wer": 0.3308121985914027, "step": 1000 }, { "epoch": 12.22, "grad_norm": 2.021256446838379, "learning_rate": 7.228235294117648e-05, "loss": 0.3293, "step": 1500 }, { "epoch": 12.22, "eval_loss": 0.29065316915512085, "eval_runtime": 168.1265, "eval_samples_per_second": 18.861, "eval_steps_per_second": 2.361, "eval_wer": 0.290982895604205, "step": 1500 }, { "epoch": 16.29, "grad_norm": 1.2028392553329468, "learning_rate": 6.443921568627452e-05, "loss": 0.203, "step": 2000 }, { "epoch": 16.29, "eval_loss": 0.29165971279144287, "eval_runtime": 168.0318, "eval_samples_per_second": 18.871, "eval_steps_per_second": 2.363, "eval_wer": 0.2812337369461888, "step": 2000 }, { "epoch": 20.37, "grad_norm": 0.8678894639015198, "learning_rate": 5.6596078431372547e-05, "loss": 0.1526, "step": 2500 }, { "epoch": 20.37, "eval_loss": 0.2595488429069519, "eval_runtime": 168.4253, "eval_samples_per_second": 18.827, "eval_steps_per_second": 2.357, "eval_wer": 0.26881310064878744, "step": 2500 }, { "epoch": 24.44, "grad_norm": 1.7510905265808105, "learning_rate": 4.875294117647059e-05, "loss": 0.1189, "step": 3000 }, { "epoch": 24.44, "eval_loss": 0.2896680533885956, "eval_runtime": 167.8147, "eval_samples_per_second": 18.896, "eval_steps_per_second": 2.366, "eval_wer": 0.2652395656246747, "step": 3000 }, { "epoch": 28.51, "grad_norm": 1.1846829652786255, "learning_rate": 4.090980392156863e-05, "loss": 0.0988, "step": 3500 }, { "epoch": 28.51, "eval_loss": 0.30395135283470154, "eval_runtime": 168.5254, "eval_samples_per_second": 18.816, "eval_steps_per_second": 2.356, "eval_wer": 0.2631925892516393, "step": 3500 }, { "epoch": 32.59, "grad_norm": 1.2768597602844238, "learning_rate": 3.3066666666666666e-05, "loss": 0.0767, "step": 4000 }, { "epoch": 32.59, "eval_loss": 0.2856810986995697, "eval_runtime": 170.6385, "eval_samples_per_second": 18.583, "eval_steps_per_second": 2.327, "eval_wer": 0.25576796308503624, "step": 4000 }, { "epoch": 36.66, "grad_norm": 0.8570753931999207, "learning_rate": 2.5223529411764707e-05, "loss": 0.0653, "step": 4500 }, { "epoch": 36.66, "eval_loss": 0.30118483304977417, "eval_runtime": 171.6141, "eval_samples_per_second": 18.478, "eval_steps_per_second": 2.313, "eval_wer": 0.24785761371127224, "step": 4500 }, { "epoch": 40.73, "grad_norm": 0.5461006164550781, "learning_rate": 1.7380392156862745e-05, "loss": 0.0585, "step": 5000 }, { "epoch": 40.73, "eval_loss": 0.28294435143470764, "eval_runtime": 169.3236, "eval_samples_per_second": 18.727, "eval_steps_per_second": 2.345, "eval_wer": 0.24251465843250183, "step": 5000 }, { "epoch": 44.81, "grad_norm": 0.47187602519989014, "learning_rate": 9.537254901960786e-06, "loss": 0.0467, "step": 5500 }, { "epoch": 44.81, "eval_loss": 0.28480401635169983, "eval_runtime": 169.9442, "eval_samples_per_second": 18.659, "eval_steps_per_second": 2.336, "eval_wer": 0.23873295631960587, "step": 5500 }, { "epoch": 48.88, "grad_norm": 0.4703833758831024, "learning_rate": 1.6941176470588237e-06, "loss": 0.0416, "step": 6000 }, { "epoch": 48.88, "eval_loss": 0.27989715337753296, "eval_runtime": 170.4594, "eval_samples_per_second": 18.603, "eval_steps_per_second": 2.329, "eval_wer": 0.23436144745515733, "step": 6000 } ], "logging_steps": 500, "max_steps": 6100, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 9.05423649471038e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }