{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.669540229885057, "global_step": 3250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36, "learning_rate": 4.94e-05, "loss": 7.315, "step": 250 }, { "epoch": 0.36, "eval_cer": 1.0, "eval_loss": 3.364396810531616, "eval_runtime": 411.3291, "eval_samples_per_second": 18.839, "eval_steps_per_second": 2.356, "eval_wer": 1.0, "step": 250 }, { "epoch": 0.72, "learning_rate": 9.94e-05, "loss": 3.2126, "step": 500 }, { "epoch": 0.72, "eval_cer": 1.0, "eval_loss": 3.178248405456543, "eval_runtime": 409.0228, "eval_samples_per_second": 18.945, "eval_steps_per_second": 2.369, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.08, "learning_rate": 9.992892086330936e-05, "loss": 3.1414, "step": 750 }, { "epoch": 1.08, "eval_cer": 1.0, "eval_loss": 3.1066977977752686, "eval_runtime": 414.126, "eval_samples_per_second": 18.712, "eval_steps_per_second": 2.34, "eval_wer": 1.0, "step": 750 }, { "epoch": 1.44, "learning_rate": 9.985697841726619e-05, "loss": 1.9013, "step": 1000 }, { "epoch": 1.44, "eval_cer": 0.12787848954973915, "eval_loss": 0.5506780743598938, "eval_runtime": 412.7461, "eval_samples_per_second": 18.774, "eval_steps_per_second": 2.348, "eval_wer": 0.5514430132072395, "step": 1000 }, { "epoch": 1.8, "learning_rate": 9.978503597122302e-05, "loss": 0.5632, "step": 1250 }, { "epoch": 1.8, "eval_cer": 0.06747688721413134, "eval_loss": 0.28180211782455444, "eval_runtime": 412.6671, "eval_samples_per_second": 18.778, "eval_steps_per_second": 2.348, "eval_wer": 0.30760729749805243, "step": 1250 }, { "epoch": 2.16, "learning_rate": 9.971309352517985e-05, "loss": 0.388, "step": 1500 }, { "epoch": 2.16, "eval_cer": 0.053711059057835714, "eval_loss": 0.2308780699968338, "eval_runtime": 524.2327, "eval_samples_per_second": 14.782, "eval_steps_per_second": 1.848, "eval_wer": 0.24573799300686633, "step": 1500 }, { "epoch": 2.51, "learning_rate": 9.96411510791367e-05, "loss": 0.3175, "step": 1750 }, { "epoch": 2.51, "eval_cer": 0.0491677133901393, "eval_loss": 0.2099951058626175, "eval_runtime": 529.8067, "eval_samples_per_second": 14.626, "eval_steps_per_second": 1.829, "eval_wer": 0.22468612424588294, "step": 1750 }, { "epoch": 2.87, "learning_rate": 9.956920863309353e-05, "loss": 0.294, "step": 2000 }, { "epoch": 2.87, "eval_cer": 0.046197281913749985, "eval_loss": 0.19443732500076294, "eval_runtime": 533.3896, "eval_samples_per_second": 14.528, "eval_steps_per_second": 1.817, "eval_wer": 0.21001141366378606, "step": 2000 }, { "epoch": 3.23, "learning_rate": 9.949726618705036e-05, "loss": 0.2543, "step": 2250 }, { "epoch": 3.23, "eval_cer": 0.04413495377442826, "eval_loss": 0.19428253173828125, "eval_runtime": 534.1775, "eval_samples_per_second": 14.506, "eval_steps_per_second": 1.814, "eval_wer": 0.19475696142906318, "step": 2250 }, { "epoch": 3.59, "learning_rate": 9.94253237410072e-05, "loss": 0.2493, "step": 2500 }, { "epoch": 3.59, "eval_cer": 0.04232723404736848, "eval_loss": 0.1892683357000351, "eval_runtime": 528.5117, "eval_samples_per_second": 14.662, "eval_steps_per_second": 1.833, "eval_wer": 0.18792688008406255, "step": 2500 }, { "epoch": 3.95, "learning_rate": 9.935338129496404e-05, "loss": 0.2333, "step": 2750 }, { "epoch": 3.95, "eval_cer": 0.04126919464530219, "eval_loss": 0.1779049038887024, "eval_runtime": 417.6945, "eval_samples_per_second": 18.552, "eval_steps_per_second": 2.32, "eval_wer": 0.18323459608312045, "step": 2750 }, { "epoch": 4.31, "learning_rate": 9.928143884892087e-05, "loss": 0.2131, "step": 3000 }, { "epoch": 4.31, "eval_cer": 0.040952348621153996, "eval_loss": 0.18109484016895294, "eval_runtime": 409.5311, "eval_samples_per_second": 18.922, "eval_steps_per_second": 2.366, "eval_wer": 0.17970179538743047, "step": 3000 }, { "epoch": 4.67, "learning_rate": 9.92094964028777e-05, "loss": 0.1992, "step": 3250 }, { "epoch": 4.67, "eval_cer": 0.03845435719862851, "eval_loss": 0.17126505076885223, "eval_runtime": 422.692, "eval_samples_per_second": 18.332, "eval_steps_per_second": 2.292, "eval_wer": 0.169665018026342, "step": 3250 } ], "max_steps": 348000, "num_train_epochs": 500, "total_flos": 1.643409928882224e+19, "trial_name": null, "trial_params": null }