{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.953846153846154, "global_step": 6800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.76, "learning_rate": 0.00023999999999999998, "loss": 6.331, "step": 400 }, { "epoch": 1.76, "eval_loss": 3.29716157913208, "eval_runtime": 392.7938, "eval_samples_per_second": 8.236, "eval_wer": 0.9908326967150497, "step": 400 }, { "epoch": 3.52, "learning_rate": 0.00028573692551505547, "loss": 3.3384, "step": 800 }, { "epoch": 3.52, "eval_loss": 3.2769880294799805, "eval_runtime": 403.0436, "eval_samples_per_second": 8.026, "eval_wer": 0.9908326967150497, "step": 800 }, { "epoch": 5.29, "learning_rate": 0.00026671949286846274, "loss": 2.8767, "step": 1200 }, { "epoch": 5.29, "eval_loss": 0.987671434879303, "eval_runtime": 414.5553, "eval_samples_per_second": 7.804, "eval_wer": 0.9542438985163444, "step": 1200 }, { "epoch": 7.05, "learning_rate": 0.00024770206022187, "loss": 0.7362, "step": 1600 }, { "epoch": 7.05, "eval_loss": 0.5294700860977173, "eval_runtime": 412.2788, "eval_samples_per_second": 7.847, "eval_wer": 0.7132805275220135, "step": 1600 }, { "epoch": 8.81, "learning_rate": 0.0002286846275752773, "loss": 0.4225, "step": 2000 }, { "epoch": 8.81, "eval_loss": 0.47939780354499817, "eval_runtime": 412.9001, "eval_samples_per_second": 7.835, "eval_wer": 0.6393791966547384, "step": 2000 }, { "epoch": 10.57, "learning_rate": 0.0002096671949286846, "loss": 0.2997, "step": 2400 }, { "epoch": 10.57, "eval_loss": 0.47680211067199707, "eval_runtime": 406.5095, "eval_samples_per_second": 7.958, "eval_wer": 0.5926983233484782, "step": 2400 }, { "epoch": 12.33, "learning_rate": 0.00019064976228209192, "loss": 0.2349, "step": 2800 }, { "epoch": 12.33, "eval_loss": 0.46566906571388245, "eval_runtime": 410.6338, "eval_samples_per_second": 7.878, "eval_wer": 0.5762936753648827, "step": 2800 }, { "epoch": 14.1, "learning_rate": 0.00017163232963549917, "loss": 0.1947, "step": 3200 }, { "epoch": 14.1, "eval_loss": 0.5099472999572754, "eval_runtime": 412.6367, "eval_samples_per_second": 7.84, "eval_wer": 0.5637087370833501, "step": 3200 }, { "epoch": 15.86, "learning_rate": 0.00015261489698890646, "loss": 0.1652, "step": 3600 }, { "epoch": 15.86, "eval_loss": 0.4866821765899658, "eval_runtime": 413.7594, "eval_samples_per_second": 7.819, "eval_wer": 0.5551445458566202, "step": 3600 }, { "epoch": 17.62, "learning_rate": 0.0001335974643423138, "loss": 0.1397, "step": 4000 }, { "epoch": 17.62, "eval_loss": 0.510926365852356, "eval_runtime": 416.443, "eval_samples_per_second": 7.768, "eval_wer": 0.5460576575127659, "step": 4000 }, { "epoch": 19.38, "learning_rate": 0.00011458003169572107, "loss": 0.1251, "step": 4400 }, { "epoch": 19.38, "eval_loss": 0.5401586294174194, "eval_runtime": 410.6668, "eval_samples_per_second": 7.877, "eval_wer": 0.5362470346990471, "step": 4400 }, { "epoch": 21.15, "learning_rate": 9.556259904912835e-05, "loss": 0.1105, "step": 4800 }, { "epoch": 21.15, "eval_loss": 0.5472865104675293, "eval_runtime": 415.7439, "eval_samples_per_second": 7.781, "eval_wer": 0.530416951469583, "step": 4800 }, { "epoch": 22.91, "learning_rate": 7.654516640253565e-05, "loss": 0.0932, "step": 5200 }, { "epoch": 22.91, "eval_loss": 0.5360305309295654, "eval_runtime": 420.1088, "eval_samples_per_second": 7.7, "eval_wer": 0.5289694825298541, "step": 5200 }, { "epoch": 24.67, "learning_rate": 5.752773375594294e-05, "loss": 0.0893, "step": 5600 }, { "epoch": 24.67, "eval_loss": 0.5389866232872009, "eval_runtime": 429.4038, "eval_samples_per_second": 7.534, "eval_wer": 0.5169072413654457, "step": 5600 }, { "epoch": 26.43, "learning_rate": 3.851030110935023e-05, "loss": 0.0863, "step": 6000 }, { "epoch": 26.43, "eval_loss": 0.5444660186767578, "eval_runtime": 423.7501, "eval_samples_per_second": 7.634, "eval_wer": 0.5146958304853042, "step": 6000 }, { "epoch": 28.19, "learning_rate": 1.9492868462757525e-05, "loss": 0.0778, "step": 6400 }, { "epoch": 28.19, "eval_loss": 0.5570007562637329, "eval_runtime": 420.4258, "eval_samples_per_second": 7.695, "eval_wer": 0.5113988179003659, "step": 6400 }, { "epoch": 29.95, "learning_rate": 4.754358161648177e-07, "loss": 0.0732, "step": 6800 }, { "epoch": 29.95, "eval_loss": 0.557680606842041, "eval_runtime": 427.3219, "eval_samples_per_second": 7.57, "eval_wer": 0.5118411000763942, "step": 6800 } ], "max_steps": 6810, "num_train_epochs": 30, "total_flos": 3.375783462381318e+19, "trial_name": null, "trial_params": null }