{ "best_metric": 33.92029657089898, "best_model_checkpoint": "/cosmos/home/sp-operator/ai/training/models/huggingface/scripts/../breeze-listen-dsw-small-ml/checkpoint-700", "epoch": 8.068, "eval_steps": 100, "global_step": 700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.898977360288234e-06, "loss": 1.5105, "step": 25 }, { "epoch": 0.05, "learning_rate": 6.160712527409633e-06, "loss": 1.0831, "step": 50 }, { "epoch": 0.07, "learning_rate": 6.85912902234906e-06, "loss": 0.7569, "step": 75 }, { "epoch": 1.02, "learning_rate": 7.344547104469332e-06, "loss": 0.4577, "step": 100 }, { "epoch": 1.02, "eval_loss": 0.482421875, "eval_runtime": 4089.3252, "eval_samples_per_second": 0.162, "eval_steps_per_second": 0.02, "eval_wer": 58.38739573679332, "step": 100 }, { "epoch": 1.05, "learning_rate": 7.716963756434345e-06, "loss": 0.364, "step": 125 }, { "epoch": 1.07, "learning_rate": 8.019180844200955e-06, "loss": 0.2875, "step": 150 }, { "epoch": 2.02, "learning_rate": 8.27351214279797e-06, "loss": 0.2145, "step": 175 }, { "epoch": 2.04, "learning_rate": 8.49307723936858e-06, "loss": 0.1781, "step": 200 }, { "epoch": 2.04, "eval_loss": 0.306640625, "eval_runtime": 4087.5604, "eval_samples_per_second": 0.162, "eval_steps_per_second": 0.02, "eval_wer": 41.06580166821131, "step": 200 }, { "epoch": 2.07, "learning_rate": 8.686247975778677e-06, "loss": 0.1593, "step": 225 }, { "epoch": 3.01, "learning_rate": 8.858694625217149e-06, "loss": 0.1224, "step": 250 }, { "epoch": 3.04, "learning_rate": 9.014436199608479e-06, "loss": 0.1031, "step": 275 }, { "epoch": 3.06, "learning_rate": 9.156425255148058e-06, "loss": 0.0935, "step": 300 }, { "epoch": 3.06, "eval_loss": 0.290283203125, "eval_runtime": 4061.9776, "eval_samples_per_second": 0.163, "eval_steps_per_second": 0.02, "eval_wer": 35.644114921223355, "step": 300 }, { "epoch": 4.01, "learning_rate": 9.28689473531776e-06, "loss": 0.0784, "step": 325 }, { "epoch": 4.03, "learning_rate": 9.407574351377137e-06, "loss": 0.0624, "step": 350 }, { "epoch": 4.06, "learning_rate": 9.519831289296397e-06, "loss": 0.0557, "step": 375 }, { "epoch": 5.0, "learning_rate": 9.624764935335318e-06, "loss": 0.057, "step": 400 }, { "epoch": 5.0, "eval_loss": 0.328857421875, "eval_runtime": 4014.2783, "eval_samples_per_second": 0.165, "eval_steps_per_second": 0.021, "eval_wer": 36.61723818350324, "step": 400 }, { "epoch": 5.03, "learning_rate": 9.723272550712454e-06, "loss": 0.0364, "step": 425 }, { "epoch": 5.05, "learning_rate": 9.816095971633122e-06, "loss": 0.0388, "step": 450 }, { "epoch": 6.0, "learning_rate": 9.90385555539545e-06, "loss": 0.0392, "step": 475 }, { "epoch": 6.03, "learning_rate": 9.987075336738768e-06, "loss": 0.0285, "step": 500 }, { "epoch": 6.03, "eval_loss": 0.342529296875, "eval_runtime": 4051.7069, "eval_samples_per_second": 0.164, "eval_steps_per_second": 0.02, "eval_wer": 35.31974050046339, "step": 500 }, { "epoch": 6.05, "learning_rate": 9.600000000000001e-06, "loss": 0.0279, "step": 525 }, { "epoch": 6.08, "learning_rate": 9.100000000000001e-06, "loss": 0.0287, "step": 550 }, { "epoch": 7.02, "learning_rate": 8.6e-06, "loss": 0.0199, "step": 575 }, { "epoch": 7.05, "learning_rate": 8.1e-06, "loss": 0.0203, "step": 600 }, { "epoch": 7.05, "eval_loss": 0.36474609375, "eval_runtime": 4154.7446, "eval_samples_per_second": 0.16, "eval_steps_per_second": 0.02, "eval_wer": 34.38368860055607, "step": 600 }, { "epoch": 7.07, "learning_rate": 7.600000000000001e-06, "loss": 0.0184, "step": 625 }, { "epoch": 8.02, "learning_rate": 7.100000000000001e-06, "loss": 0.0117, "step": 650 }, { "epoch": 8.04, "learning_rate": 6.600000000000001e-06, "loss": 0.0098, "step": 675 }, { "epoch": 8.07, "learning_rate": 6.1e-06, "loss": 0.0103, "step": 700 }, { "epoch": 8.07, "eval_loss": 0.382568359375, "eval_runtime": 4054.0209, "eval_samples_per_second": 0.164, "eval_steps_per_second": 0.02, "eval_wer": 33.92029657089898, "step": 700 }, { "epoch": 8.07, "step": 700, "total_flos": 3.213687048481276e+18, "train_loss": 0.0, "train_runtime": 19.7929, "train_samples_per_second": 808.369, "train_steps_per_second": 50.523 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "total_flos": 3.213687048481276e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }