{ "best_metric": 17.70632072867789, "best_model_checkpoint": "/cosmos/home/sp-operator/ai/training/models/huggingface/scripts/../breeze-dsw-small-id/checkpoint-200", "epoch": 6.064, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.898977360288234e-06, "loss": 1.0704, "step": 25 }, { "epoch": 0.05, "learning_rate": 6.160712527409633e-06, "loss": 0.5908, "step": 50 }, { "epoch": 0.07, "learning_rate": 6.85912902234906e-06, "loss": 0.4936, "step": 75 }, { "epoch": 0.1, "learning_rate": 7.344547104469332e-06, "loss": 0.4199, "step": 100 }, { "epoch": 0.1, "eval_loss": 0.349365234375, "eval_runtime": 6608.5627, "eval_samples_per_second": 0.551, "eval_steps_per_second": 0.035, "eval_wer": 19.15999631980863, "step": 100 }, { "epoch": 0.12, "learning_rate": 7.716963756434345e-06, "loss": 0.3139, "step": 125 }, { "epoch": 0.15, "learning_rate": 8.019180844200955e-06, "loss": 0.2397, "step": 150 }, { "epoch": 1.02, "learning_rate": 8.27351214279797e-06, "loss": 0.2943, "step": 175 }, { "epoch": 1.04, "learning_rate": 8.49307723936858e-06, "loss": 0.282, "step": 200 }, { "epoch": 1.04, "eval_loss": 0.31591796875, "eval_runtime": 6606.4412, "eval_samples_per_second": 0.551, "eval_steps_per_second": 0.035, "eval_wer": 17.70632072867789, "step": 200 }, { "epoch": 1.07, "learning_rate": 8.686247975778677e-06, "loss": 0.3122, "step": 225 }, { "epoch": 1.09, "learning_rate": 8.858694625217149e-06, "loss": 0.2207, "step": 250 }, { "epoch": 1.12, "learning_rate": 9.014436199608479e-06, "loss": 0.1808, "step": 275 }, { "epoch": 1.14, "learning_rate": 9.156425255148058e-06, "loss": 0.1241, "step": 300 }, { "epoch": 1.14, "eval_loss": 0.3291015625, "eval_runtime": 6608.4009, "eval_samples_per_second": 0.551, "eval_steps_per_second": 0.035, "eval_wer": 18.598767135891066, "step": 300 }, { "epoch": 2.01, "learning_rate": 9.28689473531776e-06, "loss": 0.1342, "step": 325 }, { "epoch": 2.04, "learning_rate": 9.407574351377137e-06, "loss": 0.1435, "step": 350 }, { "epoch": 2.06, "learning_rate": 9.519831289296397e-06, "loss": 0.1539, "step": 375 }, { "epoch": 2.09, "learning_rate": 9.624764935335318e-06, "loss": 0.1232, "step": 400 }, { "epoch": 2.09, "eval_loss": 0.327880859375, "eval_runtime": 6854.1492, "eval_samples_per_second": 0.531, "eval_steps_per_second": 0.033, "eval_wer": 18.888582206274727, "step": 400 }, { "epoch": 2.11, "learning_rate": 9.723272550712454e-06, "loss": 0.1001, "step": 425 }, { "epoch": 2.14, "learning_rate": 9.816095971633122e-06, "loss": 0.066, "step": 450 }, { "epoch": 3.01, "learning_rate": 9.90385555539545e-06, "loss": 0.0587, "step": 475 }, { "epoch": 3.03, "learning_rate": 9.987075336738768e-06, "loss": 0.064, "step": 500 }, { "epoch": 3.03, "eval_loss": 0.3388671875, "eval_runtime": 6629.0714, "eval_samples_per_second": 0.549, "eval_steps_per_second": 0.034, "eval_wer": 17.91333149323765, "step": 500 }, { "epoch": 3.06, "learning_rate": 9.600000000000001e-06, "loss": 0.0723, "step": 525 }, { "epoch": 3.08, "learning_rate": 9.100000000000001e-06, "loss": 0.0575, "step": 550 }, { "epoch": 3.11, "learning_rate": 8.6e-06, "loss": 0.0444, "step": 575 }, { "epoch": 3.13, "learning_rate": 8.1e-06, "loss": 0.0305, "step": 600 }, { "epoch": 3.13, "eval_loss": 0.355712890625, "eval_runtime": 6595.7112, "eval_samples_per_second": 0.552, "eval_steps_per_second": 0.035, "eval_wer": 18.47916091636765, "step": 600 }, { "epoch": 4.0, "learning_rate": 7.600000000000001e-06, "loss": 0.0245, "step": 625 }, { "epoch": 4.03, "learning_rate": 7.100000000000001e-06, "loss": 0.0281, "step": 650 }, { "epoch": 4.05, "learning_rate": 6.600000000000001e-06, "loss": 0.0298, "step": 675 }, { "epoch": 4.08, "learning_rate": 6.1e-06, "loss": 0.0282, "step": 700 }, { "epoch": 4.08, "eval_loss": 0.362548828125, "eval_runtime": 6513.7808, "eval_samples_per_second": 0.559, "eval_steps_per_second": 0.035, "eval_wer": 18.055938908823258, "step": 700 }, { "epoch": 4.1, "learning_rate": 5.600000000000001e-06, "loss": 0.0215, "step": 725 }, { "epoch": 4.13, "learning_rate": 5.1e-06, "loss": 0.0138, "step": 750 }, { "epoch": 4.15, "learning_rate": 4.600000000000001e-06, "loss": 0.011, "step": 775 }, { "epoch": 5.02, "learning_rate": 4.1e-06, "loss": 0.0117, "step": 800 }, { "epoch": 5.02, "eval_loss": 0.369873046875, "eval_runtime": 6724.4574, "eval_samples_per_second": 0.542, "eval_steps_per_second": 0.034, "eval_wer": 18.29055110865765, "step": 800 }, { "epoch": 5.04, "learning_rate": 3.6000000000000003e-06, "loss": 0.013, "step": 825 }, { "epoch": 5.07, "learning_rate": 3.1000000000000004e-06, "loss": 0.0142, "step": 850 }, { "epoch": 5.09, "learning_rate": 2.6e-06, "loss": 0.0094, "step": 875 }, { "epoch": 5.12, "learning_rate": 2.1000000000000002e-06, "loss": 0.0079, "step": 900 }, { "epoch": 5.12, "eval_loss": 0.37939453125, "eval_runtime": 6640.5093, "eval_samples_per_second": 0.548, "eval_steps_per_second": 0.034, "eval_wer": 18.359554696844235, "step": 900 }, { "epoch": 5.14, "learning_rate": 1.6000000000000001e-06, "loss": 0.0064, "step": 925 }, { "epoch": 6.01, "learning_rate": 1.1e-06, "loss": 0.0061, "step": 950 }, { "epoch": 6.04, "learning_rate": 6.000000000000001e-07, "loss": 0.0073, "step": 975 }, { "epoch": 6.06, "learning_rate": 1.0000000000000001e-07, "loss": 0.0081, "step": 1000 }, { "epoch": 6.06, "eval_loss": 0.382568359375, "eval_runtime": 6601.76, "eval_samples_per_second": 0.552, "eval_steps_per_second": 0.035, "eval_wer": 18.29055110865765, "step": 1000 }, { "epoch": 6.06, "step": 1000, "total_flos": 9.194908120353079e+18, "train_loss": 0.14578299713134765, "train_runtime": 145825.9025, "train_samples_per_second": 0.219, "train_steps_per_second": 0.007 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "total_flos": 9.194908120353079e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }