{ "best_metric": 45.243352654338025, "best_model_checkpoint": "/cosmos/home/sp-operator/ai/training/models/huggingface/scripts/../breeze-dsw-tiny-id/checkpoint-500", "epoch": 3.032, "eval_steps": 100, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.898977360288234e-06, "loss": 1.7847, "step": 25 }, { "epoch": 0.05, "learning_rate": 6.160712527409633e-06, "loss": 1.3566, "step": 50 }, { "epoch": 0.07, "learning_rate": 6.85912902234906e-06, "loss": 1.1407, "step": 75 }, { "epoch": 0.1, "learning_rate": 7.344547104469332e-06, "loss": 0.99, "step": 100 }, { "epoch": 0.1, "eval_loss": 0.8486328125, "eval_runtime": 1294.216, "eval_samples_per_second": 2.814, "eval_steps_per_second": 0.176, "eval_wer": 54.17241696568221, "step": 100 }, { "epoch": 0.12, "learning_rate": 7.716963756434345e-06, "loss": 0.765, "step": 125 }, { "epoch": 0.15, "learning_rate": 8.019180844200955e-06, "loss": 0.5849, "step": 150 }, { "epoch": 1.02, "learning_rate": 8.27351214279797e-06, "loss": 0.7834, "step": 175 }, { "epoch": 1.04, "learning_rate": 8.49307723936858e-06, "loss": 0.7896, "step": 200 }, { "epoch": 1.04, "eval_loss": 0.7578125, "eval_runtime": 1161.7981, "eval_samples_per_second": 3.135, "eval_steps_per_second": 0.196, "eval_wer": 48.339313644309506, "step": 200 }, { "epoch": 1.07, "learning_rate": 8.686247975778677e-06, "loss": 0.8413, "step": 225 }, { "epoch": 1.09, "learning_rate": 8.858694625217149e-06, "loss": 0.68, "step": 250 }, { "epoch": 1.12, "learning_rate": 9.014436199608479e-06, "loss": 0.5851, "step": 275 }, { "epoch": 1.14, "learning_rate": 9.156425255148058e-06, "loss": 0.4164, "step": 300 }, { "epoch": 1.14, "eval_loss": 0.73876953125, "eval_runtime": 1191.6636, "eval_samples_per_second": 3.056, "eval_steps_per_second": 0.191, "eval_wer": 49.25936148679732, "step": 300 }, { "epoch": 2.01, "learning_rate": 9.28689473531776e-06, "loss": 0.5102, "step": 325 }, { "epoch": 2.04, "learning_rate": 9.407574351377137e-06, "loss": 0.602, "step": 350 }, { "epoch": 2.06, "learning_rate": 9.519831289296397e-06, "loss": 0.6399, "step": 375 }, { "epoch": 2.09, "learning_rate": 9.624764935335318e-06, "loss": 0.5456, "step": 400 }, { "epoch": 2.09, "eval_loss": 0.7177734375, "eval_runtime": 1169.8926, "eval_samples_per_second": 3.113, "eval_steps_per_second": 0.195, "eval_wer": 46.126598583126324, "step": 400 }, { "epoch": 2.11, "learning_rate": 9.723272550712454e-06, "loss": 0.48, "step": 425 }, { "epoch": 2.14, "learning_rate": 9.816095971633122e-06, "loss": 0.3325, "step": 450 }, { "epoch": 3.01, "learning_rate": 9.90385555539545e-06, "loss": 0.3389, "step": 475 }, { "epoch": 3.03, "learning_rate": 9.987075336738768e-06, "loss": 0.476, "step": 500 }, { "epoch": 3.03, "eval_loss": 0.7109375, "eval_runtime": 1126.9534, "eval_samples_per_second": 3.232, "eval_steps_per_second": 0.202, "eval_wer": 45.243352654338025, "step": 500 }, { "epoch": 3.03, "step": 500, "total_flos": 3.922034448169697e+17, "train_loss": 0.0, "train_runtime": 66.1004, "train_samples_per_second": 968.225, "train_steps_per_second": 30.257 } ], "logging_steps": 25, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "total_flos": 3.922034448169697e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }