{ "best_metric": 28.50294181738941, "best_model_checkpoint": "/cosmos/home/sp-operator/ai/training/models/huggingface/scripts/../breeze-listen-dsw-base-hi/checkpoint-800", "epoch": 8.08, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.973833272194737e-06, "loss": 1.392, "step": 25 }, { "epoch": 0.05, "learning_rate": 6.195318418690893e-06, "loss": 0.8763, "step": 50 }, { "epoch": 0.07, "learning_rate": 6.881634451095711e-06, "loss": 0.6506, "step": 75 }, { "epoch": 0.1, "learning_rate": 7.361221988663844e-06, "loss": 0.553, "step": 100 }, { "epoch": 0.1, "eval_loss": 0.64453125, "eval_runtime": 3247.321, "eval_samples_per_second": 0.867, "eval_steps_per_second": 0.054, "eval_wer": 39.498801481804314, "step": 100 }, { "epoch": 1.01, "learning_rate": 7.730207550743121e-06, "loss": 0.5096, "step": 125 }, { "epoch": 1.03, "learning_rate": 8.03016458599496e-06, "loss": 0.4697, "step": 150 }, { "epoch": 1.06, "learning_rate": 8.282894746203441e-06, "loss": 0.4251, "step": 175 }, { "epoch": 1.08, "learning_rate": 8.501266121799902e-06, "loss": 0.3683, "step": 200 }, { "epoch": 1.08, "eval_loss": 0.5341796875, "eval_runtime": 3136.1352, "eval_samples_per_second": 0.898, "eval_steps_per_second": 0.056, "eval_wer": 33.0660274569623, "step": 200 }, { "epoch": 1.11, "learning_rate": 8.693512601774437e-06, "loss": 0.3335, "step": 225 }, { "epoch": 2.02, "learning_rate": 8.865222471593567e-06, "loss": 0.348, "step": 250 }, { "epoch": 2.04, "learning_rate": 9.020362953730323e-06, "loss": 0.3151, "step": 275 }, { "epoch": 2.07, "learning_rate": 9.161852281961698e-06, "loss": 0.2855, "step": 300 }, { "epoch": 2.07, "eval_loss": 0.498291015625, "eval_runtime": 3150.996, "eval_samples_per_second": 0.894, "eval_steps_per_second": 0.056, "eval_wer": 31.42514709086947, "step": 300 }, { "epoch": 2.1, "learning_rate": 9.29189975311636e-06, "loss": 0.2602, "step": 325 }, { "epoch": 3.0, "learning_rate": 9.412218256259678e-06, "loss": 0.2482, "step": 350 }, { "epoch": 3.03, "learning_rate": 9.524162683365145e-06, "loss": 0.2578, "step": 375 }, { "epoch": 3.06, "learning_rate": 9.62882322733502e-06, "loss": 0.2233, "step": 400 }, { "epoch": 3.06, "eval_loss": 0.48681640625, "eval_runtime": 3166.0648, "eval_samples_per_second": 0.889, "eval_steps_per_second": 0.056, "eval_wer": 30.154717803443017, "step": 400 }, { "epoch": 3.08, "learning_rate": 9.727090137141168e-06, "loss": 0.2072, "step": 425 }, { "epoch": 3.1, "learning_rate": 9.819699807237934e-06, "loss": 0.1884, "step": 450 }, { "epoch": 4.01, "learning_rate": 9.907268307310855e-06, "loss": 0.1854, "step": 475 }, { "epoch": 4.04, "learning_rate": 9.990316248055788e-06, "loss": 0.1832, "step": 500 }, { "epoch": 4.04, "eval_loss": 0.478271484375, "eval_runtime": 3160.7276, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.056, "eval_wer": 28.954020483765525, "step": 500 }, { "epoch": 4.07, "learning_rate": 9.58e-06, "loss": 0.1714, "step": 525 }, { "epoch": 4.09, "learning_rate": 9.080000000000001e-06, "loss": 0.1424, "step": 550 }, { "epoch": 4.12, "learning_rate": 8.580000000000001e-06, "loss": 0.133, "step": 575 }, { "epoch": 5.03, "learning_rate": 8.08e-06, "loss": 0.1431, "step": 600 }, { "epoch": 5.03, "eval_loss": 0.490234375, "eval_runtime": 3169.5843, "eval_samples_per_second": 0.888, "eval_steps_per_second": 0.056, "eval_wer": 29.182828502941817, "step": 600 }, { "epoch": 5.05, "learning_rate": 7.58e-06, "loss": 0.1226, "step": 625 }, { "epoch": 5.08, "learning_rate": 7.08e-06, "loss": 0.1122, "step": 650 }, { "epoch": 5.1, "learning_rate": 6.5800000000000005e-06, "loss": 0.1021, "step": 675 }, { "epoch": 6.01, "learning_rate": 6.08e-06, "loss": 0.0972, "step": 700 }, { "epoch": 6.01, "eval_loss": 0.5048828125, "eval_runtime": 3143.5242, "eval_samples_per_second": 0.896, "eval_steps_per_second": 0.056, "eval_wer": 28.63804750490303, "step": 700 }, { "epoch": 6.04, "learning_rate": 5.580000000000001e-06, "loss": 0.0955, "step": 725 }, { "epoch": 6.06, "learning_rate": 5.0800000000000005e-06, "loss": 0.0906, "step": 750 }, { "epoch": 6.08, "learning_rate": 4.58e-06, "loss": 0.0807, "step": 775 }, { "epoch": 6.11, "learning_rate": 4.08e-06, "loss": 0.0715, "step": 800 }, { "epoch": 6.11, "eval_loss": 0.5205078125, "eval_runtime": 3154.678, "eval_samples_per_second": 0.893, "eval_steps_per_second": 0.056, "eval_wer": 28.50294181738941, "step": 800 }, { "epoch": 7.02, "learning_rate": 3.58e-06, "loss": 0.0744, "step": 825 }, { "epoch": 7.04, "learning_rate": 3.08e-06, "loss": 0.0692, "step": 850 }, { "epoch": 7.07, "learning_rate": 2.5800000000000003e-06, "loss": 0.0661, "step": 875 }, { "epoch": 7.09, "learning_rate": 2.08e-06, "loss": 0.0579, "step": 900 }, { "epoch": 7.09, "eval_loss": 0.53662109375, "eval_runtime": 3161.7785, "eval_samples_per_second": 0.891, "eval_steps_per_second": 0.056, "eval_wer": 28.94748311178906, "step": 900 }, { "epoch": 8.01, "learning_rate": 1.5800000000000001e-06, "loss": 0.0563, "step": 925 }, { "epoch": 8.03, "learning_rate": 1.08e-06, "loss": 0.0588, "step": 950 }, { "epoch": 8.05, "learning_rate": 5.800000000000001e-07, "loss": 0.0541, "step": 975 }, { "epoch": 8.08, "learning_rate": 8e-08, "loss": 0.0519, "step": 1000 }, { "epoch": 8.08, "eval_loss": 0.5380859375, "eval_runtime": 3154.4366, "eval_samples_per_second": 0.893, "eval_steps_per_second": 0.056, "eval_wer": 28.7949444323382, "step": 1000 }, { "epoch": 8.08, "step": 1000, "total_flos": 2.0641045545295544e+18, "train_loss": 0.2532890625, "train_runtime": 58625.9301, "train_samples_per_second": 0.546, "train_steps_per_second": 0.017 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "total_flos": 2.0641045545295544e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }