{ "best_metric": 17.77988614800759, "best_model_checkpoint": "./checkpoint-400", "epoch": 16.056, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.0999999999999997e-07, "loss": 1.3438, "step": 25 }, { "epoch": 0.05, "learning_rate": 4.5e-07, "loss": 0.8978, "step": 50 }, { "epoch": 1.02, "learning_rate": 7e-07, "loss": 0.4779, "step": 75 }, { "epoch": 1.04, "learning_rate": 9.499999999999999e-07, "loss": 0.3079, "step": 100 }, { "epoch": 2.01, "learning_rate": 9.766666666666667e-07, "loss": 0.2482, "step": 125 }, { "epoch": 2.03, "learning_rate": 9.488888888888888e-07, "loss": 0.2535, "step": 150 }, { "epoch": 2.06, "learning_rate": 9.211111111111111e-07, "loss": 0.1647, "step": 175 }, { "epoch": 3.02, "learning_rate": 8.933333333333333e-07, "loss": 0.2124, "step": 200 }, { "epoch": 3.02, "eval_loss": 0.24853515625, "eval_runtime": 632.8323, "eval_samples_per_second": 3.505, "eval_steps_per_second": 0.22, "eval_wer": 18.40607210626186, "step": 200 }, { "epoch": 3.05, "learning_rate": 8.655555555555555e-07, "loss": 0.1383, "step": 225 }, { "epoch": 4.01, "learning_rate": 8.366666666666667e-07, "loss": 0.153, "step": 250 }, { "epoch": 4.04, "learning_rate": 8.088888888888888e-07, "loss": 0.1306, "step": 275 }, { "epoch": 5.0, "learning_rate": 7.81111111111111e-07, "loss": 0.1085, "step": 300 }, { "epoch": 5.03, "learning_rate": 7.533333333333332e-07, "loss": 0.1213, "step": 325 }, { "epoch": 5.05, "learning_rate": 7.255555555555555e-07, "loss": 0.0798, "step": 350 }, { "epoch": 6.02, "learning_rate": 6.966666666666666e-07, "loss": 0.1017, "step": 375 }, { "epoch": 6.05, "learning_rate": 6.688888888888889e-07, "loss": 0.0704, "step": 400 }, { "epoch": 6.05, "eval_loss": 0.263427734375, "eval_runtime": 620.9514, "eval_samples_per_second": 3.572, "eval_steps_per_second": 0.224, "eval_wer": 17.77988614800759, "step": 400 }, { "epoch": 7.01, "learning_rate": 6.411111111111111e-07, "loss": 0.0748, "step": 425 }, { "epoch": 7.04, "learning_rate": 6.133333333333332e-07, "loss": 0.0669, "step": 450 }, { "epoch": 8.0, "learning_rate": 5.844444444444444e-07, "loss": 0.0546, "step": 475 }, { "epoch": 8.03, "learning_rate": 5.566666666666666e-07, "loss": 0.0626, "step": 500 }, { "epoch": 8.05, "learning_rate": 5.288888888888888e-07, "loss": 0.0431, "step": 525 }, { "epoch": 9.02, "learning_rate": 5.011111111111111e-07, "loss": 0.0516, "step": 550 }, { "epoch": 9.04, "learning_rate": 4.733333333333333e-07, "loss": 0.0386, "step": 575 }, { "epoch": 10.01, "learning_rate": 4.444444444444444e-07, "loss": 0.0379, "step": 600 }, { "epoch": 10.01, "eval_loss": 0.310302734375, "eval_runtime": 621.7077, "eval_samples_per_second": 3.568, "eval_steps_per_second": 0.224, "eval_wer": 17.817836812144215, "step": 600 }, { "epoch": 10.04, "learning_rate": 4.1666666666666667e-07, "loss": 0.0367, "step": 625 }, { "epoch": 11.0, "learning_rate": 3.888888888888889e-07, "loss": 0.0271, "step": 650 }, { "epoch": 11.03, "learning_rate": 3.6111111111111107e-07, "loss": 0.0334, "step": 675 }, { "epoch": 11.05, "learning_rate": 3.333333333333333e-07, "loss": 0.0247, "step": 700 }, { "epoch": 12.02, "learning_rate": 3.0444444444444445e-07, "loss": 0.0277, "step": 725 }, { "epoch": 12.04, "learning_rate": 2.766666666666667e-07, "loss": 0.0229, "step": 750 }, { "epoch": 13.01, "learning_rate": 2.4888888888888886e-07, "loss": 0.0222, "step": 775 }, { "epoch": 13.03, "learning_rate": 2.2111111111111111e-07, "loss": 0.0228, "step": 800 }, { "epoch": 13.03, "eval_loss": 0.35546875, "eval_runtime": 628.2239, "eval_samples_per_second": 3.531, "eval_steps_per_second": 0.221, "eval_wer": 18.40607210626186, "step": 800 }, { "epoch": 13.06, "learning_rate": 1.9333333333333332e-07, "loss": 0.0168, "step": 825 }, { "epoch": 14.02, "learning_rate": 1.6444444444444444e-07, "loss": 0.0222, "step": 850 }, { "epoch": 14.05, "learning_rate": 1.3666666666666665e-07, "loss": 0.0166, "step": 875 }, { "epoch": 15.02, "learning_rate": 1.0888888888888888e-07, "loss": 0.0189, "step": 900 }, { "epoch": 15.04, "learning_rate": 8.11111111111111e-08, "loss": 0.0168, "step": 925 }, { "epoch": 16.01, "learning_rate": 5.2222222222222224e-08, "loss": 0.0158, "step": 950 }, { "epoch": 16.03, "learning_rate": 2.4444444444444444e-08, "loss": 0.0185, "step": 975 }, { "epoch": 16.06, "learning_rate": 0.0, "loss": 0.0139, "step": 1000 }, { "epoch": 16.06, "eval_loss": 0.373291015625, "eval_runtime": 625.4198, "eval_samples_per_second": 3.546, "eval_steps_per_second": 0.222, "eval_wer": 18.453510436432637, "step": 1000 }, { "epoch": 16.06, "step": 1000, "total_flos": 1.3683412531969996e+20, "train_loss": 0.13993059158325194, "train_runtime": 17951.1904, "train_samples_per_second": 3.565, "train_steps_per_second": 0.056 } ], "max_steps": 1000, "num_train_epochs": 9223372036854775807, "total_flos": 1.3683412531969996e+20, "trial_name": null, "trial_params": null }