{ "best_metric": 56.651029055690074, "best_model_checkpoint": "./checkpoint-600", "epoch": 22.22222222222222, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.7, "learning_rate": 1.9200000000000003e-06, "loss": 2.1183, "step": 100 }, { "epoch": 3.7, "eval_loss": 1.3170489072799683, "eval_runtime": 435.5063, "eval_samples_per_second": 1.176, "eval_steps_per_second": 0.147, "eval_wer": 76.95217917675545, "step": 100 }, { "epoch": 7.41, "learning_rate": 3.920000000000001e-06, "loss": 0.8565, "step": 200 }, { "epoch": 7.41, "eval_loss": 0.9366902709007263, "eval_runtime": 473.9252, "eval_samples_per_second": 1.08, "eval_steps_per_second": 0.135, "eval_wer": 61.99303874092009, "step": 200 }, { "epoch": 7.78, "learning_rate": 4.12e-06, "loss": 0.5824, "step": 210 }, { "epoch": 8.15, "learning_rate": 4.32e-06, "loss": 0.5195, "step": 220 }, { "epoch": 8.52, "learning_rate": 4.520000000000001e-06, "loss": 0.4472, "step": 230 }, { "epoch": 8.89, "learning_rate": 4.7200000000000005e-06, "loss": 0.4615, "step": 240 }, { "epoch": 9.26, "learning_rate": 4.92e-06, "loss": 0.4068, "step": 250 }, { "epoch": 9.63, "learning_rate": 5.12e-06, "loss": 0.3408, "step": 260 }, { "epoch": 10.0, "learning_rate": 5.320000000000001e-06, "loss": 0.3718, "step": 270 }, { "epoch": 10.37, "learning_rate": 5.5200000000000005e-06, "loss": 0.2749, "step": 280 }, { "epoch": 10.74, "learning_rate": 5.72e-06, "loss": 0.2625, "step": 290 }, { "epoch": 11.11, "learning_rate": 5.92e-06, "loss": 0.2246, "step": 300 }, { "epoch": 11.11, "eval_loss": 0.9642460942268372, "eval_runtime": 439.7231, "eval_samples_per_second": 1.164, "eval_steps_per_second": 0.146, "eval_wer": 58.830205811138015, "step": 300 }, { "epoch": 11.48, "learning_rate": 6.120000000000001e-06, "loss": 0.1991, "step": 310 }, { "epoch": 11.85, "learning_rate": 6.3200000000000005e-06, "loss": 0.1798, "step": 320 }, { "epoch": 12.22, "learning_rate": 6.520000000000001e-06, "loss": 0.1426, "step": 330 }, { "epoch": 12.59, "learning_rate": 6.720000000000001e-06, "loss": 0.125, "step": 340 }, { "epoch": 12.96, "learning_rate": 6.92e-06, "loss": 0.1177, "step": 350 }, { "epoch": 13.33, "learning_rate": 7.1200000000000004e-06, "loss": 0.0874, "step": 360 }, { "epoch": 13.7, "learning_rate": 7.32e-06, "loss": 0.0852, "step": 370 }, { "epoch": 14.07, "learning_rate": 7.520000000000001e-06, "loss": 0.0666, "step": 380 }, { "epoch": 14.44, "learning_rate": 7.72e-06, "loss": 0.0531, "step": 390 }, { "epoch": 14.81, "learning_rate": 7.92e-06, "loss": 0.054, "step": 400 }, { "epoch": 14.81, "eval_loss": 1.0876343250274658, "eval_runtime": 428.9567, "eval_samples_per_second": 1.194, "eval_steps_per_second": 0.149, "eval_wer": 57.99031476997578, "step": 400 }, { "epoch": 15.19, "learning_rate": 8.120000000000002e-06, "loss": 0.0465, "step": 410 }, { "epoch": 15.56, "learning_rate": 8.32e-06, "loss": 0.0392, "step": 420 }, { "epoch": 15.93, "learning_rate": 8.52e-06, "loss": 0.0354, "step": 430 }, { "epoch": 16.3, "learning_rate": 8.720000000000001e-06, "loss": 0.0296, "step": 440 }, { "epoch": 16.67, "learning_rate": 8.920000000000001e-06, "loss": 0.0255, "step": 450 }, { "epoch": 17.04, "learning_rate": 9.12e-06, "loss": 0.0274, "step": 460 }, { "epoch": 17.41, "learning_rate": 9.32e-06, "loss": 0.0216, "step": 470 }, { "epoch": 17.78, "learning_rate": 9.52e-06, "loss": 0.022, "step": 480 }, { "epoch": 18.15, "learning_rate": 9.72e-06, "loss": 0.0219, "step": 490 }, { "epoch": 18.52, "learning_rate": 9.920000000000002e-06, "loss": 0.0159, "step": 500 }, { "epoch": 18.52, "eval_loss": 1.1797882318496704, "eval_runtime": 450.9533, "eval_samples_per_second": 1.135, "eval_steps_per_second": 0.142, "eval_wer": 57.87681598062954, "step": 500 }, { "epoch": 18.89, "learning_rate": 9.4e-06, "loss": 0.019, "step": 510 }, { "epoch": 19.26, "learning_rate": 8.400000000000001e-06, "loss": 0.021, "step": 520 }, { "epoch": 19.63, "learning_rate": 7.4e-06, "loss": 0.0139, "step": 530 }, { "epoch": 20.0, "learning_rate": 6.4000000000000006e-06, "loss": 0.013, "step": 540 }, { "epoch": 20.37, "learning_rate": 5.400000000000001e-06, "loss": 0.007, "step": 550 }, { "epoch": 20.74, "learning_rate": 4.4e-06, "loss": 0.0082, "step": 560 }, { "epoch": 21.11, "learning_rate": 3.4000000000000005e-06, "loss": 0.009, "step": 570 }, { "epoch": 21.48, "learning_rate": 2.4000000000000003e-06, "loss": 0.0045, "step": 580 }, { "epoch": 21.85, "learning_rate": 1.4000000000000001e-06, "loss": 0.0049, "step": 590 }, { "epoch": 22.22, "learning_rate": 4.0000000000000003e-07, "loss": 0.0045, "step": 600 }, { "epoch": 22.22, "eval_loss": 1.2309296131134033, "eval_runtime": 449.2432, "eval_samples_per_second": 1.14, "eval_steps_per_second": 0.142, "eval_wer": 56.651029055690074, "step": 600 }, { "epoch": 22.22, "step": 600, "total_flos": 2.76407096426496e+18, "train_loss": 0.001749273296445608, "train_runtime": 576.5843, "train_samples_per_second": 16.65, "train_steps_per_second": 1.041 } ], "max_steps": 600, "num_train_epochs": 23, "total_flos": 2.76407096426496e+18, "trial_name": null, "trial_params": null }