{ "best_metric": 1.0742337703704834, "best_model_checkpoint": "./checkpoint-200", "epoch": 71.42857142857143, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.43, "learning_rate": 1.4e-07, "loss": 3.2633, "step": 10 }, { "epoch": 2.86, "learning_rate": 3.4000000000000003e-07, "loss": 3.235, "step": 20 }, { "epoch": 4.29, "learning_rate": 5.4e-07, "loss": 3.164, "step": 30 }, { "epoch": 5.71, "learning_rate": 7.4e-07, "loss": 2.9792, "step": 40 }, { "epoch": 7.14, "learning_rate": 9.400000000000001e-07, "loss": 2.7533, "step": 50 }, { "epoch": 8.57, "learning_rate": 1.14e-06, "loss": 2.5502, "step": 60 }, { "epoch": 10.0, "learning_rate": 1.34e-06, "loss": 2.3845, "step": 70 }, { "epoch": 11.43, "learning_rate": 1.54e-06, "loss": 2.161, "step": 80 }, { "epoch": 12.86, "learning_rate": 1.74e-06, "loss": 1.9855, "step": 90 }, { "epoch": 14.29, "learning_rate": 1.94e-06, "loss": 1.8225, "step": 100 }, { "epoch": 14.29, "eval_loss": 1.760784387588501, "eval_runtime": 425.4331, "eval_samples_per_second": 1.203, "eval_steps_per_second": 0.075, "eval_wer": 105.31930992736078, "step": 100 }, { "epoch": 15.71, "learning_rate": 2.1400000000000003e-06, "loss": 1.6373, "step": 110 }, { "epoch": 17.14, "learning_rate": 2.3400000000000005e-06, "loss": 1.4756, "step": 120 }, { "epoch": 18.57, "learning_rate": 2.5400000000000002e-06, "loss": 1.3534, "step": 130 }, { "epoch": 20.0, "learning_rate": 2.7400000000000004e-06, "loss": 1.2259, "step": 140 }, { "epoch": 21.43, "learning_rate": 2.9400000000000002e-06, "loss": 1.1304, "step": 150 }, { "epoch": 22.86, "learning_rate": 3.1400000000000004e-06, "loss": 1.0195, "step": 160 }, { "epoch": 24.29, "learning_rate": 3.3400000000000006e-06, "loss": 0.9428, "step": 170 }, { "epoch": 25.71, "learning_rate": 3.54e-06, "loss": 0.8721, "step": 180 }, { "epoch": 27.14, "learning_rate": 3.74e-06, "loss": 0.7904, "step": 190 }, { "epoch": 28.57, "learning_rate": 3.94e-06, "loss": 0.7281, "step": 200 }, { "epoch": 28.57, "eval_loss": 1.0742337703704834, "eval_runtime": 174.027, "eval_samples_per_second": 2.942, "eval_steps_per_second": 0.184, "eval_wer": 69.61259079903148, "step": 200 }, { "epoch": 30.0, "learning_rate": 4.14e-06, "loss": 0.6704, "step": 210 }, { "epoch": 31.43, "learning_rate": 4.34e-06, "loss": 0.6118, "step": 220 }, { "epoch": 32.86, "learning_rate": 4.540000000000001e-06, "loss": 0.5494, "step": 230 }, { "epoch": 34.29, "learning_rate": 4.74e-06, "loss": 0.5024, "step": 240 }, { "epoch": 35.71, "learning_rate": 4.94e-06, "loss": 0.4511, "step": 250 }, { "epoch": 37.14, "learning_rate": 5.140000000000001e-06, "loss": 0.4026, "step": 260 }, { "epoch": 38.57, "learning_rate": 5.3400000000000005e-06, "loss": 0.3507, "step": 270 }, { "epoch": 40.0, "learning_rate": 5.540000000000001e-06, "loss": 0.3108, "step": 280 }, { "epoch": 41.43, "learning_rate": 5.74e-06, "loss": 0.2687, "step": 290 }, { "epoch": 42.86, "learning_rate": 5.94e-06, "loss": 0.2329, "step": 300 }, { "epoch": 42.86, "eval_loss": 1.1192402839660645, "eval_runtime": 175.174, "eval_samples_per_second": 2.923, "eval_steps_per_second": 0.183, "eval_wer": 67.02481840193705, "step": 300 }, { "epoch": 44.29, "learning_rate": 6.1400000000000005e-06, "loss": 0.1932, "step": 310 }, { "epoch": 45.71, "learning_rate": 6.34e-06, "loss": 0.1642, "step": 320 }, { "epoch": 47.14, "learning_rate": 6.540000000000001e-06, "loss": 0.134, "step": 330 }, { "epoch": 48.57, "learning_rate": 6.740000000000001e-06, "loss": 0.1047, "step": 340 }, { "epoch": 50.0, "learning_rate": 6.9400000000000005e-06, "loss": 0.0869, "step": 350 }, { "epoch": 51.43, "learning_rate": 7.14e-06, "loss": 0.0648, "step": 360 }, { "epoch": 52.86, "learning_rate": 7.340000000000001e-06, "loss": 0.0517, "step": 370 }, { "epoch": 54.29, "learning_rate": 7.540000000000001e-06, "loss": 0.0391, "step": 380 }, { "epoch": 55.71, "learning_rate": 7.74e-06, "loss": 0.0311, "step": 390 }, { "epoch": 57.14, "learning_rate": 7.94e-06, "loss": 0.0247, "step": 400 }, { "epoch": 57.14, "eval_loss": 1.3494515419006348, "eval_runtime": 166.7411, "eval_samples_per_second": 3.071, "eval_steps_per_second": 0.192, "eval_wer": 66.37409200968523, "step": 400 }, { "epoch": 58.57, "learning_rate": 8.14e-06, "loss": 0.0195, "step": 410 }, { "epoch": 60.0, "learning_rate": 8.34e-06, "loss": 0.0157, "step": 420 }, { "epoch": 61.43, "learning_rate": 8.540000000000001e-06, "loss": 0.0129, "step": 430 }, { "epoch": 62.86, "learning_rate": 8.740000000000001e-06, "loss": 0.0112, "step": 440 }, { "epoch": 64.29, "learning_rate": 8.94e-06, "loss": 0.0097, "step": 450 }, { "epoch": 65.71, "learning_rate": 9.14e-06, "loss": 0.0085, "step": 460 }, { "epoch": 67.14, "learning_rate": 9.340000000000002e-06, "loss": 0.0075, "step": 470 }, { "epoch": 68.57, "learning_rate": 9.54e-06, "loss": 0.0069, "step": 480 }, { "epoch": 70.0, "learning_rate": 9.74e-06, "loss": 0.0062, "step": 490 }, { "epoch": 71.43, "learning_rate": 9.940000000000001e-06, "loss": 0.0057, "step": 500 }, { "epoch": 71.43, "eval_loss": 1.5055396556854248, "eval_runtime": 189.4887, "eval_samples_per_second": 2.702, "eval_steps_per_second": 0.169, "eval_wer": 67.28964891041163, "step": 500 }, { "epoch": 71.43, "step": 500, "total_flos": 1.99723386175488e+18, "train_loss": 0.8564610816463828, "train_runtime": 2065.7487, "train_samples_per_second": 15.491, "train_steps_per_second": 0.242 } ], "max_steps": 500, "num_train_epochs": 72, "total_flos": 1.99723386175488e+18, "trial_name": null, "trial_params": null }