{ "best_metric": 60.18099547511312, "best_model_checkpoint": "./whisper-small-bn/checkpoint-1000", "epoch": 3.3333333333333335, "eval_steps": 1000, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "grad_norm": 27.802248001098633, "learning_rate": 3.2e-07, "loss": 2.2811, "step": 20 }, { "epoch": 0.13, "grad_norm": 18.253421783447266, "learning_rate": 7.2e-07, "loss": 2.1964, "step": 40 }, { "epoch": 0.2, "grad_norm": 13.585515975952148, "learning_rate": 1.12e-06, "loss": 1.8635, "step": 60 }, { "epoch": 0.27, "grad_norm": 22.400362014770508, "learning_rate": 1.52e-06, "loss": 1.703, "step": 80 }, { "epoch": 0.33, "grad_norm": 13.77535343170166, "learning_rate": 1.9200000000000003e-06, "loss": 1.5336, "step": 100 }, { "epoch": 0.4, "grad_norm": 14.989215850830078, "learning_rate": 2.3200000000000002e-06, "loss": 1.4044, "step": 120 }, { "epoch": 0.47, "grad_norm": 22.01858139038086, "learning_rate": 2.7200000000000002e-06, "loss": 1.2402, "step": 140 }, { "epoch": 0.53, "grad_norm": 16.482311248779297, "learning_rate": 3.12e-06, "loss": 1.0922, "step": 160 }, { "epoch": 0.6, "grad_norm": 17.529874801635742, "learning_rate": 3.52e-06, "loss": 0.9583, "step": 180 }, { "epoch": 0.67, "grad_norm": 18.252761840820312, "learning_rate": 3.920000000000001e-06, "loss": 0.8783, "step": 200 }, { "epoch": 0.73, "grad_norm": 16.48067283630371, "learning_rate": 4.32e-06, "loss": 0.7643, "step": 220 }, { "epoch": 0.8, "grad_norm": 15.062904357910156, "learning_rate": 4.7200000000000005e-06, "loss": 0.6991, "step": 240 }, { "epoch": 0.87, "grad_norm": 14.231403350830078, "learning_rate": 5.12e-06, "loss": 0.6594, "step": 260 }, { "epoch": 0.93, "grad_norm": 18.425230026245117, "learning_rate": 5.5200000000000005e-06, "loss": 0.5724, "step": 280 }, { "epoch": 1.0, "grad_norm": 11.786844253540039, "learning_rate": 5.92e-06, "loss": 0.5687, "step": 300 }, { "epoch": 1.07, "grad_norm": 15.346015930175781, "learning_rate": 6.3200000000000005e-06, "loss": 0.4696, "step": 320 }, { "epoch": 1.13, "grad_norm": 7.936849594116211, "learning_rate": 6.720000000000001e-06, "loss": 0.4569, "step": 340 }, { "epoch": 1.2, "grad_norm": 13.894779205322266, "learning_rate": 7.1200000000000004e-06, "loss": 0.4089, "step": 360 }, { "epoch": 1.27, "grad_norm": 11.665081977844238, "learning_rate": 7.520000000000001e-06, "loss": 0.4168, "step": 380 }, { "epoch": 1.33, "grad_norm": 16.7696475982666, "learning_rate": 7.92e-06, "loss": 0.4433, "step": 400 }, { "epoch": 1.4, "grad_norm": 15.60254192352295, "learning_rate": 8.32e-06, "loss": 0.4013, "step": 420 }, { "epoch": 1.47, "grad_norm": 13.72472858428955, "learning_rate": 8.720000000000001e-06, "loss": 0.3903, "step": 440 }, { "epoch": 1.53, "grad_norm": 10.506444931030273, "learning_rate": 9.12e-06, "loss": 0.3581, "step": 460 }, { "epoch": 1.6, "grad_norm": 13.643900871276855, "learning_rate": 9.52e-06, "loss": 0.3605, "step": 480 }, { "epoch": 1.67, "grad_norm": 11.669689178466797, "learning_rate": 9.920000000000002e-06, "loss": 0.3548, "step": 500 }, { "epoch": 1.73, "grad_norm": 9.907474517822266, "learning_rate": 9.936000000000001e-06, "loss": 0.3535, "step": 520 }, { "epoch": 1.8, "grad_norm": 11.366904258728027, "learning_rate": 9.856000000000002e-06, "loss": 0.2943, "step": 540 }, { "epoch": 1.87, "grad_norm": 7.5114240646362305, "learning_rate": 9.776000000000001e-06, "loss": 0.3057, "step": 560 }, { "epoch": 1.93, "grad_norm": 11.717443466186523, "learning_rate": 9.696000000000002e-06, "loss": 0.2919, "step": 580 }, { "epoch": 2.0, "grad_norm": 9.275129318237305, "learning_rate": 9.616e-06, "loss": 0.3115, "step": 600 }, { "epoch": 2.07, "grad_norm": 9.388382911682129, "learning_rate": 9.536000000000002e-06, "loss": 0.1926, "step": 620 }, { "epoch": 2.13, "grad_norm": 8.574433326721191, "learning_rate": 9.456e-06, "loss": 0.2193, "step": 640 }, { "epoch": 2.2, "grad_norm": 5.512338638305664, "learning_rate": 9.376000000000001e-06, "loss": 0.18, "step": 660 }, { "epoch": 2.27, "grad_norm": 9.644317626953125, "learning_rate": 9.296e-06, "loss": 0.1916, "step": 680 }, { "epoch": 2.33, "grad_norm": 9.321474075317383, "learning_rate": 9.216000000000001e-06, "loss": 0.2055, "step": 700 }, { "epoch": 2.4, "grad_norm": 4.868476390838623, "learning_rate": 9.136e-06, "loss": 0.1843, "step": 720 }, { "epoch": 2.47, "grad_norm": 6.592445373535156, "learning_rate": 9.056000000000001e-06, "loss": 0.1713, "step": 740 }, { "epoch": 2.53, "grad_norm": 8.138116836547852, "learning_rate": 8.976e-06, "loss": 0.1792, "step": 760 }, { "epoch": 2.6, "grad_norm": 9.782510757446289, "learning_rate": 8.896000000000001e-06, "loss": 0.1796, "step": 780 }, { "epoch": 2.67, "grad_norm": 5.867671012878418, "learning_rate": 8.816000000000002e-06, "loss": 0.1652, "step": 800 }, { "epoch": 2.73, "grad_norm": 6.637588977813721, "learning_rate": 8.736e-06, "loss": 0.1735, "step": 820 }, { "epoch": 2.8, "grad_norm": 11.652266502380371, "learning_rate": 8.656000000000001e-06, "loss": 0.1734, "step": 840 }, { "epoch": 2.87, "grad_norm": 6.051203727722168, "learning_rate": 8.576e-06, "loss": 0.1557, "step": 860 }, { "epoch": 2.93, "grad_norm": 11.018319129943848, "learning_rate": 8.496000000000001e-06, "loss": 0.1668, "step": 880 }, { "epoch": 3.0, "grad_norm": 7.878730297088623, "learning_rate": 8.416e-06, "loss": 0.1706, "step": 900 }, { "epoch": 3.07, "grad_norm": 6.223515510559082, "learning_rate": 8.336000000000001e-06, "loss": 0.0917, "step": 920 }, { "epoch": 3.13, "grad_norm": 7.449841022491455, "learning_rate": 8.256e-06, "loss": 0.0853, "step": 940 }, { "epoch": 3.2, "grad_norm": 3.824934244155884, "learning_rate": 8.176000000000001e-06, "loss": 0.1033, "step": 960 }, { "epoch": 3.27, "grad_norm": 6.369926929473877, "learning_rate": 8.096e-06, "loss": 0.0997, "step": 980 }, { "epoch": 3.33, "grad_norm": 6.938141822814941, "learning_rate": 8.016e-06, "loss": 0.095, "step": 1000 }, { "epoch": 3.33, "eval_loss": 0.27550819516181946, "eval_runtime": 345.1723, "eval_samples_per_second": 0.29, "eval_steps_per_second": 0.29, "eval_wer": 60.18099547511312, "step": 1000 } ], "logging_steps": 20, "max_steps": 3000, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "total_flos": 8.6575620096e+17, "train_batch_size": 3, "trial_name": null, "trial_params": null }