{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.997802197802198, "global_step": 6810, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.32, "learning_rate": 0.00017999999999999998, "loss": 11.6291, "step": 300 }, { "epoch": 1.32, "eval_loss": 3.028644323348999, "eval_runtime": 643.9816, "eval_samples_per_second": 8.087, "eval_wer": 1.0, "step": 300 }, { "epoch": 2.64, "learning_rate": 0.0002952456418383518, "loss": 3.0796, "step": 600 }, { "epoch": 2.64, "eval_loss": 2.9614171981811523, "eval_runtime": 650.4972, "eval_samples_per_second": 8.006, "eval_wer": 1.0, "step": 600 }, { "epoch": 3.96, "learning_rate": 0.0002809825673534073, "loss": 1.6756, "step": 900 }, { "epoch": 3.96, "eval_loss": 0.6643654108047485, "eval_runtime": 668.5248, "eval_samples_per_second": 7.79, "eval_wer": 0.5919852387016294, "step": 900 }, { "epoch": 5.29, "learning_rate": 0.00026671949286846274, "loss": 0.5169, "step": 1200 }, { "epoch": 5.29, "eval_loss": 0.48998841643333435, "eval_runtime": 670.8355, "eval_samples_per_second": 7.763, "eval_wer": 0.4740247927440942, "step": 1200 }, { "epoch": 6.61, "learning_rate": 0.0002524564183835182, "loss": 0.3643, "step": 1500 }, { "epoch": 6.61, "eval_loss": 0.4465296268463135, "eval_runtime": 685.664, "eval_samples_per_second": 7.596, "eval_wer": 0.43660178279061307, "step": 1500 }, { "epoch": 7.93, "learning_rate": 0.00023819334389857367, "loss": 0.277, "step": 1800 }, { "epoch": 7.93, "eval_loss": 0.4225097894668579, "eval_runtime": 675.9712, "eval_samples_per_second": 7.704, "eval_wer": 0.4157592453026326, "step": 1800 }, { "epoch": 9.25, "learning_rate": 0.00022393026941362913, "loss": 0.2271, "step": 2100 }, { "epoch": 9.25, "eval_loss": 0.4406384825706482, "eval_runtime": 685.3056, "eval_samples_per_second": 7.6, "eval_wer": 0.39260375789391616, "step": 2100 }, { "epoch": 10.57, "learning_rate": 0.0002096671949286846, "loss": 0.1895, "step": 2400 }, { "epoch": 10.57, "eval_loss": 0.4478096067905426, "eval_runtime": 689.1958, "eval_samples_per_second": 7.557, "eval_wer": 0.3899529613555446, "step": 2400 }, { "epoch": 11.89, "learning_rate": 0.0001954041204437401, "loss": 0.1653, "step": 2700 }, { "epoch": 11.89, "eval_loss": 0.4374055862426758, "eval_runtime": 693.8794, "eval_samples_per_second": 7.506, "eval_wer": 0.3792977987993451, "step": 2700 }, { "epoch": 13.22, "learning_rate": 0.00018114104595879556, "loss": 0.1366, "step": 3000 }, { "epoch": 13.22, "eval_loss": 0.4575331211090088, "eval_runtime": 703.7067, "eval_samples_per_second": 7.401, "eval_wer": 0.37355440629954, "step": 3000 }, { "epoch": 14.54, "learning_rate": 0.000166877971473851, "loss": 0.1219, "step": 3300 }, { "epoch": 14.54, "eval_loss": 0.4847738444805145, "eval_runtime": 705.6195, "eval_samples_per_second": 7.381, "eval_wer": 0.36983809350554847, "step": 3300 }, { "epoch": 15.86, "learning_rate": 0.00015261489698890646, "loss": 0.1102, "step": 3600 }, { "epoch": 15.86, "eval_loss": 0.4509291350841522, "eval_runtime": 713.2815, "eval_samples_per_second": 7.301, "eval_wer": 0.35738974505574467, "step": 3600 }, { "epoch": 17.18, "learning_rate": 0.00013835182250396196, "loss": 0.1017, "step": 3900 }, { "epoch": 17.18, "eval_loss": 0.45042145252227783, "eval_runtime": 714.3616, "eval_samples_per_second": 7.29, "eval_wer": 0.34715039372125056, "step": 3900 }, { "epoch": 18.5, "learning_rate": 0.00012408874801901742, "loss": 0.0937, "step": 4200 }, { "epoch": 18.5, "eval_loss": 0.4522610008716583, "eval_runtime": 719.9965, "eval_samples_per_second": 7.233, "eval_wer": 0.3469164999090413, "step": 4200 }, { "epoch": 19.82, "learning_rate": 0.00010982567353407289, "loss": 0.0829, "step": 4500 }, { "epoch": 19.82, "eval_loss": 0.4488724172115326, "eval_runtime": 720.1162, "eval_samples_per_second": 7.232, "eval_wer": 0.3452012786195067, "step": 4500 }, { "epoch": 21.15, "learning_rate": 9.556259904912835e-05, "loss": 0.0752, "step": 4800 }, { "epoch": 21.15, "eval_loss": 0.4762090742588043, "eval_runtime": 730.1963, "eval_samples_per_second": 7.132, "eval_wer": 0.33800254684373293, "step": 4800 }, { "epoch": 22.47, "learning_rate": 8.129952456418383e-05, "loss": 0.0698, "step": 5100 }, { "epoch": 22.47, "eval_loss": 0.480989009141922, "eval_runtime": 727.7194, "eval_samples_per_second": 7.157, "eval_wer": 0.33597546713791937, "step": 5100 }, { "epoch": 23.79, "learning_rate": 6.70364500792393e-05, "loss": 0.0642, "step": 5400 }, { "epoch": 23.79, "eval_loss": 0.49236759543418884, "eval_runtime": 737.7861, "eval_samples_per_second": 7.059, "eval_wer": 0.3343382104524546, "step": 5400 }, { "epoch": 25.11, "learning_rate": 5.2773375594294765e-05, "loss": 0.058, "step": 5700 }, { "epoch": 25.11, "eval_loss": 0.4960506558418274, "eval_runtime": 740.7836, "eval_samples_per_second": 7.03, "eval_wer": 0.3302840510408275, "step": 5700 }, { "epoch": 26.43, "learning_rate": 3.851030110935023e-05, "loss": 0.0553, "step": 6000 }, { "epoch": 26.43, "eval_loss": 0.4915299415588379, "eval_runtime": 748.2447, "eval_samples_per_second": 6.96, "eval_wer": 0.32506042256815404, "step": 6000 }, { "epoch": 27.75, "learning_rate": 2.4247226624405704e-05, "loss": 0.0545, "step": 6300 }, { "epoch": 27.75, "eval_loss": 0.5011223554611206, "eval_runtime": 762.4791, "eval_samples_per_second": 6.83, "eval_wer": 0.3210062631565269, "step": 6300 }, { "epoch": 29.07, "learning_rate": 9.984152139461172e-06, "loss": 0.0523, "step": 6600 }, { "epoch": 29.07, "eval_loss": 0.49885958433151245, "eval_runtime": 743.9824, "eval_samples_per_second": 7.0, "eval_wer": 0.3229034018555576, "step": 6600 }, { "epoch": 30.0, "step": 6810, "total_flos": 4.998078272539758e+19, "train_runtime": 61300.1017, "train_samples_per_second": 0.111 } ], "max_steps": 6810, "num_train_epochs": 30, "total_flos": 4.998078272539758e+19, "trial_name": null, "trial_params": null }