{ "best_metric": null, "best_model_checkpoint": null, "epoch": 75.0, "eval_steps": 100, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.57, "learning_rate": 2.5e-05, "loss": 8.1262, "step": 100 }, { "epoch": 3.57, "eval_loss": 4.0765790939331055, "eval_runtime": 21.8341, "eval_samples_per_second": 18.091, "eval_steps_per_second": 0.595, "eval_wer": 1.0, "step": 100 }, { "epoch": 7.14, "learning_rate": 5e-05, "loss": 3.5989, "step": 200 }, { "epoch": 7.14, "eval_loss": 3.226550340652466, "eval_runtime": 21.5714, "eval_samples_per_second": 18.311, "eval_steps_per_second": 0.603, "eval_wer": 1.0, "step": 200 }, { "epoch": 10.71, "learning_rate": 7.5e-05, "loss": 3.0472, "step": 300 }, { "epoch": 10.71, "eval_loss": 2.8889501094818115, "eval_runtime": 21.7358, "eval_samples_per_second": 18.173, "eval_steps_per_second": 0.598, "eval_wer": 1.0, "step": 300 }, { "epoch": 14.29, "learning_rate": 0.0001, "loss": 1.7327, "step": 400 }, { "epoch": 14.29, "eval_loss": 0.6693879961967468, "eval_runtime": 22.0147, "eval_samples_per_second": 17.943, "eval_steps_per_second": 0.591, "eval_wer": 0.6622969506982046, "step": 400 }, { "epoch": 17.86, "learning_rate": 0.000125, "loss": 0.4239, "step": 500 }, { "epoch": 17.86, "eval_loss": 0.4327137768268585, "eval_runtime": 21.6438, "eval_samples_per_second": 18.25, "eval_steps_per_second": 0.601, "eval_wer": 0.5483043602165859, "step": 500 }, { "epoch": 21.43, "learning_rate": 0.00015, "loss": 0.2337, "step": 600 }, { "epoch": 21.43, "eval_loss": 0.38920775055885315, "eval_runtime": 21.8228, "eval_samples_per_second": 18.1, "eval_steps_per_second": 0.596, "eval_wer": 0.5195212311199772, "step": 600 }, { "epoch": 25.0, "learning_rate": 0.000175, "loss": 0.1724, "step": 700 }, { "epoch": 25.0, "eval_loss": 0.391787052154541, "eval_runtime": 21.5976, "eval_samples_per_second": 18.289, "eval_steps_per_second": 0.602, "eval_wer": 0.5027073240239385, "step": 700 }, { "epoch": 28.57, "learning_rate": 0.0002, "loss": 0.1392, "step": 800 }, { "epoch": 28.57, "eval_loss": 0.36863410472869873, "eval_runtime": 21.5216, "eval_samples_per_second": 18.354, "eval_steps_per_second": 0.604, "eval_wer": 0.4670846394984326, "step": 800 }, { "epoch": 32.14, "learning_rate": 0.00022500000000000002, "loss": 0.1196, "step": 900 }, { "epoch": 32.14, "eval_loss": 0.3849872052669525, "eval_runtime": 21.7525, "eval_samples_per_second": 18.159, "eval_steps_per_second": 0.598, "eval_wer": 0.45397549159304645, "step": 900 }, { "epoch": 35.71, "learning_rate": 0.00025, "loss": 0.1095, "step": 1000 }, { "epoch": 35.71, "eval_loss": 0.3586702346801758, "eval_runtime": 21.8949, "eval_samples_per_second": 18.041, "eval_steps_per_second": 0.594, "eval_wer": 0.4516956397834141, "step": 1000 }, { "epoch": 39.29, "learning_rate": 0.000275, "loss": 0.1026, "step": 1100 }, { "epoch": 39.29, "eval_loss": 0.36062636971473694, "eval_runtime": 21.8096, "eval_samples_per_second": 18.111, "eval_steps_per_second": 0.596, "eval_wer": 0.44029638073525224, "step": 1100 }, { "epoch": 42.86, "learning_rate": 0.0003, "loss": 0.0972, "step": 1200 }, { "epoch": 42.86, "eval_loss": 0.40814533829689026, "eval_runtime": 21.9118, "eval_samples_per_second": 18.027, "eval_steps_per_second": 0.593, "eval_wer": 0.43317184383015106, "step": 1200 }, { "epoch": 46.43, "learning_rate": 0.00032500000000000004, "loss": 0.0932, "step": 1300 }, { "epoch": 46.43, "eval_loss": 0.3904629945755005, "eval_runtime": 21.7505, "eval_samples_per_second": 18.16, "eval_steps_per_second": 0.598, "eval_wer": 0.426902251353662, "step": 1300 }, { "epoch": 50.0, "learning_rate": 0.00035, "loss": 0.0897, "step": 1400 }, { "epoch": 50.0, "eval_loss": 0.37547701597213745, "eval_runtime": 22.1102, "eval_samples_per_second": 17.865, "eval_steps_per_second": 0.588, "eval_wer": 0.42747221430607013, "step": 1400 }, { "epoch": 53.57, "learning_rate": 0.000375, "loss": 0.0846, "step": 1500 }, { "epoch": 53.57, "eval_loss": 0.36823779344558716, "eval_runtime": 21.896, "eval_samples_per_second": 18.04, "eval_steps_per_second": 0.594, "eval_wer": 0.42091764035337703, "step": 1500 }, { "epoch": 57.14, "learning_rate": 0.0004, "loss": 0.0854, "step": 1600 }, { "epoch": 57.14, "eval_loss": 0.3795730471611023, "eval_runtime": 21.5795, "eval_samples_per_second": 18.304, "eval_steps_per_second": 0.602, "eval_wer": 0.41635793673411226, "step": 1600 }, { "epoch": 60.71, "learning_rate": 0.000425, "loss": 0.0845, "step": 1700 }, { "epoch": 60.71, "eval_loss": 0.35857513546943665, "eval_runtime": 21.7949, "eval_samples_per_second": 18.124, "eval_steps_per_second": 0.596, "eval_wer": 0.3941293815901966, "step": 1700 }, { "epoch": 64.29, "learning_rate": 0.00045000000000000004, "loss": 0.0854, "step": 1800 }, { "epoch": 64.29, "eval_loss": 0.376005083322525, "eval_runtime": 21.5391, "eval_samples_per_second": 18.339, "eval_steps_per_second": 0.604, "eval_wer": 0.41664291821031635, "step": 1800 }, { "epoch": 67.86, "learning_rate": 0.000475, "loss": 0.0846, "step": 1900 }, { "epoch": 67.86, "eval_loss": 0.3710671663284302, "eval_runtime": 21.5817, "eval_samples_per_second": 18.303, "eval_steps_per_second": 0.602, "eval_wer": 0.41208321459105157, "step": 1900 }, { "epoch": 71.43, "learning_rate": 0.0005, "loss": 0.0827, "step": 2000 }, { "epoch": 71.43, "eval_loss": 0.40105244517326355, "eval_runtime": 21.6021, "eval_samples_per_second": 18.285, "eval_steps_per_second": 0.602, "eval_wer": 0.4160729552579082, "step": 2000 }, { "epoch": 75.0, "learning_rate": 0.0, "loss": 0.0665, "step": 2100 }, { "epoch": 75.0, "eval_loss": 0.3478299379348755, "eval_runtime": 21.8418, "eval_samples_per_second": 18.085, "eval_steps_per_second": 0.595, "eval_wer": 0.36534625249358793, "step": 2100 }, { "epoch": 75.0, "step": 2100, "total_flos": 5.419723804611084e+19, "train_loss": 0.8885498528253465, "train_runtime": 26470.8294, "train_samples_per_second": 10.064, "train_steps_per_second": 0.079 } ], "logging_steps": 100, "max_steps": 2100, "num_train_epochs": 75, "save_steps": 100, "total_flos": 5.419723804611084e+19, "trial_name": null, "trial_params": null }