{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.98461538461538, "global_step": 3200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.12, "learning_rate": 0.00019600000000000002, "loss": 4.3333, "step": 100 }, { "epoch": 6.25, "learning_rate": 0.00039600000000000003, "loss": 2.4826, "step": 200 }, { "epoch": 9.37, "learning_rate": 0.000596, "loss": 2.2099, "step": 300 }, { "epoch": 12.49, "learning_rate": 0.000796, "loss": 2.1639, "step": 400 }, { "epoch": 15.62, "learning_rate": 0.000996, "loss": 2.1851, "step": 500 }, { "epoch": 15.62, "eval_loss": 1.8067339658737183, "eval_runtime": 25.4413, "eval_samples_per_second": 20.007, "eval_steps_per_second": 2.516, "eval_wer": 0.9256004686584651, "step": 500 }, { "epoch": 18.74, "learning_rate": 0.0009637037037037037, "loss": 2.1912, "step": 600 }, { "epoch": 21.86, "learning_rate": 0.0009266666666666667, "loss": 2.1441, "step": 700 }, { "epoch": 24.98, "learning_rate": 0.0008896296296296296, "loss": 2.1784, "step": 800 }, { "epoch": 28.12, "learning_rate": 0.0008525925925925926, "loss": 2.1836, "step": 900 }, { "epoch": 31.25, "learning_rate": 0.0008155555555555556, "loss": 2.1586, "step": 1000 }, { "epoch": 31.25, "eval_loss": 1.7883163690567017, "eval_runtime": 24.5739, "eval_samples_per_second": 20.713, "eval_steps_per_second": 2.604, "eval_wer": 0.9179847685998829, "step": 1000 }, { "epoch": 34.37, "learning_rate": 0.0007785185185185186, "loss": 2.1329, "step": 1100 }, { "epoch": 37.49, "learning_rate": 0.0007414814814814815, "loss": 2.1071, "step": 1200 }, { "epoch": 40.62, "learning_rate": 0.0007044444444444445, "loss": 2.0866, "step": 1300 }, { "epoch": 43.74, "learning_rate": 0.0006674074074074075, "loss": 2.0434, "step": 1400 }, { "epoch": 46.86, "learning_rate": 0.0006303703703703703, "loss": 2.0302, "step": 1500 }, { "epoch": 46.86, "eval_loss": 1.7570589780807495, "eval_runtime": 24.6634, "eval_samples_per_second": 20.638, "eval_steps_per_second": 2.595, "eval_wer": 0.9191564147627417, "step": 1500 }, { "epoch": 49.98, "learning_rate": 0.0005933333333333334, "loss": 2.0, "step": 1600 }, { "epoch": 53.12, "learning_rate": 0.0005562962962962963, "loss": 1.9953, "step": 1700 }, { "epoch": 56.25, "learning_rate": 0.0005192592592592593, "loss": 1.9243, "step": 1800 }, { "epoch": 59.37, "learning_rate": 0.0004822222222222222, "loss": 1.9093, "step": 1900 }, { "epoch": 62.49, "learning_rate": 0.0004451851851851852, "loss": 1.8706, "step": 2000 }, { "epoch": 62.49, "eval_loss": 1.631394624710083, "eval_runtime": 24.8239, "eval_samples_per_second": 20.504, "eval_steps_per_second": 2.578, "eval_wer": 0.8857644991212654, "step": 2000 }, { "epoch": 65.62, "learning_rate": 0.00040814814814814815, "loss": 1.8333, "step": 2100 }, { "epoch": 68.74, "learning_rate": 0.0003711111111111111, "loss": 1.7878, "step": 2200 }, { "epoch": 71.86, "learning_rate": 0.0003340740740740741, "loss": 1.7619, "step": 2300 }, { "epoch": 74.98, "learning_rate": 0.00029703703703703706, "loss": 1.7315, "step": 2400 }, { "epoch": 78.12, "learning_rate": 0.00026000000000000003, "loss": 1.7008, "step": 2500 }, { "epoch": 78.12, "eval_loss": 1.6130626201629639, "eval_runtime": 25.5604, "eval_samples_per_second": 19.914, "eval_steps_per_second": 2.504, "eval_wer": 0.8678968951376684, "step": 2500 }, { "epoch": 81.25, "learning_rate": 0.00022296296296296297, "loss": 1.6485, "step": 2600 }, { "epoch": 84.37, "learning_rate": 0.00018592592592592594, "loss": 1.6268, "step": 2700 }, { "epoch": 87.49, "learning_rate": 0.0001488888888888889, "loss": 1.5698, "step": 2800 }, { "epoch": 90.62, "learning_rate": 0.00011185185185185186, "loss": 1.5581, "step": 2900 }, { "epoch": 93.74, "learning_rate": 7.481481481481483e-05, "loss": 1.4982, "step": 3000 }, { "epoch": 93.74, "eval_loss": 1.6539617776870728, "eval_runtime": 24.6055, "eval_samples_per_second": 20.686, "eval_steps_per_second": 2.601, "eval_wer": 0.8649677797305214, "step": 3000 }, { "epoch": 96.86, "learning_rate": 3.777777777777778e-05, "loss": 1.4848, "step": 3100 }, { "epoch": 99.98, "learning_rate": 7.407407407407407e-07, "loss": 1.4475, "step": 3200 }, { "epoch": 99.98, "step": 3200, "total_flos": 3.685380703950235e+19, "train_loss": 1.9993535804748535, "train_runtime": 10079.8406, "train_samples_per_second": 10.268, "train_steps_per_second": 0.317 } ], "max_steps": 3200, "num_train_epochs": 100, "total_flos": 3.685380703950235e+19, "trial_name": null, "trial_params": null }