{ "best_metric": null, "best_model_checkpoint": null, "epoch": 36.7816091954023, "global_step": 6400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.45, "learning_rate": 0.00023999999999999998, "loss": NaN, "step": 400 }, { "epoch": 3.45, "eval_loss": 3.315488815307617, "eval_runtime": 135.9943, "eval_samples_per_second": 11.192, "eval_wer": 0.9802160088826083, "step": 400 }, { "epoch": 6.89, "learning_rate": 0.00025054945054945053, "loss": NaN, "step": 800 }, { "epoch": 6.89, "eval_loss": 1.239622712135315, "eval_runtime": 136.5249, "eval_samples_per_second": 11.148, "eval_wer": 0.8971434339356011, "step": 800 }, { "epoch": 10.34, "learning_rate": 0.0001846153846153846, "loss": NaN, "step": 1200 }, { "epoch": 10.34, "eval_loss": 0.5218076705932617, "eval_runtime": 136.039, "eval_samples_per_second": 11.188, "eval_wer": 0.5810033309780963, "step": 1200 }, { "epoch": 13.79, "learning_rate": 0.00011868131868131866, "loss": NaN, "step": 1600 }, { "epoch": 13.79, "eval_loss": 0.45067664980888367, "eval_runtime": 136.2412, "eval_samples_per_second": 11.171, "eval_wer": 0.5031795700010094, "step": 1600 }, { "epoch": 17.24, "learning_rate": 5.274725274725275e-05, "loss": NaN, "step": 2000 }, { "epoch": 17.24, "eval_loss": 0.4806972146034241, "eval_runtime": 135.9031, "eval_samples_per_second": 11.199, "eval_wer": 0.48622186332895934, "step": 2000 }, { "epoch": 13.79, "learning_rate": 0.00021176470588235295, "loss": 1.706, "step": 2400 }, { "epoch": 13.79, "eval_loss": 0.4303271174430847, "eval_runtime": 143.5562, "eval_samples_per_second": 10.602, "eval_wer": 0.4874331280912486, "step": 2400 }, { "epoch": 16.09, "learning_rate": 0.00019318885448916406, "loss": 0.2803, "step": 2800 }, { "epoch": 16.09, "eval_loss": 0.39102795720100403, "eval_runtime": 144.4788, "eval_samples_per_second": 10.534, "eval_wer": 0.4415060058544463, "step": 2800 }, { "epoch": 18.39, "learning_rate": 0.0001746130030959752, "loss": 0.2162, "step": 3200 }, { "epoch": 18.39, "eval_loss": 0.3744983971118927, "eval_runtime": 146.0666, "eval_samples_per_second": 10.42, "eval_wer": 0.41728071060866057, "step": 3200 }, { "epoch": 20.69, "learning_rate": 0.00015603715170278636, "loss": 0.1768, "step": 3600 }, { "epoch": 20.69, "eval_loss": 0.4224047064781189, "eval_runtime": 145.7761, "eval_samples_per_second": 10.441, "eval_wer": 0.4194004239426668, "step": 3600 }, { "epoch": 22.99, "learning_rate": 0.00013746130030959752, "loss": 0.1502, "step": 4000 }, { "epoch": 22.99, "eval_loss": 0.37262818217277527, "eval_runtime": 145.6131, "eval_samples_per_second": 10.452, "eval_wer": 0.4009286363177551, "step": 4000 }, { "epoch": 25.29, "learning_rate": 0.00011888544891640867, "loss": 0.1214, "step": 4400 }, { "epoch": 25.29, "eval_loss": 0.3383936583995819, "eval_runtime": 146.7077, "eval_samples_per_second": 10.374, "eval_wer": 0.3776117896436863, "step": 4400 }, { "epoch": 27.59, "learning_rate": 0.00010030959752321979, "loss": 0.105, "step": 4800 }, { "epoch": 27.59, "eval_loss": 0.3606471121311188, "eval_runtime": 146.6063, "eval_samples_per_second": 10.382, "eval_wer": 0.3755930150398708, "step": 4800 }, { "epoch": 29.89, "learning_rate": 8.173374613003094e-05, "loss": 0.0917, "step": 5200 }, { "epoch": 29.89, "eval_loss": 0.35728928446769714, "eval_runtime": 147.1176, "eval_samples_per_second": 10.345, "eval_wer": 0.3658019582113657, "step": 5200 }, { "epoch": 32.18, "learning_rate": 6.315789473684209e-05, "loss": 0.0782, "step": 5600 }, { "epoch": 32.18, "eval_loss": 0.36559608578681946, "eval_runtime": 148.3984, "eval_samples_per_second": 10.256, "eval_wer": 0.3630766124962148, "step": 5600 }, { "epoch": 34.48, "learning_rate": 4.4582043343653246e-05, "loss": 0.0713, "step": 6000 }, { "epoch": 34.48, "eval_loss": 0.3449805676937103, "eval_runtime": 147.7614, "eval_samples_per_second": 10.3, "eval_wer": 0.35611184011305136, "step": 6000 }, { "epoch": 36.78, "learning_rate": 2.6006191950464395e-05, "loss": 0.0666, "step": 6400 }, { "epoch": 36.78, "eval_loss": 0.3429848849773407, "eval_runtime": 147.6625, "eval_samples_per_second": 10.307, "eval_wer": 0.3475320480468356, "step": 6400 } ], "max_steps": 6960, "num_train_epochs": 40, "total_flos": 3.238027552454268e+19, "trial_name": null, "trial_params": null }