{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 3900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.56, "learning_rate": 1.3439999999999998e-05, "loss": 21.7509, "step": 100 }, { "epoch": 5.13, "learning_rate": 2.7439999999999998e-05, "loss": 5.8287, "step": 200 }, { "epoch": 7.69, "learning_rate": 4.1439999999999996e-05, "loss": 3.6855, "step": 300 }, { "epoch": 10.26, "learning_rate": 5.544e-05, "loss": 3.0684, "step": 400 }, { "epoch": 12.82, "learning_rate": 6.944e-05, "loss": 2.9421, "step": 500 }, { "epoch": 12.82, "eval_loss": 2.889408826828003, "eval_runtime": 17.3993, "eval_samples_per_second": 25.288, "eval_steps_per_second": 1.609, "eval_wer": 1.0, "step": 500 }, { "epoch": 15.38, "learning_rate": 6.802352941176471e-05, "loss": 2.847, "step": 600 }, { "epoch": 17.95, "learning_rate": 6.596470588235294e-05, "loss": 2.5861, "step": 700 }, { "epoch": 20.51, "learning_rate": 6.390588235294118e-05, "loss": 1.8427, "step": 800 }, { "epoch": 23.08, "learning_rate": 6.184705882352941e-05, "loss": 1.3659, "step": 900 }, { "epoch": 25.64, "learning_rate": 5.978823529411764e-05, "loss": 1.1872, "step": 1000 }, { "epoch": 25.64, "eval_loss": 0.6688352227210999, "eval_runtime": 17.312, "eval_samples_per_second": 25.416, "eval_steps_per_second": 1.617, "eval_wer": 0.7459733087896917, "step": 1000 }, { "epoch": 28.21, "learning_rate": 5.772941176470588e-05, "loss": 1.087, "step": 1100 }, { "epoch": 30.77, "learning_rate": 5.5670588235294114e-05, "loss": 1.0304, "step": 1200 }, { "epoch": 33.33, "learning_rate": 5.361176470588235e-05, "loss": 0.9762, "step": 1300 }, { "epoch": 35.9, "learning_rate": 5.1552941176470585e-05, "loss": 0.9043, "step": 1400 }, { "epoch": 38.46, "learning_rate": 4.949411764705882e-05, "loss": 0.8894, "step": 1500 }, { "epoch": 38.46, "eval_loss": 0.4868045449256897, "eval_runtime": 16.7955, "eval_samples_per_second": 26.197, "eval_steps_per_second": 1.667, "eval_wer": 0.6516336861481823, "step": 1500 }, { "epoch": 41.03, "learning_rate": 4.7435294117647055e-05, "loss": 0.861, "step": 1600 }, { "epoch": 43.59, "learning_rate": 4.5376470588235284e-05, "loss": 0.8411, "step": 1700 }, { "epoch": 46.15, "learning_rate": 4.331764705882352e-05, "loss": 0.8349, "step": 1800 }, { "epoch": 48.72, "learning_rate": 4.1258823529411755e-05, "loss": 0.7912, "step": 1900 }, { "epoch": 51.28, "learning_rate": 3.92e-05, "loss": 0.769, "step": 2000 }, { "epoch": 51.28, "eval_loss": 0.495985209941864, "eval_runtime": 17.6095, "eval_samples_per_second": 24.987, "eval_steps_per_second": 1.59, "eval_wer": 0.650713299585826, "step": 2000 }, { "epoch": 53.85, "learning_rate": 3.714117647058823e-05, "loss": 0.7639, "step": 2100 }, { "epoch": 56.41, "learning_rate": 3.508235294117647e-05, "loss": 0.7421, "step": 2200 }, { "epoch": 58.97, "learning_rate": 3.3023529411764704e-05, "loss": 0.728, "step": 2300 }, { "epoch": 61.54, "learning_rate": 3.096470588235294e-05, "loss": 0.7105, "step": 2400 }, { "epoch": 64.1, "learning_rate": 2.8905882352941175e-05, "loss": 0.6936, "step": 2500 }, { "epoch": 64.1, "eval_loss": 0.47812411189079285, "eval_runtime": 17.7238, "eval_samples_per_second": 24.825, "eval_steps_per_second": 1.58, "eval_wer": 0.5384261389783709, "step": 2500 }, { "epoch": 66.67, "learning_rate": 2.684705882352941e-05, "loss": 0.6813, "step": 2600 }, { "epoch": 69.23, "learning_rate": 2.4788235294117646e-05, "loss": 0.6514, "step": 2700 }, { "epoch": 71.79, "learning_rate": 2.272941176470588e-05, "loss": 0.6539, "step": 2800 }, { "epoch": 74.36, "learning_rate": 2.0670588235294117e-05, "loss": 0.6423, "step": 2900 }, { "epoch": 76.92, "learning_rate": 1.8611764705882352e-05, "loss": 0.624, "step": 3000 }, { "epoch": 76.92, "eval_loss": 0.4642501175403595, "eval_runtime": 16.6285, "eval_samples_per_second": 26.461, "eval_steps_per_second": 1.684, "eval_wer": 0.5430280717901519, "step": 3000 }, { "epoch": 79.49, "learning_rate": 1.6552941176470588e-05, "loss": 0.624, "step": 3100 }, { "epoch": 82.05, "learning_rate": 1.4494117647058823e-05, "loss": 0.6071, "step": 3200 }, { "epoch": 84.62, "learning_rate": 1.2435294117647057e-05, "loss": 0.6175, "step": 3300 }, { "epoch": 87.18, "learning_rate": 1.0376470588235292e-05, "loss": 0.6023, "step": 3400 }, { "epoch": 89.74, "learning_rate": 8.317647058823528e-06, "loss": 0.5966, "step": 3500 }, { "epoch": 89.74, "eval_loss": 0.4529646337032318, "eval_runtime": 16.5674, "eval_samples_per_second": 26.558, "eval_steps_per_second": 1.69, "eval_wer": 0.5591348366313852, "step": 3500 }, { "epoch": 92.31, "learning_rate": 6.258823529411764e-06, "loss": 0.5868, "step": 3600 }, { "epoch": 94.87, "learning_rate": 4.2e-06, "loss": 0.5817, "step": 3700 }, { "epoch": 97.44, "learning_rate": 2.141176470588235e-06, "loss": 0.5687, "step": 3800 }, { "epoch": 100.0, "learning_rate": 8.235294117647057e-08, "loss": 0.5709, "step": 3900 }, { "epoch": 100.0, "step": 3900, "total_flos": 1.7166559536300583e+19, "train_loss": 1.7521888146033653, "train_runtime": 6573.2529, "train_samples_per_second": 18.56, "train_steps_per_second": 0.593 } ], "max_steps": 3900, "num_train_epochs": 100, "total_flos": 1.7166559536300583e+19, "trial_name": null, "trial_params": null }