{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.25, "learning_rate": 6.789999999999999e-06, "loss": 15.499, "step": 100 }, { "epoch": 2.5, "learning_rate": 1.379e-05, "loss": 5.6988, "step": 200 }, { "epoch": 3.75, "learning_rate": 2.0789999999999996e-05, "loss": 3.9135, "step": 300 }, { "epoch": 5.0, "learning_rate": 2.779e-05, "loss": 3.315, "step": 400 }, { "epoch": 6.25, "learning_rate": 3.479e-05, "loss": 3.0713, "step": 500 }, { "epoch": 7.5, "learning_rate": 4.178999999999999e-05, "loss": 2.9727, "step": 600 }, { "epoch": 8.75, "learning_rate": 4.878999999999999e-05, "loss": 2.8912, "step": 700 }, { "epoch": 10.0, "learning_rate": 5.579e-05, "loss": 2.8514, "step": 800 }, { "epoch": 11.25, "learning_rate": 6.279e-05, "loss": 2.5765, "step": 900 }, { "epoch": 12.5, "learning_rate": 6.979e-05, "loss": 1.785, "step": 1000 }, { "epoch": 12.5, "eval_loss": 0.7464718222618103, "eval_runtime": 42.2657, "eval_samples_per_second": 28.013, "eval_steps_per_second": 1.751, "eval_wer": 0.6812214190883611, "step": 1000 }, { "epoch": 13.75, "learning_rate": 6.903e-05, "loss": 1.4773, "step": 1100 }, { "epoch": 15.0, "learning_rate": 6.803e-05, "loss": 1.3507, "step": 1200 }, { "epoch": 16.25, "learning_rate": 6.702999999999999e-05, "loss": 1.2818, "step": 1300 }, { "epoch": 17.5, "learning_rate": 6.602999999999999e-05, "loss": 1.233, "step": 1400 }, { "epoch": 18.75, "learning_rate": 6.502999999999999e-05, "loss": 1.161, "step": 1500 }, { "epoch": 20.0, "learning_rate": 6.403e-05, "loss": 1.1004, "step": 1600 }, { "epoch": 21.25, "learning_rate": 6.303e-05, "loss": 1.0246, "step": 1700 }, { "epoch": 22.5, "learning_rate": 6.202999999999999e-05, "loss": 0.9693, "step": 1800 }, { "epoch": 23.75, "learning_rate": 6.103e-05, "loss": 0.9507, "step": 1900 }, { "epoch": 25.0, "learning_rate": 6.002999999999999e-05, "loss": 0.8989, "step": 2000 }, { "epoch": 25.0, "eval_loss": 0.24948318302631378, "eval_runtime": 40.7976, "eval_samples_per_second": 29.021, "eval_steps_per_second": 1.814, "eval_wer": 0.27319663667207555, "step": 2000 }, { "epoch": 26.25, "learning_rate": 5.904e-05, "loss": 0.8743, "step": 2100 }, { "epoch": 27.5, "learning_rate": 5.804e-05, "loss": 0.8558, "step": 2200 }, { "epoch": 28.75, "learning_rate": 5.7039999999999996e-05, "loss": 0.8228, "step": 2300 }, { "epoch": 30.0, "learning_rate": 5.604e-05, "loss": 0.8154, "step": 2400 }, { "epoch": 31.25, "learning_rate": 5.5039999999999995e-05, "loss": 0.7932, "step": 2500 }, { "epoch": 32.5, "learning_rate": 5.404e-05, "loss": 0.7755, "step": 2600 }, { "epoch": 33.75, "learning_rate": 5.3039999999999994e-05, "loss": 0.7585, "step": 2700 }, { "epoch": 35.0, "learning_rate": 5.2039999999999996e-05, "loss": 0.7472, "step": 2800 }, { "epoch": 36.25, "learning_rate": 5.103999999999999e-05, "loss": 0.734, "step": 2900 }, { "epoch": 37.5, "learning_rate": 5.0039999999999995e-05, "loss": 0.7118, "step": 3000 }, { "epoch": 37.5, "eval_loss": 0.21255508065223694, "eval_runtime": 40.7166, "eval_samples_per_second": 29.079, "eval_steps_per_second": 1.817, "eval_wer": 0.22835226434577371, "step": 3000 }, { "epoch": 38.75, "learning_rate": 4.904e-05, "loss": 0.7163, "step": 3100 }, { "epoch": 40.0, "learning_rate": 4.8039999999999994e-05, "loss": 0.7066, "step": 3200 }, { "epoch": 41.25, "learning_rate": 4.704e-05, "loss": 0.7067, "step": 3300 }, { "epoch": 42.5, "learning_rate": 4.603999999999999e-05, "loss": 0.688, "step": 3400 }, { "epoch": 43.75, "learning_rate": 4.5039999999999996e-05, "loss": 0.6777, "step": 3500 }, { "epoch": 45.0, "learning_rate": 4.403999999999999e-05, "loss": 0.6621, "step": 3600 }, { "epoch": 46.25, "learning_rate": 4.3039999999999994e-05, "loss": 0.6494, "step": 3700 }, { "epoch": 47.5, "learning_rate": 4.203999999999999e-05, "loss": 0.646, "step": 3800 }, { "epoch": 48.75, "learning_rate": 4.104e-05, "loss": 0.6356, "step": 3900 }, { "epoch": 50.0, "learning_rate": 4.0039999999999996e-05, "loss": 0.6367, "step": 4000 }, { "epoch": 50.0, "eval_loss": 0.20487311482429504, "eval_runtime": 43.4458, "eval_samples_per_second": 27.252, "eval_steps_per_second": 1.703, "eval_wer": 0.20489747750405665, "step": 4000 }, { "epoch": 51.25, "learning_rate": 3.904e-05, "loss": 0.6272, "step": 4100 }, { "epoch": 52.5, "learning_rate": 3.804e-05, "loss": 0.6187, "step": 4200 }, { "epoch": 53.75, "learning_rate": 3.704e-05, "loss": 0.6116, "step": 4300 }, { "epoch": 55.0, "learning_rate": 3.604e-05, "loss": 0.6074, "step": 4400 }, { "epoch": 56.25, "learning_rate": 3.504999999999999e-05, "loss": 0.5945, "step": 4500 }, { "epoch": 57.5, "learning_rate": 3.4049999999999994e-05, "loss": 0.6074, "step": 4600 }, { "epoch": 58.75, "learning_rate": 3.305e-05, "loss": 0.6011, "step": 4700 }, { "epoch": 60.0, "learning_rate": 3.205e-05, "loss": 0.5815, "step": 4800 }, { "epoch": 61.25, "learning_rate": 3.1049999999999996e-05, "loss": 0.5704, "step": 4900 }, { "epoch": 62.5, "learning_rate": 3.0049999999999995e-05, "loss": 0.5763, "step": 5000 }, { "epoch": 62.5, "eval_loss": 0.21160605549812317, "eval_runtime": 41.2219, "eval_samples_per_second": 28.723, "eval_steps_per_second": 1.795, "eval_wer": 0.20548753503466588, "step": 5000 }, { "epoch": 63.75, "learning_rate": 2.9049999999999995e-05, "loss": 0.5742, "step": 5100 }, { "epoch": 65.0, "learning_rate": 2.8049999999999997e-05, "loss": 0.58, "step": 5200 }, { "epoch": 66.25, "learning_rate": 2.705e-05, "loss": 0.5542, "step": 5300 }, { "epoch": 67.5, "learning_rate": 2.605e-05, "loss": 0.5472, "step": 5400 }, { "epoch": 68.75, "learning_rate": 2.505e-05, "loss": 0.5433, "step": 5500 }, { "epoch": 70.0, "learning_rate": 2.405e-05, "loss": 0.5448, "step": 5600 }, { "epoch": 71.25, "learning_rate": 2.3049999999999998e-05, "loss": 0.5469, "step": 5700 }, { "epoch": 72.5, "learning_rate": 2.2049999999999997e-05, "loss": 0.5424, "step": 5800 }, { "epoch": 73.75, "learning_rate": 2.1049999999999997e-05, "loss": 0.5221, "step": 5900 }, { "epoch": 75.0, "learning_rate": 2.0049999999999996e-05, "loss": 0.5196, "step": 6000 }, { "epoch": 75.0, "eval_loss": 0.21106497943401337, "eval_runtime": 41.193, "eval_samples_per_second": 28.743, "eval_steps_per_second": 1.796, "eval_wer": 0.19103112553473964, "step": 6000 }, { "epoch": 76.25, "learning_rate": 1.905e-05, "loss": 0.5215, "step": 6100 }, { "epoch": 77.5, "learning_rate": 1.8049999999999998e-05, "loss": 0.5141, "step": 6200 }, { "epoch": 78.75, "learning_rate": 1.7049999999999998e-05, "loss": 0.5087, "step": 6300 }, { "epoch": 80.0, "learning_rate": 1.6049999999999997e-05, "loss": 0.5136, "step": 6400 }, { "epoch": 81.25, "learning_rate": 1.5049999999999998e-05, "loss": 0.4973, "step": 6500 }, { "epoch": 82.5, "learning_rate": 1.4049999999999998e-05, "loss": 0.5064, "step": 6600 }, { "epoch": 83.75, "learning_rate": 1.3049999999999999e-05, "loss": 0.4985, "step": 6700 }, { "epoch": 85.0, "learning_rate": 1.205e-05, "loss": 0.4926, "step": 6800 }, { "epoch": 86.25, "learning_rate": 1.105e-05, "loss": 0.4973, "step": 6900 }, { "epoch": 87.5, "learning_rate": 1.0049999999999999e-05, "loss": 0.4949, "step": 7000 }, { "epoch": 87.5, "eval_loss": 0.21311460435390472, "eval_runtime": 43.2323, "eval_samples_per_second": 27.387, "eval_steps_per_second": 1.712, "eval_wer": 0.19309632689187195, "step": 7000 }, { "epoch": 88.75, "learning_rate": 9.05e-06, "loss": 0.5008, "step": 7100 }, { "epoch": 90.0, "learning_rate": 8.05e-06, "loss": 0.4834, "step": 7200 }, { "epoch": 91.25, "learning_rate": 7.049999999999999e-06, "loss": 0.4717, "step": 7300 }, { "epoch": 92.5, "learning_rate": 6.049999999999999e-06, "loss": 0.485, "step": 7400 }, { "epoch": 93.75, "learning_rate": 5.05e-06, "loss": 0.4835, "step": 7500 }, { "epoch": 95.0, "learning_rate": 4.049999999999999e-06, "loss": 0.4787, "step": 7600 }, { "epoch": 96.25, "learning_rate": 3.05e-06, "loss": 0.4747, "step": 7700 }, { "epoch": 97.5, "learning_rate": 2.05e-06, "loss": 0.4804, "step": 7800 }, { "epoch": 98.75, "learning_rate": 1.05e-06, "loss": 0.4731, "step": 7900 }, { "epoch": 100.0, "learning_rate": 6e-08, "loss": 0.4797, "step": 8000 }, { "epoch": 100.0, "eval_loss": 0.20929841697216034, "eval_runtime": 42.6795, "eval_samples_per_second": 27.742, "eval_steps_per_second": 1.734, "eval_wer": 0.190736096769435, "step": 8000 }, { "epoch": 100.0, "step": 8000, "total_flos": 2.997146399308124e+19, "train_loss": 1.1495286922454835, "train_runtime": 11669.3128, "train_samples_per_second": 21.921, "train_steps_per_second": 0.686 } ], "max_steps": 8000, "num_train_epochs": 100, "total_flos": 2.997146399308124e+19, "trial_name": null, "trial_params": null }