{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 7800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.64, "learning_rate": 7.275e-06, "loss": 16.819, "step": 100 }, { "epoch": 1.28, "learning_rate": 1.4775e-05, "loss": 6.1908, "step": 200 }, { "epoch": 1.92, "learning_rate": 2.2274999999999996e-05, "loss": 4.1987, "step": 300 }, { "epoch": 2.56, "learning_rate": 2.9775e-05, "loss": 3.489, "step": 400 }, { "epoch": 2.56, "eval_loss": 3.3590216636657715, "eval_runtime": 66.6533, "eval_samples_per_second": 32.226, "eval_steps_per_second": 2.025, "eval_wer": 1.0, "step": 400 }, { "epoch": 3.21, "learning_rate": 3.7275e-05, "loss": 3.2231, "step": 500 }, { "epoch": 3.85, "learning_rate": 4.4775e-05, "loss": 3.1038, "step": 600 }, { "epoch": 4.49, "learning_rate": 5.227499999999999e-05, "loss": 3.0217, "step": 700 }, { "epoch": 5.13, "learning_rate": 5.9774999999999996e-05, "loss": 2.9903, "step": 800 }, { "epoch": 5.13, "eval_loss": 2.970390796661377, "eval_runtime": 66.5925, "eval_samples_per_second": 32.256, "eval_steps_per_second": 2.027, "eval_wer": 1.0000977995110025, "step": 800 }, { "epoch": 5.77, "learning_rate": 6.7275e-05, "loss": 2.906, "step": 900 }, { "epoch": 6.41, "learning_rate": 7.477499999999999e-05, "loss": 2.5346, "step": 1000 }, { "epoch": 7.05, "learning_rate": 7.393014705882353e-05, "loss": 1.8923, "step": 1100 }, { "epoch": 7.69, "learning_rate": 7.282720588235294e-05, "loss": 1.6712, "step": 1200 }, { "epoch": 7.69, "eval_loss": 0.6178616285324097, "eval_runtime": 66.5699, "eval_samples_per_second": 32.267, "eval_steps_per_second": 2.028, "eval_wer": 0.6566259168704156, "step": 1200 }, { "epoch": 8.33, "learning_rate": 7.172426470588234e-05, "loss": 1.5317, "step": 1300 }, { "epoch": 8.97, "learning_rate": 7.062132352941176e-05, "loss": 1.4151, "step": 1400 }, { "epoch": 9.62, "learning_rate": 6.951838235294117e-05, "loss": 1.3153, "step": 1500 }, { "epoch": 10.26, "learning_rate": 6.841544117647059e-05, "loss": 1.2635, "step": 1600 }, { "epoch": 10.26, "eval_loss": 0.31762251257896423, "eval_runtime": 67.311, "eval_samples_per_second": 31.912, "eval_steps_per_second": 2.006, "eval_wer": 0.45310513447432765, "step": 1600 }, { "epoch": 10.9, "learning_rate": 6.731249999999999e-05, "loss": 1.1929, "step": 1700 }, { "epoch": 11.54, "learning_rate": 6.62095588235294e-05, "loss": 1.166, "step": 1800 }, { "epoch": 12.18, "learning_rate": 6.510661764705882e-05, "loss": 1.0971, "step": 1900 }, { "epoch": 12.82, "learning_rate": 6.400367647058824e-05, "loss": 1.0819, "step": 2000 }, { "epoch": 12.82, "eval_loss": 0.2516830265522003, "eval_runtime": 66.7082, "eval_samples_per_second": 32.2, "eval_steps_per_second": 2.024, "eval_wer": 0.35080684596577016, "step": 2000 }, { "epoch": 13.46, "learning_rate": 6.290073529411764e-05, "loss": 1.0892, "step": 2100 }, { "epoch": 14.1, "learning_rate": 6.179779411764705e-05, "loss": 1.0534, "step": 2200 }, { "epoch": 14.74, "learning_rate": 6.069485294117646e-05, "loss": 1.012, "step": 2300 }, { "epoch": 15.38, "learning_rate": 5.9591911764705876e-05, "loss": 1.0136, "step": 2400 }, { "epoch": 15.38, "eval_loss": 0.22566433250904083, "eval_runtime": 66.3228, "eval_samples_per_second": 32.387, "eval_steps_per_second": 2.036, "eval_wer": 0.3123716381418093, "step": 2400 }, { "epoch": 16.03, "learning_rate": 5.848897058823529e-05, "loss": 0.9991, "step": 2500 }, { "epoch": 16.67, "learning_rate": 5.73860294117647e-05, "loss": 0.9722, "step": 2600 }, { "epoch": 17.31, "learning_rate": 5.6283088235294115e-05, "loss": 0.9794, "step": 2700 }, { "epoch": 17.95, "learning_rate": 5.5180147058823523e-05, "loss": 0.9625, "step": 2800 }, { "epoch": 17.95, "eval_loss": 0.19747723639011383, "eval_runtime": 67.1687, "eval_samples_per_second": 31.979, "eval_steps_per_second": 2.01, "eval_wer": 0.2311002444987775, "step": 2800 }, { "epoch": 18.59, "learning_rate": 5.407720588235294e-05, "loss": 0.9505, "step": 2900 }, { "epoch": 19.23, "learning_rate": 5.297426470588235e-05, "loss": 0.9337, "step": 3000 }, { "epoch": 19.87, "learning_rate": 5.187132352941176e-05, "loss": 0.936, "step": 3100 }, { "epoch": 20.51, "learning_rate": 5.076838235294117e-05, "loss": 0.901, "step": 3200 }, { "epoch": 20.51, "eval_loss": 0.19860759377479553, "eval_runtime": 66.128, "eval_samples_per_second": 32.482, "eval_steps_per_second": 2.041, "eval_wer": 0.20968215158924206, "step": 3200 }, { "epoch": 21.15, "learning_rate": 4.9665441176470586e-05, "loss": 0.9179, "step": 3300 }, { "epoch": 21.79, "learning_rate": 4.8562499999999995e-05, "loss": 0.8963, "step": 3400 }, { "epoch": 22.44, "learning_rate": 4.745955882352941e-05, "loss": 0.8826, "step": 3500 }, { "epoch": 23.08, "learning_rate": 4.635661764705882e-05, "loss": 0.8842, "step": 3600 }, { "epoch": 23.08, "eval_loss": 0.19037693738937378, "eval_runtime": 67.9694, "eval_samples_per_second": 31.602, "eval_steps_per_second": 1.986, "eval_wer": 0.2039119804400978, "step": 3600 }, { "epoch": 23.72, "learning_rate": 4.5253676470588234e-05, "loss": 0.8683, "step": 3700 }, { "epoch": 24.36, "learning_rate": 4.416176470588235e-05, "loss": 0.8599, "step": 3800 }, { "epoch": 25.0, "learning_rate": 4.305882352941176e-05, "loss": 0.8658, "step": 3900 }, { "epoch": 25.64, "learning_rate": 4.1955882352941173e-05, "loss": 0.8542, "step": 4000 }, { "epoch": 25.64, "eval_loss": 0.1846681386232376, "eval_runtime": 67.5681, "eval_samples_per_second": 31.79, "eval_steps_per_second": 1.998, "eval_wer": 0.19814180929095354, "step": 4000 }, { "epoch": 26.28, "learning_rate": 4.085294117647058e-05, "loss": 0.8468, "step": 4100 }, { "epoch": 26.92, "learning_rate": 3.975e-05, "loss": 0.8462, "step": 4200 }, { "epoch": 27.56, "learning_rate": 3.8647058823529406e-05, "loss": 0.8284, "step": 4300 }, { "epoch": 28.21, "learning_rate": 3.754411764705882e-05, "loss": 0.8244, "step": 4400 }, { "epoch": 28.21, "eval_loss": 0.1804967224597931, "eval_runtime": 66.5917, "eval_samples_per_second": 32.256, "eval_steps_per_second": 2.027, "eval_wer": 0.18474327628361858, "step": 4400 }, { "epoch": 28.85, "learning_rate": 3.6441176470588236e-05, "loss": 0.8288, "step": 4500 }, { "epoch": 29.49, "learning_rate": 3.5338235294117645e-05, "loss": 0.7904, "step": 4600 }, { "epoch": 30.13, "learning_rate": 3.423529411764706e-05, "loss": 0.8157, "step": 4700 }, { "epoch": 30.77, "learning_rate": 3.313235294117647e-05, "loss": 0.7689, "step": 4800 }, { "epoch": 30.77, "eval_loss": 0.17355979979038239, "eval_runtime": 69.3305, "eval_samples_per_second": 30.982, "eval_steps_per_second": 1.947, "eval_wer": 0.18317848410757948, "step": 4800 }, { "epoch": 31.41, "learning_rate": 3.202941176470588e-05, "loss": 0.7948, "step": 4900 }, { "epoch": 32.05, "learning_rate": 3.092647058823529e-05, "loss": 0.8035, "step": 5000 }, { "epoch": 32.69, "learning_rate": 2.9823529411764704e-05, "loss": 0.7754, "step": 5100 }, { "epoch": 33.33, "learning_rate": 2.8720588235294116e-05, "loss": 0.7825, "step": 5200 }, { "epoch": 33.33, "eval_loss": 0.1698261797428131, "eval_runtime": 67.6625, "eval_samples_per_second": 31.746, "eval_steps_per_second": 1.995, "eval_wer": 0.18210268948655256, "step": 5200 }, { "epoch": 33.97, "learning_rate": 2.7617647058823528e-05, "loss": 0.7748, "step": 5300 }, { "epoch": 34.62, "learning_rate": 2.651470588235294e-05, "loss": 0.7704, "step": 5400 }, { "epoch": 35.26, "learning_rate": 2.5411764705882348e-05, "loss": 0.7597, "step": 5500 }, { "epoch": 35.9, "learning_rate": 2.430882352941176e-05, "loss": 0.7817, "step": 5600 }, { "epoch": 35.9, "eval_loss": 0.17581327259540558, "eval_runtime": 67.4489, "eval_samples_per_second": 31.846, "eval_steps_per_second": 2.002, "eval_wer": 0.18034229828850856, "step": 5600 }, { "epoch": 36.54, "learning_rate": 2.3205882352941172e-05, "loss": 0.744, "step": 5700 }, { "epoch": 37.18, "learning_rate": 2.2102941176470584e-05, "loss": 0.7508, "step": 5800 }, { "epoch": 37.82, "learning_rate": 2.1e-05, "loss": 0.746, "step": 5900 }, { "epoch": 38.46, "learning_rate": 1.989705882352941e-05, "loss": 0.7488, "step": 6000 }, { "epoch": 38.46, "eval_loss": 0.16634251177310944, "eval_runtime": 67.3054, "eval_samples_per_second": 31.914, "eval_steps_per_second": 2.006, "eval_wer": 0.17603911980440098, "step": 6000 }, { "epoch": 39.1, "learning_rate": 1.8794117647058823e-05, "loss": 0.7486, "step": 6100 }, { "epoch": 39.74, "learning_rate": 1.7691176470588234e-05, "loss": 0.7281, "step": 6200 }, { "epoch": 40.38, "learning_rate": 1.6588235294117646e-05, "loss": 0.7317, "step": 6300 }, { "epoch": 41.03, "learning_rate": 1.5485294117647058e-05, "loss": 0.7171, "step": 6400 }, { "epoch": 41.03, "eval_loss": 0.16363976895809174, "eval_runtime": 68.4211, "eval_samples_per_second": 31.394, "eval_steps_per_second": 1.973, "eval_wer": 0.17212713936430318, "step": 6400 }, { "epoch": 41.67, "learning_rate": 1.438235294117647e-05, "loss": 0.7196, "step": 6500 }, { "epoch": 42.31, "learning_rate": 1.3279411764705882e-05, "loss": 0.7292, "step": 6600 }, { "epoch": 42.95, "learning_rate": 1.2176470588235294e-05, "loss": 0.7319, "step": 6700 }, { "epoch": 43.59, "learning_rate": 1.1073529411764704e-05, "loss": 0.7222, "step": 6800 }, { "epoch": 43.59, "eval_loss": 0.16627496480941772, "eval_runtime": 67.2415, "eval_samples_per_second": 31.945, "eval_steps_per_second": 2.008, "eval_wer": 0.17290953545232274, "step": 6800 }, { "epoch": 44.23, "learning_rate": 9.970588235294116e-06, "loss": 0.709, "step": 6900 }, { "epoch": 44.87, "learning_rate": 8.86764705882353e-06, "loss": 0.7162, "step": 7000 }, { "epoch": 45.51, "learning_rate": 7.764705882352941e-06, "loss": 0.7282, "step": 7100 }, { "epoch": 46.15, "learning_rate": 6.6838235294117634e-06, "loss": 0.7156, "step": 7200 }, { "epoch": 46.15, "eval_loss": 0.16330638527870178, "eval_runtime": 67.815, "eval_samples_per_second": 31.674, "eval_steps_per_second": 1.991, "eval_wer": 0.17154034229828852, "step": 7200 }, { "epoch": 46.79, "learning_rate": 5.580882352941176e-06, "loss": 0.7001, "step": 7300 }, { "epoch": 47.44, "learning_rate": 4.477941176470588e-06, "loss": 0.7034, "step": 7400 }, { "epoch": 48.08, "learning_rate": 3.3749999999999995e-06, "loss": 0.6991, "step": 7500 }, { "epoch": 48.72, "learning_rate": 2.2720588235294113e-06, "loss": 0.7121, "step": 7600 }, { "epoch": 48.72, "eval_loss": 0.16656027734279633, "eval_runtime": 66.7215, "eval_samples_per_second": 32.194, "eval_steps_per_second": 2.023, "eval_wer": 0.17183374083129585, "step": 7600 }, { "epoch": 49.36, "learning_rate": 1.1691176470588234e-06, "loss": 0.7012, "step": 7700 }, { "epoch": 50.0, "learning_rate": 6.617647058823529e-08, "loss": 0.6972, "step": 7800 }, { "epoch": 50.0, "step": 7800, "total_flos": 2.5302348778981876e+19, "train_loss": 1.4054935827010717, "train_runtime": 11681.5311, "train_samples_per_second": 21.243, "train_steps_per_second": 0.668 } ], "max_steps": 7800, "num_train_epochs": 50, "total_flos": 2.5302348778981876e+19, "trial_name": null, "trial_params": null }