{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 9650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.52, "learning_rate": 3.2333333333333334e-06, "loss": 12.3601, "step": 100 }, { "epoch": 1.04, "learning_rate": 6.566666666666667e-06, "loss": 3.4654, "step": 200 }, { "epoch": 1.55, "learning_rate": 9.900000000000002e-06, "loss": 3.3012, "step": 300 }, { "epoch": 2.07, "learning_rate": 1.3233333333333334e-05, "loss": 2.514, "step": 400 }, { "epoch": 2.07, "eval_loss": 1.4589147567749023, "eval_runtime": 164.3831, "eval_samples_per_second": 16.382, "eval_steps_per_second": 1.028, "eval_wer": 0.8530606592189087, "step": 400 }, { "epoch": 2.59, "learning_rate": 1.6566666666666665e-05, "loss": 1.8333, "step": 500 }, { "epoch": 3.11, "learning_rate": 1.9900000000000003e-05, "loss": 1.6083, "step": 600 }, { "epoch": 3.63, "learning_rate": 2.3233333333333333e-05, "loss": 1.4999, "step": 700 }, { "epoch": 4.15, "learning_rate": 2.6566666666666668e-05, "loss": 1.4289, "step": 800 }, { "epoch": 4.15, "eval_loss": 0.8939768075942993, "eval_runtime": 163.9044, "eval_samples_per_second": 16.43, "eval_steps_per_second": 1.031, "eval_wer": 0.6475394700397008, "step": 800 }, { "epoch": 4.66, "learning_rate": 2.9900000000000002e-05, "loss": 1.3809, "step": 900 }, { "epoch": 5.18, "learning_rate": 3.323333333333333e-05, "loss": 1.3202, "step": 1000 }, { "epoch": 5.7, "learning_rate": 3.656666666666666e-05, "loss": 1.2969, "step": 1100 }, { "epoch": 6.22, "learning_rate": 3.99e-05, "loss": 1.276, "step": 1200 }, { "epoch": 6.22, "eval_loss": 0.7743439078330994, "eval_runtime": 165.0198, "eval_samples_per_second": 16.319, "eval_steps_per_second": 1.024, "eval_wer": 0.6089003785430708, "step": 1200 }, { "epoch": 6.74, "learning_rate": 4.323333333333334e-05, "loss": 1.2398, "step": 1300 }, { "epoch": 7.25, "learning_rate": 4.656666666666667e-05, "loss": 1.2339, "step": 1400 }, { "epoch": 7.77, "learning_rate": 4.99e-05, "loss": 1.2393, "step": 1500 }, { "epoch": 8.29, "learning_rate": 4.940490797546012e-05, "loss": 1.2213, "step": 1600 }, { "epoch": 8.29, "eval_loss": 0.6918994784355164, "eval_runtime": 163.5897, "eval_samples_per_second": 16.462, "eval_steps_per_second": 1.033, "eval_wer": 0.4973225002308189, "step": 1600 }, { "epoch": 8.81, "learning_rate": 4.879141104294479e-05, "loss": 1.2088, "step": 1700 }, { "epoch": 9.33, "learning_rate": 4.817791411042945e-05, "loss": 1.189, "step": 1800 }, { "epoch": 9.84, "learning_rate": 4.756441717791411e-05, "loss": 1.1805, "step": 1900 }, { "epoch": 10.36, "learning_rate": 4.695092024539878e-05, "loss": 1.1522, "step": 2000 }, { "epoch": 10.36, "eval_loss": 0.6634941697120667, "eval_runtime": 162.8319, "eval_samples_per_second": 16.539, "eval_steps_per_second": 1.038, "eval_wer": 0.4588219001015603, "step": 2000 }, { "epoch": 10.88, "learning_rate": 4.633742331288344e-05, "loss": 1.1468, "step": 2100 }, { "epoch": 11.4, "learning_rate": 4.5723926380368096e-05, "loss": 1.129, "step": 2200 }, { "epoch": 11.92, "learning_rate": 4.511042944785277e-05, "loss": 1.1133, "step": 2300 }, { "epoch": 12.44, "learning_rate": 4.4496932515337425e-05, "loss": 1.0914, "step": 2400 }, { "epoch": 12.44, "eval_loss": 0.6839072704315186, "eval_runtime": 163.9109, "eval_samples_per_second": 16.43, "eval_steps_per_second": 1.031, "eval_wer": 0.45859108115594127, "step": 2400 }, { "epoch": 12.95, "learning_rate": 4.388343558282208e-05, "loss": 1.1104, "step": 2500 }, { "epoch": 13.47, "learning_rate": 4.3269938650306755e-05, "loss": 1.0732, "step": 2600 }, { "epoch": 13.99, "learning_rate": 4.265644171779141e-05, "loss": 1.0746, "step": 2700 }, { "epoch": 14.51, "learning_rate": 4.204294478527608e-05, "loss": 1.0499, "step": 2800 }, { "epoch": 14.51, "eval_loss": 0.7151051163673401, "eval_runtime": 165.6452, "eval_samples_per_second": 16.258, "eval_steps_per_second": 1.02, "eval_wer": 0.44672698735112176, "step": 2800 }, { "epoch": 15.03, "learning_rate": 4.142944785276074e-05, "loss": 1.0641, "step": 2900 }, { "epoch": 15.54, "learning_rate": 4.08159509202454e-05, "loss": 1.0386, "step": 3000 }, { "epoch": 16.06, "learning_rate": 4.0202453987730065e-05, "loss": 1.0328, "step": 3100 }, { "epoch": 16.58, "learning_rate": 3.958895705521473e-05, "loss": 1.0238, "step": 3200 }, { "epoch": 16.58, "eval_loss": 0.6823991537094116, "eval_runtime": 163.1559, "eval_samples_per_second": 16.506, "eval_steps_per_second": 1.036, "eval_wer": 0.44358784969070264, "step": 3200 }, { "epoch": 17.1, "learning_rate": 3.897546012269939e-05, "loss": 1.0055, "step": 3300 }, { "epoch": 17.62, "learning_rate": 3.836196319018405e-05, "loss": 1.006, "step": 3400 }, { "epoch": 18.13, "learning_rate": 3.774846625766871e-05, "loss": 1.0008, "step": 3500 }, { "epoch": 18.65, "learning_rate": 3.7134969325153375e-05, "loss": 0.9963, "step": 3600 }, { "epoch": 18.65, "eval_loss": 0.6872182488441467, "eval_runtime": 163.684, "eval_samples_per_second": 16.452, "eval_steps_per_second": 1.032, "eval_wer": 0.44368017726895026, "step": 3600 }, { "epoch": 19.17, "learning_rate": 3.652147239263804e-05, "loss": 0.9852, "step": 3700 }, { "epoch": 19.69, "learning_rate": 3.59079754601227e-05, "loss": 0.9905, "step": 3800 }, { "epoch": 20.21, "learning_rate": 3.529447852760737e-05, "loss": 0.9621, "step": 3900 }, { "epoch": 20.73, "learning_rate": 3.468711656441718e-05, "loss": 0.9728, "step": 4000 }, { "epoch": 20.73, "eval_loss": 0.704698383808136, "eval_runtime": 163.0374, "eval_samples_per_second": 16.518, "eval_steps_per_second": 1.037, "eval_wer": 0.4243837134151971, "step": 4000 }, { "epoch": 21.24, "learning_rate": 3.407361963190184e-05, "loss": 0.9534, "step": 4100 }, { "epoch": 21.76, "learning_rate": 3.34601226993865e-05, "loss": 0.9599, "step": 4200 }, { "epoch": 22.28, "learning_rate": 3.284662576687117e-05, "loss": 0.9427, "step": 4300 }, { "epoch": 22.8, "learning_rate": 3.223312883435583e-05, "loss": 0.9373, "step": 4400 }, { "epoch": 22.8, "eval_loss": 0.656895637512207, "eval_runtime": 162.8754, "eval_samples_per_second": 16.534, "eval_steps_per_second": 1.038, "eval_wer": 0.41889022250946356, "step": 4400 }, { "epoch": 23.32, "learning_rate": 3.161963190184049e-05, "loss": 0.9348, "step": 4500 }, { "epoch": 23.83, "learning_rate": 3.100613496932516e-05, "loss": 0.9295, "step": 4600 }, { "epoch": 24.35, "learning_rate": 3.0392638036809817e-05, "loss": 0.9162, "step": 4700 }, { "epoch": 24.87, "learning_rate": 2.977914110429448e-05, "loss": 0.9028, "step": 4800 }, { "epoch": 24.87, "eval_loss": 0.6622523665428162, "eval_runtime": 163.2025, "eval_samples_per_second": 16.501, "eval_steps_per_second": 1.036, "eval_wer": 0.40938048194995846, "step": 4800 }, { "epoch": 25.39, "learning_rate": 2.9165644171779143e-05, "loss": 0.9002, "step": 4900 }, { "epoch": 25.91, "learning_rate": 2.8552147239263805e-05, "loss": 0.8869, "step": 5000 }, { "epoch": 26.42, "learning_rate": 2.7938650306748466e-05, "loss": 0.8741, "step": 5100 }, { "epoch": 26.94, "learning_rate": 2.732515337423313e-05, "loss": 0.8759, "step": 5200 }, { "epoch": 26.94, "eval_loss": 0.6723098158836365, "eval_runtime": 162.4177, "eval_samples_per_second": 16.581, "eval_steps_per_second": 1.041, "eval_wer": 0.41519711937955867, "step": 5200 }, { "epoch": 27.46, "learning_rate": 2.6711656441717792e-05, "loss": 0.8702, "step": 5300 }, { "epoch": 27.98, "learning_rate": 2.6098159509202457e-05, "loss": 0.8609, "step": 5400 }, { "epoch": 28.5, "learning_rate": 2.5484662576687118e-05, "loss": 0.8593, "step": 5500 }, { "epoch": 29.02, "learning_rate": 2.4871165644171783e-05, "loss": 0.8824, "step": 5600 }, { "epoch": 29.02, "eval_loss": 0.6467483043670654, "eval_runtime": 163.0935, "eval_samples_per_second": 16.512, "eval_steps_per_second": 1.036, "eval_wer": 0.40167112916628195, "step": 5600 }, { "epoch": 29.53, "learning_rate": 2.425766871165644e-05, "loss": 0.8717, "step": 5700 }, { "epoch": 30.05, "learning_rate": 2.3644171779141105e-05, "loss": 0.8514, "step": 5800 }, { "epoch": 30.57, "learning_rate": 2.303067484662577e-05, "loss": 0.851, "step": 5900 }, { "epoch": 31.09, "learning_rate": 2.2423312883435586e-05, "loss": 0.8371, "step": 6000 }, { "epoch": 31.09, "eval_loss": 0.6910686492919922, "eval_runtime": 162.9708, "eval_samples_per_second": 16.524, "eval_steps_per_second": 1.037, "eval_wer": 0.4079955682762441, "step": 6000 }, { "epoch": 31.61, "learning_rate": 2.1809815950920244e-05, "loss": 0.8276, "step": 6100 }, { "epoch": 32.12, "learning_rate": 2.119631901840491e-05, "loss": 0.8282, "step": 6200 }, { "epoch": 32.64, "learning_rate": 2.0582822085889574e-05, "loss": 0.8195, "step": 6300 }, { "epoch": 33.16, "learning_rate": 1.9969325153374235e-05, "loss": 0.8205, "step": 6400 }, { "epoch": 33.16, "eval_loss": 0.7144636511802673, "eval_runtime": 163.1126, "eval_samples_per_second": 16.51, "eval_steps_per_second": 1.036, "eval_wer": 0.4062875080786631, "step": 6400 }, { "epoch": 33.68, "learning_rate": 1.9355828220858896e-05, "loss": 0.8073, "step": 6500 }, { "epoch": 34.2, "learning_rate": 1.874233128834356e-05, "loss": 0.8088, "step": 6600 }, { "epoch": 34.72, "learning_rate": 1.8128834355828222e-05, "loss": 0.7828, "step": 6700 }, { "epoch": 35.23, "learning_rate": 1.7515337423312884e-05, "loss": 0.7837, "step": 6800 }, { "epoch": 35.23, "eval_loss": 0.7037481069564819, "eval_runtime": 163.3942, "eval_samples_per_second": 16.482, "eval_steps_per_second": 1.034, "eval_wer": 0.3929923368110054, "step": 6800 }, { "epoch": 35.75, "learning_rate": 1.6901840490797545e-05, "loss": 0.7893, "step": 6900 }, { "epoch": 36.27, "learning_rate": 1.628834355828221e-05, "loss": 0.7777, "step": 7000 }, { "epoch": 36.79, "learning_rate": 1.5674846625766874e-05, "loss": 0.7659, "step": 7100 }, { "epoch": 37.31, "learning_rate": 1.5067484662576689e-05, "loss": 0.7708, "step": 7200 }, { "epoch": 37.31, "eval_loss": 0.6925445199012756, "eval_runtime": 163.9386, "eval_samples_per_second": 16.427, "eval_steps_per_second": 1.031, "eval_wer": 0.38399039793186224, "step": 7200 }, { "epoch": 37.82, "learning_rate": 1.445398773006135e-05, "loss": 0.7568, "step": 7300 }, { "epoch": 38.34, "learning_rate": 1.3840490797546013e-05, "loss": 0.7467, "step": 7400 }, { "epoch": 38.86, "learning_rate": 1.3226993865030676e-05, "loss": 0.7456, "step": 7500 }, { "epoch": 39.38, "learning_rate": 1.2613496932515337e-05, "loss": 0.7359, "step": 7600 }, { "epoch": 39.38, "eval_loss": 0.7034336924552917, "eval_runtime": 163.3955, "eval_samples_per_second": 16.481, "eval_steps_per_second": 1.034, "eval_wer": 0.3829286307820146, "step": 7600 }, { "epoch": 39.9, "learning_rate": 1.2e-05, "loss": 0.7464, "step": 7700 }, { "epoch": 40.41, "learning_rate": 1.1386503067484663e-05, "loss": 0.7381, "step": 7800 }, { "epoch": 40.93, "learning_rate": 1.0773006134969325e-05, "loss": 0.726, "step": 7900 }, { "epoch": 41.45, "learning_rate": 1.015950920245399e-05, "loss": 0.7153, "step": 8000 }, { "epoch": 41.45, "eval_loss": 0.7030363082885742, "eval_runtime": 163.6509, "eval_samples_per_second": 16.456, "eval_steps_per_second": 1.033, "eval_wer": 0.37937401901948115, "step": 8000 }, { "epoch": 41.97, "learning_rate": 9.54601226993865e-06, "loss": 0.708, "step": 8100 }, { "epoch": 42.49, "learning_rate": 8.932515337423314e-06, "loss": 0.7127, "step": 8200 }, { "epoch": 43.01, "learning_rate": 8.319018404907975e-06, "loss": 0.7117, "step": 8300 }, { "epoch": 43.52, "learning_rate": 7.705521472392638e-06, "loss": 0.7127, "step": 8400 }, { "epoch": 43.52, "eval_loss": 0.6823467016220093, "eval_runtime": 163.2294, "eval_samples_per_second": 16.498, "eval_steps_per_second": 1.035, "eval_wer": 0.37609638999169054, "step": 8400 }, { "epoch": 44.04, "learning_rate": 7.092024539877301e-06, "loss": 0.7081, "step": 8500 }, { "epoch": 44.56, "learning_rate": 6.478527607361963e-06, "loss": 0.6977, "step": 8600 }, { "epoch": 45.08, "learning_rate": 5.865030674846626e-06, "loss": 0.6943, "step": 8700 }, { "epoch": 45.6, "learning_rate": 5.251533742331288e-06, "loss": 0.6884, "step": 8800 }, { "epoch": 45.6, "eval_loss": 0.6854104399681091, "eval_runtime": 163.0739, "eval_samples_per_second": 16.514, "eval_steps_per_second": 1.036, "eval_wer": 0.3710645369771951, "step": 8800 }, { "epoch": 46.11, "learning_rate": 4.6380368098159506e-06, "loss": 0.6803, "step": 8900 }, { "epoch": 46.63, "learning_rate": 4.0245398773006136e-06, "loss": 0.6902, "step": 9000 }, { "epoch": 47.15, "learning_rate": 3.411042944785276e-06, "loss": 0.6827, "step": 9100 }, { "epoch": 47.67, "learning_rate": 2.7975460122699388e-06, "loss": 0.6835, "step": 9200 }, { "epoch": 47.67, "eval_loss": 0.6723486185073853, "eval_runtime": 162.6892, "eval_samples_per_second": 16.553, "eval_steps_per_second": 1.039, "eval_wer": 0.36649432185393777, "step": 9200 }, { "epoch": 48.19, "learning_rate": 2.1840490797546013e-06, "loss": 0.6825, "step": 9300 }, { "epoch": 48.7, "learning_rate": 1.570552147239264e-06, "loss": 0.6611, "step": 9400 }, { "epoch": 49.22, "learning_rate": 9.570552147239263e-07, "loss": 0.6666, "step": 9500 }, { "epoch": 49.74, "learning_rate": 3.43558282208589e-07, "loss": 0.6703, "step": 9600 }, { "epoch": 49.74, "eval_loss": 0.6773473024368286, "eval_runtime": 162.1677, "eval_samples_per_second": 16.606, "eval_steps_per_second": 1.042, "eval_wer": 0.3667713045886806, "step": 9600 }, { "epoch": 50.0, "step": 9650, "total_flos": 1.2240973116802213e+20, "train_loss": 1.1252575924730053, "train_runtime": 34837.5253, "train_samples_per_second": 8.864, "train_steps_per_second": 0.277 } ], "max_steps": 9650, "num_train_epochs": 50, "total_flos": 1.2240973116802213e+20, "trial_name": null, "trial_params": null }