{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 9650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.52, "learning_rate": 3.2333333333333334e-06, "loss": 11.6585, "step": 100 }, { "epoch": 1.04, "learning_rate": 6.566666666666667e-06, "loss": 3.4349, "step": 200 }, { "epoch": 1.55, "learning_rate": 9.900000000000002e-06, "loss": 3.0733, "step": 300 }, { "epoch": 2.07, "learning_rate": 1.3233333333333334e-05, "loss": 2.0674, "step": 400 }, { "epoch": 2.07, "eval_loss": 1.341134786605835, "eval_runtime": 163.433, "eval_samples_per_second": 16.478, "eval_steps_per_second": 1.034, "eval_wer": 0.8834933444645424, "step": 400 }, { "epoch": 2.59, "learning_rate": 1.6566666666666665e-05, "loss": 1.6791, "step": 500 }, { "epoch": 3.11, "learning_rate": 1.9900000000000003e-05, "loss": 1.5037, "step": 600 }, { "epoch": 3.63, "learning_rate": 2.3233333333333333e-05, "loss": 1.3966, "step": 700 }, { "epoch": 4.15, "learning_rate": 2.6566666666666668e-05, "loss": 1.324, "step": 800 }, { "epoch": 4.15, "eval_loss": 0.9311084747314453, "eval_runtime": 162.7031, "eval_samples_per_second": 16.552, "eval_steps_per_second": 1.039, "eval_wer": 0.7142062056490887, "step": 800 }, { "epoch": 4.66, "learning_rate": 2.9900000000000002e-05, "loss": 1.2854, "step": 900 }, { "epoch": 5.18, "learning_rate": 3.323333333333333e-05, "loss": 1.237, "step": 1000 }, { "epoch": 5.7, "learning_rate": 3.656666666666666e-05, "loss": 1.2105, "step": 1100 }, { "epoch": 6.22, "learning_rate": 3.99e-05, "loss": 1.2023, "step": 1200 }, { "epoch": 6.22, "eval_loss": 0.8060462474822998, "eval_runtime": 163.1532, "eval_samples_per_second": 16.506, "eval_steps_per_second": 1.036, "eval_wer": 0.6170400259728213, "step": 1200 }, { "epoch": 6.74, "learning_rate": 4.323333333333334e-05, "loss": 1.1816, "step": 1300 }, { "epoch": 7.25, "learning_rate": 4.656666666666667e-05, "loss": 1.1895, "step": 1400 }, { "epoch": 7.77, "learning_rate": 4.99e-05, "loss": 1.1743, "step": 1500 }, { "epoch": 8.29, "learning_rate": 4.940490797546012e-05, "loss": 1.1573, "step": 1600 }, { "epoch": 8.29, "eval_loss": 0.741489589214325, "eval_runtime": 162.3319, "eval_samples_per_second": 16.589, "eval_steps_per_second": 1.041, "eval_wer": 0.49724038773711793, "step": 1600 }, { "epoch": 8.81, "learning_rate": 4.879141104294479e-05, "loss": 1.1409, "step": 1700 }, { "epoch": 9.33, "learning_rate": 4.817791411042945e-05, "loss": 1.1314, "step": 1800 }, { "epoch": 9.84, "learning_rate": 4.756441717791411e-05, "loss": 1.1364, "step": 1900 }, { "epoch": 10.36, "learning_rate": 4.695092024539878e-05, "loss": 1.1117, "step": 2000 }, { "epoch": 10.36, "eval_loss": 0.7248229384422302, "eval_runtime": 162.1078, "eval_samples_per_second": 16.612, "eval_steps_per_second": 1.043, "eval_wer": 0.4587913362088957, "step": 2000 }, { "epoch": 10.88, "learning_rate": 4.633742331288344e-05, "loss": 1.1017, "step": 2100 }, { "epoch": 11.4, "learning_rate": 4.5723926380368096e-05, "loss": 1.0979, "step": 2200 }, { "epoch": 11.92, "learning_rate": 4.511042944785277e-05, "loss": 1.0932, "step": 2300 }, { "epoch": 12.44, "learning_rate": 4.4496932515337425e-05, "loss": 1.0672, "step": 2400 }, { "epoch": 12.44, "eval_loss": 0.6728883981704712, "eval_runtime": 161.2322, "eval_samples_per_second": 16.703, "eval_steps_per_second": 1.048, "eval_wer": 0.4349983766986689, "step": 2400 }, { "epoch": 12.95, "learning_rate": 4.388343558282208e-05, "loss": 1.0797, "step": 2500 }, { "epoch": 13.47, "learning_rate": 4.3269938650306755e-05, "loss": 1.0521, "step": 2600 }, { "epoch": 13.99, "learning_rate": 4.265644171779141e-05, "loss": 1.051, "step": 2700 }, { "epoch": 14.51, "learning_rate": 4.204294478527608e-05, "loss": 1.0336, "step": 2800 }, { "epoch": 14.51, "eval_loss": 0.7116607427597046, "eval_runtime": 160.91, "eval_samples_per_second": 16.736, "eval_steps_per_second": 1.05, "eval_wer": 0.4345809563563842, "step": 2800 }, { "epoch": 15.03, "learning_rate": 4.142944785276074e-05, "loss": 1.0411, "step": 2900 }, { "epoch": 15.54, "learning_rate": 4.08159509202454e-05, "loss": 1.0135, "step": 3000 }, { "epoch": 16.06, "learning_rate": 4.0202453987730065e-05, "loss": 1.0102, "step": 3100 }, { "epoch": 16.58, "learning_rate": 3.958895705521473e-05, "loss": 1.0025, "step": 3200 }, { "epoch": 16.58, "eval_loss": 0.7018883228302002, "eval_runtime": 160.9879, "eval_samples_per_second": 16.728, "eval_steps_per_second": 1.05, "eval_wer": 0.4271601502713232, "step": 3200 }, { "epoch": 17.1, "learning_rate": 3.897546012269939e-05, "loss": 0.9754, "step": 3300 }, { "epoch": 17.62, "learning_rate": 3.836196319018405e-05, "loss": 0.9695, "step": 3400 }, { "epoch": 18.13, "learning_rate": 3.774846625766871e-05, "loss": 0.9663, "step": 3500 }, { "epoch": 18.65, "learning_rate": 3.7134969325153375e-05, "loss": 0.9578, "step": 3600 }, { "epoch": 18.65, "eval_loss": 0.6791622042655945, "eval_runtime": 162.2664, "eval_samples_per_second": 16.596, "eval_steps_per_second": 1.041, "eval_wer": 0.41176197764482164, "step": 3600 }, { "epoch": 19.17, "learning_rate": 3.652147239263804e-05, "loss": 0.9392, "step": 3700 }, { "epoch": 19.69, "learning_rate": 3.59079754601227e-05, "loss": 0.9504, "step": 3800 }, { "epoch": 20.21, "learning_rate": 3.529447852760737e-05, "loss": 0.9389, "step": 3900 }, { "epoch": 20.73, "learning_rate": 3.468098159509203e-05, "loss": 0.9272, "step": 4000 }, { "epoch": 20.73, "eval_loss": 0.6863027811050415, "eval_runtime": 161.4481, "eval_samples_per_second": 16.68, "eval_steps_per_second": 1.047, "eval_wer": 0.41561152080144703, "step": 4000 }, { "epoch": 21.24, "learning_rate": 3.4067484662576685e-05, "loss": 0.926, "step": 4100 }, { "epoch": 21.76, "learning_rate": 3.3453987730061356e-05, "loss": 0.9145, "step": 4200 }, { "epoch": 22.28, "learning_rate": 3.284662576687117e-05, "loss": 0.9118, "step": 4300 }, { "epoch": 22.8, "learning_rate": 3.223312883435583e-05, "loss": 0.9321, "step": 4400 }, { "epoch": 22.8, "eval_loss": 0.6534942984580994, "eval_runtime": 160.5933, "eval_samples_per_second": 16.769, "eval_steps_per_second": 1.052, "eval_wer": 0.3971986457028895, "step": 4400 }, { "epoch": 23.32, "learning_rate": 3.161963190184049e-05, "loss": 0.9064, "step": 4500 }, { "epoch": 23.83, "learning_rate": 3.100613496932516e-05, "loss": 0.8915, "step": 4600 }, { "epoch": 24.35, "learning_rate": 3.0392638036809817e-05, "loss": 0.8907, "step": 4700 }, { "epoch": 24.87, "learning_rate": 2.977914110429448e-05, "loss": 0.8802, "step": 4800 }, { "epoch": 24.87, "eval_loss": 0.6766195297241211, "eval_runtime": 161.8159, "eval_samples_per_second": 16.642, "eval_steps_per_second": 1.044, "eval_wer": 0.39056630026436623, "step": 4800 }, { "epoch": 25.39, "learning_rate": 2.9171779141104295e-05, "loss": 0.8834, "step": 4900 }, { "epoch": 25.91, "learning_rate": 2.855828220858896e-05, "loss": 0.866, "step": 5000 }, { "epoch": 26.42, "learning_rate": 2.794478527607362e-05, "loss": 0.8497, "step": 5100 }, { "epoch": 26.94, "learning_rate": 2.7331288343558282e-05, "loss": 0.844, "step": 5200 }, { "epoch": 26.94, "eval_loss": 0.678193986415863, "eval_runtime": 161.4084, "eval_samples_per_second": 16.684, "eval_steps_per_second": 1.047, "eval_wer": 0.39492602383933956, "step": 5200 }, { "epoch": 27.46, "learning_rate": 2.6717791411042947e-05, "loss": 0.8466, "step": 5300 }, { "epoch": 27.98, "learning_rate": 2.6104294478527608e-05, "loss": 0.831, "step": 5400 }, { "epoch": 28.5, "learning_rate": 2.5490797546012273e-05, "loss": 0.82, "step": 5500 }, { "epoch": 29.02, "learning_rate": 2.4877300613496934e-05, "loss": 0.8387, "step": 5600 }, { "epoch": 29.02, "eval_loss": 0.6915661692619324, "eval_runtime": 161.098, "eval_samples_per_second": 16.717, "eval_steps_per_second": 1.049, "eval_wer": 0.3921432215574417, "step": 5600 }, { "epoch": 29.53, "learning_rate": 2.4263803680981595e-05, "loss": 0.8205, "step": 5700 }, { "epoch": 30.05, "learning_rate": 2.3650306748466257e-05, "loss": 0.8139, "step": 5800 }, { "epoch": 30.57, "learning_rate": 2.303680981595092e-05, "loss": 0.8197, "step": 5900 }, { "epoch": 31.09, "learning_rate": 2.2423312883435586e-05, "loss": 0.8042, "step": 6000 }, { "epoch": 31.09, "eval_loss": 0.6805949807167053, "eval_runtime": 161.7661, "eval_samples_per_second": 16.647, "eval_steps_per_second": 1.045, "eval_wer": 0.3796669913269329, "step": 6000 }, { "epoch": 31.61, "learning_rate": 2.1809815950920244e-05, "loss": 0.7911, "step": 6100 }, { "epoch": 32.12, "learning_rate": 2.119631901840491e-05, "loss": 0.7926, "step": 6200 }, { "epoch": 32.64, "learning_rate": 2.0582822085889574e-05, "loss": 0.7922, "step": 6300 }, { "epoch": 33.16, "learning_rate": 1.9969325153374235e-05, "loss": 0.793, "step": 6400 }, { "epoch": 33.16, "eval_loss": 0.7119700908660889, "eval_runtime": 161.5775, "eval_samples_per_second": 16.667, "eval_steps_per_second": 1.046, "eval_wer": 0.383052734103242, "step": 6400 }, { "epoch": 33.68, "learning_rate": 1.9355828220858896e-05, "loss": 0.7779, "step": 6500 }, { "epoch": 34.2, "learning_rate": 1.874233128834356e-05, "loss": 0.7739, "step": 6600 }, { "epoch": 34.72, "learning_rate": 1.8128834355828222e-05, "loss": 0.7579, "step": 6700 }, { "epoch": 35.23, "learning_rate": 1.7515337423312884e-05, "loss": 0.7567, "step": 6800 }, { "epoch": 35.23, "eval_loss": 0.686227560043335, "eval_runtime": 161.2767, "eval_samples_per_second": 16.698, "eval_steps_per_second": 1.048, "eval_wer": 0.3808264922777237, "step": 6800 }, { "epoch": 35.75, "learning_rate": 1.6901840490797545e-05, "loss": 0.7623, "step": 6900 }, { "epoch": 36.27, "learning_rate": 1.628834355828221e-05, "loss": 0.7603, "step": 7000 }, { "epoch": 36.79, "learning_rate": 1.5674846625766874e-05, "loss": 0.7416, "step": 7100 }, { "epoch": 37.31, "learning_rate": 1.5061349693251534e-05, "loss": 0.7463, "step": 7200 }, { "epoch": 37.31, "eval_loss": 0.6893351078033447, "eval_runtime": 161.5435, "eval_samples_per_second": 16.67, "eval_steps_per_second": 1.046, "eval_wer": 0.3709011641389546, "step": 7200 }, { "epoch": 37.82, "learning_rate": 1.445398773006135e-05, "loss": 0.7333, "step": 7300 }, { "epoch": 38.34, "learning_rate": 1.3840490797546013e-05, "loss": 0.7142, "step": 7400 }, { "epoch": 38.86, "learning_rate": 1.3226993865030676e-05, "loss": 0.7291, "step": 7500 }, { "epoch": 39.38, "learning_rate": 1.2613496932515337e-05, "loss": 0.7053, "step": 7600 }, { "epoch": 39.38, "eval_loss": 0.7095528841018677, "eval_runtime": 160.1545, "eval_samples_per_second": 16.815, "eval_steps_per_second": 1.055, "eval_wer": 0.3700663234543852, "step": 7600 }, { "epoch": 39.9, "learning_rate": 1.2e-05, "loss": 0.727, "step": 7700 }, { "epoch": 40.41, "learning_rate": 1.1386503067484663e-05, "loss": 0.7157, "step": 7800 }, { "epoch": 40.93, "learning_rate": 1.0773006134969325e-05, "loss": 0.6954, "step": 7900 }, { "epoch": 41.45, "learning_rate": 1.015950920245399e-05, "loss": 0.6906, "step": 8000 }, { "epoch": 41.45, "eval_loss": 0.6921066045761108, "eval_runtime": 161.634, "eval_samples_per_second": 16.661, "eval_steps_per_second": 1.046, "eval_wer": 0.3675618014006771, "step": 8000 }, { "epoch": 41.97, "learning_rate": 9.54601226993865e-06, "loss": 0.6817, "step": 8100 }, { "epoch": 42.49, "learning_rate": 8.932515337423314e-06, "loss": 0.6861, "step": 8200 }, { "epoch": 43.01, "learning_rate": 8.319018404907975e-06, "loss": 0.6942, "step": 8300 }, { "epoch": 43.52, "learning_rate": 7.705521472392638e-06, "loss": 0.6891, "step": 8400 }, { "epoch": 43.52, "eval_loss": 0.7166885733604431, "eval_runtime": 160.1699, "eval_samples_per_second": 16.813, "eval_steps_per_second": 1.055, "eval_wer": 0.3662631603357915, "step": 8400 }, { "epoch": 44.04, "learning_rate": 7.092024539877301e-06, "loss": 0.6844, "step": 8500 }, { "epoch": 44.56, "learning_rate": 6.478527607361963e-06, "loss": 0.6693, "step": 8600 }, { "epoch": 45.08, "learning_rate": 5.865030674846626e-06, "loss": 0.6703, "step": 8700 }, { "epoch": 45.6, "learning_rate": 5.251533742331288e-06, "loss": 0.658, "step": 8800 }, { "epoch": 45.6, "eval_loss": 0.6833298802375793, "eval_runtime": 159.9929, "eval_samples_per_second": 16.832, "eval_steps_per_second": 1.056, "eval_wer": 0.3580075135661611, "step": 8800 }, { "epoch": 46.11, "learning_rate": 4.6380368098159506e-06, "loss": 0.6549, "step": 8900 }, { "epoch": 46.63, "learning_rate": 4.0245398773006136e-06, "loss": 0.6665, "step": 9000 }, { "epoch": 47.15, "learning_rate": 3.411042944785276e-06, "loss": 0.6537, "step": 9100 }, { "epoch": 47.67, "learning_rate": 2.7975460122699388e-06, "loss": 0.6576, "step": 9200 }, { "epoch": 47.67, "eval_loss": 0.6914297342300415, "eval_runtime": 160.7459, "eval_samples_per_second": 16.753, "eval_steps_per_second": 1.051, "eval_wer": 0.3569407726914336, "step": 9200 }, { "epoch": 48.19, "learning_rate": 2.1840490797546013e-06, "loss": 0.6601, "step": 9300 }, { "epoch": 48.7, "learning_rate": 1.570552147239264e-06, "loss": 0.6327, "step": 9400 }, { "epoch": 49.22, "learning_rate": 9.570552147239263e-07, "loss": 0.6448, "step": 9500 }, { "epoch": 49.74, "learning_rate": 3.43558282208589e-07, "loss": 0.6358, "step": 9600 }, { "epoch": 49.74, "eval_loss": 0.6921652555465698, "eval_runtime": 161.3961, "eval_samples_per_second": 16.686, "eval_steps_per_second": 1.047, "eval_wer": 0.3551319512082, "step": 9600 }, { "epoch": 50.0, "step": 9650, "total_flos": 1.2240549585337914e+20, "train_loss": 1.0753348940518237, "train_runtime": 32646.4121, "train_samples_per_second": 9.459, "train_steps_per_second": 0.296 } ], "max_steps": 9650, "num_train_epochs": 50, "total_flos": 1.2240549585337914e+20, "trial_name": null, "trial_params": null }