{ "best_metric": null, "best_model_checkpoint": null, "epoch": 17.999072356215212, "global_step": 6462, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.56, "eval_loss": 2.8832294940948486, "eval_runtime": 56.6037, "eval_samples_per_second": 9.045, "eval_steps_per_second": 1.131, "eval_wer": 1.0, "step": 200 }, { "epoch": 1.11, "eval_loss": 1.1704888343811035, "eval_runtime": 56.4819, "eval_samples_per_second": 9.065, "eval_steps_per_second": 1.133, "eval_wer": 0.7788489628980426, "step": 400 }, { "epoch": 1.39, "learning_rate": 0.00029939999999999996, "loss": 3.3987, "step": 500 }, { "epoch": 1.67, "eval_loss": 0.7739295959472656, "eval_runtime": 57.0638, "eval_samples_per_second": 8.972, "eval_steps_per_second": 1.122, "eval_wer": 0.5895413380075957, "step": 600 }, { "epoch": 2.23, "eval_loss": 0.6045235395431519, "eval_runtime": 56.2609, "eval_samples_per_second": 9.1, "eval_steps_per_second": 1.138, "eval_wer": 0.4902132632193982, "step": 800 }, { "epoch": 2.78, "learning_rate": 0.0002748909761824891, "loss": 0.8313, "step": 1000 }, { "epoch": 2.78, "eval_loss": 0.5234911441802979, "eval_runtime": 56.3102, "eval_samples_per_second": 9.092, "eval_steps_per_second": 1.137, "eval_wer": 0.43938066023955596, "step": 1000 }, { "epoch": 3.34, "eval_loss": 0.48238545656204224, "eval_runtime": 56.3832, "eval_samples_per_second": 9.081, "eval_steps_per_second": 1.135, "eval_wer": 0.4002337131171487, "step": 1200 }, { "epoch": 3.9, "eval_loss": 0.4378102719783783, "eval_runtime": 56.2821, "eval_samples_per_second": 9.097, "eval_steps_per_second": 1.137, "eval_wer": 0.37540169442009935, "step": 1400 }, { "epoch": 4.18, "learning_rate": 0.00024973163367997313, "loss": 0.5342, "step": 1500 }, { "epoch": 4.46, "eval_loss": 0.44333964586257935, "eval_runtime": 56.3381, "eval_samples_per_second": 9.088, "eval_steps_per_second": 1.136, "eval_wer": 0.36342389716622847, "step": 1600 }, { "epoch": 5.01, "eval_loss": 0.41031020879745483, "eval_runtime": 56.4477, "eval_samples_per_second": 9.07, "eval_steps_per_second": 1.134, "eval_wer": 0.34852468594799885, "step": 1800 }, { "epoch": 5.57, "learning_rate": 0.00022457229117745723, "loss": 0.3792, "step": 2000 }, { "epoch": 5.57, "eval_loss": 0.3816453218460083, "eval_runtime": 56.4551, "eval_samples_per_second": 9.069, "eval_steps_per_second": 1.134, "eval_wer": 0.33099620216184633, "step": 2000 }, { "epoch": 6.13, "eval_loss": 0.3952919840812683, "eval_runtime": 56.4457, "eval_samples_per_second": 9.071, "eval_steps_per_second": 1.134, "eval_wer": 0.32252410166520595, "step": 2200 }, { "epoch": 6.68, "eval_loss": 0.39945441484451294, "eval_runtime": 56.6634, "eval_samples_per_second": 9.036, "eval_steps_per_second": 1.129, "eval_wer": 0.31317557697925796, "step": 2400 }, { "epoch": 6.96, "learning_rate": 0.00019941294867494128, "loss": 0.2924, "step": 2500 }, { "epoch": 7.24, "eval_loss": 0.3906857967376709, "eval_runtime": 56.3831, "eval_samples_per_second": 9.081, "eval_steps_per_second": 1.135, "eval_wer": 0.2930178206251826, "step": 2600 }, { "epoch": 7.8, "eval_loss": 0.35171157121658325, "eval_runtime": 56.7082, "eval_samples_per_second": 9.029, "eval_steps_per_second": 1.129, "eval_wer": 0.2740286298568507, "step": 2800 }, { "epoch": 8.36, "learning_rate": 0.00017425360617242535, "loss": 0.2217, "step": 3000 }, { "epoch": 8.36, "eval_loss": 0.33607447147369385, "eval_runtime": 56.5616, "eval_samples_per_second": 9.052, "eval_steps_per_second": 1.132, "eval_wer": 0.2591294186386211, "step": 3000 }, { "epoch": 8.91, "eval_loss": 0.3340049088001251, "eval_runtime": 56.5741, "eval_samples_per_second": 9.05, "eval_steps_per_second": 1.131, "eval_wer": 0.2451066316096991, "step": 3200 }, { "epoch": 9.47, "eval_loss": 0.3125685453414917, "eval_runtime": 56.4726, "eval_samples_per_second": 9.066, "eval_steps_per_second": 1.133, "eval_wer": 0.2448144902132632, "step": 3400 }, { "epoch": 9.75, "learning_rate": 0.00014909426366990943, "loss": 0.1714, "step": 3500 }, { "epoch": 10.03, "eval_loss": 0.34412676095962524, "eval_runtime": 56.5983, "eval_samples_per_second": 9.046, "eval_steps_per_second": 1.131, "eval_wer": 0.2556237218813906, "step": 3600 }, { "epoch": 10.58, "eval_loss": 0.3404456079006195, "eval_runtime": 56.5608, "eval_samples_per_second": 9.052, "eval_steps_per_second": 1.132, "eval_wer": 0.2521180251241601, "step": 3800 }, { "epoch": 11.14, "learning_rate": 0.0001239349211673935, "loss": 0.1395, "step": 4000 }, { "epoch": 11.14, "eval_loss": 0.3728441894054413, "eval_runtime": 56.7823, "eval_samples_per_second": 9.017, "eval_steps_per_second": 1.127, "eval_wer": 0.25182588372772424, "step": 4000 }, { "epoch": 11.7, "eval_loss": 0.3828706741333008, "eval_runtime": 56.4473, "eval_samples_per_second": 9.07, "eval_steps_per_second": 1.134, "eval_wer": 0.23955594507741748, "step": 4200 }, { "epoch": 12.26, "eval_loss": 0.3465881943702698, "eval_runtime": 56.3386, "eval_samples_per_second": 9.088, "eval_steps_per_second": 1.136, "eval_wer": 0.23605024832018698, "step": 4400 }, { "epoch": 12.53, "learning_rate": 9.877557866487755e-05, "loss": 0.1069, "step": 4500 }, { "epoch": 12.81, "eval_loss": 0.3187991976737976, "eval_runtime": 56.3638, "eval_samples_per_second": 9.084, "eval_steps_per_second": 1.135, "eval_wer": 0.2240724510663161, "step": 4600 }, { "epoch": 13.37, "eval_loss": 0.3395535349845886, "eval_runtime": 56.3257, "eval_samples_per_second": 9.09, "eval_steps_per_second": 1.136, "eval_wer": 0.21969033011977798, "step": 4800 }, { "epoch": 13.93, "learning_rate": 7.361623616236162e-05, "loss": 0.0845, "step": 5000 }, { "epoch": 13.93, "eval_loss": 0.3364916741847992, "eval_runtime": 56.321, "eval_samples_per_second": 9.091, "eval_steps_per_second": 1.136, "eval_wer": 0.2205667543090856, "step": 5000 }, { "epoch": 14.48, "eval_loss": 0.3458584249019623, "eval_runtime": 56.3485, "eval_samples_per_second": 9.086, "eval_steps_per_second": 1.136, "eval_wer": 0.22085889570552147, "step": 5200 }, { "epoch": 15.04, "eval_loss": 0.3429270386695862, "eval_runtime": 57.9716, "eval_samples_per_second": 8.832, "eval_steps_per_second": 1.104, "eval_wer": 0.2193981887233421, "step": 5400 }, { "epoch": 15.32, "learning_rate": 4.845689365984569e-05, "loss": 0.0675, "step": 5500 }, { "epoch": 15.6, "eval_loss": 0.3433798849582672, "eval_runtime": 56.3488, "eval_samples_per_second": 9.086, "eval_steps_per_second": 1.136, "eval_wer": 0.2182296231375986, "step": 5600 }, { "epoch": 16.16, "eval_loss": 0.3434172570705414, "eval_runtime": 56.3181, "eval_samples_per_second": 9.091, "eval_steps_per_second": 1.136, "eval_wer": 0.20829681565877886, "step": 5800 }, { "epoch": 16.71, "learning_rate": 2.329755115732975e-05, "loss": 0.0561, "step": 6000 }, { "epoch": 16.71, "eval_loss": 0.33747875690460205, "eval_runtime": 56.3763, "eval_samples_per_second": 9.082, "eval_steps_per_second": 1.135, "eval_wer": 0.20362255331580484, "step": 6000 }, { "epoch": 17.27, "eval_loss": 0.3445747494697571, "eval_runtime": 56.6382, "eval_samples_per_second": 9.04, "eval_steps_per_second": 1.13, "eval_wer": 0.19865614957639496, "step": 6200 }, { "epoch": 17.83, "eval_loss": 0.33622780442237854, "eval_runtime": 56.3321, "eval_samples_per_second": 9.089, "eval_steps_per_second": 1.136, "eval_wer": 0.19777972538708735, "step": 6400 }, { "epoch": 18.0, "step": 6462, "total_flos": 3.4883692787272507e+19, "train_loss": 0.4896983008235647, "train_runtime": 62213.9785, "train_samples_per_second": 2.494, "train_steps_per_second": 0.104 } ], "max_steps": 6462, "num_train_epochs": 18, "total_flos": 3.4883692787272507e+19, "trial_name": null, "trial_params": null }