{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 2550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.96, "learning_rate": 7.361999999999999e-05, "loss": 10.1224, "step": 100 }, { "epoch": 1.96, "eval_loss": 3.5429043769836426, "eval_runtime": 12.8681, "eval_samples_per_second": 26.5, "eval_steps_per_second": 3.342, "eval_wer": 1.0, "step": 100 }, { "epoch": 3.92, "learning_rate": 7.062e-05, "loss": 3.2411, "step": 200 }, { "epoch": 3.92, "eval_loss": 3.1785788536071777, "eval_runtime": 12.8876, "eval_samples_per_second": 26.46, "eval_steps_per_second": 3.337, "eval_wer": 1.0, "step": 200 }, { "epoch": 5.88, "learning_rate": 6.761999999999999e-05, "loss": 3.1283, "step": 300 }, { "epoch": 5.88, "eval_loss": 3.057115316390991, "eval_runtime": 12.9515, "eval_samples_per_second": 26.329, "eval_steps_per_second": 3.32, "eval_wer": 1.0, "step": 300 }, { "epoch": 7.84, "learning_rate": 6.462e-05, "loss": 3.0044, "step": 400 }, { "epoch": 7.84, "eval_loss": 2.956029176712036, "eval_runtime": 13.0792, "eval_samples_per_second": 26.072, "eval_steps_per_second": 3.288, "eval_wer": 0.9996309963099631, "step": 400 }, { "epoch": 9.8, "learning_rate": 6.162e-05, "loss": 2.9388, "step": 500 }, { "epoch": 9.8, "eval_loss": 2.897726535797119, "eval_runtime": 13.249, "eval_samples_per_second": 25.738, "eval_steps_per_second": 3.246, "eval_wer": 1.0011070110701108, "step": 500 }, { "epoch": 11.76, "learning_rate": 5.861999999999999e-05, "loss": 2.86, "step": 600 }, { "epoch": 11.76, "eval_loss": 2.694431781768799, "eval_runtime": 12.8219, "eval_samples_per_second": 26.595, "eval_steps_per_second": 3.354, "eval_wer": 0.9952029520295202, "step": 600 }, { "epoch": 13.73, "learning_rate": 5.562e-05, "loss": 2.5538, "step": 700 }, { "epoch": 13.73, "eval_loss": 2.0967445373535156, "eval_runtime": 12.8896, "eval_samples_per_second": 26.456, "eval_steps_per_second": 3.336, "eval_wer": 0.9435424354243542, "step": 700 }, { "epoch": 15.69, "learning_rate": 5.2619999999999994e-05, "loss": 2.1214, "step": 800 }, { "epoch": 15.69, "eval_loss": 1.4815592765808105, "eval_runtime": 12.9225, "eval_samples_per_second": 26.388, "eval_steps_per_second": 3.328, "eval_wer": 0.8428044280442805, "step": 800 }, { "epoch": 17.65, "learning_rate": 4.9619999999999996e-05, "loss": 1.8136, "step": 900 }, { "epoch": 17.65, "eval_loss": 1.245869517326355, "eval_runtime": 12.7527, "eval_samples_per_second": 26.739, "eval_steps_per_second": 3.372, "eval_wer": 0.8047970479704797, "step": 900 }, { "epoch": 19.61, "learning_rate": 4.662e-05, "loss": 1.6795, "step": 1000 }, { "epoch": 19.61, "eval_loss": 1.1232149600982666, "eval_runtime": 12.9316, "eval_samples_per_second": 26.37, "eval_steps_per_second": 3.325, "eval_wer": 0.7649446494464944, "step": 1000 }, { "epoch": 21.57, "learning_rate": 4.362e-05, "loss": 1.5571, "step": 1100 }, { "epoch": 21.57, "eval_loss": 1.0510376691818237, "eval_runtime": 12.7516, "eval_samples_per_second": 26.742, "eval_steps_per_second": 3.372, "eval_wer": 0.7431734317343174, "step": 1100 }, { "epoch": 23.53, "learning_rate": 4.0619999999999994e-05, "loss": 1.4975, "step": 1200 }, { "epoch": 23.53, "eval_loss": 1.0297844409942627, "eval_runtime": 12.7119, "eval_samples_per_second": 26.825, "eval_steps_per_second": 3.383, "eval_wer": 0.696309963099631, "step": 1200 }, { "epoch": 25.49, "learning_rate": 3.762e-05, "loss": 1.4485, "step": 1300 }, { "epoch": 25.49, "eval_loss": 0.9775241017341614, "eval_runtime": 12.651, "eval_samples_per_second": 26.954, "eval_steps_per_second": 3.399, "eval_wer": 0.707380073800738, "step": 1300 }, { "epoch": 27.45, "learning_rate": 3.462e-05, "loss": 1.3924, "step": 1400 }, { "epoch": 27.45, "eval_loss": 0.9797706007957458, "eval_runtime": 12.6253, "eval_samples_per_second": 27.009, "eval_steps_per_second": 3.406, "eval_wer": 0.6955719557195572, "step": 1400 }, { "epoch": 29.41, "learning_rate": 3.161999999999999e-05, "loss": 1.3604, "step": 1500 }, { "epoch": 29.41, "eval_loss": 0.9344653487205505, "eval_runtime": 12.7334, "eval_samples_per_second": 26.78, "eval_steps_per_second": 3.377, "eval_wer": 0.7092250922509226, "step": 1500 }, { "epoch": 31.37, "learning_rate": 2.8619999999999997e-05, "loss": 1.3224, "step": 1600 }, { "epoch": 31.37, "eval_loss": 0.9535229802131653, "eval_runtime": 12.7291, "eval_samples_per_second": 26.789, "eval_steps_per_second": 3.378, "eval_wer": 0.6830258302583025, "step": 1600 }, { "epoch": 33.33, "learning_rate": 2.562e-05, "loss": 1.2816, "step": 1700 }, { "epoch": 33.33, "eval_loss": 0.9177776575088501, "eval_runtime": 12.8107, "eval_samples_per_second": 26.618, "eval_steps_per_second": 3.357, "eval_wer": 0.6678966789667896, "step": 1700 }, { "epoch": 35.29, "learning_rate": 2.2619999999999997e-05, "loss": 1.2623, "step": 1800 }, { "epoch": 35.29, "eval_loss": 0.924865186214447, "eval_runtime": 12.5463, "eval_samples_per_second": 27.179, "eval_steps_per_second": 3.427, "eval_wer": 0.6678966789667896, "step": 1800 }, { "epoch": 37.25, "learning_rate": 1.962e-05, "loss": 1.2421, "step": 1900 }, { "epoch": 37.25, "eval_loss": 0.9123861193656921, "eval_runtime": 12.7025, "eval_samples_per_second": 26.845, "eval_steps_per_second": 3.385, "eval_wer": 0.6734317343173432, "step": 1900 }, { "epoch": 39.22, "learning_rate": 1.6619999999999997e-05, "loss": 1.2208, "step": 2000 }, { "epoch": 39.22, "eval_loss": 0.8961586356163025, "eval_runtime": 12.5233, "eval_samples_per_second": 27.229, "eval_steps_per_second": 3.434, "eval_wer": 0.666420664206642, "step": 2000 }, { "epoch": 41.18, "learning_rate": 1.362e-05, "loss": 1.2145, "step": 2100 }, { "epoch": 41.18, "eval_loss": 0.8903014063835144, "eval_runtime": 12.6636, "eval_samples_per_second": 26.928, "eval_steps_per_second": 3.396, "eval_wer": 0.6734317343173432, "step": 2100 }, { "epoch": 43.14, "learning_rate": 1.062e-05, "loss": 1.1888, "step": 2200 }, { "epoch": 43.14, "eval_loss": 0.8883377313613892, "eval_runtime": 12.7667, "eval_samples_per_second": 26.71, "eval_steps_per_second": 3.368, "eval_wer": 0.6708487084870849, "step": 2200 }, { "epoch": 45.1, "learning_rate": 7.619999999999999e-06, "loss": 1.1933, "step": 2300 }, { "epoch": 45.1, "eval_loss": 0.8928494453430176, "eval_runtime": 12.6877, "eval_samples_per_second": 26.876, "eval_steps_per_second": 3.389, "eval_wer": 0.6723247232472325, "step": 2300 }, { "epoch": 47.06, "learning_rate": 4.62e-06, "loss": 1.1838, "step": 2400 }, { "epoch": 47.06, "eval_loss": 0.8868067860603333, "eval_runtime": 12.5901, "eval_samples_per_second": 27.085, "eval_steps_per_second": 3.415, "eval_wer": 0.6678966789667896, "step": 2400 }, { "epoch": 49.02, "learning_rate": 1.62e-06, "loss": 1.1634, "step": 2500 }, { "epoch": 49.02, "eval_loss": 0.888584554195404, "eval_runtime": 12.7157, "eval_samples_per_second": 26.817, "eval_steps_per_second": 3.382, "eval_wer": 0.6656826568265682, "step": 2500 }, { "epoch": 50.0, "step": 2550, "total_flos": 4.920553620909019e+18, "train_loss": 2.1006999116785385, "train_runtime": 2771.7539, "train_samples_per_second": 14.612, "train_steps_per_second": 0.92 } ], "max_steps": 2550, "num_train_epochs": 50, "total_flos": 4.920553620909019e+18, "trial_name": null, "trial_params": null }