{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.997489539748955, "global_step": 2980, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.33, "learning_rate": 4.9000000000000005e-05, "loss": 5.3579, "step": 100 }, { "epoch": 0.67, "learning_rate": 9.900000000000001e-05, "loss": 3.2697, "step": 200 }, { "epoch": 1.01, "learning_rate": 0.000149, "loss": 2.4711, "step": 300 }, { "epoch": 1.34, "learning_rate": 0.000199, "loss": 2.1202, "step": 400 }, { "epoch": 1.68, "learning_rate": 0.000249, "loss": 2.0846, "step": 500 }, { "epoch": 1.68, "eval_loss": 1.1640619039535522, "eval_runtime": 414.2546, "eval_samples_per_second": 25.076, "eval_steps_per_second": 3.136, "eval_wer": 0.8072035074088462, "step": 500 }, { "epoch": 2.01, "learning_rate": 0.000299, "loss": 2.1202, "step": 600 }, { "epoch": 2.35, "learning_rate": 0.00034899999999999997, "loss": 2.1037, "step": 700 }, { "epoch": 2.68, "learning_rate": 0.00039900000000000005, "loss": 2.0882, "step": 800 }, { "epoch": 3.02, "learning_rate": 0.000449, "loss": 2.126, "step": 900 }, { "epoch": 3.35, "learning_rate": 0.000499, "loss": 2.1201, "step": 1000 }, { "epoch": 3.35, "eval_loss": 1.1776171922683716, "eval_runtime": 424.2371, "eval_samples_per_second": 24.486, "eval_steps_per_second": 3.062, "eval_wer": 0.8329171060177221, "step": 1000 }, { "epoch": 3.69, "learning_rate": 0.000549, "loss": 2.1435, "step": 1100 }, { "epoch": 4.03, "learning_rate": 0.000599, "loss": 2.152, "step": 1200 }, { "epoch": 4.36, "learning_rate": 0.0006490000000000001, "loss": 2.11, "step": 1300 }, { "epoch": 4.7, "learning_rate": 0.000699, "loss": 2.1503, "step": 1400 }, { "epoch": 5.03, "learning_rate": 0.000749, "loss": 2.1972, "step": 1500 }, { "epoch": 5.03, "eval_loss": 1.2631869316101074, "eval_runtime": 410.677, "eval_samples_per_second": 25.295, "eval_steps_per_second": 3.163, "eval_wer": 0.8723754555376732, "step": 1500 }, { "epoch": 5.37, "learning_rate": 0.000799, "loss": 2.1788, "step": 1600 }, { "epoch": 5.7, "learning_rate": 0.000849, "loss": 2.1774, "step": 1700 }, { "epoch": 6.04, "learning_rate": 0.0008990000000000001, "loss": 2.2205, "step": 1800 }, { "epoch": 6.37, "learning_rate": 0.000949, "loss": 2.2466, "step": 1900 }, { "epoch": 6.71, "learning_rate": 0.000999, "loss": 2.2643, "step": 2000 }, { "epoch": 6.71, "eval_loss": 1.372324824333191, "eval_runtime": 411.6031, "eval_samples_per_second": 25.238, "eval_steps_per_second": 3.156, "eval_wer": 0.8982740440645984, "step": 2000 }, { "epoch": 7.05, "learning_rate": 0.0009000000000000001, "loss": 2.2996, "step": 2100 }, { "epoch": 7.38, "learning_rate": 0.0007979591836734694, "loss": 2.2775, "step": 2200 }, { "epoch": 7.72, "learning_rate": 0.0006959183673469388, "loss": 2.2478, "step": 2300 }, { "epoch": 8.05, "learning_rate": 0.0005938775510204082, "loss": 2.2288, "step": 2400 }, { "epoch": 8.39, "learning_rate": 0.0004918367346938776, "loss": 2.1649, "step": 2500 }, { "epoch": 8.39, "eval_loss": 1.25503671169281, "eval_runtime": 416.24, "eval_samples_per_second": 24.957, "eval_steps_per_second": 3.121, "eval_wer": 0.884196311301034, "step": 2500 }, { "epoch": 8.72, "learning_rate": 0.000389795918367347, "loss": 2.1054, "step": 2600 }, { "epoch": 9.06, "learning_rate": 0.00028775510204081633, "loss": 2.0872, "step": 2700 }, { "epoch": 9.39, "learning_rate": 0.00018571428571428572, "loss": 1.9953, "step": 2800 }, { "epoch": 9.73, "learning_rate": 8.36734693877551e-05, "loss": 1.9474, "step": 2900 }, { "epoch": 10.0, "step": 2980, "total_flos": 1.006981652948512e+20, "train_loss": 2.3014913469352978, "train_runtime": 21335.6515, "train_samples_per_second": 17.909, "train_steps_per_second": 0.14 } ], "max_steps": 2980, "num_train_epochs": 10, "total_flos": 1.006981652948512e+20, "trial_name": null, "trial_params": null }