{ "best_metric": 4.878868882500295, "best_model_checkpoint": "./checkpoint-1000", "epoch": 1.1523333333333334, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.6013176087541454e-06, "loss": 0.8269, "step": 25 }, { "epoch": 0.02, "learning_rate": 2.0137381186086706e-06, "loss": 0.3045, "step": 50 }, { "epoch": 0.03, "learning_rate": 2.2420279328577924e-06, "loss": 0.2313, "step": 75 }, { "epoch": 0.03, "learning_rate": 2.40069543942923e-06, "loss": 0.1914, "step": 100 }, { "epoch": 0.04, "learning_rate": 2.522426425046553e-06, "loss": 0.1792, "step": 125 }, { "epoch": 0.05, "learning_rate": 2.61759653240662e-06, "loss": 0.1751, "step": 150 }, { "epoch": 0.06, "learning_rate": 2.701259274807719e-06, "loss": 0.1753, "step": 175 }, { "epoch": 0.07, "learning_rate": 2.773422504448636e-06, "loss": 0.1429, "step": 200 }, { "epoch": 0.07, "learning_rate": 2.836868816606194e-06, "loss": 0.1708, "step": 225 }, { "epoch": 0.08, "learning_rate": 2.893478968555679e-06, "loss": 0.156, "step": 250 }, { "epoch": 0.09, "learning_rate": 2.944583870724746e-06, "loss": 0.1573, "step": 275 }, { "epoch": 0.1, "learning_rate": 2.991160014398659e-06, "loss": 0.157, "step": 300 }, { "epoch": 0.11, "learning_rate": 2.978888888888889e-06, "loss": 0.1659, "step": 325 }, { "epoch": 0.12, "learning_rate": 2.9511111111111114e-06, "loss": 0.1362, "step": 350 }, { "epoch": 0.12, "learning_rate": 2.9233333333333334e-06, "loss": 0.1465, "step": 375 }, { "epoch": 0.13, "learning_rate": 2.8955555555555555e-06, "loss": 0.1432, "step": 400 }, { "epoch": 0.14, "learning_rate": 2.867777777777778e-06, "loss": 0.1306, "step": 425 }, { "epoch": 0.15, "learning_rate": 2.84e-06, "loss": 0.1388, "step": 450 }, { "epoch": 0.16, "learning_rate": 2.812222222222222e-06, "loss": 0.1434, "step": 475 }, { "epoch": 0.17, "learning_rate": 2.7844444444444444e-06, "loss": 0.1425, "step": 500 }, { "epoch": 0.17, "learning_rate": 2.756666666666667e-06, "loss": 0.1411, "step": 525 }, { "epoch": 1.0, "learning_rate": 2.7288888888888893e-06, "loss": 0.1316, "step": 550 }, { "epoch": 1.01, "learning_rate": 2.702222222222222e-06, "loss": 0.1212, "step": 575 }, { "epoch": 1.02, "learning_rate": 2.674444444444444e-06, "loss": 0.1117, "step": 600 }, { "epoch": 1.03, "learning_rate": 2.6466666666666666e-06, "loss": 0.1, "step": 625 }, { "epoch": 1.04, "learning_rate": 2.618888888888889e-06, "loss": 0.0931, "step": 650 }, { "epoch": 1.04, "learning_rate": 2.5911111111111115e-06, "loss": 0.0876, "step": 675 }, { "epoch": 1.05, "learning_rate": 2.5633333333333335e-06, "loss": 0.0943, "step": 700 }, { "epoch": 1.06, "learning_rate": 2.5355555555555555e-06, "loss": 0.0866, "step": 725 }, { "epoch": 1.07, "learning_rate": 2.507777777777778e-06, "loss": 0.0817, "step": 750 }, { "epoch": 1.08, "learning_rate": 2.48e-06, "loss": 0.0977, "step": 775 }, { "epoch": 1.09, "learning_rate": 2.452222222222222e-06, "loss": 0.0878, "step": 800 }, { "epoch": 1.09, "learning_rate": 2.4244444444444444e-06, "loss": 0.0819, "step": 825 }, { "epoch": 1.1, "learning_rate": 2.396666666666667e-06, "loss": 0.0818, "step": 850 }, { "epoch": 1.11, "learning_rate": 2.3688888888888893e-06, "loss": 0.0829, "step": 875 }, { "epoch": 1.12, "learning_rate": 2.3411111111111113e-06, "loss": 0.0801, "step": 900 }, { "epoch": 1.13, "learning_rate": 2.3133333333333333e-06, "loss": 0.0743, "step": 925 }, { "epoch": 1.14, "learning_rate": 2.285555555555556e-06, "loss": 0.0758, "step": 950 }, { "epoch": 1.14, "learning_rate": 2.257777777777778e-06, "loss": 0.0771, "step": 975 }, { "epoch": 1.15, "learning_rate": 2.23e-06, "loss": 0.0774, "step": 1000 }, { "epoch": 1.15, "eval_loss": 0.148193359375, "eval_runtime": 2293.5239, "eval_samples_per_second": 3.79, "eval_steps_per_second": 0.237, "eval_wer": 4.878868882500295, "step": 1000 } ], "max_steps": 3000, "num_train_epochs": 9223372036854775807, "total_flos": 6.794147330330198e+19, "trial_name": null, "trial_params": null }