{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.962822997027284, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 1.904275808936826e-05, "loss": 0.3678, "step": 500 }, { "epoch": 0.1, "learning_rate": 1.807973805855162e-05, "loss": 0.3141, "step": 1000 }, { "epoch": 0.14, "learning_rate": 1.7116718027734977e-05, "loss": 0.2928, "step": 1500 }, { "epoch": 0.19, "learning_rate": 1.6153697996918337e-05, "loss": 0.2788, "step": 2000 }, { "epoch": 0.24, "learning_rate": 1.5190677966101697e-05, "loss": 0.2664, "step": 2500 }, { "epoch": 0.29, "learning_rate": 1.4227657935285056e-05, "loss": 0.2551, "step": 3000 }, { "epoch": 0.34, "learning_rate": 1.3264637904468413e-05, "loss": 0.2465, "step": 3500 }, { "epoch": 0.39, "learning_rate": 1.2301617873651772e-05, "loss": 0.2373, "step": 4000 }, { "epoch": 0.43, "learning_rate": 1.1338597842835134e-05, "loss": 0.2305, "step": 4500 }, { "epoch": 0.48, "learning_rate": 1.037557781201849e-05, "loss": 0.2229, "step": 5000 }, { "epoch": 0.53, "learning_rate": 9.416409861325117e-06, "loss": 0.2154, "step": 5500 }, { "epoch": 0.58, "learning_rate": 8.455315870570109e-06, "loss": 0.2104, "step": 6000 }, { "epoch": 0.63, "learning_rate": 7.492295839753467e-06, "loss": 0.2043, "step": 6500 }, { "epoch": 0.67, "learning_rate": 6.529275808936827e-06, "loss": 0.1985, "step": 7000 }, { "epoch": 0.72, "learning_rate": 5.568181818181818e-06, "loss": 0.1953, "step": 7500 }, { "epoch": 0.77, "learning_rate": 4.605161787365178e-06, "loss": 0.1903, "step": 8000 }, { "epoch": 0.82, "learning_rate": 3.645993836671803e-06, "loss": 0.186, "step": 8500 }, { "epoch": 0.87, "learning_rate": 2.682973805855162e-06, "loss": 0.1837, "step": 9000 }, { "epoch": 0.91, "learning_rate": 1.7238058551617876e-06, "loss": 0.1803, "step": 9500 }, { "epoch": 0.96, "learning_rate": 7.607858243451465e-07, "loss": 0.1775, "step": 10000 } ], "max_steps": 10386, "num_train_epochs": 1, "total_flos": 1.1258147859595264e+16, "trial_name": null, "trial_params": null }