{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "global_step": 8280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.81, "learning_rate": 2.818840579710145e-05, "loss": 1.483, "step": 500 }, { "epoch": 3.62, "learning_rate": 2.63768115942029e-05, "loss": 0.5081, "step": 1000 }, { "epoch": 5.43, "learning_rate": 2.456521739130435e-05, "loss": 0.1953, "step": 1500 }, { "epoch": 7.25, "learning_rate": 2.2753623188405797e-05, "loss": 0.0984, "step": 2000 }, { "epoch": 9.06, "learning_rate": 2.0942028985507246e-05, "loss": 0.0663, "step": 2500 }, { "epoch": 10.87, "learning_rate": 1.9130434782608694e-05, "loss": 0.0443, "step": 3000 }, { "epoch": 12.68, "learning_rate": 1.7318840579710146e-05, "loss": 0.0337, "step": 3500 }, { "epoch": 14.49, "learning_rate": 1.5507246376811594e-05, "loss": 0.0289, "step": 4000 }, { "epoch": 16.3, "learning_rate": 1.3695652173913042e-05, "loss": 0.0179, "step": 4500 }, { "epoch": 18.12, "learning_rate": 1.1884057971014492e-05, "loss": 0.0134, "step": 5000 }, { "epoch": 19.93, "learning_rate": 1.0072463768115942e-05, "loss": 0.0119, "step": 5500 }, { "epoch": 21.74, "learning_rate": 8.260869565217392e-06, "loss": 0.0066, "step": 6000 }, { "epoch": 23.55, "learning_rate": 6.4492753623188414e-06, "loss": 0.0095, "step": 6500 }, { "epoch": 25.36, "learning_rate": 4.63768115942029e-06, "loss": 0.0045, "step": 7000 }, { "epoch": 27.17, "learning_rate": 2.8260869565217393e-06, "loss": 0.0037, "step": 7500 }, { "epoch": 28.99, "learning_rate": 1.0144927536231885e-06, "loss": 0.0019, "step": 8000 }, { "epoch": 30.0, "step": 8280, "total_flos": 2.5286585356944e+16, "train_loss": 0.1526769723581231, "train_runtime": 2830.1977, "train_samples_per_second": 87.535, "train_steps_per_second": 2.926 } ], "max_steps": 8280, "num_train_epochs": 30, "total_flos": 2.5286585356944e+16, "trial_name": null, "trial_params": null }