{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.76923076923077, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.23, "learning_rate": 0.000975, "loss": 1.1621, "step": 20 }, { "epoch": 2.46, "learning_rate": 0.00095, "loss": 0.7214, "step": 40 }, { "epoch": 3.69, "learning_rate": 0.000925, "loss": 0.5522, "step": 60 }, { "epoch": 4.92, "learning_rate": 0.0009000000000000001, "loss": 0.4059, "step": 80 }, { "epoch": 6.15, "learning_rate": 0.000875, "loss": 0.3237, "step": 100 }, { "epoch": 7.38, "learning_rate": 0.00085, "loss": 0.249, "step": 120 }, { "epoch": 8.62, "learning_rate": 0.000825, "loss": 0.1859, "step": 140 }, { "epoch": 9.85, "learning_rate": 0.0008, "loss": 0.1988, "step": 160 }, { "epoch": 11.08, "learning_rate": 0.0007750000000000001, "loss": 0.1458, "step": 180 }, { "epoch": 12.31, "learning_rate": 0.00075, "loss": 0.1279, "step": 200 }, { "epoch": 13.54, "learning_rate": 0.000725, "loss": 0.1002, "step": 220 }, { "epoch": 14.77, "learning_rate": 0.0007, "loss": 0.1024, "step": 240 }, { "epoch": 16.0, "learning_rate": 0.000675, "loss": 0.0808, "step": 260 }, { "epoch": 17.23, "learning_rate": 0.0006500000000000001, "loss": 0.0743, "step": 280 }, { "epoch": 18.46, "learning_rate": 0.000625, "loss": 0.0638, "step": 300 }, { "epoch": 19.69, "learning_rate": 0.0006, "loss": 0.0444, "step": 320 }, { "epoch": 20.92, "learning_rate": 0.000575, "loss": 0.0441, "step": 340 }, { "epoch": 22.15, "learning_rate": 0.00055, "loss": 0.035, "step": 360 }, { "epoch": 23.38, "learning_rate": 0.0005250000000000001, "loss": 0.0305, "step": 380 }, { "epoch": 24.62, "learning_rate": 0.0005, "loss": 0.0272, "step": 400 }, { "epoch": 25.85, "learning_rate": 0.000475, "loss": 0.0252, "step": 420 }, { "epoch": 27.08, "learning_rate": 0.00045000000000000004, "loss": 0.021, "step": 440 }, { "epoch": 28.31, "learning_rate": 0.000425, "loss": 0.0188, "step": 460 }, { "epoch": 29.54, "learning_rate": 0.0004, "loss": 0.0186, "step": 480 }, { "epoch": 30.77, "learning_rate": 0.000375, "loss": 0.0178, "step": 500 } ], "max_steps": 800, "num_train_epochs": 50, "total_flos": 6.497819467776e+17, "trial_name": null, "trial_params": null }