{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9998530492285083, "global_step": 13608, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.6737692872887585e-08, "loss": 3.3894, "step": 1 }, { "epoch": 0.11, "learning_rate": 1.8368846436443794e-05, "loss": 3.3892, "step": 500 }, { "epoch": 0.22, "learning_rate": 3.673769287288759e-05, "loss": 3.4112, "step": 1000 }, { "epoch": 0.33, "learning_rate": 4.9432514085082064e-05, "loss": 3.4369, "step": 1500 }, { "epoch": 0.44, "learning_rate": 4.739119784437005e-05, "loss": 3.4317, "step": 2000 }, { "epoch": 0.55, "learning_rate": 4.534988160365804e-05, "loss": 3.4256, "step": 2500 }, { "epoch": 0.66, "learning_rate": 4.330856536294603e-05, "loss": 3.4214, "step": 3000 }, { "epoch": 0.77, "learning_rate": 4.127133175471544e-05, "loss": 3.4182, "step": 3500 }, { "epoch": 0.88, "learning_rate": 3.9230015514003435e-05, "loss": 3.4124, "step": 4000 }, { "epoch": 0.99, "learning_rate": 3.718869927329142e-05, "loss": 3.4087, "step": 4500 }, { "epoch": 1.1, "learning_rate": 3.51473830325794e-05, "loss": 3.4057, "step": 5000 }, { "epoch": 1.21, "learning_rate": 3.31060667918674e-05, "loss": 3.399, "step": 5500 }, { "epoch": 1.32, "learning_rate": 3.1064750551155385e-05, "loss": 3.3956, "step": 6000 }, { "epoch": 1.43, "learning_rate": 2.9023434310443376e-05, "loss": 3.3923, "step": 6500 }, { "epoch": 1.54, "learning_rate": 2.6982118069731367e-05, "loss": 3.3893, "step": 7000 }, { "epoch": 1.65, "learning_rate": 2.4940801829019355e-05, "loss": 3.3858, "step": 7500 }, { "epoch": 1.76, "learning_rate": 2.2899485588307342e-05, "loss": 3.3829, "step": 8000 }, { "epoch": 1.87, "learning_rate": 2.085816934759533e-05, "loss": 3.3815, "step": 8500 }, { "epoch": 1.98, "learning_rate": 1.8820935739364743e-05, "loss": 3.3786, "step": 9000 }, { "epoch": 2.09, "learning_rate": 1.677961949865273e-05, "loss": 3.3752, "step": 9500 }, { "epoch": 2.2, "learning_rate": 1.4738303257940722e-05, "loss": 3.3681, "step": 10000 }, { "epoch": 2.31, "learning_rate": 1.269698701722871e-05, "loss": 3.3666, "step": 10500 }, { "epoch": 2.42, "learning_rate": 1.0655670776516699e-05, "loss": 3.365, "step": 11000 }, { "epoch": 2.54, "learning_rate": 8.618437168286112e-06, "loss": 3.3638, "step": 11500 }, { "epoch": 2.65, "learning_rate": 6.5771209275741e-06, "loss": 3.3629, "step": 12000 }, { "epoch": 2.76, "learning_rate": 4.535804686862089e-06, "loss": 3.362, "step": 12500 }, { "epoch": 2.87, "learning_rate": 2.4944884461500775e-06, "loss": 3.3622, "step": 13000 }, { "epoch": 2.98, "learning_rate": 4.531722054380665e-07, "loss": 3.3615, "step": 13500 }, { "epoch": 3.0, "step": 13608, "total_flos": 7.655079399630727e+19, "train_loss": 3.390678170251538, "train_runtime": 24048.3069, "train_samples_per_second": 1738.566, "train_steps_per_second": 0.566 } ], "max_steps": 13608, "num_train_epochs": 3, "total_flos": 7.655079399630727e+19, "trial_name": null, "trial_params": null }