{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "global_step": 2850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21, "learning_rate": 1e-05, "loss": 0.65, "step": 100 }, { "epoch": 0.42, "learning_rate": 2e-05, "loss": 0.4943, "step": 200 }, { "epoch": 0.63, "learning_rate": 3e-05, "loss": 0.4019, "step": 300 }, { "epoch": 0.84, "learning_rate": 4e-05, "loss": 0.4193, "step": 400 }, { "epoch": 1.05, "learning_rate": 5e-05, "loss": 0.3961, "step": 500 }, { "epoch": 1.26, "learning_rate": 4.787234042553192e-05, "loss": 0.324, "step": 600 }, { "epoch": 1.47, "learning_rate": 4.574468085106383e-05, "loss": 0.3471, "step": 700 }, { "epoch": 1.68, "learning_rate": 4.3617021276595746e-05, "loss": 0.287, "step": 800 }, { "epoch": 1.89, "learning_rate": 4.148936170212766e-05, "loss": 0.2373, "step": 900 }, { "epoch": 2.11, "learning_rate": 3.936170212765958e-05, "loss": 0.2729, "step": 1000 }, { "epoch": 2.32, "learning_rate": 3.723404255319149e-05, "loss": 0.1955, "step": 1100 }, { "epoch": 2.53, "learning_rate": 3.5106382978723407e-05, "loss": 0.1929, "step": 1200 }, { "epoch": 2.74, "learning_rate": 3.2978723404255317e-05, "loss": 0.1849, "step": 1300 }, { "epoch": 2.95, "learning_rate": 3.085106382978723e-05, "loss": 0.2012, "step": 1400 }, { "epoch": 3.16, "learning_rate": 2.8723404255319154e-05, "loss": 0.152, "step": 1500 }, { "epoch": 3.37, "learning_rate": 2.6595744680851064e-05, "loss": 0.1388, "step": 1600 }, { "epoch": 3.58, "learning_rate": 2.446808510638298e-05, "loss": 0.1337, "step": 1700 }, { "epoch": 3.79, "learning_rate": 2.2340425531914894e-05, "loss": 0.1571, "step": 1800 }, { "epoch": 4.0, "learning_rate": 2.0212765957446807e-05, "loss": 0.1088, "step": 1900 }, { "epoch": 4.21, "learning_rate": 1.8085106382978724e-05, "loss": 0.0701, "step": 2000 }, { "epoch": 4.42, "learning_rate": 1.595744680851064e-05, "loss": 0.0853, "step": 2100 }, { "epoch": 4.63, "learning_rate": 1.3829787234042554e-05, "loss": 0.0923, "step": 2200 }, { "epoch": 4.84, "learning_rate": 1.170212765957447e-05, "loss": 0.0695, "step": 2300 }, { "epoch": 5.05, "learning_rate": 9.574468085106383e-06, "loss": 0.0867, "step": 2400 }, { "epoch": 5.26, "learning_rate": 7.446808510638298e-06, "loss": 0.0371, "step": 2500 }, { "epoch": 5.47, "learning_rate": 5.319148936170213e-06, "loss": 0.0201, "step": 2600 }, { "epoch": 5.68, "learning_rate": 3.1914893617021277e-06, "loss": 0.0141, "step": 2700 }, { "epoch": 5.89, "learning_rate": 1.0638297872340427e-06, "loss": 0.1063, "step": 2800 } ], "max_steps": 2850, "num_train_epochs": 6, "total_flos": 5994196063211520.0, "trial_name": null, "trial_params": null }