{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 685, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 7.82608695652174e-05, "loss": 1.5533, "step": 27 }, { "epoch": 0.39, "learning_rate": 0.0001536231884057971, "loss": 1.3582, "step": 54 }, { "epoch": 0.59, "learning_rate": 0.00019642857142857144, "loss": 1.32, "step": 81 }, { "epoch": 0.79, "learning_rate": 0.00018766233766233769, "loss": 1.2552, "step": 108 }, { "epoch": 0.99, "learning_rate": 0.0001788961038961039, "loss": 1.2336, "step": 135 }, { "epoch": 1.18, "learning_rate": 0.00017012987012987013, "loss": 1.1885, "step": 162 }, { "epoch": 1.38, "learning_rate": 0.00016136363636363635, "loss": 1.2022, "step": 189 }, { "epoch": 1.58, "learning_rate": 0.0001525974025974026, "loss": 1.1927, "step": 216 }, { "epoch": 1.77, "learning_rate": 0.00014383116883116883, "loss": 1.1789, "step": 243 }, { "epoch": 1.97, "learning_rate": 0.00013506493506493507, "loss": 1.1918, "step": 270 }, { "epoch": 2.17, "learning_rate": 0.0001262987012987013, "loss": 1.1426, "step": 297 }, { "epoch": 2.36, "learning_rate": 0.00011753246753246753, "loss": 1.1104, "step": 324 }, { "epoch": 2.56, "learning_rate": 0.00010876623376623376, "loss": 1.148, "step": 351 }, { "epoch": 2.76, "learning_rate": 0.0001, "loss": 1.1244, "step": 378 }, { "epoch": 2.96, "learning_rate": 9.123376623376624e-05, "loss": 1.0997, "step": 405 }, { "epoch": 3.15, "learning_rate": 8.246753246753248e-05, "loss": 1.0676, "step": 432 }, { "epoch": 3.35, "learning_rate": 7.37012987012987e-05, "loss": 1.0429, "step": 459 }, { "epoch": 3.55, "learning_rate": 6.493506493506494e-05, "loss": 1.0455, "step": 486 }, { "epoch": 3.74, "learning_rate": 5.616883116883117e-05, "loss": 1.0255, "step": 513 }, { "epoch": 3.94, "learning_rate": 4.740259740259741e-05, "loss": 1.0583, "step": 540 }, { "epoch": 4.14, "learning_rate": 3.8636363636363636e-05, "loss": 1.0279, "step": 567 }, { "epoch": 4.34, "learning_rate": 2.9870129870129872e-05, "loss": 0.9746, "step": 594 }, { "epoch": 4.53, "learning_rate": 2.1103896103896105e-05, "loss": 0.9944, "step": 621 }, { "epoch": 4.73, "learning_rate": 1.2337662337662339e-05, "loss": 0.9703, "step": 648 }, { "epoch": 4.93, "learning_rate": 3.5714285714285714e-06, "loss": 0.9644, "step": 675 } ], "max_steps": 685, "num_train_epochs": 5, "total_flos": 5.700380906225664e+16, "trial_name": null, "trial_params": null }