{ "best_metric": null, "best_model_checkpoint": null, "epoch": 69.0909090909091, "eval_steps": 500, "global_step": 350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.02, "learning_rate": 5.194805194805195e-06, "loss": 1.8662, "step": 11 }, { "epoch": 4.04, "learning_rate": 1.0909090909090909e-05, "loss": 1.5883, "step": 22 }, { "epoch": 6.05, "learning_rate": 1.6623376623376623e-05, "loss": 1.0967, "step": 33 }, { "epoch": 8.07, "learning_rate": 2.2337662337662336e-05, "loss": 0.7579, "step": 44 }, { "epoch": 10.09, "learning_rate": 2.8051948051948052e-05, "loss": 0.5478, "step": 55 }, { "epoch": 13.02, "learning_rate": 3.324675324675325e-05, "loss": 0.4326, "step": 66 }, { "epoch": 15.04, "learning_rate": 3.8441558441558445e-05, "loss": 0.3292, "step": 77 }, { "epoch": 17.05, "learning_rate": 4.415584415584416e-05, "loss": 0.2404, "step": 88 }, { "epoch": 19.07, "learning_rate": 4.987012987012987e-05, "loss": 0.1739, "step": 99 }, { "epoch": 21.09, "learning_rate": 5.558441558441558e-05, "loss": 0.1312, "step": 110 }, { "epoch": 24.02, "learning_rate": 6.12987012987013e-05, "loss": 0.0961, "step": 121 }, { "epoch": 26.04, "learning_rate": 6.701298701298702e-05, "loss": 0.0595, "step": 132 }, { "epoch": 28.05, "learning_rate": 7.16883116883117e-05, "loss": 0.1897, "step": 143 }, { "epoch": 30.07, "learning_rate": 7.74025974025974e-05, "loss": 0.0537, "step": 154 }, { "epoch": 32.09, "learning_rate": 8.311688311688312e-05, "loss": 0.0264, "step": 165 }, { "epoch": 35.02, "learning_rate": 8.883116883116883e-05, "loss": 0.0199, "step": 176 }, { "epoch": 37.04, "learning_rate": 9.454545454545455e-05, "loss": 0.0176, "step": 187 }, { "epoch": 39.05, "learning_rate": 0.00010025974025974026, "loss": 0.015, "step": 198 }, { "epoch": 41.07, "learning_rate": 0.00010597402597402598, "loss": 0.0134, "step": 209 }, { "epoch": 43.09, "learning_rate": 0.00011168831168831168, "loss": 0.0094, "step": 220 }, { "epoch": 46.02, "learning_rate": 0.0001174025974025974, "loss": 0.0064, "step": 231 }, { "epoch": 48.04, "learning_rate": 0.0001231168831168831, "loss": 0.0084, "step": 242 }, { "epoch": 50.05, "learning_rate": 0.00012883116883116884, "loss": 0.0077, "step": 253 }, { "epoch": 52.07, "learning_rate": 0.00013454545454545455, "loss": 0.0082, "step": 264 }, { "epoch": 54.09, "learning_rate": 0.00014025974025974028, "loss": 0.0058, "step": 275 }, { "epoch": 57.02, "learning_rate": 0.00014597402597402599, "loss": 0.006, "step": 286 }, { "epoch": 59.04, "learning_rate": 0.0001516883116883117, "loss": 0.0095, "step": 297 }, { "epoch": 61.05, "learning_rate": 0.00015740259740259742, "loss": 0.006, "step": 308 }, { "epoch": 63.07, "learning_rate": 0.00016311688311688313, "loss": 0.0068, "step": 319 }, { "epoch": 65.09, "learning_rate": 0.00016883116883116884, "loss": 0.0086, "step": 330 }, { "epoch": 68.02, "learning_rate": 0.00017454545454545454, "loss": 0.0036, "step": 341 } ], "logging_steps": 11, "max_steps": 3850, "num_train_epochs": 70, "save_steps": 500, "total_flos": 6.12217506496512e+16, "trial_name": null, "trial_params": null }