{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 17160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 4.8543123543123546e-05, "loss": 1.1321, "step": 500 }, { "epoch": 0.17, "learning_rate": 4.708624708624709e-05, "loss": 1.1111, "step": 1000 }, { "epoch": 0.26, "learning_rate": 4.562937062937063e-05, "loss": 1.0592, "step": 1500 }, { "epoch": 0.35, "learning_rate": 4.4172494172494175e-05, "loss": 1.0563, "step": 2000 }, { "epoch": 0.44, "learning_rate": 4.271561771561772e-05, "loss": 1.0382, "step": 2500 }, { "epoch": 0.52, "learning_rate": 4.125874125874126e-05, "loss": 1.0295, "step": 3000 }, { "epoch": 0.61, "learning_rate": 3.9801864801864804e-05, "loss": 1.0196, "step": 3500 }, { "epoch": 0.7, "learning_rate": 3.834498834498835e-05, "loss": 1.0207, "step": 4000 }, { "epoch": 0.79, "learning_rate": 3.688811188811189e-05, "loss": 1.0309, "step": 4500 }, { "epoch": 0.87, "learning_rate": 3.5431235431235434e-05, "loss": 1.0039, "step": 5000 }, { "epoch": 0.96, "learning_rate": 3.397435897435898e-05, "loss": 1.0012, "step": 5500 }, { "epoch": 1.05, "learning_rate": 3.251748251748252e-05, "loss": 0.9437, "step": 6000 }, { "epoch": 1.14, "learning_rate": 3.106060606060606e-05, "loss": 0.871, "step": 6500 }, { "epoch": 1.22, "learning_rate": 2.9603729603729606e-05, "loss": 0.8709, "step": 7000 }, { "epoch": 1.31, "learning_rate": 2.8146853146853146e-05, "loss": 0.8775, "step": 7500 }, { "epoch": 1.4, "learning_rate": 2.6689976689976692e-05, "loss": 0.8558, "step": 8000 }, { "epoch": 1.49, "learning_rate": 2.5233100233100232e-05, "loss": 0.8449, "step": 8500 }, { "epoch": 1.57, "learning_rate": 2.377622377622378e-05, "loss": 0.8832, "step": 9000 }, { "epoch": 1.66, "learning_rate": 2.231934731934732e-05, "loss": 0.8747, "step": 9500 }, { "epoch": 1.75, "learning_rate": 2.0862470862470865e-05, "loss": 0.8595, "step": 10000 }, { "epoch": 1.84, "learning_rate": 1.9405594405594408e-05, "loss": 0.8546, "step": 10500 }, { "epoch": 1.92, "learning_rate": 1.794871794871795e-05, "loss": 0.8857, "step": 11000 }, { "epoch": 2.01, "learning_rate": 1.649184149184149e-05, "loss": 0.8425, "step": 11500 }, { "epoch": 2.1, "learning_rate": 1.5034965034965034e-05, "loss": 0.806, "step": 12000 }, { "epoch": 2.19, "learning_rate": 1.357808857808858e-05, "loss": 0.8023, "step": 12500 }, { "epoch": 2.27, "learning_rate": 1.2121212121212122e-05, "loss": 0.7991, "step": 13000 }, { "epoch": 2.36, "learning_rate": 1.0664335664335665e-05, "loss": 0.8154, "step": 13500 }, { "epoch": 2.45, "learning_rate": 9.207459207459208e-06, "loss": 0.8205, "step": 14000 }, { "epoch": 2.53, "learning_rate": 7.750582750582751e-06, "loss": 0.82, "step": 14500 }, { "epoch": 2.62, "learning_rate": 6.2937062937062944e-06, "loss": 0.8207, "step": 15000 }, { "epoch": 2.71, "learning_rate": 4.836829836829837e-06, "loss": 0.8266, "step": 15500 }, { "epoch": 2.8, "learning_rate": 3.3799533799533803e-06, "loss": 0.8146, "step": 16000 }, { "epoch": 2.88, "learning_rate": 1.9230769230769234e-06, "loss": 0.81, "step": 16500 }, { "epoch": 2.97, "learning_rate": 4.662004662004662e-07, "loss": 0.7898, "step": 17000 }, { "epoch": 3.0, "step": 17160, "total_flos": 5.399805267070157e+17, "train_loss": 0.9076518683444648, "train_runtime": 7155.0947, "train_samples_per_second": 19.186, "train_steps_per_second": 2.398 } ], "logging_steps": 500, "max_steps": 17160, "num_train_epochs": 3, "save_steps": 500, "total_flos": 5.399805267070157e+17, "trial_name": null, "trial_params": null }