{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0434782608695652, "eval_steps": 4, "global_step": 15, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06956521739130435, "grad_norm": 7.7945876121521, "learning_rate": 1e-05, "loss": 177.4688, "step": 1 }, { "epoch": 0.06956521739130435, "eval_loss": 11.082473754882812, "eval_runtime": 7.3657, "eval_samples_per_second": 13.169, "eval_steps_per_second": 1.765, "step": 1 }, { "epoch": 0.1391304347826087, "grad_norm": 7.366751194000244, "learning_rate": 2e-05, "loss": 177.3516, "step": 2 }, { "epoch": 0.20869565217391303, "grad_norm": 6.917609214782715, "learning_rate": 3e-05, "loss": 177.4609, "step": 3 }, { "epoch": 0.2782608695652174, "grad_norm": 7.315999984741211, "learning_rate": 4e-05, "loss": 177.3516, "step": 4 }, { "epoch": 0.2782608695652174, "eval_loss": 11.079252243041992, "eval_runtime": 0.1082, "eval_samples_per_second": 896.219, "eval_steps_per_second": 120.112, "step": 4 }, { "epoch": 0.34782608695652173, "grad_norm": 7.8040056228637695, "learning_rate": 5e-05, "loss": 177.4141, "step": 5 }, { "epoch": 0.41739130434782606, "grad_norm": 8.207115173339844, "learning_rate": 6e-05, "loss": 177.3047, "step": 6 }, { "epoch": 0.48695652173913045, "grad_norm": 7.859698295593262, "learning_rate": 7e-05, "loss": 177.2422, "step": 7 }, { "epoch": 0.5565217391304348, "grad_norm": 7.5123820304870605, "learning_rate": 8e-05, "loss": 177.1875, "step": 8 }, { "epoch": 0.5565217391304348, "eval_loss": 11.06894302368164, "eval_runtime": 0.1066, "eval_samples_per_second": 909.671, "eval_steps_per_second": 121.915, "step": 8 }, { "epoch": 0.6260869565217392, "grad_norm": 7.649663925170898, "learning_rate": 9e-05, "loss": 177.1172, "step": 9 }, { "epoch": 0.6956521739130435, "grad_norm": 7.564890384674072, "learning_rate": 0.0001, "loss": 177.0859, "step": 10 }, { "epoch": 0.7652173913043478, "grad_norm": 8.557611465454102, "learning_rate": 9.045084971874738e-05, "loss": 177.0078, "step": 11 }, { "epoch": 0.8347826086956521, "grad_norm": 7.719607353210449, "learning_rate": 6.545084971874738e-05, "loss": 176.9453, "step": 12 }, { "epoch": 0.8347826086956521, "eval_loss": 11.049612998962402, "eval_runtime": 0.1071, "eval_samples_per_second": 905.76, "eval_steps_per_second": 121.391, "step": 12 }, { "epoch": 0.9043478260869565, "grad_norm": 8.3920259475708, "learning_rate": 3.4549150281252636e-05, "loss": 176.8594, "step": 13 }, { "epoch": 0.9739130434782609, "grad_norm": 8.231620788574219, "learning_rate": 9.549150281252633e-06, "loss": 176.8672, "step": 14 }, { "epoch": 1.0434782608695652, "grad_norm": 8.611029624938965, "learning_rate": 0.0, "loss": 176.7188, "step": 15 } ], "logging_steps": 1, "max_steps": 15, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 19967571394560.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }