{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.6, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 0.0009866666666666667, "loss": 3067311560851.456, "step": 500 }, { "epoch": 0.4, "eval_loss": 131732.34375, "eval_runtime": 1192.6725, "eval_samples_per_second": 16.768, "eval_steps_per_second": 2.096, "step": 500 }, { "epoch": 0.8, "learning_rate": 0.0009733333333333334, "loss": 130631.296, "step": 1000 }, { "epoch": 0.8, "eval_loss": 129769.484375, "eval_runtime": 1199.9356, "eval_samples_per_second": 16.667, "eval_steps_per_second": 2.083, "step": 1000 }, { "epoch": 1.2, "learning_rate": 0.00096, "loss": 129394.928, "step": 1500 }, { "epoch": 1.2, "eval_loss": 129025.2265625, "eval_runtime": 1188.6216, "eval_samples_per_second": 16.825, "eval_steps_per_second": 2.103, "step": 1500 }, { "epoch": 1.6, "learning_rate": 0.0009466666666666667, "loss": 128776.992, "step": 2000 }, { "epoch": 1.6, "eval_loss": 128457.265625, "eval_runtime": 1211.4822, "eval_samples_per_second": 16.508, "eval_steps_per_second": 2.064, "step": 2000 } ], "logging_steps": 500, "max_steps": 37500, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }