{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0008923956733683103, "eval_steps": 2000, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 8.451894760131836, "learning_rate": 1.9999999959757473e-05, "loss": 1.835, "step": 200 }, { "epoch": 0.0, "grad_norm": 2.7373883724212646, "learning_rate": 1.9999999832252933e-05, "loss": 1.6278, "step": 400 }, { "epoch": 0.0, "grad_norm": 3.7490854263305664, "learning_rate": 1.9999999617416517e-05, "loss": 1.6314, "step": 600 }, { "epoch": 0.0, "grad_norm": 10.143038749694824, "learning_rate": 1.999999931524823e-05, "loss": 1.5416, "step": 800 }, { "epoch": 0.0, "grad_norm": 2.783194065093994, "learning_rate": 1.999999892574807e-05, "loss": 1.5775, "step": 1000 }, { "epoch": 0.0, "grad_norm": 2.1446919441223145, "learning_rate": 1.9999998448916044e-05, "loss": 1.6922, "step": 1200 } ], "logging_steps": 200, "max_steps": 6723475, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "total_flos": 1.5519816481284096e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }