{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.978102189781022, "eval_steps": 500, "global_step": 204, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36, "grad_norm": 1.607978105545044, "learning_rate": 0.00019672131147540985, "loss": 1.9632, "step": 25 }, { "epoch": 0.73, "grad_norm": 0.4215347468852997, "learning_rate": 0.00016939890710382515, "loss": 1.2822, "step": 50 }, { "epoch": 1.09, "grad_norm": 0.6183569431304932, "learning_rate": 0.00014316939890710384, "loss": 1.2407, "step": 75 }, { "epoch": 1.46, "grad_norm": 0.49381059408187866, "learning_rate": 0.00011584699453551913, "loss": 1.1787, "step": 100 }, { "epoch": 1.82, "grad_norm": 0.47651711106300354, "learning_rate": 8.852459016393443e-05, "loss": 1.1866, "step": 125 }, { "epoch": 2.19, "grad_norm": 0.5027020573616028, "learning_rate": 6.120218579234973e-05, "loss": 1.1836, "step": 150 }, { "epoch": 2.55, "grad_norm": 0.5709903240203857, "learning_rate": 3.387978142076503e-05, "loss": 1.1499, "step": 175 }, { "epoch": 2.92, "grad_norm": 0.6197895407676697, "learning_rate": 6.557377049180328e-06, "loss": 1.1439, "step": 200 } ], "logging_steps": 25, "max_steps": 204, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 3.568349359374336e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }