{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2507601166034542, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018807008745259066, "grad_norm": 0.7771628499031067, "learning_rate": 1.9877931524601816e-05, "loss": 10.1462, "num_input_tokens_seen": 19660800, "step": 150 }, { "epoch": 0.03761401749051813, "grad_norm": 0.841651201248169, "learning_rate": 1.97525187074119e-05, "loss": 9.8974, "num_input_tokens_seen": 39321600, "step": 300 }, { "epoch": 0.0564210262357772, "grad_norm": 0.7740743160247803, "learning_rate": 1.9627105890221982e-05, "loss": 9.6653, "num_input_tokens_seen": 58982400, "step": 450 }, { "epoch": 0.07522803498103627, "grad_norm": 0.8001583218574524, "learning_rate": 1.9501693073032066e-05, "loss": 9.4506, "num_input_tokens_seen": 78643200, "step": 600 }, { "epoch": 0.09403504372629533, "grad_norm": 0.8240243792533875, "learning_rate": 1.937628025584215e-05, "loss": 9.2539, "num_input_tokens_seen": 98304000, "step": 750 }, { "epoch": 0.1128420524715544, "grad_norm": 0.7667157053947449, "learning_rate": 1.9250867438652233e-05, "loss": 9.0693, "num_input_tokens_seen": 117964800, "step": 900 }, { "epoch": 0.13164906121681347, "grad_norm": 0.8228150010108948, "learning_rate": 1.9125454621462316e-05, "loss": 8.8967, "num_input_tokens_seen": 137625600, "step": 1050 }, { "epoch": 0.15045606996207253, "grad_norm": 0.8424794673919678, "learning_rate": 1.9000041804272396e-05, "loss": 8.7364, "num_input_tokens_seen": 157286400, "step": 1200 }, { "epoch": 0.1692630787073316, "grad_norm": 0.7859320640563965, "learning_rate": 1.8874628987082483e-05, "loss": 8.5903, "num_input_tokens_seen": 176947200, "step": 1350 }, { "epoch": 0.18807008745259066, "grad_norm": 0.7881995439529419, "learning_rate": 1.8749216169892567e-05, "loss": 8.4497, "num_input_tokens_seen": 196608000, "step": 1500 }, { "epoch": 0.20687709619784972, "grad_norm": 0.8333655595779419, "learning_rate": 1.8623803352702647e-05, "loss": 8.3184, "num_input_tokens_seen": 216268800, "step": 1650 }, { "epoch": 0.2256841049431088, "grad_norm": 0.7956089973449707, "learning_rate": 1.849839053551273e-05, "loss": 8.1998, "num_input_tokens_seen": 235929600, "step": 1800 }, { "epoch": 0.24449111368836787, "grad_norm": 0.8068310022354126, "learning_rate": 1.8372977718322814e-05, "loss": 8.0871, "num_input_tokens_seen": 255590400, "step": 1950 } ], "logging_steps": 150, "max_steps": 23925, "num_input_tokens_seen": 262144000, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4047671132160000.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }