{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8218, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06084205402774397, "grad_norm": 9.958785057067871, "learning_rate": 4.6957897298612804e-05, "loss": 2.8566, "step": 500 }, { "epoch": 0.12168410805548795, "grad_norm": 6.4962873458862305, "learning_rate": 4.3915794597225605e-05, "loss": 2.445, "step": 1000 }, { "epoch": 0.18252616208323194, "grad_norm": 6.847861289978027, "learning_rate": 4.0873691895838406e-05, "loss": 2.2613, "step": 1500 }, { "epoch": 0.2433682161109759, "grad_norm": 7.166022300720215, "learning_rate": 3.783158919445121e-05, "loss": 2.1305, "step": 2000 }, { "epoch": 0.3042102701387199, "grad_norm": 7.0767316818237305, "learning_rate": 3.4789486493064e-05, "loss": 2.0557, "step": 2500 }, { "epoch": 0.3650523241664639, "grad_norm": 5.804446220397949, "learning_rate": 3.174738379167681e-05, "loss": 2.0253, "step": 3000 }, { "epoch": 0.42589437819420783, "grad_norm": 4.874488830566406, "learning_rate": 2.8705281090289608e-05, "loss": 1.9551, "step": 3500 }, { "epoch": 0.4867364322219518, "grad_norm": 5.902616024017334, "learning_rate": 2.566317838890241e-05, "loss": 1.8709, "step": 4000 }, { "epoch": 0.5475784862496957, "grad_norm": 6.240393161773682, "learning_rate": 2.262107568751521e-05, "loss": 1.8769, "step": 4500 }, { "epoch": 0.6084205402774397, "grad_norm": 6.906947135925293, "learning_rate": 1.9578972986128012e-05, "loss": 1.8443, "step": 5000 }, { "epoch": 0.6692625943051838, "grad_norm": 5.4539971351623535, "learning_rate": 1.6536870284740814e-05, "loss": 1.7751, "step": 5500 }, { "epoch": 0.7301046483329278, "grad_norm": 5.705854892730713, "learning_rate": 1.3494767583353615e-05, "loss": 1.7458, "step": 6000 }, { "epoch": 0.7909467023606717, "grad_norm": 5.6163649559021, "learning_rate": 1.0452664881966416e-05, "loss": 1.6901, "step": 6500 }, { "epoch": 0.8517887563884157, "grad_norm": 6.5511016845703125, "learning_rate": 7.410562180579217e-06, "loss": 1.6807, "step": 7000 }, { "epoch": 0.9126308104161597, "grad_norm": 5.792708396911621, "learning_rate": 4.368459479192018e-06, "loss": 1.6844, "step": 7500 }, { "epoch": 0.9734728644439036, "grad_norm": 6.069802761077881, "learning_rate": 1.3263567778048189e-06, "loss": 1.674, "step": 8000 }, { "epoch": 1.0, "step": 8218, "total_flos": 1.063927729790976e+16, "train_loss": 1.9656052448999624, "train_runtime": 5505.0716, "train_samples_per_second": 14.927, "train_steps_per_second": 1.493 } ], "logging_steps": 500, "max_steps": 8218, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.063927729790976e+16, "train_batch_size": 10, "trial_name": null, "trial_params": null }