{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 500, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.0004987337087656614, "loss": 1.7079, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.0004949476630105669, "loss": 1.5881, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.0004886802166271364, "loss": 1.537, "step": 30 }, { "epoch": 0.13, "learning_rate": 0.0004799948609147061, "loss": 1.6139, "step": 40 }, { "epoch": 0.16, "learning_rate": 0.000468979581391722, "loss": 1.5905, "step": 50 }, { "epoch": 0.19, "learning_rate": 0.0004557459664734141, "loss": 1.5532, "step": 60 }, { "epoch": 0.22, "learning_rate": 0.0004404280770443398, "loss": 1.5624, "step": 70 }, { "epoch": 0.26, "learning_rate": 0.00042318108837739987, "loss": 1.566, "step": 80 }, { "epoch": 0.29, "learning_rate": 0.0004041797181571358, "loss": 1.6084, "step": 90 }, { "epoch": 0.32, "learning_rate": 0.00038361645653195025, "loss": 1.5778, "step": 100 }, { "epoch": 0.35, "learning_rate": 0.0003616996161254065, "loss": 1.5952, "step": 110 }, { "epoch": 0.38, "learning_rate": 0.0003386512217606339, "loss": 1.526, "step": 120 }, { "epoch": 0.42, "learning_rate": 0.00031470476127563017, "loss": 1.5188, "step": 130 }, { "epoch": 0.45, "learning_rate": 0.0002901028202144401, "loss": 1.5547, "step": 140 }, { "epoch": 0.48, "learning_rate": 0.00026509462435557154, "loss": 1.4073, "step": 150 }, { "epoch": 0.51, "learning_rate": 0.0002399335149726463, "loss": 1.543, "step": 160 }, { "epoch": 0.54, "learning_rate": 0.0002148743824037269, "loss": 1.4798, "step": 170 }, { "epoch": 0.58, "learning_rate": 0.00019017108392811063, "loss": 1.479, "step": 180 }, { "epoch": 0.61, "learning_rate": 0.00016607387210834887, "loss": 1.5438, "step": 190 }, { "epoch": 0.64, "learning_rate": 0.00014282685964923643, "loss": 1.5179, "step": 200 }, { "epoch": 0.67, "learning_rate": 0.00012066554645558578, "loss": 1.4537, "step": 210 }, { "epoch": 0.7, "learning_rate": 9.981443394050524e-05, "loss": 1.5523, "step": 220 }, { "epoch": 0.74, "learning_rate": 8.048475075202727e-05, "loss": 1.5372, "step": 230 }, { "epoch": 0.77, "learning_rate": 6.28723129572247e-05, "loss": 1.5376, "step": 240 }, { "epoch": 0.8, "learning_rate": 4.715554036085673e-05, "loss": 1.5009, "step": 250 }, { "epoch": 0.83, "learning_rate": 3.3493649053890325e-05, "loss": 1.5572, "step": 260 }, { "epoch": 0.86, "learning_rate": 2.2025038501977485e-05, "loss": 1.5053, "step": 270 }, { "epoch": 0.9, "learning_rate": 1.286588951321363e-05, "loss": 1.5444, "step": 280 }, { "epoch": 0.93, "learning_rate": 6.108987288226536e-06, "loss": 1.5053, "step": 290 }, { "epoch": 0.96, "learning_rate": 1.822781475486507e-06, "loss": 1.4731, "step": 300 }, { "epoch": 0.99, "learning_rate": 5.069275378746796e-08, "loss": 1.5291, "step": 310 }, { "epoch": 1.0, "step": 312, "total_flos": 1.0452011423603098e+17, "train_loss": 1.5403484671543806, "train_runtime": 1808.2639, "train_samples_per_second": 2.765, "train_steps_per_second": 0.173 } ], "logging_steps": 10, "max_steps": 312, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 1.0452011423603098e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }