{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9985401459854014, "eval_steps": 500, "global_step": 513, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 0.19102843105793, "learning_rate": 9.615384615384617e-05, "loss": 1.8608, "step": 25 }, { "epoch": 0.1, "grad_norm": 0.22166840732097626, "learning_rate": 0.00019230769230769233, "loss": 1.81, "step": 50 }, { "epoch": 0.15, "grad_norm": 0.3510190546512604, "learning_rate": 0.00019002169197396965, "loss": 1.7424, "step": 75 }, { "epoch": 0.19, "grad_norm": 0.2323824167251587, "learning_rate": 0.000179175704989154, "loss": 1.7622, "step": 100 }, { "epoch": 0.24, "grad_norm": 0.23122742772102356, "learning_rate": 0.0001683297180043384, "loss": 1.717, "step": 125 }, { "epoch": 0.29, "grad_norm": 0.2081485390663147, "learning_rate": 0.00015748373101952278, "loss": 1.7318, "step": 150 }, { "epoch": 0.34, "grad_norm": 0.2549290060997009, "learning_rate": 0.00014663774403470716, "loss": 1.7101, "step": 175 }, { "epoch": 0.39, "grad_norm": 0.21858647465705872, "learning_rate": 0.00013579175704989157, "loss": 1.7282, "step": 200 }, { "epoch": 0.44, "grad_norm": 0.21650561690330505, "learning_rate": 0.00012494577006507592, "loss": 1.6976, "step": 225 }, { "epoch": 0.49, "grad_norm": 0.2338039129972458, "learning_rate": 0.0001140997830802603, "loss": 1.7133, "step": 250 }, { "epoch": 0.54, "grad_norm": 0.22612518072128296, "learning_rate": 0.0001032537960954447, "loss": 1.7139, "step": 275 }, { "epoch": 0.58, "grad_norm": 0.2349502444267273, "learning_rate": 9.240780911062907e-05, "loss": 1.6824, "step": 300 }, { "epoch": 0.63, "grad_norm": 0.23057827353477478, "learning_rate": 8.156182212581345e-05, "loss": 1.6819, "step": 325 }, { "epoch": 0.68, "grad_norm": 0.2516602873802185, "learning_rate": 7.071583514099784e-05, "loss": 1.7191, "step": 350 }, { "epoch": 0.73, "grad_norm": 0.22540146112442017, "learning_rate": 5.9869848156182215e-05, "loss": 1.702, "step": 375 }, { "epoch": 0.78, "grad_norm": 0.2385682761669159, "learning_rate": 4.90238611713666e-05, "loss": 1.709, "step": 400 }, { "epoch": 0.83, "grad_norm": 0.22341802716255188, "learning_rate": 3.817787418655098e-05, "loss": 1.6827, "step": 425 }, { "epoch": 0.88, "grad_norm": 0.22832414507865906, "learning_rate": 2.7331887201735356e-05, "loss": 1.6588, "step": 450 }, { "epoch": 0.92, "grad_norm": 0.2418232560157776, "learning_rate": 1.648590021691974e-05, "loss": 1.6636, "step": 475 }, { "epoch": 0.97, "grad_norm": 0.2324737012386322, "learning_rate": 5.639913232104121e-06, "loss": 1.688, "step": 500 } ], "logging_steps": 25, "max_steps": 513, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1.668156353497006e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }