{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.927536231884059, "eval_steps": 500, "global_step": 660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.39, "learning_rate": 0.0002, "loss": 2.1782, "step": 20 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 1.9533, "step": 40 }, { "epoch": 1.16, "learning_rate": 0.0002, "loss": 1.8756, "step": 60 }, { "epoch": 1.55, "learning_rate": 0.0002, "loss": 1.7662, "step": 80 }, { "epoch": 1.93, "learning_rate": 0.0002, "loss": 1.7196, "step": 100 }, { "epoch": 2.32, "learning_rate": 0.0002, "loss": 1.5842, "step": 120 }, { "epoch": 2.71, "learning_rate": 0.0002, "loss": 1.49, "step": 140 }, { "epoch": 3.09, "learning_rate": 0.0002, "loss": 1.4432, "step": 160 }, { "epoch": 3.48, "learning_rate": 0.0002, "loss": 1.2434, "step": 180 }, { "epoch": 3.86, "learning_rate": 0.0002, "loss": 1.3193, "step": 200 }, { "epoch": 4.25, "learning_rate": 0.0002, "loss": 1.1336, "step": 220 }, { "epoch": 4.64, "learning_rate": 0.0002, "loss": 1.1327, "step": 240 }, { "epoch": 5.02, "learning_rate": 0.0002, "loss": 1.1099, "step": 260 }, { "epoch": 5.41, "learning_rate": 0.0002, "loss": 0.9778, "step": 280 }, { "epoch": 5.8, "learning_rate": 0.0002, "loss": 0.9912, "step": 300 }, { "epoch": 6.18, "learning_rate": 0.0002, "loss": 0.8791, "step": 320 }, { "epoch": 6.57, "learning_rate": 0.0002, "loss": 0.8689, "step": 340 }, { "epoch": 6.96, "learning_rate": 0.0002, "loss": 0.7917, "step": 360 }, { "epoch": 7.34, "learning_rate": 0.0002, "loss": 0.7627, "step": 380 }, { "epoch": 7.73, "learning_rate": 0.0002, "loss": 0.6916, "step": 400 }, { "epoch": 8.12, "learning_rate": 0.0002, "loss": 0.717, "step": 420 }, { "epoch": 8.5, "learning_rate": 0.0002, "loss": 0.5919, "step": 440 }, { "epoch": 8.89, "learning_rate": 0.0002, "loss": 0.6064, "step": 460 }, { "epoch": 9.28, "learning_rate": 0.0002, "loss": 0.5224, "step": 480 }, { "epoch": 9.66, "learning_rate": 0.0002, "loss": 0.5103, "step": 500 }, { "epoch": 10.05, "learning_rate": 0.0002, "loss": 0.5647, "step": 520 }, { "epoch": 10.43, "learning_rate": 0.0002, "loss": 0.4083, "step": 540 }, { "epoch": 10.82, "learning_rate": 0.0002, "loss": 0.472, "step": 560 }, { "epoch": 11.21, "learning_rate": 0.0002, "loss": 0.3785, "step": 580 }, { "epoch": 11.59, "learning_rate": 0.0002, "loss": 0.4072, "step": 600 }, { "epoch": 11.98, "learning_rate": 0.0002, "loss": 0.3689, "step": 620 }, { "epoch": 12.37, "learning_rate": 0.0002, "loss": 0.3327, "step": 640 }, { "epoch": 12.93, "learning_rate": 0.0002, "loss": 0.29, "step": 660 } ], "logging_steps": 20, "max_steps": 660, "num_input_tokens_seen": 0, "num_train_epochs": 13, "save_steps": 20, "total_flos": 9.918366904902205e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }