{ "best_metric": 0.20319828391075134, "best_model_checkpoint": "./results/checkpoint-3500", "epoch": 2.708978328173375, "eval_steps": 500, "global_step": 3500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 0.00019999868960045492, "loss": 0.7358, "step": 200 }, { "epoch": 0.31, "learning_rate": 0.00019845929936213215, "loss": 0.7562, "step": 400 }, { "epoch": 0.39, "eval_loss": 0.481257826089859, "eval_runtime": 49.5763, "eval_samples_per_second": 12.183, "eval_steps_per_second": 3.046, "step": 500 }, { "epoch": 0.46, "learning_rate": 0.00019405971991583108, "loss": 0.7465, "step": 600 }, { "epoch": 0.62, "learning_rate": 0.000186927756656608, "loss": 0.71, "step": 800 }, { "epoch": 0.77, "learning_rate": 0.00017727058924629164, "loss": 0.71, "step": 1000 }, { "epoch": 0.77, "eval_loss": 0.40068519115448, "eval_runtime": 49.5272, "eval_samples_per_second": 12.195, "eval_steps_per_second": 3.049, "step": 1000 }, { "epoch": 0.93, "learning_rate": 0.00016536875315675275, "loss": 0.6628, "step": 1200 }, { "epoch": 1.08, "learning_rate": 0.00015156799026670633, "loss": 0.6006, "step": 1400 }, { "epoch": 1.16, "eval_loss": 0.3466373383998871, "eval_runtime": 49.4923, "eval_samples_per_second": 12.204, "eval_steps_per_second": 3.051, "step": 1500 }, { "epoch": 1.24, "learning_rate": 0.00013626920524778533, "loss": 0.5302, "step": 1600 }, { "epoch": 1.39, "learning_rate": 0.00011991681950141926, "loss": 0.5161, "step": 1800 }, { "epoch": 1.55, "learning_rate": 0.00010298586095833151, "loss": 0.4935, "step": 2000 }, { "epoch": 1.55, "eval_loss": 0.28296753764152527, "eval_runtime": 49.4347, "eval_samples_per_second": 12.218, "eval_steps_per_second": 3.055, "step": 2000 }, { "epoch": 1.7, "learning_rate": 8.596816477497136e-05, "loss": 0.5046, "step": 2200 }, { "epoch": 1.86, "learning_rate": 6.93580857891615e-05, "loss": 0.5042, "step": 2400 }, { "epoch": 1.93, "eval_loss": 0.2403416633605957, "eval_runtime": 49.3424, "eval_samples_per_second": 12.241, "eval_steps_per_second": 3.06, "step": 2500 }, { "epoch": 2.01, "learning_rate": 5.3638137780368736e-05, "loss": 0.4561, "step": 2600 }, { "epoch": 2.17, "learning_rate": 3.9264976706293624e-05, "loss": 0.3455, "step": 2800 }, { "epoch": 2.32, "learning_rate": 2.6656135095147604e-05, "loss": 0.356, "step": 3000 }, { "epoch": 2.32, "eval_loss": 0.2102939337491989, "eval_runtime": 49.4972, "eval_samples_per_second": 12.203, "eval_steps_per_second": 3.051, "step": 3000 }, { "epoch": 2.48, "learning_rate": 1.6177892952323237e-05, "loss": 0.3659, "step": 3200 }, { "epoch": 2.63, "learning_rate": 8.134637525034839e-06, "loss": 0.3393, "step": 3400 }, { "epoch": 2.71, "eval_loss": 0.20319828391075134, "eval_runtime": 49.6088, "eval_samples_per_second": 12.175, "eval_steps_per_second": 3.044, "step": 3500 } ], "logging_steps": 200, "max_steps": 3876, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 7247048796733440.0, "trial_name": null, "trial_params": null }