{ "best_metric": 2.2219202518463135, "best_model_checkpoint": "output/shadowraze/checkpoint-81", "epoch": 9.0, "global_step": 81, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.56, "learning_rate": 5.668773501204858e-05, "loss": 2.801, "step": 5 }, { "epoch": 1.0, "eval_loss": 2.403702974319458, "eval_runtime": 0.5725, "eval_samples_per_second": 22.709, "eval_steps_per_second": 3.494, "step": 9 }, { "epoch": 1.11, "learning_rate": 4.137086214086682e-06, "loss": 2.5593, "step": 10 }, { "epoch": 1.67, "learning_rate": 0.00010290000000000001, "loss": 2.5034, "step": 15 }, { "epoch": 2.0, "eval_loss": 2.3624541759490967, "eval_runtime": 0.5663, "eval_samples_per_second": 22.957, "eval_steps_per_second": 3.532, "step": 18 }, { "epoch": 2.22, "learning_rate": 0.0001211506487979619, "loss": 2.4263, "step": 20 }, { "epoch": 2.78, "learning_rate": 1.6049351202038163e-05, "loss": 2.3294, "step": 25 }, { "epoch": 3.0, "eval_loss": 2.2799673080444336, "eval_runtime": 0.5719, "eval_samples_per_second": 22.73, "eval_steps_per_second": 3.497, "step": 27 }, { "epoch": 3.33, "learning_rate": 3.4300000000000014e-05, "loss": 2.3824, "step": 30 }, { "epoch": 3.89, "learning_rate": 0.00013306291378591332, "loss": 2.2699, "step": 35 }, { "epoch": 4.0, "eval_loss": 2.2671029567718506, "eval_runtime": 0.5765, "eval_samples_per_second": 22.55, "eval_steps_per_second": 3.469, "step": 36 }, { "epoch": 4.44, "learning_rate": 8.051226498795145e-05, "loss": 2.1985, "step": 40 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 2.1072, "step": 45 }, { "epoch": 5.0, "eval_loss": 2.232370615005493, "eval_runtime": 0.5566, "eval_samples_per_second": 23.356, "eval_steps_per_second": 3.593, "step": 45 }, { "epoch": 5.56, "learning_rate": 8.051226498795124e-05, "loss": 2.1033, "step": 50 }, { "epoch": 6.0, "eval_loss": 2.2706549167633057, "eval_runtime": 0.5754, "eval_samples_per_second": 22.595, "eval_steps_per_second": 3.476, "step": 54 }, { "epoch": 6.11, "learning_rate": 0.00013306291378591335, "loss": 2.0514, "step": 55 }, { "epoch": 6.67, "learning_rate": 3.429999999999998e-05, "loss": 2.0029, "step": 60 }, { "epoch": 7.0, "eval_loss": 2.2322123050689697, "eval_runtime": 0.5836, "eval_samples_per_second": 22.275, "eval_steps_per_second": 3.427, "step": 63 }, { "epoch": 7.22, "learning_rate": 1.604935120203803e-05, "loss": 2.0429, "step": 65 }, { "epoch": 7.78, "learning_rate": 0.00012115064879796188, "loss": 1.9633, "step": 70 }, { "epoch": 8.0, "eval_loss": 2.246337652206421, "eval_runtime": 0.582, "eval_samples_per_second": 22.338, "eval_steps_per_second": 3.437, "step": 72 }, { "epoch": 8.33, "learning_rate": 0.00010289999999999993, "loss": 1.9794, "step": 75 }, { "epoch": 8.89, "learning_rate": 4.137086214086705e-06, "loss": 1.8826, "step": 80 }, { "epoch": 9.0, "eval_loss": 2.2219202518463135, "eval_runtime": 0.5809, "eval_samples_per_second": 22.378, "eval_steps_per_second": 3.443, "step": 81 } ], "max_steps": 90, "num_train_epochs": 10, "total_flos": 82306990080000.0, "trial_name": null, "trial_params": null }