{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9230769230769231, "eval_steps": 10, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 2.272727272727273e-05, "loss": 1.7377, "step": 10 }, { "epoch": 0.19, "eval_loss": 1.3826087713241577, "eval_runtime": 164.229, "eval_samples_per_second": 0.304, "eval_steps_per_second": 0.043, "step": 10 }, { "epoch": 0.38, "learning_rate": 2.0202020202020203e-05, "loss": 1.0247, "step": 20 }, { "epoch": 0.38, "eval_loss": 0.6683715581893921, "eval_runtime": 163.0119, "eval_samples_per_second": 0.307, "eval_steps_per_second": 0.043, "step": 20 }, { "epoch": 0.58, "learning_rate": 1.7676767676767676e-05, "loss": 0.4914, "step": 30 }, { "epoch": 0.58, "eval_loss": 0.34242257475852966, "eval_runtime": 164.0724, "eval_samples_per_second": 0.305, "eval_steps_per_second": 0.043, "step": 30 }, { "epoch": 0.77, "learning_rate": 1.5151515151515153e-05, "loss": 0.3506, "step": 40 }, { "epoch": 0.77, "eval_loss": 0.3160565197467804, "eval_runtime": 163.8993, "eval_samples_per_second": 0.305, "eval_steps_per_second": 0.043, "step": 40 }, { "epoch": 0.96, "learning_rate": 1.2626262626262628e-05, "loss": 0.3133, "step": 50 }, { "epoch": 0.96, "eval_loss": 0.3000437915325165, "eval_runtime": 163.2552, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.043, "step": 50 }, { "epoch": 1.15, "learning_rate": 1.0101010101010101e-05, "loss": 0.3185, "step": 60 }, { "epoch": 1.15, "eval_loss": 0.2911369800567627, "eval_runtime": 162.9849, "eval_samples_per_second": 0.307, "eval_steps_per_second": 0.043, "step": 60 }, { "epoch": 1.35, "learning_rate": 7.5757575757575764e-06, "loss": 0.2703, "step": 70 }, { "epoch": 1.35, "eval_loss": 0.2851818799972534, "eval_runtime": 163.5768, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.043, "step": 70 }, { "epoch": 1.54, "learning_rate": 5.050505050505051e-06, "loss": 0.2451, "step": 80 }, { "epoch": 1.54, "eval_loss": 0.27840811014175415, "eval_runtime": 163.9369, "eval_samples_per_second": 0.305, "eval_steps_per_second": 0.043, "step": 80 }, { "epoch": 1.73, "learning_rate": 2.5252525252525253e-06, "loss": 0.2702, "step": 90 }, { "epoch": 1.73, "eval_loss": 0.2748894989490509, "eval_runtime": 162.9364, "eval_samples_per_second": 0.307, "eval_steps_per_second": 0.043, "step": 90 }, { "epoch": 1.92, "learning_rate": 0.0, "loss": 0.2489, "step": 100 }, { "epoch": 1.92, "eval_loss": 0.27365821599960327, "eval_runtime": 164.1174, "eval_samples_per_second": 0.305, "eval_steps_per_second": 0.043, "step": 100 } ], "logging_steps": 10, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10, "total_flos": 4359942624509952.0, "trial_name": null, "trial_params": null }