{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9833024118738405, "eval_steps": 500, "global_step": 201, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.523809523809525e-07, "loss": 1.5933, "step": 1 }, { "epoch": 0.07, "learning_rate": 4.761904761904762e-06, "loss": 1.4782, "step": 5 }, { "epoch": 0.15, "learning_rate": 9.523809523809525e-06, "loss": 1.5256, "step": 10 }, { "epoch": 0.22, "learning_rate": 1.4285714285714287e-05, "loss": 1.4709, "step": 15 }, { "epoch": 0.3, "learning_rate": 1.904761904761905e-05, "loss": 1.5224, "step": 20 }, { "epoch": 0.37, "learning_rate": 1.9975640502598243e-05, "loss": 1.5039, "step": 25 }, { "epoch": 0.45, "learning_rate": 1.9876883405951378e-05, "loss": 1.4679, "step": 30 }, { "epoch": 0.52, "learning_rate": 1.9702957262759964e-05, "loss": 1.4341, "step": 35 }, { "epoch": 0.59, "learning_rate": 1.945518575599317e-05, "loss": 1.4291, "step": 40 }, { "epoch": 0.67, "learning_rate": 1.913545457642601e-05, "loss": 1.4145, "step": 45 }, { "epoch": 0.74, "learning_rate": 1.874619707139396e-05, "loss": 1.4082, "step": 50 }, { "epoch": 0.82, "learning_rate": 1.8290375725550417e-05, "loss": 1.3682, "step": 55 }, { "epoch": 0.89, "learning_rate": 1.777145961456971e-05, "loss": 1.3838, "step": 60 }, { "epoch": 0.96, "learning_rate": 1.7193398003386514e-05, "loss": 1.3204, "step": 65 }, { "epoch": 0.99, "eval_loss": 1.3371310234069824, "eval_runtime": 60.5562, "eval_samples_per_second": 8.207, "eval_steps_per_second": 2.064, "step": 67 }, { "epoch": 1.04, "learning_rate": 1.6560590289905074e-05, "loss": 1.3366, "step": 70 }, { "epoch": 1.11, "learning_rate": 1.5877852522924733e-05, "loss": 1.357, "step": 75 }, { "epoch": 1.19, "learning_rate": 1.5150380749100545e-05, "loss": 1.3083, "step": 80 }, { "epoch": 1.26, "learning_rate": 1.4383711467890776e-05, "loss": 1.3313, "step": 85 }, { "epoch": 1.34, "learning_rate": 1.3583679495453e-05, "loss": 1.3263, "step": 90 }, { "epoch": 1.41, "learning_rate": 1.2756373558169992e-05, "loss": 1.343, "step": 95 }, { "epoch": 1.48, "learning_rate": 1.190808995376545e-05, "loss": 1.3439, "step": 100 }, { "epoch": 1.56, "learning_rate": 1.1045284632676535e-05, "loss": 1.2764, "step": 105 }, { "epoch": 1.63, "learning_rate": 1.0174524064372837e-05, "loss": 1.3138, "step": 110 }, { "epoch": 1.71, "learning_rate": 9.302435262558748e-06, "loss": 1.3108, "step": 115 }, { "epoch": 1.78, "learning_rate": 8.43565534959769e-06, "loss": 1.2986, "step": 120 }, { "epoch": 1.86, "learning_rate": 7.580781044003324e-06, "loss": 1.2803, "step": 125 }, { "epoch": 1.93, "learning_rate": 6.744318455428436e-06, "loss": 1.3083, "step": 130 }, { "epoch": 1.99, "eval_loss": 1.2949973344802856, "eval_runtime": 60.6302, "eval_samples_per_second": 8.197, "eval_steps_per_second": 2.062, "step": 134 }, { "epoch": 2.0, "learning_rate": 5.932633569242e-06, "loss": 1.3041, "step": 135 }, { "epoch": 2.08, "learning_rate": 5.151903797536631e-06, "loss": 1.2992, "step": 140 }, { "epoch": 2.15, "learning_rate": 4.408070965292534e-06, "loss": 1.2984, "step": 145 }, { "epoch": 2.23, "learning_rate": 3.7067960895016277e-06, "loss": 1.284, "step": 150 }, { "epoch": 2.3, "learning_rate": 3.0534162954100264e-06, "loss": 1.3143, "step": 155 }, { "epoch": 2.37, "learning_rate": 2.45290419777228e-06, "loss": 1.3138, "step": 160 }, { "epoch": 2.45, "learning_rate": 1.9098300562505266e-06, "loss": 1.3192, "step": 165 }, { "epoch": 2.52, "learning_rate": 1.4283269929788779e-06, "loss": 1.3079, "step": 170 }, { "epoch": 2.6, "learning_rate": 1.012059537008332e-06, "loss": 1.3073, "step": 175 }, { "epoch": 2.67, "learning_rate": 6.641957350279838e-07, "loss": 1.3209, "step": 180 }, { "epoch": 2.75, "learning_rate": 3.8738304061681107e-07, "loss": 1.3137, "step": 185 }, { "epoch": 2.82, "learning_rate": 1.8372816552336025e-07, "loss": 1.2787, "step": 190 }, { "epoch": 2.89, "learning_rate": 5.4781046317267103e-08, "loss": 1.2959, "step": 195 }, { "epoch": 2.97, "learning_rate": 1.5230484360873043e-09, "loss": 1.2921, "step": 200 }, { "epoch": 2.98, "eval_loss": 1.2931873798370361, "eval_runtime": 60.4523, "eval_samples_per_second": 8.221, "eval_steps_per_second": 2.068, "step": 201 }, { "epoch": 2.98, "step": 201, "total_flos": 1.0527084092262973e+18, "train_loss": 1.3530203068434303, "train_runtime": 5442.6995, "train_samples_per_second": 2.376, "train_steps_per_second": 0.037 } ], "logging_steps": 5, "max_steps": 201, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 1.0527084092262973e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }