{ "best_metric": 1.9292813539505005, "best_model_checkpoint": "output/big-russian-boss/checkpoint-248", "epoch": 2.0, "global_step": 248, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 0.00013665032034568096, "loss": 2.5025, "step": 5 }, { "epoch": 0.08, "learning_rate": 0.0001350100903542216, "loss": 2.3418, "step": 10 }, { "epoch": 0.12, "learning_rate": 0.0001323055957706572, "loss": 2.1866, "step": 15 }, { "epoch": 0.16, "learning_rate": 0.00012858017786751834, "loss": 2.2697, "step": 20 }, { "epoch": 0.2, "learning_rate": 0.00012389353887294475, "loss": 2.131, "step": 25 }, { "epoch": 0.24, "learning_rate": 0.00011832078520391764, "loss": 2.0523, "step": 30 }, { "epoch": 0.28, "learning_rate": 0.00011195122383741646, "loss": 2.0523, "step": 35 }, { "epoch": 0.32, "learning_rate": 0.00010488693110842961, "loss": 2.0586, "step": 40 }, { "epoch": 0.36, "learning_rate": 9.724111687075534e-05, "loss": 2.1215, "step": 45 }, { "epoch": 0.4, "learning_rate": 8.913631023597238e-05, "loss": 2.0016, "step": 50 }, { "epoch": 0.44, "learning_rate": 8.070239596528567e-05, "loss": 2.0287, "step": 55 }, { "epoch": 0.48, "learning_rate": 7.207453298233569e-05, "loss": 1.9277, "step": 60 }, { "epoch": 0.52, "learning_rate": 6.339098836414812e-05, "loss": 1.9488, "step": 65 }, { "epoch": 0.56, "learning_rate": 5.4790921521917906e-05, "loss": 1.9926, "step": 70 }, { "epoch": 0.6, "learning_rate": 4.641215408156227e-05, "loss": 1.8875, "step": 75 }, { "epoch": 0.65, "learning_rate": 3.838896120314628e-05, "loss": 1.9105, "step": 80 }, { "epoch": 0.69, "learning_rate": 3.084991973471496e-05, "loss": 1.8968, "step": 85 }, { "epoch": 0.73, "learning_rate": 2.3915847685255567e-05, "loss": 1.9608, "step": 90 }, { "epoch": 0.77, "learning_rate": 1.7697868038067536e-05, "loss": 1.8985, "step": 95 }, { "epoch": 0.81, "learning_rate": 1.2295627933180862e-05, "loss": 1.9667, "step": 100 }, { "epoch": 0.85, "learning_rate": 7.795701757575202e-06, "loss": 2.0107, "step": 105 }, { "epoch": 0.89, "learning_rate": 4.270203734710291e-06, "loss": 1.7971, "step": 110 }, { "epoch": 0.93, "learning_rate": 1.7756322475164567e-06, "loss": 2.0397, "step": 115 }, { "epoch": 0.97, "learning_rate": 3.519644153159924e-07, "loss": 1.9473, "step": 120 }, { "epoch": 1.0, "eval_loss": 1.9632209539413452, "eval_runtime": 8.8591, "eval_samples_per_second": 20.544, "eval_steps_per_second": 2.596, "step": 124 }, { "epoch": 1.01, "learning_rate": 2.2015434664866374e-08, "loss": 2.0051, "step": 125 }, { "epoch": 1.05, "learning_rate": 7.910729510915516e-07, "loss": 1.9063, "step": 130 }, { "epoch": 1.09, "learning_rate": 2.6468123211622144e-06, "loss": 1.9292, "step": 135 }, { "epoch": 1.13, "learning_rate": 5.559494122852188e-06, "loss": 1.8741, "step": 140 }, { "epoch": 1.17, "learning_rate": 9.482440748992502e-06, "loss": 1.9671, "step": 145 }, { "epoch": 1.21, "learning_rate": 1.4352784446073873e-05, "loss": 1.9278, "step": 150 }, { "epoch": 1.25, "learning_rate": 2.009247481060283e-05, "loss": 1.8776, "step": 155 }, { "epoch": 1.29, "learning_rate": 2.660952959723034e-05, "loss": 1.8322, "step": 160 }, { "epoch": 1.33, "learning_rate": 3.379950879364461e-05, "loss": 1.9671, "step": 165 }, { "epoch": 1.37, "learning_rate": 4.154718833922634e-05, "loss": 1.8845, "step": 170 }, { "epoch": 1.41, "learning_rate": 4.972840666504281e-05, "loss": 1.9002, "step": 175 }, { "epoch": 1.45, "learning_rate": 5.8212054463186014e-05, "loss": 1.8912, "step": 180 }, { "epoch": 1.49, "learning_rate": 6.686217579811531e-05, "loss": 1.9383, "step": 185 }, { "epoch": 1.53, "learning_rate": 7.554014688833781e-05, "loss": 1.9313, "step": 190 }, { "epoch": 1.57, "learning_rate": 8.410689764204419e-05, "loss": 1.8199, "step": 195 }, { "epoch": 1.61, "learning_rate": 9.242514034515462e-05, "loss": 1.8346, "step": 200 }, { "epoch": 1.65, "learning_rate": 0.0001003615697856113, "loss": 1.8638, "step": 205 }, { "epoch": 1.69, "learning_rate": 0.00010778899955550273, "loss": 1.8885, "step": 210 }, { "epoch": 1.73, "learning_rate": 0.0001145884002954007, "loss": 1.8695, "step": 215 }, { "epoch": 1.77, "learning_rate": 0.00012065080721672542, "loss": 1.8793, "step": 220 }, { "epoch": 1.81, "learning_rate": 0.00012587906633286313, "loss": 1.8785, "step": 225 }, { "epoch": 1.85, "learning_rate": 0.00013018939141455285, "loss": 1.8644, "step": 230 }, { "epoch": 1.9, "learning_rate": 0.00013351270671832803, "loss": 1.85, "step": 235 }, { "epoch": 1.94, "learning_rate": 0.00013579575396992113, "loss": 1.8876, "step": 240 }, { "epoch": 1.98, "learning_rate": 0.0001370019458625086, "loss": 1.8228, "step": 245 }, { "epoch": 2.0, "eval_loss": 1.9292813539505005, "eval_runtime": 8.8397, "eval_samples_per_second": 20.589, "eval_steps_per_second": 2.602, "step": 248 } ], "max_steps": 248, "num_train_epochs": 2, "total_flos": 258156527616000.0, "trial_name": null, "trial_params": null }