{ "best_metric": 1.3656095266342163, "best_model_checkpoint": "output/bring-me-the-horizon/checkpoint-216", "epoch": 8.0, "global_step": 216, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 0.00012505669320030482, "loss": 2.9107, "step": 5 }, { "epoch": 0.38, "learning_rate": 9.292589525111794e-05, "loss": 2.6407, "step": 10 }, { "epoch": 0.58, "learning_rate": 5.218294542987356e-05, "loss": 2.5181, "step": 15 }, { "epoch": 0.77, "learning_rate": 1.725216267546246e-05, "loss": 2.4529, "step": 20 }, { "epoch": 0.96, "learning_rate": 5.001712368734975e-07, "loss": 2.466, "step": 25 }, { "epoch": 1.0, "eval_loss": 2.72078013420105, "eval_runtime": 0.543, "eval_samples_per_second": 75.509, "eval_steps_per_second": 11.05, "step": 26 }, { "epoch": 1.15, "learning_rate": 7.857716640189785e-06, "loss": 2.5427, "step": 30 }, { "epoch": 1.35, "learning_rate": 3.671999039779749e-05, "loss": 2.3173, "step": 35 }, { "epoch": 1.54, "learning_rate": 7.686881626551516e-05, "loss": 2.2888, "step": 40 }, { "epoch": 1.73, "learning_rate": 0.00011409021435531856, "loss": 2.3614, "step": 45 }, { "epoch": 1.92, "learning_rate": 0.00013520660867542716, "loss": 2.1993, "step": 50 }, { "epoch": 2.0, "eval_loss": 2.672280788421631, "eval_runtime": 0.5476, "eval_samples_per_second": 74.866, "eval_steps_per_second": 10.956, "step": 52 }, { "epoch": 2.12, "learning_rate": 0.00013274211424821946, "loss": 2.2735, "step": 55 }, { "epoch": 2.31, "learning_rate": 0.00010756924162575734, "loss": 2.1555, "step": 60 }, { "epoch": 2.5, "learning_rate": 6.860000000000001e-05, "loss": 2.1802, "step": 65 }, { "epoch": 2.69, "learning_rate": 2.9630758374242683e-05, "loss": 1.978, "step": 70 }, { "epoch": 2.88, "learning_rate": 4.457885751780558e-06, "loss": 2.4223, "step": 75 }, { "epoch": 3.0, "eval_loss": 1.926320195198059, "eval_runtime": 0.4807, "eval_samples_per_second": 74.883, "eval_steps_per_second": 10.4, "step": 78 }, { "epoch": 3.08, "learning_rate": 1.9933913245728396e-06, "loss": 2.1228, "step": 80 }, { "epoch": 3.27, "learning_rate": 2.310978564468141e-05, "loss": 1.9814, "step": 85 }, { "epoch": 3.46, "learning_rate": 6.033118373448485e-05, "loss": 2.1607, "step": 90 }, { "epoch": 3.65, "learning_rate": 0.00010048000960220248, "loss": 2.1746, "step": 95 }, { "epoch": 3.85, "learning_rate": 0.00012934228335981018, "loss": 1.8565, "step": 100 }, { "epoch": 4.0, "eval_loss": 1.8828926086425781, "eval_runtime": 0.4815, "eval_samples_per_second": 74.766, "eval_steps_per_second": 10.384, "step": 104 }, { "epoch": 4.04, "learning_rate": 0.0001366998287631265, "loss": 2.1214, "step": 105 }, { "epoch": 4.23, "learning_rate": 0.00011994783732453755, "loss": 1.898, "step": 110 }, { "epoch": 4.42, "learning_rate": 8.501705457012643e-05, "loss": 1.8994, "step": 115 }, { "epoch": 4.62, "learning_rate": 4.42741047488822e-05, "loss": 1.9389, "step": 120 }, { "epoch": 4.81, "learning_rate": 1.2143306799695228e-05, "loss": 1.7737, "step": 125 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.7275, "step": 130 }, { "epoch": 5.0, "eval_loss": 1.8320238590240479, "eval_runtime": 0.4801, "eval_samples_per_second": 74.982, "eval_steps_per_second": 10.414, "step": 130 }, { "epoch": 5.19, "learning_rate": 1.2143306799695106e-05, "loss": 1.6759, "step": 135 }, { "epoch": 5.38, "learning_rate": 4.4274104748882125e-05, "loss": 1.934, "step": 140 }, { "epoch": 5.58, "learning_rate": 8.501705457012647e-05, "loss": 1.7796, "step": 145 }, { "epoch": 5.77, "learning_rate": 0.00011994783732453749, "loss": 1.5568, "step": 150 }, { "epoch": 5.96, "learning_rate": 0.00013669982876312649, "loss": 1.7681, "step": 155 }, { "epoch": 6.0, "eval_loss": 1.8055299520492554, "eval_runtime": 0.4812, "eval_samples_per_second": 74.816, "eval_steps_per_second": 10.391, "step": 156 }, { "epoch": 6.15, "learning_rate": 0.00012934228335981018, "loss": 1.5298, "step": 160 }, { "epoch": 6.35, "learning_rate": 0.00010048000960220254, "loss": 1.5527, "step": 165 }, { "epoch": 6.54, "learning_rate": 6.033118373448493e-05, "loss": 1.6001, "step": 170 }, { "epoch": 6.73, "learning_rate": 2.3109785644681573e-05, "loss": 1.5066, "step": 175 }, { "epoch": 6.92, "learning_rate": 1.9933913245728244e-06, "loss": 1.5358, "step": 180 }, { "epoch": 7.0, "eval_loss": 1.7874510288238525, "eval_runtime": 0.4807, "eval_samples_per_second": 74.894, "eval_steps_per_second": 10.402, "step": 182 }, { "epoch": 7.12, "learning_rate": 4.457885751780527e-06, "loss": 1.45, "step": 185 }, { "epoch": 7.31, "learning_rate": 2.963075837424261e-05, "loss": 1.4034, "step": 190 }, { "epoch": 7.5, "learning_rate": 6.859999999999982e-05, "loss": 1.3896, "step": 195 }, { "epoch": 7.69, "learning_rate": 0.00010756924162575728, "loss": 1.3993, "step": 200 }, { "epoch": 7.88, "learning_rate": 0.00013274211424821943, "loss": 1.4986, "step": 205 }, { "epoch": 8.0, "eval_loss": 1.761746883392334, "eval_runtime": 0.4842, "eval_samples_per_second": 74.353, "eval_steps_per_second": 10.327, "step": 208 }, { "epoch": 7.78, "learning_rate": 0.00012115064879796188, "loss": 1.688, "step": 210 }, { "epoch": 7.96, "learning_rate": 0.00013673615134109727, "loss": 1.5172, "step": 215 }, { "epoch": 8.0, "eval_loss": 1.3656095266342163, "eval_runtime": 0.4451, "eval_samples_per_second": 74.143, "eval_steps_per_second": 11.234, "step": 216 } ], "max_steps": 2916, "num_train_epochs": 108, "total_flos": 222490165248000.0, "trial_name": null, "trial_params": null }