{ "best_metric": 3.0450499057769775, "best_model_checkpoint": "output/metallica/checkpoint-315", "epoch": 5.0, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 0.00013507870183531476, "loss": 3.3881, "step": 5 }, { "epoch": 0.16, "learning_rate": 0.00012884599993319768, "loss": 3.3444, "step": 10 }, { "epoch": 0.24, "learning_rate": 0.00011888735840752609, "loss": 3.3036, "step": 15 }, { "epoch": 0.32, "learning_rate": 0.0001058186737011911, "loss": 3.3353, "step": 20 }, { "epoch": 0.4, "learning_rate": 9.044818420726556e-05, "loss": 3.2497, "step": 25 }, { "epoch": 0.48, "learning_rate": 7.372648442002871e-05, "loss": 3.2277, "step": 30 }, { "epoch": 0.56, "learning_rate": 5.668773501204858e-05, "loss": 3.0355, "step": 35 }, { "epoch": 0.63, "learning_rate": 4.0385704725240065e-05, "loss": 2.898, "step": 40 }, { "epoch": 0.71, "learning_rate": 2.5828599592490882e-05, "loss": 2.9592, "step": 45 }, { "epoch": 0.79, "learning_rate": 1.3916710004507539e-05, "loss": 2.9574, "step": 50 }, { "epoch": 0.87, "learning_rate": 5.38673186569003e-06, "loss": 2.9963, "step": 55 }, { "epoch": 0.95, "learning_rate": 7.662053209561833e-07, "loss": 2.9366, "step": 60 }, { "epoch": 1.0, "eval_loss": 3.13543438911438, "eval_runtime": 3.4469, "eval_samples_per_second": 47.289, "eval_steps_per_second": 6.092, "step": 63 }, { "epoch": 1.03, "learning_rate": 3.408888099334633e-07, "loss": 2.948, "step": 65 }, { "epoch": 1.11, "learning_rate": 4.137086214086682e-06, "loss": 2.811, "step": 70 }, { "epoch": 1.19, "learning_rate": 1.1920020081922749e-05, "loss": 2.9051, "step": 75 }, { "epoch": 1.27, "learning_rate": 2.320835154085542e-05, "loss": 2.813, "step": 80 }, { "epoch": 1.35, "learning_rate": 3.7303948905573005e-05, "loss": 2.6726, "step": 85 }, { "epoch": 1.43, "learning_rate": 5.333506393059682e-05, "loss": 2.817, "step": 90 }, { "epoch": 1.51, "learning_rate": 7.031024545323179e-05, "loss": 2.8553, "step": 95 }, { "epoch": 1.59, "learning_rate": 8.71796561146101e-05, "loss": 2.7581, "step": 100 }, { "epoch": 1.67, "learning_rate": 0.00010290000000000001, "loss": 2.8057, "step": 105 }, { "epoch": 1.75, "learning_rate": 0.0001164990457207046, "loss": 2.7321, "step": 110 }, { "epoch": 1.83, "learning_rate": 0.00012713575447996587, "loss": 2.67, "step": 115 }, { "epoch": 1.9, "learning_rate": 0.00013415229447692924, "loss": 2.7242, "step": 120 }, { "epoch": 1.98, "learning_rate": 0.00013711472479561806, "loss": 2.7872, "step": 125 }, { "epoch": 2.0, "eval_loss": 3.1001694202423096, "eval_runtime": 3.559, "eval_samples_per_second": 45.8, "eval_steps_per_second": 5.901, "step": 126 }, { "epoch": 2.06, "learning_rate": 0.00013583983266641012, "loss": 2.5963, "step": 130 }, { "epoch": 2.14, "learning_rate": 0.00013040646433810595, "loss": 2.4975, "step": 135 }, { "epoch": 2.22, "learning_rate": 0.0001211506487979619, "loss": 2.5517, "step": 140 }, { "epoch": 2.3, "learning_rate": 0.00010864481591530664, "loss": 2.454, "step": 145 }, { "epoch": 2.38, "learning_rate": 9.36623942715347e-05, "loss": 2.4655, "step": 150 }, { "epoch": 2.46, "learning_rate": 7.712997813881747e-05, "loss": 2.5144, "step": 155 }, { "epoch": 2.54, "learning_rate": 6.007002186118257e-05, "loss": 2.4411, "step": 160 }, { "epoch": 2.62, "learning_rate": 4.3537605728465284e-05, "loss": 2.49, "step": 165 }, { "epoch": 2.7, "learning_rate": 2.8555184084693446e-05, "loss": 2.2522, "step": 170 }, { "epoch": 2.78, "learning_rate": 1.6049351202038163e-05, "loss": 2.4637, "step": 175 }, { "epoch": 2.86, "learning_rate": 6.793535661894062e-06, "loss": 2.4223, "step": 180 }, { "epoch": 2.94, "learning_rate": 1.3601673335899086e-06, "loss": 2.255, "step": 185 }, { "epoch": 3.0, "eval_loss": 3.0634868144989014, "eval_runtime": 3.6817, "eval_samples_per_second": 44.273, "eval_steps_per_second": 5.704, "step": 189 }, { "epoch": 3.02, "learning_rate": 8.527520438192717e-08, "loss": 2.3823, "step": 190 }, { "epoch": 3.1, "learning_rate": 3.047705523070765e-06, "loss": 2.3682, "step": 195 }, { "epoch": 3.17, "learning_rate": 1.0064245520034058e-05, "loss": 2.3272, "step": 200 }, { "epoch": 3.25, "learning_rate": 2.0700954279295363e-05, "loss": 2.2495, "step": 205 }, { "epoch": 3.33, "learning_rate": 3.4300000000000014e-05, "loss": 2.0865, "step": 210 }, { "epoch": 3.41, "learning_rate": 5.0020343885389815e-05, "loss": 2.279, "step": 215 }, { "epoch": 3.49, "learning_rate": 6.688975454676822e-05, "loss": 2.1852, "step": 220 }, { "epoch": 3.57, "learning_rate": 8.386493606940314e-05, "loss": 2.2131, "step": 225 }, { "epoch": 3.65, "learning_rate": 9.989605109442691e-05, "loss": 2.2336, "step": 230 }, { "epoch": 3.73, "learning_rate": 0.00011399164845914455, "loss": 2.2808, "step": 235 }, { "epoch": 3.81, "learning_rate": 0.00012527997991807721, "loss": 2.4415, "step": 240 }, { "epoch": 3.89, "learning_rate": 0.00013306291378591332, "loss": 2.1738, "step": 245 }, { "epoch": 3.97, "learning_rate": 0.00013685911119006654, "loss": 2.3683, "step": 250 }, { "epoch": 4.0, "eval_loss": 3.110128879547119, "eval_runtime": 3.7075, "eval_samples_per_second": 43.965, "eval_steps_per_second": 5.664, "step": 252 }, { "epoch": 4.05, "learning_rate": 0.00013643379467904383, "loss": 2.0957, "step": 255 }, { "epoch": 4.13, "learning_rate": 0.00013181326813430994, "loss": 2.1122, "step": 260 }, { "epoch": 4.21, "learning_rate": 0.00012328328999549248, "loss": 2.0913, "step": 265 }, { "epoch": 4.29, "learning_rate": 0.00011137140040750914, "loss": 2.0899, "step": 270 }, { "epoch": 4.37, "learning_rate": 9.681429527476003e-05, "loss": 2.0885, "step": 275 }, { "epoch": 4.44, "learning_rate": 8.051226498795145e-05, "loss": 2.054, "step": 280 }, { "epoch": 4.52, "learning_rate": 6.347351557997137e-05, "loss": 2.0165, "step": 285 }, { "epoch": 4.6, "learning_rate": 4.675181579273458e-05, "loss": 1.945, "step": 290 }, { "epoch": 4.68, "learning_rate": 3.138132629880886e-05, "loss": 1.9216, "step": 295 }, { "epoch": 4.76, "learning_rate": 1.8312641592473912e-05, "loss": 1.9411, "step": 300 }, { "epoch": 4.84, "learning_rate": 8.354000066802353e-06, "loss": 1.7585, "step": 305 }, { "epoch": 4.92, "learning_rate": 2.121298164685252e-06, "loss": 1.947, "step": 310 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 1.9567, "step": 315 }, { "epoch": 5.0, "eval_loss": 3.0450499057769775, "eval_runtime": 3.7841, "eval_samples_per_second": 43.076, "eval_steps_per_second": 5.55, "step": 315 } ], "max_steps": 315, "num_train_epochs": 5, "total_flos": 327268270080000.0, "trial_name": null, "trial_params": null }