{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "global_step": 25284, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 4.9011232399936727e-05, "loss": 3.3688, "step": 500 }, { "epoch": 0.24, "learning_rate": 4.802246479987344e-05, "loss": 3.2716, "step": 1000 }, { "epoch": 0.36, "learning_rate": 4.703369719981016e-05, "loss": 3.2238, "step": 1500 }, { "epoch": 0.47, "learning_rate": 4.604492959974688e-05, "loss": 3.2109, "step": 2000 }, { "epoch": 0.59, "learning_rate": 4.5056161999683596e-05, "loss": 3.1767, "step": 2500 }, { "epoch": 0.71, "learning_rate": 4.406739439962031e-05, "loss": 3.1522, "step": 3000 }, { "epoch": 0.83, "learning_rate": 4.307862679955704e-05, "loss": 3.1347, "step": 3500 }, { "epoch": 0.95, "learning_rate": 4.2089859199493755e-05, "loss": 3.129, "step": 4000 }, { "epoch": 1.07, "learning_rate": 4.110109159943047e-05, "loss": 3.0598, "step": 4500 }, { "epoch": 1.19, "learning_rate": 4.011232399936719e-05, "loss": 3.0041, "step": 5000 }, { "epoch": 1.31, "learning_rate": 3.9123556399303914e-05, "loss": 3.0206, "step": 5500 }, { "epoch": 1.42, "learning_rate": 3.813478879924063e-05, "loss": 3.0094, "step": 6000 }, { "epoch": 1.54, "learning_rate": 3.714602119917735e-05, "loss": 3.0046, "step": 6500 }, { "epoch": 1.66, "learning_rate": 3.6157253599114066e-05, "loss": 3.0031, "step": 7000 }, { "epoch": 1.78, "learning_rate": 3.516848599905079e-05, "loss": 2.9904, "step": 7500 }, { "epoch": 1.9, "learning_rate": 3.41797183989875e-05, "loss": 2.9906, "step": 8000 }, { "epoch": 2.02, "learning_rate": 3.3190950798924225e-05, "loss": 2.9812, "step": 8500 }, { "epoch": 2.14, "learning_rate": 3.220218319886094e-05, "loss": 2.8911, "step": 9000 }, { "epoch": 2.25, "learning_rate": 3.1213415598797666e-05, "loss": 2.9061, "step": 9500 }, { "epoch": 2.37, "learning_rate": 3.022464799873438e-05, "loss": 2.9192, "step": 10000 }, { "epoch": 2.49, "learning_rate": 2.92358803986711e-05, "loss": 2.9018, "step": 10500 }, { "epoch": 2.61, "learning_rate": 2.8247112798607815e-05, "loss": 2.9167, "step": 11000 }, { "epoch": 2.73, "learning_rate": 2.725834519854454e-05, "loss": 2.923, "step": 11500 }, { "epoch": 2.85, "learning_rate": 2.6269577598481253e-05, "loss": 2.8988, "step": 12000 }, { "epoch": 2.97, "learning_rate": 2.5280809998417977e-05, "loss": 2.8976, "step": 12500 }, { "epoch": 3.08, "learning_rate": 2.429204239835469e-05, "loss": 2.8552, "step": 13000 }, { "epoch": 3.2, "learning_rate": 2.3303274798291412e-05, "loss": 2.8315, "step": 13500 }, { "epoch": 3.32, "learning_rate": 2.231450719822813e-05, "loss": 2.8407, "step": 14000 }, { "epoch": 3.44, "learning_rate": 2.1325739598164847e-05, "loss": 2.8406, "step": 14500 }, { "epoch": 3.56, "learning_rate": 2.0336971998101567e-05, "loss": 2.8386, "step": 15000 }, { "epoch": 3.68, "learning_rate": 1.9348204398038285e-05, "loss": 2.8402, "step": 15500 }, { "epoch": 3.8, "learning_rate": 1.8359436797975006e-05, "loss": 2.8409, "step": 16000 }, { "epoch": 3.92, "learning_rate": 1.7370669197911723e-05, "loss": 2.8446, "step": 16500 }, { "epoch": 4.03, "learning_rate": 1.6381901597848444e-05, "loss": 2.8165, "step": 17000 }, { "epoch": 4.15, "learning_rate": 1.539313399778516e-05, "loss": 2.7863, "step": 17500 }, { "epoch": 4.27, "learning_rate": 1.440436639772188e-05, "loss": 2.7812, "step": 18000 }, { "epoch": 4.39, "learning_rate": 1.3415598797658599e-05, "loss": 2.7759, "step": 18500 }, { "epoch": 4.51, "learning_rate": 1.2426831197595318e-05, "loss": 2.7893, "step": 19000 }, { "epoch": 4.63, "learning_rate": 1.1438063597532037e-05, "loss": 2.8047, "step": 19500 }, { "epoch": 4.75, "learning_rate": 1.0449295997468755e-05, "loss": 2.7915, "step": 20000 }, { "epoch": 4.86, "learning_rate": 9.460528397405474e-06, "loss": 2.8009, "step": 20500 }, { "epoch": 4.98, "learning_rate": 8.471760797342193e-06, "loss": 2.7765, "step": 21000 }, { "epoch": 5.1, "learning_rate": 7.482993197278912e-06, "loss": 2.7556, "step": 21500 }, { "epoch": 5.22, "learning_rate": 6.494225597215631e-06, "loss": 2.7601, "step": 22000 }, { "epoch": 5.34, "learning_rate": 5.50545799715235e-06, "loss": 2.7608, "step": 22500 }, { "epoch": 5.46, "learning_rate": 4.516690397089068e-06, "loss": 2.7527, "step": 23000 }, { "epoch": 5.58, "learning_rate": 3.5279227970257872e-06, "loss": 2.7481, "step": 23500 }, { "epoch": 5.7, "learning_rate": 2.539155196962506e-06, "loss": 2.7657, "step": 24000 }, { "epoch": 5.81, "learning_rate": 1.550387596899225e-06, "loss": 2.7518, "step": 24500 }, { "epoch": 5.93, "learning_rate": 5.616199968359437e-07, "loss": 2.7425, "step": 25000 }, { "epoch": 6.0, "step": 25284, "total_flos": 1.3213015474176e+16, "train_loss": 2.915600132591418, "train_runtime": 6526.9461, "train_samples_per_second": 3.874, "train_steps_per_second": 3.874 } ], "max_steps": 25284, "num_train_epochs": 6, "total_flos": 1.3213015474176e+16, "trial_name": null, "trial_params": null }