{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.7799511002445, "eval_steps": 2000, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 8e-05, "loss": 0.8756, "step": 10 }, { "epoch": 0.39, "learning_rate": 8e-05, "loss": 0.8401, "step": 20 }, { "epoch": 0.59, "learning_rate": 8e-05, "loss": 0.8574, "step": 30 }, { "epoch": 0.78, "learning_rate": 8e-05, "loss": 0.8282, "step": 40 }, { "epoch": 0.98, "learning_rate": 8e-05, "loss": 0.7797, "step": 50 }, { "epoch": 1.17, "learning_rate": 8e-05, "loss": 0.7198, "step": 60 }, { "epoch": 1.37, "learning_rate": 8e-05, "loss": 0.6546, "step": 70 }, { "epoch": 1.56, "learning_rate": 8e-05, "loss": 0.6477, "step": 80 }, { "epoch": 1.76, "learning_rate": 8e-05, "loss": 0.6173, "step": 90 }, { "epoch": 1.96, "learning_rate": 8e-05, "loss": 0.6119, "step": 100 }, { "epoch": 2.15, "learning_rate": 8e-05, "loss": 0.5535, "step": 110 }, { "epoch": 2.35, "learning_rate": 8e-05, "loss": 0.4959, "step": 120 }, { "epoch": 2.54, "learning_rate": 8e-05, "loss": 0.5077, "step": 130 }, { "epoch": 2.74, "learning_rate": 8e-05, "loss": 0.5021, "step": 140 }, { "epoch": 2.93, "learning_rate": 8e-05, "loss": 0.4717, "step": 150 }, { "epoch": 3.13, "learning_rate": 8e-05, "loss": 0.427, "step": 160 }, { "epoch": 3.33, "learning_rate": 8e-05, "loss": 0.361, "step": 170 }, { "epoch": 3.52, "learning_rate": 8e-05, "loss": 0.3622, "step": 180 }, { "epoch": 3.72, "learning_rate": 8e-05, "loss": 0.3425, "step": 190 }, { "epoch": 3.91, "learning_rate": 8e-05, "loss": 0.3531, "step": 200 }, { "epoch": 4.11, "learning_rate": 8e-05, "loss": 0.3376, "step": 210 }, { "epoch": 4.3, "learning_rate": 8e-05, "loss": 0.2356, "step": 220 }, { "epoch": 4.5, "learning_rate": 8e-05, "loss": 0.2514, "step": 230 }, { "epoch": 4.69, "learning_rate": 8e-05, "loss": 0.2369, "step": 240 }, { "epoch": 4.89, "learning_rate": 8e-05, "loss": 0.2485, "step": 250 }, { "epoch": 5.09, "learning_rate": 8e-05, "loss": 0.2164, "step": 260 }, { "epoch": 5.28, "learning_rate": 8e-05, "loss": 0.1498, "step": 270 }, { "epoch": 5.48, "learning_rate": 8e-05, "loss": 0.1495, "step": 280 }, { "epoch": 5.67, "learning_rate": 8e-05, "loss": 0.166, "step": 290 }, { "epoch": 5.87, "learning_rate": 8e-05, "loss": 0.2012, "step": 300 }, { "epoch": 6.06, "learning_rate": 8e-05, "loss": 0.1569, "step": 310 }, { "epoch": 6.26, "learning_rate": 8e-05, "loss": 0.0878, "step": 320 }, { "epoch": 6.45, "learning_rate": 8e-05, "loss": 0.0969, "step": 330 }, { "epoch": 6.65, "learning_rate": 8e-05, "loss": 0.1229, "step": 340 }, { "epoch": 6.85, "learning_rate": 8e-05, "loss": 0.1199, "step": 350 }, { "epoch": 7.04, "learning_rate": 8e-05, "loss": 0.0996, "step": 360 }, { "epoch": 7.24, "learning_rate": 8e-05, "loss": 0.0568, "step": 370 }, { "epoch": 7.43, "learning_rate": 8e-05, "loss": 0.0792, "step": 380 }, { "epoch": 7.63, "learning_rate": 8e-05, "loss": 0.0781, "step": 390 }, { "epoch": 7.82, "learning_rate": 8e-05, "loss": 0.0852, "step": 400 }, { "epoch": 8.02, "learning_rate": 8e-05, "loss": 0.0611, "step": 410 }, { "epoch": 8.22, "learning_rate": 8e-05, "loss": 0.0399, "step": 420 }, { "epoch": 8.41, "learning_rate": 8e-05, "loss": 0.0597, "step": 430 }, { "epoch": 8.61, "learning_rate": 8e-05, "loss": 0.0502, "step": 440 }, { "epoch": 8.8, "learning_rate": 8e-05, "loss": 0.0535, "step": 450 }, { "epoch": 9.0, "learning_rate": 8e-05, "loss": 0.0347, "step": 460 }, { "epoch": 9.19, "learning_rate": 8e-05, "loss": 0.0323, "step": 470 }, { "epoch": 9.39, "learning_rate": 8e-05, "loss": 0.039, "step": 480 }, { "epoch": 9.58, "learning_rate": 8e-05, "loss": 0.0335, "step": 490 }, { "epoch": 9.78, "learning_rate": 8e-05, "loss": 0.0364, "step": 500 } ], "logging_steps": 10, "max_steps": 100000, "num_train_epochs": 1961, "save_steps": 500, "total_flos": 3.9296655213613056e+17, "trial_name": null, "trial_params": null }