{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.061835151351194675, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.999226963512678e-06, "loss": 1.1361, "step": 5 }, { "epoch": 0.0, "learning_rate": 9.998453927025357e-06, "loss": 0.7353, "step": 10 }, { "epoch": 0.0, "learning_rate": 9.997680890538034e-06, "loss": 0.6909, "step": 15 }, { "epoch": 0.0, "learning_rate": 9.996907854050712e-06, "loss": 0.6498, "step": 20 }, { "epoch": 0.0, "learning_rate": 9.99613481756339e-06, "loss": 0.6414, "step": 25 }, { "epoch": 0.0, "learning_rate": 9.995361781076068e-06, "loss": 0.6415, "step": 30 }, { "epoch": 0.01, "learning_rate": 9.994588744588745e-06, "loss": 0.6317, "step": 35 }, { "epoch": 0.01, "learning_rate": 9.993815708101423e-06, "loss": 0.6378, "step": 40 }, { "epoch": 0.01, "learning_rate": 9.9930426716141e-06, "loss": 0.6347, "step": 45 }, { "epoch": 0.01, "learning_rate": 9.992269635126779e-06, "loss": 0.5924, "step": 50 }, { "epoch": 0.01, "learning_rate": 9.991496598639456e-06, "loss": 0.6046, "step": 55 }, { "epoch": 0.01, "learning_rate": 9.990723562152135e-06, "loss": 0.6045, "step": 60 }, { "epoch": 0.01, "learning_rate": 9.989950525664813e-06, "loss": 0.6, "step": 65 }, { "epoch": 0.01, "learning_rate": 9.98917748917749e-06, "loss": 0.5504, "step": 70 }, { "epoch": 0.01, "learning_rate": 9.988404452690169e-06, "loss": 0.5747, "step": 75 }, { "epoch": 0.01, "learning_rate": 9.987631416202846e-06, "loss": 0.5526, "step": 80 }, { "epoch": 0.01, "learning_rate": 9.986858379715523e-06, "loss": 0.5958, "step": 85 }, { "epoch": 0.01, "learning_rate": 9.9860853432282e-06, "loss": 0.608, "step": 90 }, { "epoch": 0.01, "learning_rate": 9.985312306740878e-06, "loss": 0.5988, "step": 95 }, { "epoch": 0.02, "learning_rate": 9.984539270253557e-06, "loss": 0.5861, "step": 100 }, { "epoch": 0.02, "learning_rate": 9.983766233766234e-06, "loss": 0.5749, "step": 105 }, { "epoch": 0.02, "learning_rate": 9.982993197278913e-06, "loss": 0.5498, "step": 110 }, { "epoch": 0.02, "learning_rate": 9.98222016079159e-06, "loss": 0.5841, "step": 115 }, { "epoch": 0.02, "learning_rate": 9.981447124304268e-06, "loss": 0.5973, "step": 120 }, { "epoch": 0.02, "learning_rate": 9.980674087816947e-06, "loss": 0.5954, "step": 125 }, { "epoch": 0.02, "learning_rate": 9.979901051329624e-06, "loss": 0.527, "step": 130 }, { "epoch": 0.02, "learning_rate": 9.979128014842301e-06, "loss": 0.5321, "step": 135 }, { "epoch": 0.02, "learning_rate": 9.978354978354979e-06, "loss": 0.5781, "step": 140 }, { "epoch": 0.02, "learning_rate": 9.977581941867656e-06, "loss": 0.5119, "step": 145 }, { "epoch": 0.02, "learning_rate": 9.976808905380335e-06, "loss": 0.5271, "step": 150 }, { "epoch": 0.02, "learning_rate": 9.976035868893012e-06, "loss": 0.5814, "step": 155 }, { "epoch": 0.02, "learning_rate": 9.97526283240569e-06, "loss": 0.518, "step": 160 }, { "epoch": 0.03, "learning_rate": 9.974489795918369e-06, "loss": 0.5335, "step": 165 }, { "epoch": 0.03, "learning_rate": 9.973716759431046e-06, "loss": 0.5062, "step": 170 }, { "epoch": 0.03, "learning_rate": 9.972943722943725e-06, "loss": 0.5253, "step": 175 }, { "epoch": 0.03, "learning_rate": 9.972170686456402e-06, "loss": 0.5856, "step": 180 }, { "epoch": 0.03, "learning_rate": 9.97139764996908e-06, "loss": 0.5196, "step": 185 }, { "epoch": 0.03, "learning_rate": 9.970624613481757e-06, "loss": 0.4764, "step": 190 }, { "epoch": 0.03, "learning_rate": 9.969851576994434e-06, "loss": 0.5254, "step": 195 }, { "epoch": 0.03, "learning_rate": 9.969078540507111e-06, "loss": 0.5442, "step": 200 }, { "epoch": 0.03, "eval_accuracy": 0.5804400673190799, "eval_accuracy_sklearn": 0.5804400673190799, "eval_f1": 0.5294915349019279, "eval_loss": 0.7918509840965271, "eval_precision": 0.6370946036872561, "eval_recall": 0.45298409281186464, "eval_runtime": 4914.2737, "eval_samples_per_second": 16.323, "eval_steps_per_second": 2.04, "step": 200 }, { "epoch": 0.03, "learning_rate": 9.96830550401979e-06, "loss": 0.5163, "step": 205 }, { "epoch": 0.03, "learning_rate": 9.967532467532468e-06, "loss": 0.5044, "step": 210 }, { "epoch": 0.03, "learning_rate": 9.966759431045147e-06, "loss": 0.5078, "step": 215 }, { "epoch": 0.03, "learning_rate": 9.965986394557824e-06, "loss": 0.4623, "step": 220 }, { "epoch": 0.03, "learning_rate": 9.965213358070501e-06, "loss": 0.5359, "step": 225 }, { "epoch": 0.04, "learning_rate": 9.96444032158318e-06, "loss": 0.5068, "step": 230 }, { "epoch": 0.04, "learning_rate": 9.963667285095858e-06, "loss": 0.5029, "step": 235 }, { "epoch": 0.04, "learning_rate": 9.962894248608535e-06, "loss": 0.5084, "step": 240 }, { "epoch": 0.04, "learning_rate": 9.962121212121212e-06, "loss": 0.4783, "step": 245 }, { "epoch": 0.04, "learning_rate": 9.96134817563389e-06, "loss": 0.5216, "step": 250 }, { "epoch": 0.04, "learning_rate": 9.960575139146569e-06, "loss": 0.54, "step": 255 }, { "epoch": 0.04, "learning_rate": 9.959802102659246e-06, "loss": 0.5494, "step": 260 }, { "epoch": 0.04, "learning_rate": 9.959029066171925e-06, "loss": 0.5401, "step": 265 }, { "epoch": 0.04, "learning_rate": 9.958256029684602e-06, "loss": 0.5073, "step": 270 }, { "epoch": 0.04, "learning_rate": 9.95748299319728e-06, "loss": 0.4598, "step": 275 }, { "epoch": 0.04, "learning_rate": 9.956709956709958e-06, "loss": 0.4913, "step": 280 }, { "epoch": 0.04, "learning_rate": 9.955936920222636e-06, "loss": 0.4947, "step": 285 }, { "epoch": 0.04, "learning_rate": 9.955163883735313e-06, "loss": 0.4806, "step": 290 }, { "epoch": 0.05, "learning_rate": 9.95439084724799e-06, "loss": 0.4659, "step": 295 }, { "epoch": 0.05, "learning_rate": 9.953617810760668e-06, "loss": 0.4555, "step": 300 }, { "epoch": 0.05, "learning_rate": 9.952844774273347e-06, "loss": 0.4606, "step": 305 }, { "epoch": 0.05, "learning_rate": 9.952071737786024e-06, "loss": 0.4905, "step": 310 }, { "epoch": 0.05, "learning_rate": 9.951298701298701e-06, "loss": 0.4423, "step": 315 }, { "epoch": 0.05, "learning_rate": 9.95052566481138e-06, "loss": 0.4855, "step": 320 }, { "epoch": 0.05, "learning_rate": 9.949752628324058e-06, "loss": 0.486, "step": 325 }, { "epoch": 0.05, "learning_rate": 9.948979591836737e-06, "loss": 0.4774, "step": 330 }, { "epoch": 0.05, "learning_rate": 9.948206555349414e-06, "loss": 0.4909, "step": 335 }, { "epoch": 0.05, "learning_rate": 9.947433518862091e-06, "loss": 0.47, "step": 340 }, { "epoch": 0.05, "learning_rate": 9.946660482374768e-06, "loss": 0.4496, "step": 345 }, { "epoch": 0.05, "learning_rate": 9.945887445887446e-06, "loss": 0.5146, "step": 350 }, { "epoch": 0.05, "learning_rate": 9.945114409400125e-06, "loss": 0.4876, "step": 355 }, { "epoch": 0.06, "learning_rate": 9.944341372912802e-06, "loss": 0.4747, "step": 360 }, { "epoch": 0.06, "learning_rate": 9.94356833642548e-06, "loss": 0.4614, "step": 365 }, { "epoch": 0.06, "learning_rate": 9.942795299938158e-06, "loss": 0.4755, "step": 370 }, { "epoch": 0.06, "learning_rate": 9.942022263450836e-06, "loss": 0.4785, "step": 375 }, { "epoch": 0.06, "learning_rate": 9.941249226963513e-06, "loss": 0.4581, "step": 380 }, { "epoch": 0.06, "learning_rate": 9.940476190476192e-06, "loss": 0.4671, "step": 385 }, { "epoch": 0.06, "learning_rate": 9.93970315398887e-06, "loss": 0.4327, "step": 390 }, { "epoch": 0.06, "learning_rate": 9.938930117501547e-06, "loss": 0.4906, "step": 395 }, { "epoch": 0.06, "learning_rate": 9.938157081014226e-06, "loss": 0.5006, "step": 400 }, { "epoch": 0.06, "eval_accuracy": 0.5519790562862308, "eval_accuracy_sklearn": 0.5519790562862308, "eval_f1": 0.39240549130993435, "eval_loss": 0.969095766544342, "eval_precision": 0.6691460531626593, "eval_recall": 0.27759837340031096, "eval_runtime": 4903.2377, "eval_samples_per_second": 16.36, "eval_steps_per_second": 2.045, "step": 400 } ], "max_steps": 64680, "num_train_epochs": 10, "total_flos": 4.75071167594496e+16, "trial_name": null, "trial_params": null }