{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.092752727026792, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.999226963512678e-06, "loss": 1.1361, "step": 5 }, { "epoch": 0.0, "learning_rate": 9.998453927025357e-06, "loss": 0.7353, "step": 10 }, { "epoch": 0.0, "learning_rate": 9.997680890538034e-06, "loss": 0.6909, "step": 15 }, { "epoch": 0.0, "learning_rate": 9.996907854050712e-06, "loss": 0.6498, "step": 20 }, { "epoch": 0.0, "learning_rate": 9.99613481756339e-06, "loss": 0.6414, "step": 25 }, { "epoch": 0.0, "learning_rate": 9.995361781076068e-06, "loss": 0.6415, "step": 30 }, { "epoch": 0.01, "learning_rate": 9.994588744588745e-06, "loss": 0.6317, "step": 35 }, { "epoch": 0.01, "learning_rate": 9.993815708101423e-06, "loss": 0.6378, "step": 40 }, { "epoch": 0.01, "learning_rate": 9.9930426716141e-06, "loss": 0.6347, "step": 45 }, { "epoch": 0.01, "learning_rate": 9.992269635126779e-06, "loss": 0.5924, "step": 50 }, { "epoch": 0.01, "learning_rate": 9.991496598639456e-06, "loss": 0.6046, "step": 55 }, { "epoch": 0.01, "learning_rate": 9.990723562152135e-06, "loss": 0.6045, "step": 60 }, { "epoch": 0.01, "learning_rate": 9.989950525664813e-06, "loss": 0.6, "step": 65 }, { "epoch": 0.01, "learning_rate": 9.98917748917749e-06, "loss": 0.5504, "step": 70 }, { "epoch": 0.01, "learning_rate": 9.988404452690169e-06, "loss": 0.5747, "step": 75 }, { "epoch": 0.01, "learning_rate": 9.987631416202846e-06, "loss": 0.5526, "step": 80 }, { "epoch": 0.01, "learning_rate": 9.986858379715523e-06, "loss": 0.5958, "step": 85 }, { "epoch": 0.01, "learning_rate": 9.9860853432282e-06, "loss": 0.608, "step": 90 }, { "epoch": 0.01, "learning_rate": 9.985312306740878e-06, "loss": 0.5988, "step": 95 }, { "epoch": 0.02, "learning_rate": 9.984539270253557e-06, "loss": 0.5861, "step": 100 }, { "epoch": 0.02, "learning_rate": 9.983766233766234e-06, "loss": 0.5749, "step": 105 }, { "epoch": 0.02, "learning_rate": 9.982993197278913e-06, "loss": 0.5498, "step": 110 }, { "epoch": 0.02, "learning_rate": 9.98222016079159e-06, "loss": 0.5841, "step": 115 }, { "epoch": 0.02, "learning_rate": 9.981447124304268e-06, "loss": 0.5973, "step": 120 }, { "epoch": 0.02, "learning_rate": 9.980674087816947e-06, "loss": 0.5954, "step": 125 }, { "epoch": 0.02, "learning_rate": 9.979901051329624e-06, "loss": 0.527, "step": 130 }, { "epoch": 0.02, "learning_rate": 9.979128014842301e-06, "loss": 0.5321, "step": 135 }, { "epoch": 0.02, "learning_rate": 9.978354978354979e-06, "loss": 0.5781, "step": 140 }, { "epoch": 0.02, "learning_rate": 9.977581941867656e-06, "loss": 0.5119, "step": 145 }, { "epoch": 0.02, "learning_rate": 9.976808905380335e-06, "loss": 0.5271, "step": 150 }, { "epoch": 0.02, "learning_rate": 9.976035868893012e-06, "loss": 0.5814, "step": 155 }, { "epoch": 0.02, "learning_rate": 9.97526283240569e-06, "loss": 0.518, "step": 160 }, { "epoch": 0.03, "learning_rate": 9.974489795918369e-06, "loss": 0.5335, "step": 165 }, { "epoch": 0.03, "learning_rate": 9.973716759431046e-06, "loss": 0.5062, "step": 170 }, { "epoch": 0.03, "learning_rate": 9.972943722943725e-06, "loss": 0.5253, "step": 175 }, { "epoch": 0.03, "learning_rate": 9.972170686456402e-06, "loss": 0.5856, "step": 180 }, { "epoch": 0.03, "learning_rate": 9.97139764996908e-06, "loss": 0.5196, "step": 185 }, { "epoch": 0.03, "learning_rate": 9.970624613481757e-06, "loss": 0.4764, "step": 190 }, { "epoch": 0.03, "learning_rate": 9.969851576994434e-06, "loss": 0.5254, "step": 195 }, { "epoch": 0.03, "learning_rate": 9.969078540507111e-06, "loss": 0.5442, "step": 200 }, { "epoch": 0.03, "eval_accuracy": 0.5804400673190799, "eval_accuracy_sklearn": 0.5804400673190799, "eval_f1": 0.5294915349019279, "eval_loss": 0.7918509840965271, "eval_precision": 0.6370946036872561, "eval_recall": 0.45298409281186464, "eval_runtime": 4914.2737, "eval_samples_per_second": 16.323, "eval_steps_per_second": 2.04, "step": 200 }, { "epoch": 0.03, "learning_rate": 9.96830550401979e-06, "loss": 0.5163, "step": 205 }, { "epoch": 0.03, "learning_rate": 9.967532467532468e-06, "loss": 0.5044, "step": 210 }, { "epoch": 0.03, "learning_rate": 9.966759431045147e-06, "loss": 0.5078, "step": 215 }, { "epoch": 0.03, "learning_rate": 9.965986394557824e-06, "loss": 0.4623, "step": 220 }, { "epoch": 0.03, "learning_rate": 9.965213358070501e-06, "loss": 0.5359, "step": 225 }, { "epoch": 0.04, "learning_rate": 9.96444032158318e-06, "loss": 0.5068, "step": 230 }, { "epoch": 0.04, "learning_rate": 9.963667285095858e-06, "loss": 0.5029, "step": 235 }, { "epoch": 0.04, "learning_rate": 9.962894248608535e-06, "loss": 0.5084, "step": 240 }, { "epoch": 0.04, "learning_rate": 9.962121212121212e-06, "loss": 0.4783, "step": 245 }, { "epoch": 0.04, "learning_rate": 9.96134817563389e-06, "loss": 0.5216, "step": 250 }, { "epoch": 0.04, "learning_rate": 9.960575139146569e-06, "loss": 0.54, "step": 255 }, { "epoch": 0.04, "learning_rate": 9.959802102659246e-06, "loss": 0.5494, "step": 260 }, { "epoch": 0.04, "learning_rate": 9.959029066171925e-06, "loss": 0.5401, "step": 265 }, { "epoch": 0.04, "learning_rate": 9.958256029684602e-06, "loss": 0.5073, "step": 270 }, { "epoch": 0.04, "learning_rate": 9.95748299319728e-06, "loss": 0.4598, "step": 275 }, { "epoch": 0.04, "learning_rate": 9.956709956709958e-06, "loss": 0.4913, "step": 280 }, { "epoch": 0.04, "learning_rate": 9.955936920222636e-06, "loss": 0.4947, "step": 285 }, { "epoch": 0.04, "learning_rate": 9.955163883735313e-06, "loss": 0.4806, "step": 290 }, { "epoch": 0.05, "learning_rate": 9.95439084724799e-06, "loss": 0.4659, "step": 295 }, { "epoch": 0.05, "learning_rate": 9.953617810760668e-06, "loss": 0.4555, "step": 300 }, { "epoch": 0.05, "learning_rate": 9.952844774273347e-06, "loss": 0.4606, "step": 305 }, { "epoch": 0.05, "learning_rate": 9.952071737786024e-06, "loss": 0.4905, "step": 310 }, { "epoch": 0.05, "learning_rate": 9.951298701298701e-06, "loss": 0.4423, "step": 315 }, { "epoch": 0.05, "learning_rate": 9.95052566481138e-06, "loss": 0.4855, "step": 320 }, { "epoch": 0.05, "learning_rate": 9.949752628324058e-06, "loss": 0.486, "step": 325 }, { "epoch": 0.05, "learning_rate": 9.948979591836737e-06, "loss": 0.4774, "step": 330 }, { "epoch": 0.05, "learning_rate": 9.948206555349414e-06, "loss": 0.4909, "step": 335 }, { "epoch": 0.05, "learning_rate": 9.947433518862091e-06, "loss": 0.47, "step": 340 }, { "epoch": 0.05, "learning_rate": 9.946660482374768e-06, "loss": 0.4496, "step": 345 }, { "epoch": 0.05, "learning_rate": 9.945887445887446e-06, "loss": 0.5146, "step": 350 }, { "epoch": 0.05, "learning_rate": 9.945114409400125e-06, "loss": 0.4876, "step": 355 }, { "epoch": 0.06, "learning_rate": 9.944341372912802e-06, "loss": 0.4747, "step": 360 }, { "epoch": 0.06, "learning_rate": 9.94356833642548e-06, "loss": 0.4614, "step": 365 }, { "epoch": 0.06, "learning_rate": 9.942795299938158e-06, "loss": 0.4755, "step": 370 }, { "epoch": 0.06, "learning_rate": 9.942022263450836e-06, "loss": 0.4785, "step": 375 }, { "epoch": 0.06, "learning_rate": 9.941249226963513e-06, "loss": 0.4581, "step": 380 }, { "epoch": 0.06, "learning_rate": 9.940476190476192e-06, "loss": 0.4671, "step": 385 }, { "epoch": 0.06, "learning_rate": 9.93970315398887e-06, "loss": 0.4327, "step": 390 }, { "epoch": 0.06, "learning_rate": 9.938930117501547e-06, "loss": 0.4906, "step": 395 }, { "epoch": 0.06, "learning_rate": 9.938157081014226e-06, "loss": 0.5006, "step": 400 }, { "epoch": 0.06, "eval_accuracy": 0.5519790562862308, "eval_accuracy_sklearn": 0.5519790562862308, "eval_f1": 0.39240549130993435, "eval_loss": 0.969095766544342, "eval_precision": 0.6691460531626593, "eval_recall": 0.27759837340031096, "eval_runtime": 4903.2377, "eval_samples_per_second": 16.36, "eval_steps_per_second": 2.045, "step": 400 }, { "epoch": 0.06, "learning_rate": 9.937384044526903e-06, "loss": 0.4531, "step": 405 }, { "epoch": 0.06, "learning_rate": 9.93661100803958e-06, "loss": 0.4467, "step": 410 }, { "epoch": 0.06, "learning_rate": 9.935837971552257e-06, "loss": 0.4753, "step": 415 }, { "epoch": 0.06, "learning_rate": 9.935064935064936e-06, "loss": 0.428, "step": 420 }, { "epoch": 0.07, "learning_rate": 9.934291898577614e-06, "loss": 0.4418, "step": 425 }, { "epoch": 0.07, "learning_rate": 9.933518862090291e-06, "loss": 0.5087, "step": 430 }, { "epoch": 0.07, "learning_rate": 9.93274582560297e-06, "loss": 0.4775, "step": 435 }, { "epoch": 0.07, "learning_rate": 9.931972789115647e-06, "loss": 0.4923, "step": 440 }, { "epoch": 0.07, "learning_rate": 9.931199752628325e-06, "loss": 0.4721, "step": 445 }, { "epoch": 0.07, "learning_rate": 9.930426716141004e-06, "loss": 0.4718, "step": 450 }, { "epoch": 0.07, "learning_rate": 9.929653679653681e-06, "loss": 0.4392, "step": 455 }, { "epoch": 0.07, "learning_rate": 9.928880643166358e-06, "loss": 0.4315, "step": 460 }, { "epoch": 0.07, "learning_rate": 9.928107606679036e-06, "loss": 0.4641, "step": 465 }, { "epoch": 0.07, "learning_rate": 9.927334570191713e-06, "loss": 0.4417, "step": 470 }, { "epoch": 0.07, "learning_rate": 9.926561533704392e-06, "loss": 0.454, "step": 475 }, { "epoch": 0.07, "learning_rate": 9.925788497217069e-06, "loss": 0.4414, "step": 480 }, { "epoch": 0.07, "learning_rate": 9.925015460729748e-06, "loss": 0.4802, "step": 485 }, { "epoch": 0.08, "learning_rate": 9.924242424242425e-06, "loss": 0.4262, "step": 490 }, { "epoch": 0.08, "learning_rate": 9.923469387755103e-06, "loss": 0.4543, "step": 495 }, { "epoch": 0.08, "learning_rate": 9.922696351267782e-06, "loss": 0.407, "step": 500 }, { "epoch": 0.08, "learning_rate": 9.921923314780459e-06, "loss": 0.4328, "step": 505 }, { "epoch": 0.08, "learning_rate": 9.921150278293136e-06, "loss": 0.4589, "step": 510 }, { "epoch": 0.08, "learning_rate": 9.920377241805814e-06, "loss": 0.4588, "step": 515 }, { "epoch": 0.08, "learning_rate": 9.919604205318491e-06, "loss": 0.4273, "step": 520 }, { "epoch": 0.08, "learning_rate": 9.91883116883117e-06, "loss": 0.4688, "step": 525 }, { "epoch": 0.08, "learning_rate": 9.918058132343847e-06, "loss": 0.4324, "step": 530 }, { "epoch": 0.08, "learning_rate": 9.917285095856525e-06, "loss": 0.4846, "step": 535 }, { "epoch": 0.08, "learning_rate": 9.916512059369204e-06, "loss": 0.4366, "step": 540 }, { "epoch": 0.08, "learning_rate": 9.91573902288188e-06, "loss": 0.442, "step": 545 }, { "epoch": 0.09, "learning_rate": 9.91496598639456e-06, "loss": 0.4241, "step": 550 }, { "epoch": 0.09, "learning_rate": 9.914192949907237e-06, "loss": 0.4574, "step": 555 }, { "epoch": 0.09, "learning_rate": 9.913419913419914e-06, "loss": 0.4173, "step": 560 }, { "epoch": 0.09, "learning_rate": 9.912646876932592e-06, "loss": 0.4202, "step": 565 }, { "epoch": 0.09, "learning_rate": 9.911873840445269e-06, "loss": 0.4694, "step": 570 }, { "epoch": 0.09, "learning_rate": 9.911100803957948e-06, "loss": 0.4595, "step": 575 }, { "epoch": 0.09, "learning_rate": 9.910327767470625e-06, "loss": 0.464, "step": 580 }, { "epoch": 0.09, "learning_rate": 9.909554730983303e-06, "loss": 0.4688, "step": 585 }, { "epoch": 0.09, "learning_rate": 9.908781694495982e-06, "loss": 0.4932, "step": 590 }, { "epoch": 0.09, "learning_rate": 9.908008658008659e-06, "loss": 0.4648, "step": 595 }, { "epoch": 0.09, "learning_rate": 9.907235621521336e-06, "loss": 0.5136, "step": 600 }, { "epoch": 0.09, "eval_accuracy": 0.595349996883376, "eval_accuracy_sklearn": 0.595349996883376, "eval_f1": 0.6681389238209163, "eval_loss": 0.8079590201377869, "eval_precision": 0.5834404685379616, "eval_recall": 0.7816050711637363, "eval_runtime": 4915.3389, "eval_samples_per_second": 16.319, "eval_steps_per_second": 2.04, "step": 600 } ], "max_steps": 64680, "num_train_epochs": 10, "total_flos": 7.12606751391744e+16, "trial_name": null, "trial_params": null }