{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.994219653179191, "eval_steps": 500, "global_step": 777, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.9999666208982518e-05, "loss": 28.454, "step": 10 }, { "epoch": 0.08, "learning_rate": 1.9987985862949325e-05, "loss": 8.849, "step": 20 }, { "epoch": 0.12, "learning_rate": 1.9959638242644855e-05, "loss": 8.2489, "step": 30 }, { "epoch": 0.15, "learning_rate": 1.991467065265775e-05, "loss": 4.2986, "step": 40 }, { "epoch": 0.19, "learning_rate": 1.985315813185629e-05, "loss": 4.8588, "step": 50 }, { "epoch": 0.23, "learning_rate": 1.9775203328168643e-05, "loss": 9.355, "step": 60 }, { "epoch": 0.27, "learning_rate": 1.9680936327290924e-05, "loss": 5.7211, "step": 70 }, { "epoch": 0.31, "learning_rate": 1.957051443560902e-05, "loss": 10.0795, "step": 80 }, { "epoch": 0.35, "learning_rate": 1.9444121917696335e-05, "loss": 3.6404, "step": 90 }, { "epoch": 0.39, "learning_rate": 1.930196968882556e-05, "loss": 30.7862, "step": 100 }, { "epoch": 0.42, "learning_rate": 1.9144294963007542e-05, "loss": 6.9002, "step": 110 }, { "epoch": 0.46, "learning_rate": 1.8971360857144616e-05, "loss": 8.5785, "step": 120 }, { "epoch": 0.5, "learning_rate": 1.8783455951958948e-05, "loss": 5.644, "step": 130 }, { "epoch": 0.54, "learning_rate": 1.8580893810428562e-05, "loss": 14.8062, "step": 140 }, { "epoch": 0.58, "learning_rate": 1.8364012454534687e-05, "loss": 14.919, "step": 150 }, { "epoch": 0.62, "learning_rate": 1.813317380119356e-05, "loss": 6.5775, "step": 160 }, { "epoch": 0.66, "learning_rate": 1.7888763058314016e-05, "loss": 6.5815, "step": 170 }, { "epoch": 0.69, "learning_rate": 1.763118808198859e-05, "loss": 4.1731, "step": 180 }, { "epoch": 0.73, "learning_rate": 1.736087869589092e-05, "loss": 9.1267, "step": 190 }, { "epoch": 0.77, "learning_rate": 1.7078285974015103e-05, "loss": 3.9908, "step": 200 }, { "epoch": 0.81, "learning_rate": 1.678388148795397e-05, "loss": 5.6814, "step": 210 }, { "epoch": 0.85, "learning_rate": 1.6478156519972354e-05, "loss": 5.0232, "step": 220 }, { "epoch": 0.89, "learning_rate": 1.6161621243188528e-05, "loss": 3.5724, "step": 230 }, { "epoch": 0.92, "learning_rate": 1.5834803870231846e-05, "loss": 11.7568, "step": 240 }, { "epoch": 0.96, "learning_rate": 1.549824977179731e-05, "loss": 5.393, "step": 250 }, { "epoch": 1.0, "learning_rate": 1.5152520566567873e-05, "loss": 6.7241, "step": 260 }, { "epoch": 1.04, "learning_rate": 1.4798193184023233e-05, "loss": 3.7898, "step": 270 }, { "epoch": 1.08, "learning_rate": 1.4435858901698995e-05, "loss": 15.7335, "step": 280 }, { "epoch": 1.12, "learning_rate": 1.4066122358502772e-05, "loss": 2.6745, "step": 290 }, { "epoch": 1.16, "learning_rate": 1.3689600545733713e-05, "loss": 5.8789, "step": 300 }, { "epoch": 1.19, "learning_rate": 1.330692177748925e-05, "loss": 2.7451, "step": 310 }, { "epoch": 1.23, "learning_rate": 1.2918724642177054e-05, "loss": 3.0655, "step": 320 }, { "epoch": 1.27, "learning_rate": 1.252565693688198e-05, "loss": 1.6089, "step": 330 }, { "epoch": 1.31, "learning_rate": 1.2128374586366159e-05, "loss": 9.6673, "step": 340 }, { "epoch": 1.35, "learning_rate": 1.172754054850619e-05, "loss": 1.9807, "step": 350 }, { "epoch": 1.39, "learning_rate": 1.1323823707993937e-05, "loss": 0.7278, "step": 360 }, { "epoch": 1.43, "learning_rate": 1.091789776014706e-05, "loss": 5.7635, "step": 370 }, { "epoch": 1.46, "learning_rate": 1.0510440086691911e-05, "loss": 3.151, "step": 380 }, { "epoch": 1.5, "learning_rate": 1.0102130625394776e-05, "loss": 16.2403, "step": 390 }, { "epoch": 1.54, "learning_rate": 9.693650735427808e-06, "loss": 2.06, "step": 400 }, { "epoch": 1.58, "learning_rate": 9.285682060362974e-06, "loss": 3.2416, "step": 410 }, { "epoch": 1.62, "learning_rate": 8.878905390691437e-06, "loss": 1.7038, "step": 420 }, { "epoch": 1.66, "learning_rate": 8.473999527766503e-06, "loss": 1.4826, "step": 430 }, { "epoch": 1.7, "learning_rate": 8.071640151065902e-06, "loss": 5.8947, "step": 440 }, { "epoch": 1.73, "learning_rate": 7.672498690663632e-06, "loss": 6.4248, "step": 450 }, { "epoch": 1.77, "learning_rate": 7.277241206792944e-06, "loss": 4.6138, "step": 460 }, { "epoch": 1.81, "learning_rate": 6.886527278370131e-06, "loss": 2.0887, "step": 470 }, { "epoch": 1.85, "learning_rate": 6.501008902333912e-06, "loss": 3.9643, "step": 480 }, { "epoch": 1.89, "learning_rate": 6.121329405637111e-06, "loss": 4.3215, "step": 490 }, { "epoch": 1.93, "learning_rate": 5.748122371706198e-06, "loss": 3.4914, "step": 500 }, { "epoch": 1.97, "learning_rate": 5.382010583160201e-06, "loss": 13.7674, "step": 510 }, { "epoch": 2.0, "learning_rate": 5.0236049825532355e-06, "loss": 3.2402, "step": 520 }, { "epoch": 2.04, "learning_rate": 4.673503652874977e-06, "loss": 0.7369, "step": 530 }, { "epoch": 2.08, "learning_rate": 4.33229081951025e-06, "loss": 0.707, "step": 540 }, { "epoch": 2.12, "learning_rate": 4.000535875323307e-06, "loss": 1.7793, "step": 550 }, { "epoch": 2.16, "learning_rate": 3.6787924304935696e-06, "loss": 0.5325, "step": 560 }, { "epoch": 2.2, "learning_rate": 3.3675973886884506e-06, "loss": 0.4873, "step": 570 }, { "epoch": 2.24, "learning_rate": 3.0674700511149057e-06, "loss": 0.3436, "step": 580 }, { "epoch": 2.27, "learning_rate": 2.7789112499447312e-06, "loss": 0.3319, "step": 590 }, { "epoch": 2.31, "learning_rate": 2.502402512559773e-06, "loss": 0.4382, "step": 600 }, { "epoch": 2.35, "learning_rate": 2.2384052580116465e-06, "loss": 0.2655, "step": 610 }, { "epoch": 2.39, "learning_rate": 1.9873600270368664e-06, "loss": 1.3714, "step": 620 }, { "epoch": 2.43, "learning_rate": 1.749685746912323e-06, "loss": 0.1256, "step": 630 }, { "epoch": 2.47, "learning_rate": 1.52577903237781e-06, "loss": 0.2344, "step": 640 }, { "epoch": 2.5, "learning_rate": 1.3160135237922011e-06, "loss": 0.4926, "step": 650 }, { "epoch": 2.54, "learning_rate": 1.1207392636277502e-06, "loss": 0.369, "step": 660 }, { "epoch": 2.58, "learning_rate": 9.402821123429017e-07, "loss": 0.1464, "step": 670 }, { "epoch": 2.62, "learning_rate": 7.749432046084471e-07, "loss": 0.5273, "step": 680 }, { "epoch": 2.66, "learning_rate": 6.249984467943737e-07, "loss": 0.4998, "step": 690 }, { "epoch": 2.7, "learning_rate": 4.906980565560004e-07, "loss": 2.2237, "step": 700 }, { "epoch": 2.74, "learning_rate": 3.722661452877163e-07, "loss": 0.2594, "step": 710 }, { "epoch": 2.77, "learning_rate": 2.699003441410508e-07, "loss": 0.3388, "step": 720 }, { "epoch": 2.81, "learning_rate": 1.8377147423120467e-07, "loss": 0.4497, "step": 730 }, { "epoch": 2.85, "learning_rate": 1.1402326158234e-07, "loss": 0.3246, "step": 740 }, { "epoch": 2.89, "learning_rate": 6.077209728732492e-08, "loss": 2.0268, "step": 750 }, { "epoch": 2.93, "learning_rate": 2.4106843282165615e-08, "loss": 0.1469, "step": 760 }, { "epoch": 2.97, "learning_rate": 4.088684059220249e-09, "loss": 0.5057, "step": 770 }, { "epoch": 2.99, "step": 777, "total_flos": 1.0509820178622054e+17, "train_loss": 4.876755037362972, "train_runtime": 2316.9087, "train_samples_per_second": 21.497, "train_steps_per_second": 0.335 } ], "logging_steps": 10, "max_steps": 777, "num_train_epochs": 3, "save_steps": 300, "total_flos": 1.0509820178622054e+17, "trial_name": null, "trial_params": null }