{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.29631824579598487, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9999729068921297e-05, "loss": 1.8898, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.9998916281557476e-05, "loss": 1.7273, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.999756165552527e-05, "loss": 1.6799, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.999566522018553e-05, "loss": 1.6431, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.999322701664249e-05, "loss": 1.6153, "step": 50 }, { "epoch": 0.02, "learning_rate": 4.9990247097742984e-05, "loss": 1.5933, "step": 60 }, { "epoch": 0.02, "learning_rate": 4.9986725528075205e-05, "loss": 1.5913, "step": 70 }, { "epoch": 0.02, "learning_rate": 4.998266238396737e-05, "loss": 1.5434, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.997805775348605e-05, "loss": 1.5304, "step": 90 }, { "epoch": 0.03, "learning_rate": 4.997291173643424e-05, "loss": 1.5531, "step": 100 }, { "epoch": 0.03, "learning_rate": 4.996722444434921e-05, "loss": 1.5446, "step": 110 }, { "epoch": 0.04, "learning_rate": 4.99609960005001e-05, "loss": 1.5352, "step": 120 }, { "epoch": 0.04, "learning_rate": 4.995422653988524e-05, "loss": 1.5303, "step": 130 }, { "epoch": 0.04, "learning_rate": 4.994691620922919e-05, "loss": 1.5449, "step": 140 }, { "epoch": 0.04, "learning_rate": 4.993906516697964e-05, "loss": 1.5114, "step": 150 }, { "epoch": 0.05, "learning_rate": 4.9930673583303865e-05, "loss": 1.5043, "step": 160 }, { "epoch": 0.05, "learning_rate": 4.992174164008515e-05, "loss": 1.5476, "step": 170 }, { "epoch": 0.05, "learning_rate": 4.991226953091877e-05, "loss": 1.5107, "step": 180 }, { "epoch": 0.06, "learning_rate": 4.9902257461107824e-05, "loss": 1.5104, "step": 190 }, { "epoch": 0.06, "learning_rate": 4.9891705647658795e-05, "loss": 1.5298, "step": 200 }, { "epoch": 0.06, "learning_rate": 4.988061431927681e-05, "loss": 1.4907, "step": 210 }, { "epoch": 0.07, "learning_rate": 4.986898371636071e-05, "loss": 1.5127, "step": 220 }, { "epoch": 0.07, "learning_rate": 4.985681409099784e-05, "loss": 1.5037, "step": 230 }, { "epoch": 0.07, "learning_rate": 4.984410570695858e-05, "loss": 1.5029, "step": 240 }, { "epoch": 0.07, "learning_rate": 4.983085883969063e-05, "loss": 1.4725, "step": 250 }, { "epoch": 0.08, "learning_rate": 4.981707377631303e-05, "loss": 1.5148, "step": 260 }, { "epoch": 0.08, "learning_rate": 4.9802750815609936e-05, "loss": 1.4993, "step": 270 }, { "epoch": 0.08, "learning_rate": 4.978789026802419e-05, "loss": 1.5006, "step": 280 }, { "epoch": 0.09, "learning_rate": 4.9772492455650494e-05, "loss": 1.4885, "step": 290 }, { "epoch": 0.09, "learning_rate": 4.975655771222855e-05, "loss": 1.4898, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.9740086383135706e-05, "loss": 1.4906, "step": 310 }, { "epoch": 0.09, "learning_rate": 4.97230788253796e-05, "loss": 1.4796, "step": 320 }, { "epoch": 0.1, "learning_rate": 4.970553540759028e-05, "loss": 1.4861, "step": 330 }, { "epoch": 0.1, "learning_rate": 4.968745651001231e-05, "loss": 1.4827, "step": 340 }, { "epoch": 0.1, "learning_rate": 4.9668842524496526e-05, "loss": 1.4884, "step": 350 }, { "epoch": 0.11, "learning_rate": 4.964969385449149e-05, "loss": 1.4873, "step": 360 }, { "epoch": 0.11, "learning_rate": 4.96300109150348e-05, "loss": 1.4848, "step": 370 }, { "epoch": 0.11, "learning_rate": 4.960979413274404e-05, "loss": 1.4881, "step": 380 }, { "epoch": 0.12, "learning_rate": 4.9589043945807594e-05, "loss": 1.4618, "step": 390 }, { "epoch": 0.12, "learning_rate": 4.9567760803975105e-05, "loss": 1.4858, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.954594516854773e-05, "loss": 1.4777, "step": 410 }, { "epoch": 0.12, "learning_rate": 4.952359751236817e-05, "loss": 1.4828, "step": 420 }, { "epoch": 0.13, "learning_rate": 4.950071831981038e-05, "loss": 1.4571, "step": 430 }, { "epoch": 0.13, "learning_rate": 4.9477308086769117e-05, "loss": 1.4724, "step": 440 }, { "epoch": 0.13, "learning_rate": 4.945336732064915e-05, "loss": 1.4771, "step": 450 }, { "epoch": 0.14, "learning_rate": 4.9428896540354294e-05, "loss": 1.4604, "step": 460 }, { "epoch": 0.14, "learning_rate": 4.940389627627613e-05, "loss": 1.4815, "step": 470 }, { "epoch": 0.14, "learning_rate": 4.937836707028255e-05, "loss": 1.4859, "step": 480 }, { "epoch": 0.15, "learning_rate": 4.935230947570597e-05, "loss": 1.4715, "step": 490 }, { "epoch": 0.15, "learning_rate": 4.932572405733137e-05, "loss": 1.4759, "step": 500 }, { "epoch": 0.15, "learning_rate": 4.929861139138404e-05, "loss": 1.4678, "step": 510 }, { "epoch": 0.15, "learning_rate": 4.9270972065517083e-05, "loss": 1.4754, "step": 520 }, { "epoch": 0.16, "learning_rate": 4.924280667879869e-05, "loss": 1.462, "step": 530 }, { "epoch": 0.16, "learning_rate": 4.921411584169915e-05, "loss": 1.4704, "step": 540 }, { "epoch": 0.16, "learning_rate": 4.918490017607761e-05, "loss": 1.4661, "step": 550 }, { "epoch": 0.17, "learning_rate": 4.915516031516863e-05, "loss": 1.471, "step": 560 }, { "epoch": 0.17, "learning_rate": 4.912489690356841e-05, "loss": 1.451, "step": 570 }, { "epoch": 0.17, "learning_rate": 4.909411059722084e-05, "loss": 1.4411, "step": 580 }, { "epoch": 0.17, "learning_rate": 4.9062802063403316e-05, "loss": 1.456, "step": 590 }, { "epoch": 0.18, "learning_rate": 4.90309719807122e-05, "loss": 1.4678, "step": 600 }, { "epoch": 0.18, "learning_rate": 4.8998621039048205e-05, "loss": 1.479, "step": 610 }, { "epoch": 0.18, "learning_rate": 4.896574993960136e-05, "loss": 1.4471, "step": 620 }, { "epoch": 0.19, "learning_rate": 4.893235939483587e-05, "loss": 1.453, "step": 630 }, { "epoch": 0.19, "learning_rate": 4.8898450128474626e-05, "loss": 1.4696, "step": 640 }, { "epoch": 0.19, "learning_rate": 4.886402287548357e-05, "loss": 1.4526, "step": 650 }, { "epoch": 0.2, "learning_rate": 4.8829078382055725e-05, "loss": 1.4429, "step": 660 }, { "epoch": 0.2, "learning_rate": 4.8793617405595025e-05, "loss": 1.4491, "step": 670 }, { "epoch": 0.2, "learning_rate": 4.8757640714699924e-05, "loss": 1.4411, "step": 680 }, { "epoch": 0.2, "learning_rate": 4.872114908914671e-05, "loss": 1.4543, "step": 690 }, { "epoch": 0.21, "learning_rate": 4.8684143319872636e-05, "loss": 1.4556, "step": 700 }, { "epoch": 0.21, "learning_rate": 4.864662420895873e-05, "loss": 1.4506, "step": 710 }, { "epoch": 0.21, "learning_rate": 4.860859256961244e-05, "loss": 1.4671, "step": 720 }, { "epoch": 0.22, "learning_rate": 4.857004922615002e-05, "loss": 1.4469, "step": 730 }, { "epoch": 0.22, "learning_rate": 4.8530995013978645e-05, "loss": 1.4554, "step": 740 }, { "epoch": 0.22, "learning_rate": 4.84914307795783e-05, "loss": 1.4671, "step": 750 }, { "epoch": 0.23, "learning_rate": 4.845135738048343e-05, "loss": 1.445, "step": 760 }, { "epoch": 0.23, "learning_rate": 4.841077568526439e-05, "loss": 1.4469, "step": 770 }, { "epoch": 0.23, "learning_rate": 4.836968657350857e-05, "loss": 1.4677, "step": 780 }, { "epoch": 0.23, "learning_rate": 4.832809093580135e-05, "loss": 1.4653, "step": 790 }, { "epoch": 0.24, "learning_rate": 4.8285989673706826e-05, "loss": 1.4342, "step": 800 }, { "epoch": 0.24, "learning_rate": 4.824338369974822e-05, "loss": 1.458, "step": 810 }, { "epoch": 0.24, "learning_rate": 4.8200273937388126e-05, "loss": 1.4541, "step": 820 }, { "epoch": 0.25, "learning_rate": 4.81566613210085e-05, "loss": 1.4324, "step": 830 }, { "epoch": 0.25, "learning_rate": 4.81125467958904e-05, "loss": 1.4405, "step": 840 }, { "epoch": 0.25, "learning_rate": 4.80679313181935e-05, "loss": 1.4408, "step": 850 }, { "epoch": 0.25, "learning_rate": 4.8022815854935356e-05, "loss": 1.4395, "step": 860 }, { "epoch": 0.26, "learning_rate": 4.797720138397045e-05, "loss": 1.4359, "step": 870 }, { "epoch": 0.26, "learning_rate": 4.793108889396902e-05, "loss": 1.442, "step": 880 }, { "epoch": 0.26, "learning_rate": 4.7884479384395594e-05, "loss": 1.4566, "step": 890 }, { "epoch": 0.27, "learning_rate": 4.7837373865487345e-05, "loss": 1.4257, "step": 900 }, { "epoch": 0.27, "learning_rate": 4.77897733582322e-05, "loss": 1.4755, "step": 910 }, { "epoch": 0.27, "learning_rate": 4.774167889434671e-05, "loss": 1.4476, "step": 920 }, { "epoch": 0.28, "learning_rate": 4.769309151625366e-05, "loss": 1.4531, "step": 930 }, { "epoch": 0.28, "learning_rate": 4.7644012277059516e-05, "loss": 1.447, "step": 940 }, { "epoch": 0.28, "learning_rate": 4.7594442240531574e-05, "loss": 1.4201, "step": 950 }, { "epoch": 0.28, "learning_rate": 4.754438248107491e-05, "loss": 1.4323, "step": 960 }, { "epoch": 0.29, "learning_rate": 4.7493834083709104e-05, "loss": 1.4432, "step": 970 }, { "epoch": 0.29, "learning_rate": 4.7442798144044695e-05, "loss": 1.4339, "step": 980 }, { "epoch": 0.29, "learning_rate": 4.739127576825945e-05, "loss": 1.4477, "step": 990 }, { "epoch": 0.3, "learning_rate": 4.733926807307441e-05, "loss": 1.4242, "step": 1000 } ], "max_steps": 6748, "num_train_epochs": 2, "total_flos": 1.731069029080105e+18, "trial_name": null, "trial_params": null }