|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.29631824579598487, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9999729068921297e-05, |
|
"loss": 1.8898, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9998916281557476e-05, |
|
"loss": 1.7273, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999756165552527e-05, |
|
"loss": 1.6799, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999566522018553e-05, |
|
"loss": 1.6431, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999322701664249e-05, |
|
"loss": 1.6153, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9990247097742984e-05, |
|
"loss": 1.5933, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9986725528075205e-05, |
|
"loss": 1.5913, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.998266238396737e-05, |
|
"loss": 1.5434, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.997805775348605e-05, |
|
"loss": 1.5304, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.997291173643424e-05, |
|
"loss": 1.5531, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.996722444434921e-05, |
|
"loss": 1.5446, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.99609960005001e-05, |
|
"loss": 1.5352, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.995422653988524e-05, |
|
"loss": 1.5303, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.994691620922919e-05, |
|
"loss": 1.5449, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.993906516697964e-05, |
|
"loss": 1.5114, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9930673583303865e-05, |
|
"loss": 1.5043, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.992174164008515e-05, |
|
"loss": 1.5476, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.991226953091877e-05, |
|
"loss": 1.5107, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9902257461107824e-05, |
|
"loss": 1.5104, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9891705647658795e-05, |
|
"loss": 1.5298, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.988061431927681e-05, |
|
"loss": 1.4907, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.986898371636071e-05, |
|
"loss": 1.5127, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.985681409099784e-05, |
|
"loss": 1.5037, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.984410570695858e-05, |
|
"loss": 1.5029, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.983085883969063e-05, |
|
"loss": 1.4725, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.981707377631303e-05, |
|
"loss": 1.5148, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9802750815609936e-05, |
|
"loss": 1.4993, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.978789026802419e-05, |
|
"loss": 1.5006, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9772492455650494e-05, |
|
"loss": 1.4885, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.975655771222855e-05, |
|
"loss": 1.4898, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9740086383135706e-05, |
|
"loss": 1.4906, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.97230788253796e-05, |
|
"loss": 1.4796, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.970553540759028e-05, |
|
"loss": 1.4861, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.968745651001231e-05, |
|
"loss": 1.4827, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9668842524496526e-05, |
|
"loss": 1.4884, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.964969385449149e-05, |
|
"loss": 1.4873, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.96300109150348e-05, |
|
"loss": 1.4848, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.960979413274404e-05, |
|
"loss": 1.4881, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9589043945807594e-05, |
|
"loss": 1.4618, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9567760803975105e-05, |
|
"loss": 1.4858, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.954594516854773e-05, |
|
"loss": 1.4777, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.952359751236817e-05, |
|
"loss": 1.4828, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.950071831981038e-05, |
|
"loss": 1.4571, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9477308086769117e-05, |
|
"loss": 1.4724, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.945336732064915e-05, |
|
"loss": 1.4771, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9428896540354294e-05, |
|
"loss": 1.4604, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.940389627627613e-05, |
|
"loss": 1.4815, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.937836707028255e-05, |
|
"loss": 1.4859, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.935230947570597e-05, |
|
"loss": 1.4715, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.932572405733137e-05, |
|
"loss": 1.4759, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.929861139138404e-05, |
|
"loss": 1.4678, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9270972065517083e-05, |
|
"loss": 1.4754, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.924280667879869e-05, |
|
"loss": 1.462, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.921411584169915e-05, |
|
"loss": 1.4704, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.918490017607761e-05, |
|
"loss": 1.4661, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.915516031516863e-05, |
|
"loss": 1.471, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.912489690356841e-05, |
|
"loss": 1.451, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.909411059722084e-05, |
|
"loss": 1.4411, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9062802063403316e-05, |
|
"loss": 1.456, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.90309719807122e-05, |
|
"loss": 1.4678, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.8998621039048205e-05, |
|
"loss": 1.479, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.896574993960136e-05, |
|
"loss": 1.4471, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.893235939483587e-05, |
|
"loss": 1.453, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8898450128474626e-05, |
|
"loss": 1.4696, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.886402287548357e-05, |
|
"loss": 1.4526, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8829078382055725e-05, |
|
"loss": 1.4429, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8793617405595025e-05, |
|
"loss": 1.4491, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8757640714699924e-05, |
|
"loss": 1.4411, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.872114908914671e-05, |
|
"loss": 1.4543, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8684143319872636e-05, |
|
"loss": 1.4556, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.864662420895873e-05, |
|
"loss": 1.4506, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.860859256961244e-05, |
|
"loss": 1.4671, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.857004922615002e-05, |
|
"loss": 1.4469, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.8530995013978645e-05, |
|
"loss": 1.4554, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.84914307795783e-05, |
|
"loss": 1.4671, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.845135738048343e-05, |
|
"loss": 1.445, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.841077568526439e-05, |
|
"loss": 1.4469, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.836968657350857e-05, |
|
"loss": 1.4677, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.832809093580135e-05, |
|
"loss": 1.4653, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.8285989673706826e-05, |
|
"loss": 1.4342, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.824338369974822e-05, |
|
"loss": 1.458, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.8200273937388126e-05, |
|
"loss": 1.4541, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.81566613210085e-05, |
|
"loss": 1.4324, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.81125467958904e-05, |
|
"loss": 1.4405, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.80679313181935e-05, |
|
"loss": 1.4408, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.8022815854935356e-05, |
|
"loss": 1.4395, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.797720138397045e-05, |
|
"loss": 1.4359, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.793108889396902e-05, |
|
"loss": 1.442, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.7884479384395594e-05, |
|
"loss": 1.4566, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.7837373865487345e-05, |
|
"loss": 1.4257, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.77897733582322e-05, |
|
"loss": 1.4755, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.774167889434671e-05, |
|
"loss": 1.4476, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.769309151625366e-05, |
|
"loss": 1.4531, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.7644012277059516e-05, |
|
"loss": 1.447, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.7594442240531574e-05, |
|
"loss": 1.4201, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.754438248107491e-05, |
|
"loss": 1.4323, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7493834083709104e-05, |
|
"loss": 1.4432, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7442798144044695e-05, |
|
"loss": 1.4339, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.739127576825945e-05, |
|
"loss": 1.4477, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.733926807307441e-05, |
|
"loss": 1.4242, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 6748, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.731069029080105e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|