|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1852729831839395, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9999729068921297e-05, |
|
"loss": 1.8898, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9998916281557476e-05, |
|
"loss": 1.7273, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999756165552527e-05, |
|
"loss": 1.6799, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999566522018553e-05, |
|
"loss": 1.6431, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999322701664249e-05, |
|
"loss": 1.6153, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9990247097742984e-05, |
|
"loss": 1.5933, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9986725528075205e-05, |
|
"loss": 1.5913, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.998266238396737e-05, |
|
"loss": 1.5434, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.997805775348605e-05, |
|
"loss": 1.5304, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.997291173643424e-05, |
|
"loss": 1.5531, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.996722444434921e-05, |
|
"loss": 1.5446, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.99609960005001e-05, |
|
"loss": 1.5352, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.995422653988524e-05, |
|
"loss": 1.5303, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.994691620922919e-05, |
|
"loss": 1.5449, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.993906516697964e-05, |
|
"loss": 1.5114, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9930673583303865e-05, |
|
"loss": 1.5043, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.992174164008515e-05, |
|
"loss": 1.5476, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.991226953091877e-05, |
|
"loss": 1.5107, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9902257461107824e-05, |
|
"loss": 1.5104, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9891705647658795e-05, |
|
"loss": 1.5298, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.988061431927681e-05, |
|
"loss": 1.4907, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.986898371636071e-05, |
|
"loss": 1.5127, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.985681409099784e-05, |
|
"loss": 1.5037, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.984410570695858e-05, |
|
"loss": 1.5029, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.983085883969063e-05, |
|
"loss": 1.4725, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.981707377631303e-05, |
|
"loss": 1.5148, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9802750815609936e-05, |
|
"loss": 1.4993, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.978789026802419e-05, |
|
"loss": 1.5006, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9772492455650494e-05, |
|
"loss": 1.4885, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.975655771222855e-05, |
|
"loss": 1.4898, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9740086383135706e-05, |
|
"loss": 1.4906, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.97230788253796e-05, |
|
"loss": 1.4796, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.970553540759028e-05, |
|
"loss": 1.4861, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.968745651001231e-05, |
|
"loss": 1.4827, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9668842524496526e-05, |
|
"loss": 1.4884, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.964969385449149e-05, |
|
"loss": 1.4873, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.96300109150348e-05, |
|
"loss": 1.4848, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.960979413274404e-05, |
|
"loss": 1.4881, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9589043945807594e-05, |
|
"loss": 1.4618, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9567760803975105e-05, |
|
"loss": 1.4858, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.954594516854773e-05, |
|
"loss": 1.4777, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.952359751236817e-05, |
|
"loss": 1.4828, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.950071831981038e-05, |
|
"loss": 1.4571, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9477308086769117e-05, |
|
"loss": 1.4724, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.945336732064915e-05, |
|
"loss": 1.4771, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9428896540354294e-05, |
|
"loss": 1.4604, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.940389627627613e-05, |
|
"loss": 1.4815, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.937836707028255e-05, |
|
"loss": 1.4859, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.935230947570597e-05, |
|
"loss": 1.4715, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.932572405733137e-05, |
|
"loss": 1.4759, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.929861139138404e-05, |
|
"loss": 1.4678, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9270972065517083e-05, |
|
"loss": 1.4754, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.924280667879869e-05, |
|
"loss": 1.462, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.921411584169915e-05, |
|
"loss": 1.4704, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.918490017607761e-05, |
|
"loss": 1.4661, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.915516031516863e-05, |
|
"loss": 1.471, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.912489690356841e-05, |
|
"loss": 1.451, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.909411059722084e-05, |
|
"loss": 1.4411, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9062802063403316e-05, |
|
"loss": 1.456, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.90309719807122e-05, |
|
"loss": 1.4678, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.8998621039048205e-05, |
|
"loss": 1.479, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.896574993960136e-05, |
|
"loss": 1.4471, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.893235939483587e-05, |
|
"loss": 1.453, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8898450128474626e-05, |
|
"loss": 1.4696, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.886402287548357e-05, |
|
"loss": 1.4526, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8829078382055725e-05, |
|
"loss": 1.4429, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8793617405595025e-05, |
|
"loss": 1.4491, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8757640714699924e-05, |
|
"loss": 1.4411, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.872114908914671e-05, |
|
"loss": 1.4543, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8684143319872636e-05, |
|
"loss": 1.4556, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.864662420895873e-05, |
|
"loss": 1.4506, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.860859256961244e-05, |
|
"loss": 1.4671, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.857004922615002e-05, |
|
"loss": 1.4469, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.8530995013978645e-05, |
|
"loss": 1.4554, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.84914307795783e-05, |
|
"loss": 1.4671, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.845135738048343e-05, |
|
"loss": 1.445, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.841077568526439e-05, |
|
"loss": 1.4469, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.836968657350857e-05, |
|
"loss": 1.4677, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.832809093580135e-05, |
|
"loss": 1.4653, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.8285989673706826e-05, |
|
"loss": 1.4342, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.824338369974822e-05, |
|
"loss": 1.458, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.8200273937388126e-05, |
|
"loss": 1.4541, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.81566613210085e-05, |
|
"loss": 1.4324, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.81125467958904e-05, |
|
"loss": 1.4405, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.80679313181935e-05, |
|
"loss": 1.4408, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.8022815854935356e-05, |
|
"loss": 1.4395, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.797720138397045e-05, |
|
"loss": 1.4359, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.793108889396902e-05, |
|
"loss": 1.442, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.7884479384395594e-05, |
|
"loss": 1.4566, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.7837373865487345e-05, |
|
"loss": 1.4257, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.77897733582322e-05, |
|
"loss": 1.4755, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.774167889434671e-05, |
|
"loss": 1.4476, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.769309151625366e-05, |
|
"loss": 1.4531, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.7644012277059516e-05, |
|
"loss": 1.447, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.7594442240531574e-05, |
|
"loss": 1.4201, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.754438248107491e-05, |
|
"loss": 1.4323, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7493834083709104e-05, |
|
"loss": 1.4432, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.7442798144044695e-05, |
|
"loss": 1.4339, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.739127576825945e-05, |
|
"loss": 1.4477, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.733926807307441e-05, |
|
"loss": 1.4242, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.728677618572965e-05, |
|
"loss": 1.4341, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.723380124395985e-05, |
|
"loss": 1.4526, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.7180344395969675e-05, |
|
"loss": 1.4402, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.712640680040884e-05, |
|
"loss": 1.4257, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.707198962634701e-05, |
|
"loss": 1.4232, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.70170940532485e-05, |
|
"loss": 1.4485, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.6961721270946635e-05, |
|
"loss": 1.456, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.690587247961804e-05, |
|
"loss": 1.4555, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.684954888975657e-05, |
|
"loss": 1.4376, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.6792751722147104e-05, |
|
"loss": 1.4353, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.6735482207839074e-05, |
|
"loss": 1.4226, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.6677741588119784e-05, |
|
"loss": 1.4315, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.66195311144875e-05, |
|
"loss": 1.4303, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.6560852048624345e-05, |
|
"loss": 1.4288, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.650170566236892e-05, |
|
"loss": 1.4539, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.6442093237688756e-05, |
|
"loss": 1.4527, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.6382016066652556e-05, |
|
"loss": 1.4406, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.632147545140212e-05, |
|
"loss": 1.4233, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.626047270412419e-05, |
|
"loss": 1.426, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.619900914702198e-05, |
|
"loss": 1.4577, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.613708611228652e-05, |
|
"loss": 1.4313, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.607470494206776e-05, |
|
"loss": 1.4129, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.601186698844554e-05, |
|
"loss": 1.4368, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.594857361340021e-05, |
|
"loss": 1.4342, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.588482618878316e-05, |
|
"loss": 1.4438, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.582062609628709e-05, |
|
"loss": 1.4263, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.575597472741601e-05, |
|
"loss": 1.4379, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.569087348345512e-05, |
|
"loss": 1.4221, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.562532377544046e-05, |
|
"loss": 1.4414, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.5559327024128265e-05, |
|
"loss": 1.4395, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.549288465996421e-05, |
|
"loss": 1.4278, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.542599812305243e-05, |
|
"loss": 1.4344, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.535866886312423e-05, |
|
"loss": 1.4352, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.529089833950675e-05, |
|
"loss": 1.4133, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.5222688021091266e-05, |
|
"loss": 1.4506, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.5154039386301385e-05, |
|
"loss": 1.4295, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.5084953923061016e-05, |
|
"loss": 1.4389, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.5015433128762065e-05, |
|
"loss": 1.4247, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.494547851023205e-05, |
|
"loss": 1.4347, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.487509158370139e-05, |
|
"loss": 1.4133, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.480427387477056e-05, |
|
"loss": 1.4296, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.473302691837702e-05, |
|
"loss": 1.4353, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.466135225876194e-05, |
|
"loss": 1.4377, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.458925144943676e-05, |
|
"loss": 1.4168, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.451672605314948e-05, |
|
"loss": 1.4334, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.444377764185082e-05, |
|
"loss": 1.44, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.43704077966601e-05, |
|
"loss": 1.4375, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.4296618107831036e-05, |
|
"loss": 1.447, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.422241017471722e-05, |
|
"loss": 1.4151, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.414778560573749e-05, |
|
"loss": 1.4388, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.4072746018341036e-05, |
|
"loss": 1.4228, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.399729303897238e-05, |
|
"loss": 1.4104, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.392142830303608e-05, |
|
"loss": 1.4441, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.384515345486131e-05, |
|
"loss": 1.4282, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.376847014766623e-05, |
|
"loss": 1.4271, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.369138004352212e-05, |
|
"loss": 1.4223, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.3613884813317406e-05, |
|
"loss": 1.425, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.3535986136721377e-05, |
|
"loss": 1.4392, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.3457685702147834e-05, |
|
"loss": 1.4097, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.3378985206718484e-05, |
|
"loss": 1.4405, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.329988635622611e-05, |
|
"loss": 1.4311, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.322039086509769e-05, |
|
"loss": 1.4358, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.3140500456357145e-05, |
|
"loss": 1.4114, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.306021686158805e-05, |
|
"loss": 1.4165, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.297954182089609e-05, |
|
"loss": 1.4309, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.289847708287129e-05, |
|
"loss": 1.4215, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.2817024404550246e-05, |
|
"loss": 1.4124, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.2735185551377895e-05, |
|
"loss": 1.4001, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.265296229716935e-05, |
|
"loss": 1.4302, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.25703564240714e-05, |
|
"loss": 1.4211, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.2487369722523906e-05, |
|
"loss": 1.4423, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.240400399122101e-05, |
|
"loss": 1.4299, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.232026103707209e-05, |
|
"loss": 1.4214, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.223614267516268e-05, |
|
"loss": 1.4348, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.215165072871505e-05, |
|
"loss": 1.4315, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.206678702904874e-05, |
|
"loss": 1.4098, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.198155341554084e-05, |
|
"loss": 1.4242, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1895951735586145e-05, |
|
"loss": 1.4272, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1809983844557085e-05, |
|
"loss": 1.4452, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.172365160576355e-05, |
|
"loss": 1.431, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.163695689041245e-05, |
|
"loss": 1.4389, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.154990157756722e-05, |
|
"loss": 1.413, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.1462487554107036e-05, |
|
"loss": 1.3893, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.137471671468596e-05, |
|
"loss": 1.4052, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.128659096169183e-05, |
|
"loss": 1.4173, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.1198112205205096e-05, |
|
"loss": 1.4012, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.110928236295734e-05, |
|
"loss": 1.4119, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.102010336028975e-05, |
|
"loss": 1.4111, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0930577130111424e-05, |
|
"loss": 1.4156, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.084070561285739e-05, |
|
"loss": 1.4419, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0750490756446624e-05, |
|
"loss": 1.4121, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0659934516239795e-05, |
|
"loss": 1.4204, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.056903885499689e-05, |
|
"loss": 1.4032, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.047780574283466e-05, |
|
"loss": 1.4207, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.038623715718397e-05, |
|
"loss": 1.4095, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.029433508274686e-05, |
|
"loss": 1.4228, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0202101511453586e-05, |
|
"loss": 1.4141, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.010953844241943e-05, |
|
"loss": 1.4323, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.001664788190135e-05, |
|
"loss": 1.4087, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.992343184325453e-05, |
|
"loss": 1.4186, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.982989234688873e-05, |
|
"loss": 1.4264, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.973603142022448e-05, |
|
"loss": 1.4417, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.964185109764915e-05, |
|
"loss": 1.4075, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.954735342047285e-05, |
|
"loss": 1.4143, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.945254043688419e-05, |
|
"loss": 1.4176, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.935741420190587e-05, |
|
"loss": 1.4214, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.926197677735018e-05, |
|
"loss": 1.4256, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9166230231774276e-05, |
|
"loss": 1.4075, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9070176640435335e-05, |
|
"loss": 1.3887, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.897381808524562e-05, |
|
"loss": 1.4225, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.887715665472729e-05, |
|
"loss": 1.4114, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.8780194443967226e-05, |
|
"loss": 1.4316, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.8682933554571524e-05, |
|
"loss": 1.4168, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.858537609461999e-05, |
|
"loss": 1.4237, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.8487524178620464e-05, |
|
"loss": 1.4373, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.838937992746295e-05, |
|
"loss": 1.4089, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.8290945468373684e-05, |
|
"loss": 1.4319, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.8192222934869e-05, |
|
"loss": 1.4035, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.809321446670909e-05, |
|
"loss": 1.4161, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.799392220985164e-05, |
|
"loss": 1.4136, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.789434831640533e-05, |
|
"loss": 1.4188, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.779449494458312e-05, |
|
"loss": 1.4203, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.769436425865557e-05, |
|
"loss": 1.4263, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.759395842890384e-05, |
|
"loss": 1.4295, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.749327963157274e-05, |
|
"loss": 1.4144, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.739233004882346e-05, |
|
"loss": 1.4162, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.729111186868635e-05, |
|
"loss": 1.4099, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.718962728501348e-05, |
|
"loss": 1.3878, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.708787849743106e-05, |
|
"loss": 1.4399, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.69858677112918e-05, |
|
"loss": 1.4249, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.688359713762707e-05, |
|
"loss": 1.3925, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.6781068993099034e-05, |
|
"loss": 1.4036, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.667828549995255e-05, |
|
"loss": 1.3986, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.657524888596703e-05, |
|
"loss": 1.4298, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.6471961384408155e-05, |
|
"loss": 1.4016, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.636842523397945e-05, |
|
"loss": 1.3992, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.626464267877381e-05, |
|
"loss": 1.4441, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.616061596822478e-05, |
|
"loss": 1.3967, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.6056347357057893e-05, |
|
"loss": 1.4252, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.595183910524173e-05, |
|
"loss": 1.4209, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.5847093477938956e-05, |
|
"loss": 1.4133, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.5742112745457235e-05, |
|
"loss": 1.4313, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.563689918320002e-05, |
|
"loss": 1.4275, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.5531455071617226e-05, |
|
"loss": 1.421, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.542578269615579e-05, |
|
"loss": 1.4402, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5319884347210186e-05, |
|
"loss": 1.4176, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.521376232007271e-05, |
|
"loss": 1.4117, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5107418914883794e-05, |
|
"loss": 1.41, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.500085643658211e-05, |
|
"loss": 1.4313, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.489407719485464e-05, |
|
"loss": 1.4035, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.4787083504086605e-05, |
|
"loss": 1.4057, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.467987768331127e-05, |
|
"loss": 1.4125, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.457246205615974e-05, |
|
"loss": 1.4056, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.446483895081054e-05, |
|
"loss": 1.4082, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.4357010699939215e-05, |
|
"loss": 1.3915, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.424897964066769e-05, |
|
"loss": 1.4012, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.4140748114513685e-05, |
|
"loss": 1.4251, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.403231846733994e-05, |
|
"loss": 1.4013, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.392369304930334e-05, |
|
"loss": 1.4076, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.3814874214804034e-05, |
|
"loss": 1.3978, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.3705864322434354e-05, |
|
"loss": 1.408, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.359666573492772e-05, |
|
"loss": 1.3888, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.3487280819107415e-05, |
|
"loss": 1.4052, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.33777119458353e-05, |
|
"loss": 1.4286, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.326796148996042e-05, |
|
"loss": 1.4241, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.315803183026753e-05, |
|
"loss": 1.4049, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.304792534942553e-05, |
|
"loss": 1.3826, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.293764443393582e-05, |
|
"loss": 1.413, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.2827191474080605e-05, |
|
"loss": 1.4161, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.2716568863871044e-05, |
|
"loss": 1.382, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.260577900099539e-05, |
|
"loss": 1.381, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.2494824286767e-05, |
|
"loss": 1.396, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.2383707126072315e-05, |
|
"loss": 1.3923, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.2272429927318707e-05, |
|
"loss": 1.4044, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.21609951023823e-05, |
|
"loss": 1.4073, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.204940506655568e-05, |
|
"loss": 1.4178, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.1937662238495544e-05, |
|
"loss": 1.4179, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.1825769040170285e-05, |
|
"loss": 1.4003, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.1713727896807505e-05, |
|
"loss": 1.4176, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.160154123684143e-05, |
|
"loss": 1.4179, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.1489211491860276e-05, |
|
"loss": 1.4098, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.1376741096553576e-05, |
|
"loss": 1.4087, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.126413248865935e-05, |
|
"loss": 1.3971, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.115138810891134e-05, |
|
"loss": 1.3915, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.103851040098607e-05, |
|
"loss": 1.4041, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.0925501811449855e-05, |
|
"loss": 1.4129, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.081236478970583e-05, |
|
"loss": 1.3948, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.069910178794082e-05, |
|
"loss": 1.4116, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.0585715261072206e-05, |
|
"loss": 1.4029, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.04722076666947e-05, |
|
"loss": 1.399, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.0358581465027125e-05, |
|
"loss": 1.4061, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.024483911885901e-05, |
|
"loss": 1.4152, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.013098309349729e-05, |
|
"loss": 1.4257, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.0017015856712814e-05, |
|
"loss": 1.417, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.9902939878686915e-05, |
|
"loss": 1.3952, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.978875763195779e-05, |
|
"loss": 1.4252, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.9674471591367005e-05, |
|
"loss": 1.3982, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.9560084234005765e-05, |
|
"loss": 1.3948, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.944559803916128e-05, |
|
"loss": 1.4127, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.9331015488263024e-05, |
|
"loss": 1.4239, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.9216339064828914e-05, |
|
"loss": 1.3889, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.910157125441152e-05, |
|
"loss": 1.403, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.898671454454418e-05, |
|
"loss": 1.4106, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.8871771424687078e-05, |
|
"loss": 1.4123, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.8756744386173284e-05, |
|
"loss": 1.4137, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8641635922154774e-05, |
|
"loss": 1.4009, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8526448527548372e-05, |
|
"loss": 1.4159, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8411184698981684e-05, |
|
"loss": 1.4071, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.829584693473899e-05, |
|
"loss": 1.41, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.8180437734707064e-05, |
|
"loss": 1.4038, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.8064959600321043e-05, |
|
"loss": 1.4069, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.7949415034510163e-05, |
|
"loss": 1.4096, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.7833806541643544e-05, |
|
"loss": 1.3821, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.7718136627475865e-05, |
|
"loss": 1.3886, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.76024077990931e-05, |
|
"loss": 1.3923, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.748662256485816e-05, |
|
"loss": 1.4072, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.7370783434356512e-05, |
|
"loss": 1.4126, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.7254892918341802e-05, |
|
"loss": 1.4238, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.713895352868144e-05, |
|
"loss": 1.4183, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.702296777830212e-05, |
|
"loss": 1.4056, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6906938181135423e-05, |
|
"loss": 1.4096, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6790867252063247e-05, |
|
"loss": 1.4018, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.6674757506863357e-05, |
|
"loss": 1.3922, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.655861146215483e-05, |
|
"loss": 1.4054, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.6442431635343528e-05, |
|
"loss": 1.3914, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.6326220544567514e-05, |
|
"loss": 1.3851, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.620998070864248e-05, |
|
"loss": 1.4102, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.6093714647007156e-05, |
|
"loss": 1.4069, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.5977424879668705e-05, |
|
"loss": 1.3919, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5861113927148096e-05, |
|
"loss": 1.4073, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.5744784310425467e-05, |
|
"loss": 1.4025, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.562843855088551e-05, |
|
"loss": 1.3805, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.5512079170262793e-05, |
|
"loss": 1.4032, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.5395708690587117e-05, |
|
"loss": 1.4232, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.527932963412885e-05, |
|
"loss": 1.3897, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.5162944523344256e-05, |
|
"loss": 1.4008, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.5046555880820826e-05, |
|
"loss": 1.3936, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.4930166229222597e-05, |
|
"loss": 1.394, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.481377809123547e-05, |
|
"loss": 1.3903, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.469739398951256e-05, |
|
"loss": 1.3869, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.458101644661947e-05, |
|
"loss": 1.429, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.4464647984979667e-05, |
|
"loss": 1.3987, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.4348291126819783e-05, |
|
"loss": 1.38, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.4231948394114936e-05, |
|
"loss": 1.3906, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.4115622308534096e-05, |
|
"loss": 1.3931, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.399931539138541e-05, |
|
"loss": 1.4135, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.388303016356156e-05, |
|
"loss": 1.3952, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.3766769145485125e-05, |
|
"loss": 1.3972, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.3650534857053943e-05, |
|
"loss": 1.3937, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.3534329817586513e-05, |
|
"loss": 1.3936, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.3418156545767365e-05, |
|
"loss": 1.397, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.3302017559592494e-05, |
|
"loss": 1.3849, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.318591537631476e-05, |
|
"loss": 1.4118, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.3069852512389335e-05, |
|
"loss": 1.414, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.2953831483419184e-05, |
|
"loss": 1.4088, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.2837854804100504e-05, |
|
"loss": 1.3773, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.272192498816825e-05, |
|
"loss": 1.3977, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.260604454834162e-05, |
|
"loss": 1.3591, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.2490215996269617e-05, |
|
"loss": 1.4023, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.237444184247661e-05, |
|
"loss": 1.3873, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.2258724596307915e-05, |
|
"loss": 1.3826, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.214306676587539e-05, |
|
"loss": 1.3732, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.2027470858003098e-05, |
|
"loss": 1.3988, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.1911939378172956e-05, |
|
"loss": 1.4036, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.1796474830470447e-05, |
|
"loss": 1.4236, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.1681079717530328e-05, |
|
"loss": 1.4032, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.156575654048239e-05, |
|
"loss": 1.39, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.145050779889725e-05, |
|
"loss": 1.3757, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.1335335990732186e-05, |
|
"loss": 1.3934, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.1220243612276964e-05, |
|
"loss": 1.3979, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.110523315809978e-05, |
|
"loss": 1.4181, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.0990307120993134e-05, |
|
"loss": 1.406, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.0875467991919854e-05, |
|
"loss": 1.4036, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.076071825995906e-05, |
|
"loss": 1.4095, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.0646060412252246e-05, |
|
"loss": 1.4048, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.0531496933949363e-05, |
|
"loss": 1.3874, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.0417030308154953e-05, |
|
"loss": 1.3793, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.0302663015874322e-05, |
|
"loss": 1.4152, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.0188397535959785e-05, |
|
"loss": 1.3738, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.007423634505692e-05, |
|
"loss": 1.4033, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.9960181917550897e-05, |
|
"loss": 1.3753, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.9846236725512835e-05, |
|
"loss": 1.3791, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.973240323864624e-05, |
|
"loss": 1.3837, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.9618683924233467e-05, |
|
"loss": 1.3945, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.9505081247082237e-05, |
|
"loss": 1.3804, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.9391597669472213e-05, |
|
"loss": 1.3964, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.927823565110165e-05, |
|
"loss": 1.3983, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.9164997649034058e-05, |
|
"loss": 1.4169, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.9051886117644963e-05, |
|
"loss": 1.4101, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.89389035085687e-05, |
|
"loss": 1.3823, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8826052270645276e-05, |
|
"loss": 1.3827, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8713334849867315e-05, |
|
"loss": 1.4035, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8600753689327e-05, |
|
"loss": 1.4081, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.8488311229163152e-05, |
|
"loss": 1.3919, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.8376009906508338e-05, |
|
"loss": 1.3854, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.826385215543603e-05, |
|
"loss": 1.3924, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.8151840406907873e-05, |
|
"loss": 1.3851, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.8039977088720972e-05, |
|
"loss": 1.3707, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.7928264625455282e-05, |
|
"loss": 1.3998, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.7816705438421064e-05, |
|
"loss": 1.3931, |
|
"step": 4000 |
|
} |
|
], |
|
"max_steps": 6748, |
|
"num_train_epochs": 2, |
|
"total_flos": 6.922102358209987e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|