|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5599888002239956, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.985998319798377e-05, |
|
"loss": 1.9301, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.971996639596752e-05, |
|
"loss": 1.8805, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.957994959395129e-05, |
|
"loss": 1.8763, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.943993279193504e-05, |
|
"loss": 1.8726, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.929991598991879e-05, |
|
"loss": 1.833, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.915989918790255e-05, |
|
"loss": 1.8522, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.90198823858863e-05, |
|
"loss": 1.8277, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.887986558387007e-05, |
|
"loss": 1.8209, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.873984878185383e-05, |
|
"loss": 1.8471, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.859983197983759e-05, |
|
"loss": 1.8477, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.845981517782135e-05, |
|
"loss": 1.8327, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.83197983758051e-05, |
|
"loss": 1.8386, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.817978157378887e-05, |
|
"loss": 1.8212, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.803976477177262e-05, |
|
"loss": 1.8405, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.789974796975637e-05, |
|
"loss": 1.8402, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.775973116774013e-05, |
|
"loss": 1.8235, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.761971436572388e-05, |
|
"loss": 1.8265, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.747969756370765e-05, |
|
"loss": 1.8245, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.733968076169141e-05, |
|
"loss": 1.8216, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.719966395967517e-05, |
|
"loss": 1.8325, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.705964715765893e-05, |
|
"loss": 1.8024, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.691963035564268e-05, |
|
"loss": 1.8275, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.677961355362643e-05, |
|
"loss": 1.83, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.66395967516102e-05, |
|
"loss": 1.848, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.649957994959395e-05, |
|
"loss": 1.8137, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.635956314757771e-05, |
|
"loss": 1.8112, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.621954634556148e-05, |
|
"loss": 1.8196, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.607952954354523e-05, |
|
"loss": 1.8207, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.5939512741529e-05, |
|
"loss": 1.8119, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.579949593951275e-05, |
|
"loss": 1.8277, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.56594791374965e-05, |
|
"loss": 1.8077, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.551946233548026e-05, |
|
"loss": 1.8254, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.537944553346401e-05, |
|
"loss": 1.8276, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.523942873144778e-05, |
|
"loss": 1.8396, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.509941192943154e-05, |
|
"loss": 1.802, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.49593951274153e-05, |
|
"loss": 1.8177, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.481937832539906e-05, |
|
"loss": 1.8064, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.467936152338281e-05, |
|
"loss": 1.8059, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.453934472136658e-05, |
|
"loss": 1.8051, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.439932791935033e-05, |
|
"loss": 1.8205, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.425931111733408e-05, |
|
"loss": 1.804, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.411929431531784e-05, |
|
"loss": 1.8193, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.39792775133016e-05, |
|
"loss": 1.8263, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.383926071128536e-05, |
|
"loss": 1.7961, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.369924390926912e-05, |
|
"loss": 1.8249, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.355922710725287e-05, |
|
"loss": 1.8102, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.341921030523664e-05, |
|
"loss": 1.8218, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.327919350322039e-05, |
|
"loss": 1.7999, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.313917670120414e-05, |
|
"loss": 1.8061, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.29991598991879e-05, |
|
"loss": 1.8169, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.285914309717166e-05, |
|
"loss": 1.8517, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.271912629515542e-05, |
|
"loss": 1.8219, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.257910949313919e-05, |
|
"loss": 1.8291, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.243909269112294e-05, |
|
"loss": 1.8334, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.22990758891067e-05, |
|
"loss": 1.8245, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.215905908709045e-05, |
|
"loss": 1.8263, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.20190422850742e-05, |
|
"loss": 1.7942, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.187902548305797e-05, |
|
"loss": 1.8024, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.173900868104172e-05, |
|
"loss": 1.8355, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.159899187902549e-05, |
|
"loss": 1.8203, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.145897507700925e-05, |
|
"loss": 1.8212, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.1318958274993e-05, |
|
"loss": 1.7932, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.117894147297677e-05, |
|
"loss": 1.809, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.103892467096052e-05, |
|
"loss": 1.8315, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.089890786894428e-05, |
|
"loss": 1.8084, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.075889106692804e-05, |
|
"loss": 1.8332, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.061887426491179e-05, |
|
"loss": 1.8034, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.047885746289555e-05, |
|
"loss": 1.8028, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.03388406608793e-05, |
|
"loss": 1.806, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.019882385886307e-05, |
|
"loss": 1.7862, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.005880705684683e-05, |
|
"loss": 1.8068, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.991879025483058e-05, |
|
"loss": 1.8025, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.977877345281435e-05, |
|
"loss": 1.8468, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.96387566507981e-05, |
|
"loss": 1.7847, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.949873984878185e-05, |
|
"loss": 1.7988, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.935872304676562e-05, |
|
"loss": 1.835, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.921870624474937e-05, |
|
"loss": 1.8026, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.907868944273313e-05, |
|
"loss": 1.8238, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.89386726407169e-05, |
|
"loss": 1.8125, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.879865583870065e-05, |
|
"loss": 1.8421, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.865863903668441e-05, |
|
"loss": 1.8194, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.851862223466816e-05, |
|
"loss": 1.8034, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.837860543265192e-05, |
|
"loss": 1.8106, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.823858863063568e-05, |
|
"loss": 1.8022, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.809857182861943e-05, |
|
"loss": 1.8103, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.79585550266032e-05, |
|
"loss": 1.8371, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.781853822458696e-05, |
|
"loss": 1.826, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.767852142257071e-05, |
|
"loss": 1.8255, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.753850462055448e-05, |
|
"loss": 1.8254, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.739848781853823e-05, |
|
"loss": 1.813, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.725847101652199e-05, |
|
"loss": 1.8175, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.711845421450574e-05, |
|
"loss": 1.8055, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.69784374124895e-05, |
|
"loss": 1.8174, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 8.683842061047326e-05, |
|
"loss": 1.7836, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.669840380845701e-05, |
|
"loss": 1.8147, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.655838700644078e-05, |
|
"loss": 1.8026, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.641837020442454e-05, |
|
"loss": 1.8242, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 8.627835340240829e-05, |
|
"loss": 1.8233, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.613833660039206e-05, |
|
"loss": 1.8135, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.599831979837581e-05, |
|
"loss": 1.814, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 8.585830299635956e-05, |
|
"loss": 1.8078, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.571828619434332e-05, |
|
"loss": 1.8456, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.557826939232708e-05, |
|
"loss": 1.7942, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.543825259031084e-05, |
|
"loss": 1.7971, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 8.52982357882946e-05, |
|
"loss": 1.8072, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.515821898627836e-05, |
|
"loss": 1.8096, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.501820218426212e-05, |
|
"loss": 1.83, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.487818538224587e-05, |
|
"loss": 1.8107, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.473816858022962e-05, |
|
"loss": 1.805, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.459815177821339e-05, |
|
"loss": 1.8201, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.445813497619714e-05, |
|
"loss": 1.8202, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.43181181741809e-05, |
|
"loss": 1.8011, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.417810137216467e-05, |
|
"loss": 1.795, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.403808457014842e-05, |
|
"loss": 1.8051, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.389806776813219e-05, |
|
"loss": 1.8127, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.375805096611594e-05, |
|
"loss": 1.8095, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.36180341640997e-05, |
|
"loss": 1.8118, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.347801736208345e-05, |
|
"loss": 1.8195, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.33380005600672e-05, |
|
"loss": 1.8171, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.319798375805097e-05, |
|
"loss": 1.7997, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.305796695603472e-05, |
|
"loss": 1.8308, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.291795015401849e-05, |
|
"loss": 1.8208, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.277793335200225e-05, |
|
"loss": 1.808, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.2637916549986e-05, |
|
"loss": 1.8106, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.249789974796977e-05, |
|
"loss": 1.7959, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.235788294595352e-05, |
|
"loss": 1.8154, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.221786614393727e-05, |
|
"loss": 1.8068, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.207784934192103e-05, |
|
"loss": 1.8405, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.193783253990478e-05, |
|
"loss": 1.8299, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.179781573788855e-05, |
|
"loss": 1.7848, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.165779893587231e-05, |
|
"loss": 1.7936, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.151778213385607e-05, |
|
"loss": 1.8244, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.137776533183983e-05, |
|
"loss": 1.8164, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.123774852982358e-05, |
|
"loss": 1.8106, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.109773172780733e-05, |
|
"loss": 1.8173, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.09577149257911e-05, |
|
"loss": 1.8049, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.081769812377485e-05, |
|
"loss": 1.7982, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.067768132175861e-05, |
|
"loss": 1.814, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.053766451974238e-05, |
|
"loss": 1.8146, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.039764771772613e-05, |
|
"loss": 1.8239, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.02576309157099e-05, |
|
"loss": 1.802, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.011761411369365e-05, |
|
"loss": 1.8188, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.997759731167741e-05, |
|
"loss": 1.828, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.983758050966116e-05, |
|
"loss": 1.8091, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.969756370764491e-05, |
|
"loss": 1.8141, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.955754690562868e-05, |
|
"loss": 1.7822, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.941753010361243e-05, |
|
"loss": 1.8111, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.92775133015962e-05, |
|
"loss": 1.8062, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.913749649957996e-05, |
|
"loss": 1.8261, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.899747969756371e-05, |
|
"loss": 1.8266, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.885746289554748e-05, |
|
"loss": 1.7949, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.871744609353123e-05, |
|
"loss": 1.8335, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.857742929151498e-05, |
|
"loss": 1.8092, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.843741248949874e-05, |
|
"loss": 1.8151, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.82973956874825e-05, |
|
"loss": 1.7941, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.815737888546626e-05, |
|
"loss": 1.8149, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.801736208345002e-05, |
|
"loss": 1.7763, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.787734528143377e-05, |
|
"loss": 1.8153, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.773732847941754e-05, |
|
"loss": 1.8114, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.759731167740129e-05, |
|
"loss": 1.8236, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.745729487538504e-05, |
|
"loss": 1.8221, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.731727807336881e-05, |
|
"loss": 1.8017, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.717726127135256e-05, |
|
"loss": 1.7907, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.703724446933632e-05, |
|
"loss": 1.8237, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.689722766732009e-05, |
|
"loss": 1.7818, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.675721086530384e-05, |
|
"loss": 1.7932, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.66171940632876e-05, |
|
"loss": 1.8069, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.647717726127136e-05, |
|
"loss": 1.8438, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.633716045925512e-05, |
|
"loss": 1.7775, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.619714365723887e-05, |
|
"loss": 1.834, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.605712685522262e-05, |
|
"loss": 1.7821, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.591711005320639e-05, |
|
"loss": 1.7951, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.577709325119014e-05, |
|
"loss": 1.815, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.56370764491739e-05, |
|
"loss": 1.8183, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.549705964715767e-05, |
|
"loss": 1.8088, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.535704284514142e-05, |
|
"loss": 1.8196, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.521702604312518e-05, |
|
"loss": 1.7952, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.507700924110894e-05, |
|
"loss": 1.8086, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.493699243909269e-05, |
|
"loss": 1.8127, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.479697563707645e-05, |
|
"loss": 1.8047, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.46569588350602e-05, |
|
"loss": 1.8227, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.451694203304397e-05, |
|
"loss": 1.8252, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.437692523102773e-05, |
|
"loss": 1.8022, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.423690842901148e-05, |
|
"loss": 1.8159, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.409689162699525e-05, |
|
"loss": 1.8097, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.3956874824979e-05, |
|
"loss": 1.8104, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.381685802296275e-05, |
|
"loss": 1.8061, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.367684122094652e-05, |
|
"loss": 1.8273, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.353682441893027e-05, |
|
"loss": 1.8178, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.339680761691403e-05, |
|
"loss": 1.8238, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.32567908148978e-05, |
|
"loss": 1.7897, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.311677401288155e-05, |
|
"loss": 1.8134, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.297675721086531e-05, |
|
"loss": 1.8154, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.283674040884906e-05, |
|
"loss": 1.8324, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.269672360683283e-05, |
|
"loss": 1.7943, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.255670680481658e-05, |
|
"loss": 1.8176, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.241669000280033e-05, |
|
"loss": 1.8162, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.22766732007841e-05, |
|
"loss": 1.8369, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.213665639876786e-05, |
|
"loss": 1.7854, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.199663959675161e-05, |
|
"loss": 1.8253, |
|
"step": 2000 |
|
} |
|
], |
|
"max_steps": 7142, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.274070628401152e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|