{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5599888002239956, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.985998319798377e-05, "loss": 1.9301, "step": 10 }, { "epoch": 0.01, "learning_rate": 9.971996639596752e-05, "loss": 1.8805, "step": 20 }, { "epoch": 0.01, "learning_rate": 9.957994959395129e-05, "loss": 1.8763, "step": 30 }, { "epoch": 0.01, "learning_rate": 9.943993279193504e-05, "loss": 1.8726, "step": 40 }, { "epoch": 0.01, "learning_rate": 9.929991598991879e-05, "loss": 1.833, "step": 50 }, { "epoch": 0.02, "learning_rate": 9.915989918790255e-05, "loss": 1.8522, "step": 60 }, { "epoch": 0.02, "learning_rate": 9.90198823858863e-05, "loss": 1.8277, "step": 70 }, { "epoch": 0.02, "learning_rate": 9.887986558387007e-05, "loss": 1.8209, "step": 80 }, { "epoch": 0.03, "learning_rate": 9.873984878185383e-05, "loss": 1.8471, "step": 90 }, { "epoch": 0.03, "learning_rate": 9.859983197983759e-05, "loss": 1.8477, "step": 100 }, { "epoch": 0.03, "learning_rate": 9.845981517782135e-05, "loss": 1.8327, "step": 110 }, { "epoch": 0.03, "learning_rate": 9.83197983758051e-05, "loss": 1.8386, "step": 120 }, { "epoch": 0.04, "learning_rate": 9.817978157378887e-05, "loss": 1.8212, "step": 130 }, { "epoch": 0.04, "learning_rate": 9.803976477177262e-05, "loss": 1.8405, "step": 140 }, { "epoch": 0.04, "learning_rate": 9.789974796975637e-05, "loss": 1.8402, "step": 150 }, { "epoch": 0.04, "learning_rate": 9.775973116774013e-05, "loss": 1.8235, "step": 160 }, { "epoch": 0.05, "learning_rate": 9.761971436572388e-05, "loss": 1.8265, "step": 170 }, { "epoch": 0.05, "learning_rate": 9.747969756370765e-05, "loss": 1.8245, "step": 180 }, { "epoch": 0.05, "learning_rate": 9.733968076169141e-05, "loss": 1.8216, "step": 190 }, { "epoch": 0.06, "learning_rate": 9.719966395967517e-05, "loss": 1.8325, "step": 200 }, { "epoch": 0.06, "learning_rate": 9.705964715765893e-05, "loss": 1.8024, "step": 210 }, { "epoch": 0.06, "learning_rate": 9.691963035564268e-05, "loss": 1.8275, "step": 220 }, { "epoch": 0.06, "learning_rate": 9.677961355362643e-05, "loss": 1.83, "step": 230 }, { "epoch": 0.07, "learning_rate": 9.66395967516102e-05, "loss": 1.848, "step": 240 }, { "epoch": 0.07, "learning_rate": 9.649957994959395e-05, "loss": 1.8137, "step": 250 }, { "epoch": 0.07, "learning_rate": 9.635956314757771e-05, "loss": 1.8112, "step": 260 }, { "epoch": 0.08, "learning_rate": 9.621954634556148e-05, "loss": 1.8196, "step": 270 }, { "epoch": 0.08, "learning_rate": 9.607952954354523e-05, "loss": 1.8207, "step": 280 }, { "epoch": 0.08, "learning_rate": 9.5939512741529e-05, "loss": 1.8119, "step": 290 }, { "epoch": 0.08, "learning_rate": 9.579949593951275e-05, "loss": 1.8277, "step": 300 }, { "epoch": 0.09, "learning_rate": 9.56594791374965e-05, "loss": 1.8077, "step": 310 }, { "epoch": 0.09, "learning_rate": 9.551946233548026e-05, "loss": 1.8254, "step": 320 }, { "epoch": 0.09, "learning_rate": 9.537944553346401e-05, "loss": 1.8276, "step": 330 }, { "epoch": 0.1, "learning_rate": 9.523942873144778e-05, "loss": 1.8396, "step": 340 }, { "epoch": 0.1, "learning_rate": 9.509941192943154e-05, "loss": 1.802, "step": 350 }, { "epoch": 0.1, "learning_rate": 9.49593951274153e-05, "loss": 1.8177, "step": 360 }, { "epoch": 0.1, "learning_rate": 9.481937832539906e-05, "loss": 1.8064, "step": 370 }, { "epoch": 0.11, "learning_rate": 9.467936152338281e-05, "loss": 1.8059, "step": 380 }, { "epoch": 0.11, "learning_rate": 9.453934472136658e-05, "loss": 1.8051, "step": 390 }, { "epoch": 0.11, "learning_rate": 9.439932791935033e-05, "loss": 1.8205, "step": 400 }, { "epoch": 0.11, "learning_rate": 9.425931111733408e-05, "loss": 1.804, "step": 410 }, { "epoch": 0.12, "learning_rate": 9.411929431531784e-05, "loss": 1.8193, "step": 420 }, { "epoch": 0.12, "learning_rate": 9.39792775133016e-05, "loss": 1.8263, "step": 430 }, { "epoch": 0.12, "learning_rate": 9.383926071128536e-05, "loss": 1.7961, "step": 440 }, { "epoch": 0.13, "learning_rate": 9.369924390926912e-05, "loss": 1.8249, "step": 450 }, { "epoch": 0.13, "learning_rate": 9.355922710725287e-05, "loss": 1.8102, "step": 460 }, { "epoch": 0.13, "learning_rate": 9.341921030523664e-05, "loss": 1.8218, "step": 470 }, { "epoch": 0.13, "learning_rate": 9.327919350322039e-05, "loss": 1.7999, "step": 480 }, { "epoch": 0.14, "learning_rate": 9.313917670120414e-05, "loss": 1.8061, "step": 490 }, { "epoch": 0.14, "learning_rate": 9.29991598991879e-05, "loss": 1.8169, "step": 500 }, { "epoch": 0.14, "learning_rate": 9.285914309717166e-05, "loss": 1.8517, "step": 510 }, { "epoch": 0.15, "learning_rate": 9.271912629515542e-05, "loss": 1.8219, "step": 520 }, { "epoch": 0.15, "learning_rate": 9.257910949313919e-05, "loss": 1.8291, "step": 530 }, { "epoch": 0.15, "learning_rate": 9.243909269112294e-05, "loss": 1.8334, "step": 540 }, { "epoch": 0.15, "learning_rate": 9.22990758891067e-05, "loss": 1.8245, "step": 550 }, { "epoch": 0.16, "learning_rate": 9.215905908709045e-05, "loss": 1.8263, "step": 560 }, { "epoch": 0.16, "learning_rate": 9.20190422850742e-05, "loss": 1.7942, "step": 570 }, { "epoch": 0.16, "learning_rate": 9.187902548305797e-05, "loss": 1.8024, "step": 580 }, { "epoch": 0.17, "learning_rate": 9.173900868104172e-05, "loss": 1.8355, "step": 590 }, { "epoch": 0.17, "learning_rate": 9.159899187902549e-05, "loss": 1.8203, "step": 600 }, { "epoch": 0.17, "learning_rate": 9.145897507700925e-05, "loss": 1.8212, "step": 610 }, { "epoch": 0.17, "learning_rate": 9.1318958274993e-05, "loss": 1.7932, "step": 620 }, { "epoch": 0.18, "learning_rate": 9.117894147297677e-05, "loss": 1.809, "step": 630 }, { "epoch": 0.18, "learning_rate": 9.103892467096052e-05, "loss": 1.8315, "step": 640 }, { "epoch": 0.18, "learning_rate": 9.089890786894428e-05, "loss": 1.8084, "step": 650 }, { "epoch": 0.18, "learning_rate": 9.075889106692804e-05, "loss": 1.8332, "step": 660 }, { "epoch": 0.19, "learning_rate": 9.061887426491179e-05, "loss": 1.8034, "step": 670 }, { "epoch": 0.19, "learning_rate": 9.047885746289555e-05, "loss": 1.8028, "step": 680 }, { "epoch": 0.19, "learning_rate": 9.03388406608793e-05, "loss": 1.806, "step": 690 }, { "epoch": 0.2, "learning_rate": 9.019882385886307e-05, "loss": 1.7862, "step": 700 }, { "epoch": 0.2, "learning_rate": 9.005880705684683e-05, "loss": 1.8068, "step": 710 }, { "epoch": 0.2, "learning_rate": 8.991879025483058e-05, "loss": 1.8025, "step": 720 }, { "epoch": 0.2, "learning_rate": 8.977877345281435e-05, "loss": 1.8468, "step": 730 }, { "epoch": 0.21, "learning_rate": 8.96387566507981e-05, "loss": 1.7847, "step": 740 }, { "epoch": 0.21, "learning_rate": 8.949873984878185e-05, "loss": 1.7988, "step": 750 }, { "epoch": 0.21, "learning_rate": 8.935872304676562e-05, "loss": 1.835, "step": 760 }, { "epoch": 0.22, "learning_rate": 8.921870624474937e-05, "loss": 1.8026, "step": 770 }, { "epoch": 0.22, "learning_rate": 8.907868944273313e-05, "loss": 1.8238, "step": 780 }, { "epoch": 0.22, "learning_rate": 8.89386726407169e-05, "loss": 1.8125, "step": 790 }, { "epoch": 0.22, "learning_rate": 8.879865583870065e-05, "loss": 1.8421, "step": 800 }, { "epoch": 0.23, "learning_rate": 8.865863903668441e-05, "loss": 1.8194, "step": 810 }, { "epoch": 0.23, "learning_rate": 8.851862223466816e-05, "loss": 1.8034, "step": 820 }, { "epoch": 0.23, "learning_rate": 8.837860543265192e-05, "loss": 1.8106, "step": 830 }, { "epoch": 0.24, "learning_rate": 8.823858863063568e-05, "loss": 1.8022, "step": 840 }, { "epoch": 0.24, "learning_rate": 8.809857182861943e-05, "loss": 1.8103, "step": 850 }, { "epoch": 0.24, "learning_rate": 8.79585550266032e-05, "loss": 1.8371, "step": 860 }, { "epoch": 0.24, "learning_rate": 8.781853822458696e-05, "loss": 1.826, "step": 870 }, { "epoch": 0.25, "learning_rate": 8.767852142257071e-05, "loss": 1.8255, "step": 880 }, { "epoch": 0.25, "learning_rate": 8.753850462055448e-05, "loss": 1.8254, "step": 890 }, { "epoch": 0.25, "learning_rate": 8.739848781853823e-05, "loss": 1.813, "step": 900 }, { "epoch": 0.25, "learning_rate": 8.725847101652199e-05, "loss": 1.8175, "step": 910 }, { "epoch": 0.26, "learning_rate": 8.711845421450574e-05, "loss": 1.8055, "step": 920 }, { "epoch": 0.26, "learning_rate": 8.69784374124895e-05, "loss": 1.8174, "step": 930 }, { "epoch": 0.26, "learning_rate": 8.683842061047326e-05, "loss": 1.7836, "step": 940 }, { "epoch": 0.27, "learning_rate": 8.669840380845701e-05, "loss": 1.8147, "step": 950 }, { "epoch": 0.27, "learning_rate": 8.655838700644078e-05, "loss": 1.8026, "step": 960 }, { "epoch": 0.27, "learning_rate": 8.641837020442454e-05, "loss": 1.8242, "step": 970 }, { "epoch": 0.27, "learning_rate": 8.627835340240829e-05, "loss": 1.8233, "step": 980 }, { "epoch": 0.28, "learning_rate": 8.613833660039206e-05, "loss": 1.8135, "step": 990 }, { "epoch": 0.28, "learning_rate": 8.599831979837581e-05, "loss": 1.814, "step": 1000 }, { "epoch": 0.28, "learning_rate": 8.585830299635956e-05, "loss": 1.8078, "step": 1010 }, { "epoch": 0.29, "learning_rate": 8.571828619434332e-05, "loss": 1.8456, "step": 1020 }, { "epoch": 0.29, "learning_rate": 8.557826939232708e-05, "loss": 1.7942, "step": 1030 }, { "epoch": 0.29, "learning_rate": 8.543825259031084e-05, "loss": 1.7971, "step": 1040 }, { "epoch": 0.29, "learning_rate": 8.52982357882946e-05, "loss": 1.8072, "step": 1050 }, { "epoch": 0.3, "learning_rate": 8.515821898627836e-05, "loss": 1.8096, "step": 1060 }, { "epoch": 0.3, "learning_rate": 8.501820218426212e-05, "loss": 1.83, "step": 1070 }, { "epoch": 0.3, "learning_rate": 8.487818538224587e-05, "loss": 1.8107, "step": 1080 }, { "epoch": 0.31, "learning_rate": 8.473816858022962e-05, "loss": 1.805, "step": 1090 }, { "epoch": 0.31, "learning_rate": 8.459815177821339e-05, "loss": 1.8201, "step": 1100 }, { "epoch": 0.31, "learning_rate": 8.445813497619714e-05, "loss": 1.8202, "step": 1110 }, { "epoch": 0.31, "learning_rate": 8.43181181741809e-05, "loss": 1.8011, "step": 1120 }, { "epoch": 0.32, "learning_rate": 8.417810137216467e-05, "loss": 1.795, "step": 1130 }, { "epoch": 0.32, "learning_rate": 8.403808457014842e-05, "loss": 1.8051, "step": 1140 }, { "epoch": 0.32, "learning_rate": 8.389806776813219e-05, "loss": 1.8127, "step": 1150 }, { "epoch": 0.32, "learning_rate": 8.375805096611594e-05, "loss": 1.8095, "step": 1160 }, { "epoch": 0.33, "learning_rate": 8.36180341640997e-05, "loss": 1.8118, "step": 1170 }, { "epoch": 0.33, "learning_rate": 8.347801736208345e-05, "loss": 1.8195, "step": 1180 }, { "epoch": 0.33, "learning_rate": 8.33380005600672e-05, "loss": 1.8171, "step": 1190 }, { "epoch": 0.34, "learning_rate": 8.319798375805097e-05, "loss": 1.7997, "step": 1200 }, { "epoch": 0.34, "learning_rate": 8.305796695603472e-05, "loss": 1.8308, "step": 1210 }, { "epoch": 0.34, "learning_rate": 8.291795015401849e-05, "loss": 1.8208, "step": 1220 }, { "epoch": 0.34, "learning_rate": 8.277793335200225e-05, "loss": 1.808, "step": 1230 }, { "epoch": 0.35, "learning_rate": 8.2637916549986e-05, "loss": 1.8106, "step": 1240 }, { "epoch": 0.35, "learning_rate": 8.249789974796977e-05, "loss": 1.7959, "step": 1250 }, { "epoch": 0.35, "learning_rate": 8.235788294595352e-05, "loss": 1.8154, "step": 1260 }, { "epoch": 0.36, "learning_rate": 8.221786614393727e-05, "loss": 1.8068, "step": 1270 }, { "epoch": 0.36, "learning_rate": 8.207784934192103e-05, "loss": 1.8405, "step": 1280 }, { "epoch": 0.36, "learning_rate": 8.193783253990478e-05, "loss": 1.8299, "step": 1290 }, { "epoch": 0.36, "learning_rate": 8.179781573788855e-05, "loss": 1.7848, "step": 1300 }, { "epoch": 0.37, "learning_rate": 8.165779893587231e-05, "loss": 1.7936, "step": 1310 }, { "epoch": 0.37, "learning_rate": 8.151778213385607e-05, "loss": 1.8244, "step": 1320 }, { "epoch": 0.37, "learning_rate": 8.137776533183983e-05, "loss": 1.8164, "step": 1330 }, { "epoch": 0.38, "learning_rate": 8.123774852982358e-05, "loss": 1.8106, "step": 1340 }, { "epoch": 0.38, "learning_rate": 8.109773172780733e-05, "loss": 1.8173, "step": 1350 }, { "epoch": 0.38, "learning_rate": 8.09577149257911e-05, "loss": 1.8049, "step": 1360 }, { "epoch": 0.38, "learning_rate": 8.081769812377485e-05, "loss": 1.7982, "step": 1370 }, { "epoch": 0.39, "learning_rate": 8.067768132175861e-05, "loss": 1.814, "step": 1380 }, { "epoch": 0.39, "learning_rate": 8.053766451974238e-05, "loss": 1.8146, "step": 1390 }, { "epoch": 0.39, "learning_rate": 8.039764771772613e-05, "loss": 1.8239, "step": 1400 }, { "epoch": 0.39, "learning_rate": 8.02576309157099e-05, "loss": 1.802, "step": 1410 }, { "epoch": 0.4, "learning_rate": 8.011761411369365e-05, "loss": 1.8188, "step": 1420 }, { "epoch": 0.4, "learning_rate": 7.997759731167741e-05, "loss": 1.828, "step": 1430 }, { "epoch": 0.4, "learning_rate": 7.983758050966116e-05, "loss": 1.8091, "step": 1440 }, { "epoch": 0.41, "learning_rate": 7.969756370764491e-05, "loss": 1.8141, "step": 1450 }, { "epoch": 0.41, "learning_rate": 7.955754690562868e-05, "loss": 1.7822, "step": 1460 }, { "epoch": 0.41, "learning_rate": 7.941753010361243e-05, "loss": 1.8111, "step": 1470 }, { "epoch": 0.41, "learning_rate": 7.92775133015962e-05, "loss": 1.8062, "step": 1480 }, { "epoch": 0.42, "learning_rate": 7.913749649957996e-05, "loss": 1.8261, "step": 1490 }, { "epoch": 0.42, "learning_rate": 7.899747969756371e-05, "loss": 1.8266, "step": 1500 }, { "epoch": 0.42, "learning_rate": 7.885746289554748e-05, "loss": 1.7949, "step": 1510 }, { "epoch": 0.43, "learning_rate": 7.871744609353123e-05, "loss": 1.8335, "step": 1520 }, { "epoch": 0.43, "learning_rate": 7.857742929151498e-05, "loss": 1.8092, "step": 1530 }, { "epoch": 0.43, "learning_rate": 7.843741248949874e-05, "loss": 1.8151, "step": 1540 }, { "epoch": 0.43, "learning_rate": 7.82973956874825e-05, "loss": 1.7941, "step": 1550 }, { "epoch": 0.44, "learning_rate": 7.815737888546626e-05, "loss": 1.8149, "step": 1560 }, { "epoch": 0.44, "learning_rate": 7.801736208345002e-05, "loss": 1.7763, "step": 1570 }, { "epoch": 0.44, "learning_rate": 7.787734528143377e-05, "loss": 1.8153, "step": 1580 }, { "epoch": 0.45, "learning_rate": 7.773732847941754e-05, "loss": 1.8114, "step": 1590 }, { "epoch": 0.45, "learning_rate": 7.759731167740129e-05, "loss": 1.8236, "step": 1600 }, { "epoch": 0.45, "learning_rate": 7.745729487538504e-05, "loss": 1.8221, "step": 1610 }, { "epoch": 0.45, "learning_rate": 7.731727807336881e-05, "loss": 1.8017, "step": 1620 }, { "epoch": 0.46, "learning_rate": 7.717726127135256e-05, "loss": 1.7907, "step": 1630 }, { "epoch": 0.46, "learning_rate": 7.703724446933632e-05, "loss": 1.8237, "step": 1640 }, { "epoch": 0.46, "learning_rate": 7.689722766732009e-05, "loss": 1.7818, "step": 1650 }, { "epoch": 0.46, "learning_rate": 7.675721086530384e-05, "loss": 1.7932, "step": 1660 }, { "epoch": 0.47, "learning_rate": 7.66171940632876e-05, "loss": 1.8069, "step": 1670 }, { "epoch": 0.47, "learning_rate": 7.647717726127136e-05, "loss": 1.8438, "step": 1680 }, { "epoch": 0.47, "learning_rate": 7.633716045925512e-05, "loss": 1.7775, "step": 1690 }, { "epoch": 0.48, "learning_rate": 7.619714365723887e-05, "loss": 1.834, "step": 1700 }, { "epoch": 0.48, "learning_rate": 7.605712685522262e-05, "loss": 1.7821, "step": 1710 }, { "epoch": 0.48, "learning_rate": 7.591711005320639e-05, "loss": 1.7951, "step": 1720 }, { "epoch": 0.48, "learning_rate": 7.577709325119014e-05, "loss": 1.815, "step": 1730 }, { "epoch": 0.49, "learning_rate": 7.56370764491739e-05, "loss": 1.8183, "step": 1740 }, { "epoch": 0.49, "learning_rate": 7.549705964715767e-05, "loss": 1.8088, "step": 1750 }, { "epoch": 0.49, "learning_rate": 7.535704284514142e-05, "loss": 1.8196, "step": 1760 }, { "epoch": 0.5, "learning_rate": 7.521702604312518e-05, "loss": 1.7952, "step": 1770 }, { "epoch": 0.5, "learning_rate": 7.507700924110894e-05, "loss": 1.8086, "step": 1780 }, { "epoch": 0.5, "learning_rate": 7.493699243909269e-05, "loss": 1.8127, "step": 1790 }, { "epoch": 0.5, "learning_rate": 7.479697563707645e-05, "loss": 1.8047, "step": 1800 }, { "epoch": 0.51, "learning_rate": 7.46569588350602e-05, "loss": 1.8227, "step": 1810 }, { "epoch": 0.51, "learning_rate": 7.451694203304397e-05, "loss": 1.8252, "step": 1820 }, { "epoch": 0.51, "learning_rate": 7.437692523102773e-05, "loss": 1.8022, "step": 1830 }, { "epoch": 0.52, "learning_rate": 7.423690842901148e-05, "loss": 1.8159, "step": 1840 }, { "epoch": 0.52, "learning_rate": 7.409689162699525e-05, "loss": 1.8097, "step": 1850 }, { "epoch": 0.52, "learning_rate": 7.3956874824979e-05, "loss": 1.8104, "step": 1860 }, { "epoch": 0.52, "learning_rate": 7.381685802296275e-05, "loss": 1.8061, "step": 1870 }, { "epoch": 0.53, "learning_rate": 7.367684122094652e-05, "loss": 1.8273, "step": 1880 }, { "epoch": 0.53, "learning_rate": 7.353682441893027e-05, "loss": 1.8178, "step": 1890 }, { "epoch": 0.53, "learning_rate": 7.339680761691403e-05, "loss": 1.8238, "step": 1900 }, { "epoch": 0.53, "learning_rate": 7.32567908148978e-05, "loss": 1.7897, "step": 1910 }, { "epoch": 0.54, "learning_rate": 7.311677401288155e-05, "loss": 1.8134, "step": 1920 }, { "epoch": 0.54, "learning_rate": 7.297675721086531e-05, "loss": 1.8154, "step": 1930 }, { "epoch": 0.54, "learning_rate": 7.283674040884906e-05, "loss": 1.8324, "step": 1940 }, { "epoch": 0.55, "learning_rate": 7.269672360683283e-05, "loss": 1.7943, "step": 1950 }, { "epoch": 0.55, "learning_rate": 7.255670680481658e-05, "loss": 1.8176, "step": 1960 }, { "epoch": 0.55, "learning_rate": 7.241669000280033e-05, "loss": 1.8162, "step": 1970 }, { "epoch": 0.55, "learning_rate": 7.22766732007841e-05, "loss": 1.8369, "step": 1980 }, { "epoch": 0.56, "learning_rate": 7.213665639876786e-05, "loss": 1.7854, "step": 1990 }, { "epoch": 0.56, "learning_rate": 7.199663959675161e-05, "loss": 1.8253, "step": 2000 } ], "max_steps": 7142, "num_train_epochs": 2, "total_flos": 2.274070628401152e+18, "trial_name": null, "trial_params": null }