{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.07289804249572203, "global_step": 9500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00099, "loss": 3.0632, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.0009901010101010103, "loss": 2.9421, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.00098, "loss": 2.9549, "step": 300 }, { "epoch": 0.0, "learning_rate": 0.00096989898989899, "loss": 2.9156, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.0009597979797979798, "loss": 2.9743, "step": 500 }, { "epoch": 0.0, "learning_rate": 0.0009496969696969697, "loss": 2.9801, "step": 600 }, { "epoch": 0.01, "learning_rate": 0.0009395959595959597, "loss": 2.9492, "step": 700 }, { "epoch": 0.01, "learning_rate": 0.0009294949494949495, "loss": 3.043, "step": 800 }, { "epoch": 0.01, "learning_rate": 0.0009193939393939395, "loss": 2.9186, "step": 900 }, { "epoch": 0.01, "learning_rate": 0.0009092929292929293, "loss": 2.8856, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.0008992929292929293, "loss": 2.947, "step": 1100 }, { "epoch": 0.01, "learning_rate": 0.0008891919191919192, "loss": 2.9986, "step": 1200 }, { "epoch": 0.01, "learning_rate": 0.0008790909090909091, "loss": 2.9525, "step": 1300 }, { "epoch": 0.01, "learning_rate": 0.000868989898989899, "loss": 2.9203, "step": 1400 }, { "epoch": 0.01, "learning_rate": 0.000858888888888889, "loss": 2.9795, "step": 1500 }, { "epoch": 0.01, "learning_rate": 0.0008487878787878788, "loss": 2.9549, "step": 1600 }, { "epoch": 0.01, "learning_rate": 0.0008386868686868687, "loss": 2.9033, "step": 1700 }, { "epoch": 0.01, "learning_rate": 0.0008285858585858585, "loss": 2.9117, "step": 1800 }, { "epoch": 0.01, "learning_rate": 0.0008185858585858587, "loss": 2.9674, "step": 1900 }, { "epoch": 0.02, "learning_rate": 0.0008085858585858587, "loss": 2.9492, "step": 2000 }, { "epoch": 0.02, "learning_rate": 0.0007984848484848485, "loss": 2.9526, "step": 2100 }, { "epoch": 0.02, "learning_rate": 0.0007883838383838384, "loss": 2.9687, "step": 2200 }, { "epoch": 0.02, "learning_rate": 0.0007782828282828282, "loss": 2.9783, "step": 2300 }, { "epoch": 0.02, "learning_rate": 0.0007681818181818182, "loss": 2.9889, "step": 2400 }, { "epoch": 0.02, "learning_rate": 0.0007581818181818182, "loss": 2.9141, "step": 2500 }, { "epoch": 0.02, "learning_rate": 0.0007480808080808081, "loss": 2.9131, "step": 2600 }, { "epoch": 0.02, "learning_rate": 0.000737979797979798, "loss": 2.9087, "step": 2700 }, { "epoch": 0.02, "learning_rate": 0.0007278787878787879, "loss": 2.9485, "step": 2800 }, { "epoch": 0.02, "learning_rate": 0.0007177777777777778, "loss": 2.8469, "step": 2900 }, { "epoch": 0.02, "learning_rate": 0.0007076767676767677, "loss": 2.8715, "step": 3000 }, { "epoch": 0.02, "learning_rate": 0.0006975757575757576, "loss": 2.9388, "step": 3100 }, { "epoch": 0.02, "learning_rate": 0.0006874747474747474, "loss": 2.9958, "step": 3200 }, { "epoch": 0.03, "learning_rate": 0.0006773737373737374, "loss": 2.9552, "step": 3300 }, { "epoch": 0.03, "learning_rate": 0.0006672727272727273, "loss": 2.9126, "step": 3400 }, { "epoch": 0.03, "learning_rate": 0.0006571717171717172, "loss": 3.0015, "step": 3500 }, { "epoch": 0.03, "learning_rate": 0.000647070707070707, "loss": 2.9406, "step": 3600 }, { "epoch": 0.03, "learning_rate": 0.0006369696969696969, "loss": 2.8848, "step": 3700 }, { "epoch": 0.03, "learning_rate": 0.000626868686868687, "loss": 2.9598, "step": 3800 }, { "epoch": 0.03, "learning_rate": 0.0006167676767676768, "loss": 2.8795, "step": 3900 }, { "epoch": 0.03, "learning_rate": 0.0006066666666666667, "loss": 2.9366, "step": 4000 }, { "epoch": 0.03, "learning_rate": 0.0005965656565656565, "loss": 2.9357, "step": 4100 }, { "epoch": 0.03, "learning_rate": 0.0005864646464646464, "loss": 2.8924, "step": 4200 }, { "epoch": 0.03, "learning_rate": 0.0005763636363636365, "loss": 2.8939, "step": 4300 }, { "epoch": 0.03, "learning_rate": 0.0005662626262626263, "loss": 2.8453, "step": 4400 }, { "epoch": 0.03, "learning_rate": 0.0005561616161616162, "loss": 2.8551, "step": 4500 }, { "epoch": 0.04, "learning_rate": 0.000546060606060606, "loss": 2.9476, "step": 4600 }, { "epoch": 0.04, "learning_rate": 0.0005359595959595959, "loss": 2.9256, "step": 4700 }, { "epoch": 0.04, "learning_rate": 0.000525858585858586, "loss": 2.963, "step": 4800 }, { "epoch": 0.04, "learning_rate": 0.0005157575757575758, "loss": 2.8954, "step": 4900 }, { "epoch": 0.04, "learning_rate": 0.0005056565656565657, "loss": 2.8978, "step": 5000 }, { "epoch": 0.04, "learning_rate": 0.0004955555555555556, "loss": 2.8789, "step": 5100 }, { "epoch": 0.04, "learning_rate": 0.0004854545454545455, "loss": 2.8757, "step": 5200 }, { "epoch": 0.04, "learning_rate": 0.00047545454545454545, "loss": 2.9107, "step": 5300 }, { "epoch": 0.04, "learning_rate": 0.0004653535353535354, "loss": 2.8769, "step": 5400 }, { "epoch": 0.04, "learning_rate": 0.00045525252525252524, "loss": 2.8659, "step": 5500 }, { "epoch": 0.04, "learning_rate": 0.00044515151515151516, "loss": 2.8624, "step": 5600 }, { "epoch": 0.04, "learning_rate": 0.0004350505050505051, "loss": 2.8672, "step": 5700 }, { "epoch": 0.04, "learning_rate": 0.00042494949494949495, "loss": 2.8286, "step": 5800 }, { "epoch": 0.05, "learning_rate": 0.00041484848484848487, "loss": 2.9685, "step": 5900 }, { "epoch": 0.05, "learning_rate": 0.00040474747474747474, "loss": 2.9659, "step": 6000 }, { "epoch": 0.05, "learning_rate": 0.00039464646464646466, "loss": 2.8937, "step": 6100 }, { "epoch": 0.05, "learning_rate": 0.0003845454545454545, "loss": 2.8521, "step": 6200 }, { "epoch": 0.05, "learning_rate": 0.0003744444444444445, "loss": 2.8721, "step": 6300 }, { "epoch": 0.05, "learning_rate": 0.00036434343434343437, "loss": 2.8664, "step": 6400 }, { "epoch": 0.05, "learning_rate": 0.00035424242424242423, "loss": 2.8925, "step": 6500 }, { "epoch": 0.05, "learning_rate": 0.00034414141414141415, "loss": 2.8477, "step": 6600 }, { "epoch": 0.05, "learning_rate": 0.000334040404040404, "loss": 2.8804, "step": 6700 }, { "epoch": 0.05, "learning_rate": 0.00032393939393939394, "loss": 2.8661, "step": 6800 }, { "epoch": 0.05, "learning_rate": 0.00031383838383838386, "loss": 2.8934, "step": 6900 }, { "epoch": 0.05, "learning_rate": 0.0003037373737373738, "loss": 2.9107, "step": 7000 }, { "epoch": 0.05, "learning_rate": 0.00029363636363636365, "loss": 2.8775, "step": 7100 }, { "epoch": 0.06, "learning_rate": 0.0002835353535353535, "loss": 2.8849, "step": 7200 }, { "epoch": 0.06, "learning_rate": 0.00027343434343434343, "loss": 2.845, "step": 7300 }, { "epoch": 0.06, "learning_rate": 0.0002633333333333333, "loss": 2.8441, "step": 7400 }, { "epoch": 0.06, "learning_rate": 0.0002532323232323233, "loss": 2.8455, "step": 7500 }, { "epoch": 0.06, "learning_rate": 0.00024313131313131314, "loss": 2.8027, "step": 7600 }, { "epoch": 0.06, "learning_rate": 0.00023303030303030304, "loss": 2.8793, "step": 7700 }, { "epoch": 0.06, "learning_rate": 0.00022292929292929293, "loss": 2.8763, "step": 7800 }, { "epoch": 0.06, "learning_rate": 0.00021282828282828285, "loss": 2.8456, "step": 7900 }, { "epoch": 0.06, "learning_rate": 0.00020272727272727274, "loss": 2.8217, "step": 8000 }, { "epoch": 0.06, "learning_rate": 0.0001926262626262626, "loss": 2.8026, "step": 8100 }, { "epoch": 0.06, "learning_rate": 0.00018252525252525253, "loss": 2.8486, "step": 8200 }, { "epoch": 0.06, "learning_rate": 0.00017242424242424242, "loss": 2.8023, "step": 8300 }, { "epoch": 0.06, "learning_rate": 0.00016232323232323232, "loss": 2.8711, "step": 8400 }, { "epoch": 0.07, "learning_rate": 0.00015222222222222224, "loss": 2.9234, "step": 8500 }, { "epoch": 0.07, "learning_rate": 0.00014212121212121213, "loss": 2.8615, "step": 8600 }, { "epoch": 0.07, "learning_rate": 0.00013202020202020203, "loss": 2.8765, "step": 8700 }, { "epoch": 0.07, "learning_rate": 0.00012191919191919192, "loss": 2.891, "step": 8800 }, { "epoch": 0.07, "learning_rate": 0.00011181818181818181, "loss": 2.8735, "step": 8900 }, { "epoch": 0.07, "learning_rate": 0.00010171717171717172, "loss": 2.7965, "step": 9000 }, { "epoch": 0.07, "learning_rate": 9.161616161616161e-05, "loss": 2.7891, "step": 9100 }, { "epoch": 0.07, "learning_rate": 8.151515151515152e-05, "loss": 2.8796, "step": 9200 }, { "epoch": 0.07, "learning_rate": 7.141414141414141e-05, "loss": 2.8947, "step": 9300 }, { "epoch": 0.07, "learning_rate": 6.141414141414142e-05, "loss": 2.8773, "step": 9400 }, { "epoch": 0.07, "learning_rate": 5.131313131313131e-05, "loss": 2.8241, "step": 9500 } ], "max_steps": 10000, "num_train_epochs": 1, "total_flos": 3247836551823360.0, "trial_name": null, "trial_params": null }