{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9257570970799296, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.939817043813193e-05, "loss": 3.7959, "step": 100 }, { "epoch": 0.05, "learning_rate": 4.8796340876263846e-05, "loss": 2.4163, "step": 200 }, { "epoch": 0.07, "learning_rate": 4.8194511314395764e-05, "loss": 2.2765, "step": 300 }, { "epoch": 0.1, "learning_rate": 4.759268175252768e-05, "loss": 2.1919, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.699085219065961e-05, "loss": 2.1446, "step": 500 }, { "epoch": 0.14, "learning_rate": 4.6389022628791526e-05, "loss": 2.099, "step": 600 }, { "epoch": 0.17, "learning_rate": 4.578719306692345e-05, "loss": 2.0678, "step": 700 }, { "epoch": 0.19, "learning_rate": 4.518536350505537e-05, "loss": 2.0363, "step": 800 }, { "epoch": 0.22, "learning_rate": 4.458353394318729e-05, "loss": 2.0215, "step": 900 }, { "epoch": 0.24, "learning_rate": 4.3981704381319214e-05, "loss": 1.9806, "step": 1000 }, { "epoch": 0.26, "learning_rate": 4.337987481945113e-05, "loss": 1.9762, "step": 1100 }, { "epoch": 0.29, "learning_rate": 4.277804525758306e-05, "loss": 1.9529, "step": 1200 }, { "epoch": 0.31, "learning_rate": 4.2176215695714976e-05, "loss": 1.9387, "step": 1300 }, { "epoch": 0.34, "learning_rate": 4.15743861338469e-05, "loss": 1.9218, "step": 1400 }, { "epoch": 0.36, "learning_rate": 4.097255657197881e-05, "loss": 1.9309, "step": 1500 }, { "epoch": 0.39, "learning_rate": 4.037072701011074e-05, "loss": 1.9178, "step": 1600 }, { "epoch": 0.41, "learning_rate": 3.9768897448242656e-05, "loss": 1.8922, "step": 1700 }, { "epoch": 0.43, "learning_rate": 3.916706788637458e-05, "loss": 1.8956, "step": 1800 }, { "epoch": 0.46, "learning_rate": 3.85652383245065e-05, "loss": 1.8586, "step": 1900 }, { "epoch": 0.48, "learning_rate": 3.7963408762638425e-05, "loss": 1.8704, "step": 2000 }, { "epoch": 0.51, "learning_rate": 3.736157920077034e-05, "loss": 1.8613, "step": 2100 }, { "epoch": 0.53, "learning_rate": 3.675974963890226e-05, "loss": 1.8462, "step": 2200 }, { "epoch": 0.55, "learning_rate": 3.615792007703419e-05, "loss": 1.8478, "step": 2300 }, { "epoch": 0.58, "learning_rate": 3.5556090515166105e-05, "loss": 1.8343, "step": 2400 }, { "epoch": 0.6, "learning_rate": 3.495426095329803e-05, "loss": 1.8253, "step": 2500 }, { "epoch": 0.63, "learning_rate": 3.435243139142995e-05, "loss": 1.8151, "step": 2600 }, { "epoch": 0.65, "learning_rate": 3.375060182956187e-05, "loss": 1.8214, "step": 2700 }, { "epoch": 0.67, "learning_rate": 3.314877226769379e-05, "loss": 1.8282, "step": 2800 }, { "epoch": 0.7, "learning_rate": 3.254694270582571e-05, "loss": 1.8126, "step": 2900 }, { "epoch": 0.72, "learning_rate": 3.1945113143957636e-05, "loss": 1.8166, "step": 3000 }, { "epoch": 0.75, "learning_rate": 3.1343283582089554e-05, "loss": 1.7904, "step": 3100 }, { "epoch": 0.77, "learning_rate": 3.074145402022148e-05, "loss": 1.8105, "step": 3200 }, { "epoch": 0.79, "learning_rate": 3.0139624458353395e-05, "loss": 1.783, "step": 3300 }, { "epoch": 0.82, "learning_rate": 2.9537794896485316e-05, "loss": 1.79, "step": 3400 }, { "epoch": 0.84, "learning_rate": 2.8935965334617238e-05, "loss": 1.7718, "step": 3500 }, { "epoch": 0.87, "learning_rate": 2.833413577274916e-05, "loss": 1.7597, "step": 3600 }, { "epoch": 0.89, "learning_rate": 2.7732306210881082e-05, "loss": 1.7698, "step": 3700 }, { "epoch": 0.91, "learning_rate": 2.7130476649013004e-05, "loss": 1.7491, "step": 3800 }, { "epoch": 0.94, "learning_rate": 2.652864708714492e-05, "loss": 1.7619, "step": 3900 }, { "epoch": 0.96, "learning_rate": 2.592681752527684e-05, "loss": 1.7624, "step": 4000 }, { "epoch": 0.99, "learning_rate": 2.5324987963408762e-05, "loss": 1.7416, "step": 4100 }, { "epoch": 1.0, "eval_loss": 1.5835336446762085, "eval_runtime": 4.4301, "eval_samples_per_second": 225.726, "eval_steps_per_second": 28.216, "step": 4154 }, { "epoch": 1.01, "learning_rate": 2.4723158401540687e-05, "loss": 1.7607, "step": 4200 }, { "epoch": 1.04, "learning_rate": 2.4121328839672606e-05, "loss": 1.7205, "step": 4300 }, { "epoch": 1.06, "learning_rate": 2.3519499277804528e-05, "loss": 1.7291, "step": 4400 }, { "epoch": 1.08, "learning_rate": 2.291766971593645e-05, "loss": 1.7262, "step": 4500 }, { "epoch": 1.11, "learning_rate": 2.2315840154068368e-05, "loss": 1.735, "step": 4600 }, { "epoch": 1.13, "learning_rate": 2.171401059220029e-05, "loss": 1.7306, "step": 4700 }, { "epoch": 1.16, "learning_rate": 2.111218103033221e-05, "loss": 1.7141, "step": 4800 }, { "epoch": 1.18, "learning_rate": 2.051035146846413e-05, "loss": 1.7185, "step": 4900 }, { "epoch": 1.2, "learning_rate": 1.990852190659605e-05, "loss": 1.7133, "step": 5000 }, { "epoch": 1.23, "learning_rate": 1.9306692344727973e-05, "loss": 1.7126, "step": 5100 }, { "epoch": 1.25, "learning_rate": 1.8704862782859895e-05, "loss": 1.71, "step": 5200 }, { "epoch": 1.28, "learning_rate": 1.8103033220991817e-05, "loss": 1.7059, "step": 5300 }, { "epoch": 1.3, "learning_rate": 1.750120365912374e-05, "loss": 1.7092, "step": 5400 }, { "epoch": 1.32, "learning_rate": 1.6899374097255657e-05, "loss": 1.7026, "step": 5500 }, { "epoch": 1.35, "learning_rate": 1.629754453538758e-05, "loss": 1.7148, "step": 5600 }, { "epoch": 1.37, "learning_rate": 1.56957149735195e-05, "loss": 1.7137, "step": 5700 }, { "epoch": 1.4, "learning_rate": 1.5093885411651421e-05, "loss": 1.7005, "step": 5800 }, { "epoch": 1.42, "learning_rate": 1.4492055849783343e-05, "loss": 1.7074, "step": 5900 }, { "epoch": 1.44, "learning_rate": 1.3890226287915261e-05, "loss": 1.6943, "step": 6000 }, { "epoch": 1.47, "learning_rate": 1.3288396726047183e-05, "loss": 1.6917, "step": 6100 }, { "epoch": 1.49, "learning_rate": 1.2686567164179105e-05, "loss": 1.6953, "step": 6200 }, { "epoch": 1.52, "learning_rate": 1.2084737602311027e-05, "loss": 1.6869, "step": 6300 }, { "epoch": 1.54, "learning_rate": 1.1482908040442947e-05, "loss": 1.6904, "step": 6400 }, { "epoch": 1.56, "learning_rate": 1.0881078478574869e-05, "loss": 1.6988, "step": 6500 }, { "epoch": 1.59, "learning_rate": 1.0279248916706789e-05, "loss": 1.6942, "step": 6600 }, { "epoch": 1.61, "learning_rate": 9.67741935483871e-06, "loss": 1.6887, "step": 6700 }, { "epoch": 1.64, "learning_rate": 9.075589792970632e-06, "loss": 1.6858, "step": 6800 }, { "epoch": 1.66, "learning_rate": 8.473760231102552e-06, "loss": 1.6848, "step": 6900 }, { "epoch": 1.69, "learning_rate": 7.871930669234472e-06, "loss": 1.6951, "step": 7000 }, { "epoch": 1.71, "learning_rate": 7.270101107366393e-06, "loss": 1.6921, "step": 7100 }, { "epoch": 1.73, "learning_rate": 6.668271545498315e-06, "loss": 1.6801, "step": 7200 }, { "epoch": 1.76, "learning_rate": 6.066441983630236e-06, "loss": 1.6934, "step": 7300 }, { "epoch": 1.78, "learning_rate": 5.464612421762157e-06, "loss": 1.6914, "step": 7400 }, { "epoch": 1.81, "learning_rate": 4.862782859894078e-06, "loss": 1.6872, "step": 7500 }, { "epoch": 1.83, "learning_rate": 4.260953298025999e-06, "loss": 1.6799, "step": 7600 }, { "epoch": 1.85, "learning_rate": 3.6591237361579204e-06, "loss": 1.6832, "step": 7700 }, { "epoch": 1.88, "learning_rate": 3.0572941742898413e-06, "loss": 1.6902, "step": 7800 }, { "epoch": 1.9, "learning_rate": 2.4554646124217623e-06, "loss": 1.6805, "step": 7900 }, { "epoch": 1.93, "learning_rate": 1.8536350505536832e-06, "loss": 1.676, "step": 8000 } ], "max_steps": 8308, "num_train_epochs": 2, "total_flos": 5.801913298450115e+17, "trial_name": null, "trial_params": null }