{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.819185320199062, "eval_steps": 500, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.965867278325039e-05, "loss": 1.8363, "step": 500 }, { "epoch": 0.02, "learning_rate": 9.931734556650079e-05, "loss": 1.2927, "step": 1000 }, { "epoch": 0.03, "learning_rate": 9.897601834975117e-05, "loss": 1.0069, "step": 1500 }, { "epoch": 0.04, "learning_rate": 9.863469113300157e-05, "loss": 0.8362, "step": 2000 }, { "epoch": 0.05, "learning_rate": 9.829336391625195e-05, "loss": 0.7217, "step": 2500 }, { "epoch": 0.06, "learning_rate": 9.795203669950235e-05, "loss": 0.6462, "step": 3000 }, { "epoch": 0.07, "learning_rate": 9.761070948275274e-05, "loss": 0.5914, "step": 3500 }, { "epoch": 0.08, "learning_rate": 9.726938226600313e-05, "loss": 0.5497, "step": 4000 }, { "epoch": 0.09, "learning_rate": 9.692805504925352e-05, "loss": 0.5154, "step": 4500 }, { "epoch": 0.1, "learning_rate": 9.658672783250392e-05, "loss": 0.4867, "step": 5000 }, { "epoch": 0.11, "learning_rate": 9.62454006157543e-05, "loss": 0.4628, "step": 5500 }, { "epoch": 0.12, "learning_rate": 9.590407339900468e-05, "loss": 0.4393, "step": 6000 }, { "epoch": 0.13, "learning_rate": 9.556274618225508e-05, "loss": 0.4199, "step": 6500 }, { "epoch": 0.14, "learning_rate": 9.522141896550547e-05, "loss": 0.3996, "step": 7000 }, { "epoch": 0.15, "learning_rate": 9.488009174875586e-05, "loss": 0.383, "step": 7500 }, { "epoch": 0.16, "learning_rate": 9.453876453200626e-05, "loss": 0.37, "step": 8000 }, { "epoch": 0.17, "learning_rate": 9.419743731525666e-05, "loss": 0.3577, "step": 8500 }, { "epoch": 0.18, "learning_rate": 9.385611009850704e-05, "loss": 0.3468, "step": 9000 }, { "epoch": 0.19, "learning_rate": 9.351478288175744e-05, "loss": 0.3364, "step": 9500 }, { "epoch": 0.2, "learning_rate": 9.317345566500783e-05, "loss": 0.3281, "step": 10000 }, { "epoch": 0.22, "learning_rate": 9.283212844825821e-05, "loss": 0.3198, "step": 10500 }, { "epoch": 0.23, "learning_rate": 9.249080123150861e-05, "loss": 0.3124, "step": 11000 }, { "epoch": 0.24, "learning_rate": 9.214947401475899e-05, "loss": 0.3061, "step": 11500 }, { "epoch": 0.25, "learning_rate": 9.180814679800939e-05, "loss": 0.2999, "step": 12000 }, { "epoch": 0.26, "learning_rate": 9.146681958125977e-05, "loss": 0.2938, "step": 12500 }, { "epoch": 0.27, "learning_rate": 9.112549236451017e-05, "loss": 0.2886, "step": 13000 }, { "epoch": 0.28, "learning_rate": 9.078416514776056e-05, "loss": 0.2834, "step": 13500 }, { "epoch": 0.29, "learning_rate": 9.044283793101095e-05, "loss": 0.2791, "step": 14000 }, { "epoch": 0.3, "learning_rate": 9.010151071426134e-05, "loss": 0.2748, "step": 14500 }, { "epoch": 0.31, "learning_rate": 8.976018349751174e-05, "loss": 0.2708, "step": 15000 }, { "epoch": 0.32, "learning_rate": 8.941885628076212e-05, "loss": 0.2673, "step": 15500 }, { "epoch": 0.33, "learning_rate": 8.90775290640125e-05, "loss": 0.2634, "step": 16000 }, { "epoch": 0.34, "learning_rate": 8.87362018472629e-05, "loss": 0.2604, "step": 16500 }, { "epoch": 0.35, "learning_rate": 8.839487463051329e-05, "loss": 0.2575, "step": 17000 }, { "epoch": 0.36, "learning_rate": 8.805354741376368e-05, "loss": 0.2542, "step": 17500 }, { "epoch": 0.37, "learning_rate": 8.771222019701407e-05, "loss": 0.251, "step": 18000 }, { "epoch": 0.38, "learning_rate": 8.737089298026447e-05, "loss": 0.2483, "step": 18500 }, { "epoch": 0.39, "learning_rate": 8.702956576351485e-05, "loss": 0.2458, "step": 19000 }, { "epoch": 0.4, "learning_rate": 8.668823854676525e-05, "loss": 0.2429, "step": 19500 }, { "epoch": 0.41, "learning_rate": 8.634691133001563e-05, "loss": 0.2405, "step": 20000 }, { "epoch": 0.42, "learning_rate": 8.600558411326603e-05, "loss": 0.2391, "step": 20500 }, { "epoch": 0.43, "learning_rate": 8.566425689651641e-05, "loss": 0.237, "step": 21000 }, { "epoch": 0.44, "learning_rate": 8.532292967976681e-05, "loss": 0.2342, "step": 21500 }, { "epoch": 0.45, "learning_rate": 8.49816024630172e-05, "loss": 0.2326, "step": 22000 }, { "epoch": 0.46, "learning_rate": 8.464027524626758e-05, "loss": 0.2304, "step": 22500 }, { "epoch": 0.47, "learning_rate": 8.429894802951798e-05, "loss": 0.2286, "step": 23000 }, { "epoch": 0.48, "learning_rate": 8.395762081276836e-05, "loss": 0.2271, "step": 23500 }, { "epoch": 0.49, "learning_rate": 8.361629359601877e-05, "loss": 0.2252, "step": 24000 }, { "epoch": 0.5, "learning_rate": 8.327496637926916e-05, "loss": 0.2231, "step": 24500 }, { "epoch": 0.51, "learning_rate": 8.293363916251956e-05, "loss": 0.2219, "step": 25000 }, { "epoch": 0.52, "learning_rate": 8.259231194576994e-05, "loss": 0.2199, "step": 25500 }, { "epoch": 0.53, "learning_rate": 8.225098472902032e-05, "loss": 0.219, "step": 26000 }, { "epoch": 0.54, "learning_rate": 8.190965751227072e-05, "loss": 0.2169, "step": 26500 }, { "epoch": 0.55, "learning_rate": 8.15683302955211e-05, "loss": 0.2157, "step": 27000 }, { "epoch": 0.56, "learning_rate": 8.12270030787715e-05, "loss": 0.214, "step": 27500 }, { "epoch": 0.57, "learning_rate": 8.088567586202189e-05, "loss": 0.2129, "step": 28000 }, { "epoch": 0.58, "learning_rate": 8.054434864527229e-05, "loss": 0.2117, "step": 28500 }, { "epoch": 0.59, "learning_rate": 8.020302142852267e-05, "loss": 0.2111, "step": 29000 }, { "epoch": 0.6, "learning_rate": 7.986169421177307e-05, "loss": 0.2091, "step": 29500 }, { "epoch": 0.61, "learning_rate": 7.952036699502345e-05, "loss": 0.2084, "step": 30000 }, { "epoch": 0.62, "learning_rate": 7.917903977827385e-05, "loss": 0.2071, "step": 30500 }, { "epoch": 0.63, "learning_rate": 7.883771256152423e-05, "loss": 0.2057, "step": 31000 }, { "epoch": 0.65, "learning_rate": 7.849638534477462e-05, "loss": 0.2051, "step": 31500 }, { "epoch": 0.66, "learning_rate": 7.815505812802502e-05, "loss": 0.2036, "step": 32000 }, { "epoch": 0.67, "learning_rate": 7.78137309112754e-05, "loss": 0.2027, "step": 32500 }, { "epoch": 0.68, "learning_rate": 7.74724036945258e-05, "loss": 0.2017, "step": 33000 }, { "epoch": 0.69, "learning_rate": 7.713107647777618e-05, "loss": 0.2009, "step": 33500 }, { "epoch": 0.7, "learning_rate": 7.678974926102658e-05, "loss": 0.2003, "step": 34000 }, { "epoch": 0.71, "learning_rate": 7.644842204427697e-05, "loss": 0.1991, "step": 34500 }, { "epoch": 0.72, "learning_rate": 7.610709482752736e-05, "loss": 0.198, "step": 35000 }, { "epoch": 0.73, "learning_rate": 7.576576761077775e-05, "loss": 0.1974, "step": 35500 }, { "epoch": 0.74, "learning_rate": 7.542444039402814e-05, "loss": 0.1964, "step": 36000 }, { "epoch": 0.75, "learning_rate": 7.508311317727853e-05, "loss": 0.1958, "step": 36500 }, { "epoch": 0.76, "learning_rate": 7.474178596052893e-05, "loss": 0.1949, "step": 37000 }, { "epoch": 0.77, "learning_rate": 7.440045874377931e-05, "loss": 0.1937, "step": 37500 }, { "epoch": 0.78, "learning_rate": 7.40591315270297e-05, "loss": 0.1931, "step": 38000 }, { "epoch": 0.79, "learning_rate": 7.37178043102801e-05, "loss": 0.1926, "step": 38500 }, { "epoch": 0.8, "learning_rate": 7.337647709353048e-05, "loss": 0.1919, "step": 39000 }, { "epoch": 0.81, "learning_rate": 7.303514987678088e-05, "loss": 0.1909, "step": 39500 }, { "epoch": 0.82, "learning_rate": 7.269382266003127e-05, "loss": 0.1902, "step": 40000 } ], "logging_steps": 500, "max_steps": 146487, "num_train_epochs": 3, "save_steps": 10000, "total_flos": 2.527620759552e+17, "trial_name": null, "trial_params": null }