{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 44157, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.9774441198450984e-05, "loss": 2.4542, "step": 500 }, { "epoch": 0.07, "learning_rate": 1.9547976538261206e-05, "loss": 2.4106, "step": 1000 }, { "epoch": 0.1, "learning_rate": 1.9321511878071427e-05, "loss": 2.4207, "step": 1500 }, { "epoch": 0.14, "learning_rate": 1.909595307652241e-05, "loss": 2.3664, "step": 2000 }, { "epoch": 0.17, "learning_rate": 1.8869488416332635e-05, "loss": 2.3553, "step": 2500 }, { "epoch": 0.2, "learning_rate": 1.8643023756142857e-05, "loss": 2.3541, "step": 3000 }, { "epoch": 0.24, "learning_rate": 1.841655909595308e-05, "loss": 2.3494, "step": 3500 }, { "epoch": 0.27, "learning_rate": 1.81900944357633e-05, "loss": 2.3592, "step": 4000 }, { "epoch": 0.31, "learning_rate": 1.7963629775573522e-05, "loss": 2.3452, "step": 4500 }, { "epoch": 0.34, "learning_rate": 1.7737165115383744e-05, "loss": 2.32, "step": 5000 }, { "epoch": 0.37, "learning_rate": 1.7510700455193966e-05, "loss": 2.2992, "step": 5500 }, { "epoch": 0.41, "learning_rate": 1.728423579500419e-05, "loss": 2.3109, "step": 6000 }, { "epoch": 0.44, "learning_rate": 1.7057771134814413e-05, "loss": 2.3008, "step": 6500 }, { "epoch": 0.48, "learning_rate": 1.6831306474624635e-05, "loss": 2.3091, "step": 7000 }, { "epoch": 0.51, "learning_rate": 1.6604841814434857e-05, "loss": 2.3252, "step": 7500 }, { "epoch": 0.54, "learning_rate": 1.6378377154245082e-05, "loss": 2.2909, "step": 8000 }, { "epoch": 0.58, "learning_rate": 1.6151912494055304e-05, "loss": 2.3015, "step": 8500 }, { "epoch": 0.61, "learning_rate": 1.5925900763185907e-05, "loss": 2.2997, "step": 9000 }, { "epoch": 0.65, "learning_rate": 1.569943610299613e-05, "loss": 2.283, "step": 9500 }, { "epoch": 0.68, "learning_rate": 1.547297144280635e-05, "loss": 2.2826, "step": 10000 }, { "epoch": 0.71, "learning_rate": 1.5246506782616575e-05, "loss": 2.2771, "step": 10500 }, { "epoch": 0.75, "learning_rate": 1.5020042122426796e-05, "loss": 2.2991, "step": 11000 }, { "epoch": 0.78, "learning_rate": 1.4794030391557398e-05, "loss": 2.3035, "step": 11500 }, { "epoch": 0.82, "learning_rate": 1.456756573136762e-05, "loss": 2.2715, "step": 12000 }, { "epoch": 0.85, "learning_rate": 1.4341101071177844e-05, "loss": 2.2736, "step": 12500 }, { "epoch": 0.88, "learning_rate": 1.4114636410988067e-05, "loss": 2.2883, "step": 13000 }, { "epoch": 0.92, "learning_rate": 1.388862468011867e-05, "loss": 2.2545, "step": 13500 }, { "epoch": 0.95, "learning_rate": 1.3662160019928891e-05, "loss": 2.2956, "step": 14000 }, { "epoch": 0.99, "learning_rate": 1.3435695359739113e-05, "loss": 2.2575, "step": 14500 }, { "epoch": 1.0, "eval_loss": 2.1173367500305176, "eval_runtime": 371.7904, "eval_samples_per_second": 158.353, "eval_steps_per_second": 9.898, "step": 14719 }, { "epoch": 1.02, "learning_rate": 1.3209230699549337e-05, "loss": 2.2485, "step": 15000 }, { "epoch": 1.05, "learning_rate": 1.298276603935956e-05, "loss": 2.2548, "step": 15500 }, { "epoch": 1.09, "learning_rate": 1.2756301379169782e-05, "loss": 2.242, "step": 16000 }, { "epoch": 1.12, "learning_rate": 1.2529836718980004e-05, "loss": 2.2797, "step": 16500 }, { "epoch": 1.15, "learning_rate": 1.2303824988110606e-05, "loss": 2.264, "step": 17000 }, { "epoch": 1.19, "learning_rate": 1.207781325724121e-05, "loss": 2.2747, "step": 17500 }, { "epoch": 1.22, "learning_rate": 1.1851348597051431e-05, "loss": 2.2776, "step": 18000 }, { "epoch": 1.26, "learning_rate": 1.1624883936861653e-05, "loss": 2.2696, "step": 18500 }, { "epoch": 1.29, "learning_rate": 1.1398419276671877e-05, "loss": 2.2436, "step": 19000 }, { "epoch": 1.32, "learning_rate": 1.1171954616482099e-05, "loss": 2.2563, "step": 19500 }, { "epoch": 1.36, "learning_rate": 1.094548995629232e-05, "loss": 2.266, "step": 20000 }, { "epoch": 1.39, "learning_rate": 1.0719478225422924e-05, "loss": 2.2696, "step": 20500 }, { "epoch": 1.43, "learning_rate": 1.0493013565233146e-05, "loss": 2.2423, "step": 21000 }, { "epoch": 1.46, "learning_rate": 1.026654890504337e-05, "loss": 2.2411, "step": 21500 }, { "epoch": 1.49, "learning_rate": 1.0040084244853591e-05, "loss": 2.2564, "step": 22000 }, { "epoch": 1.53, "learning_rate": 9.813619584663813e-06, "loss": 2.2632, "step": 22500 }, { "epoch": 1.56, "learning_rate": 9.587154924474037e-06, "loss": 2.2656, "step": 23000 }, { "epoch": 1.6, "learning_rate": 9.360690264284259e-06, "loss": 2.2667, "step": 23500 }, { "epoch": 1.63, "learning_rate": 9.134225604094482e-06, "loss": 2.2622, "step": 24000 }, { "epoch": 1.66, "learning_rate": 8.907760943904704e-06, "loss": 2.2398, "step": 24500 }, { "epoch": 1.7, "learning_rate": 8.681296283714928e-06, "loss": 2.2477, "step": 25000 }, { "epoch": 1.73, "learning_rate": 8.45528455284553e-06, "loss": 2.2248, "step": 25500 }, { "epoch": 1.77, "learning_rate": 8.228819892655751e-06, "loss": 2.2286, "step": 26000 }, { "epoch": 1.8, "learning_rate": 8.002355232465975e-06, "loss": 2.2268, "step": 26500 }, { "epoch": 1.83, "learning_rate": 7.775890572276197e-06, "loss": 2.239, "step": 27000 }, { "epoch": 1.87, "learning_rate": 7.5498788414067996e-06, "loss": 2.2238, "step": 27500 }, { "epoch": 1.9, "learning_rate": 7.323414181217021e-06, "loss": 2.2225, "step": 28000 }, { "epoch": 1.94, "learning_rate": 7.096949521027244e-06, "loss": 2.2272, "step": 28500 }, { "epoch": 1.97, "learning_rate": 6.870484860837466e-06, "loss": 2.2663, "step": 29000 }, { "epoch": 2.0, "eval_loss": 2.092595100402832, "eval_runtime": 372.6322, "eval_samples_per_second": 157.995, "eval_steps_per_second": 9.876, "step": 29438 }, { "epoch": 2.0, "learning_rate": 6.644473129968069e-06, "loss": 2.2417, "step": 29500 }, { "epoch": 2.04, "learning_rate": 6.4180084697782915e-06, "loss": 2.2355, "step": 30000 }, { "epoch": 2.07, "learning_rate": 6.191543809588514e-06, "loss": 2.2285, "step": 30500 }, { "epoch": 2.11, "learning_rate": 5.965079149398737e-06, "loss": 2.2392, "step": 31000 }, { "epoch": 2.14, "learning_rate": 5.738614489208959e-06, "loss": 2.23, "step": 31500 }, { "epoch": 2.17, "learning_rate": 5.512149829019182e-06, "loss": 2.2399, "step": 32000 }, { "epoch": 2.21, "learning_rate": 5.286138098149784e-06, "loss": 2.2363, "step": 32500 }, { "epoch": 2.24, "learning_rate": 5.059673437960007e-06, "loss": 2.2392, "step": 33000 }, { "epoch": 2.28, "learning_rate": 4.833208777770229e-06, "loss": 2.2302, "step": 33500 }, { "epoch": 2.31, "learning_rate": 4.6067441175804515e-06, "loss": 2.2257, "step": 34000 }, { "epoch": 2.34, "learning_rate": 4.380732386711054e-06, "loss": 2.2527, "step": 34500 }, { "epoch": 2.38, "learning_rate": 4.154267726521277e-06, "loss": 2.2505, "step": 35000 }, { "epoch": 2.41, "learning_rate": 3.927803066331499e-06, "loss": 2.2063, "step": 35500 }, { "epoch": 2.45, "learning_rate": 3.701338406141722e-06, "loss": 2.2317, "step": 36000 }, { "epoch": 2.48, "learning_rate": 3.4748737459519442e-06, "loss": 2.2432, "step": 36500 }, { "epoch": 2.51, "learning_rate": 3.248409085762167e-06, "loss": 2.2414, "step": 37000 }, { "epoch": 2.55, "learning_rate": 3.0219444255723896e-06, "loss": 2.2523, "step": 37500 }, { "epoch": 2.58, "learning_rate": 2.795479765382612e-06, "loss": 2.2264, "step": 38000 }, { "epoch": 2.62, "learning_rate": 2.5690151051928346e-06, "loss": 2.229, "step": 38500 }, { "epoch": 2.65, "learning_rate": 2.343003374323437e-06, "loss": 2.2231, "step": 39000 }, { "epoch": 2.68, "learning_rate": 2.1165387141336597e-06, "loss": 2.2375, "step": 39500 }, { "epoch": 2.72, "learning_rate": 1.8900740539438822e-06, "loss": 2.235, "step": 40000 }, { "epoch": 2.75, "learning_rate": 1.6636093937541049e-06, "loss": 2.2294, "step": 40500 }, { "epoch": 2.79, "learning_rate": 1.4371447335643274e-06, "loss": 2.2244, "step": 41000 }, { "epoch": 2.82, "learning_rate": 1.2106800733745499e-06, "loss": 2.2468, "step": 41500 }, { "epoch": 2.85, "learning_rate": 9.842154131847726e-07, "loss": 2.2278, "step": 42000 }, { "epoch": 2.89, "learning_rate": 7.577507529949953e-07, "loss": 2.222, "step": 42500 }, { "epoch": 2.92, "learning_rate": 5.317390221255973e-07, "loss": 2.2208, "step": 43000 }, { "epoch": 2.96, "learning_rate": 3.057272912561995e-07, "loss": 2.211, "step": 43500 }, { "epoch": 2.99, "learning_rate": 7.926263106642208e-08, "loss": 2.2092, "step": 44000 }, { "epoch": 3.0, "eval_loss": 2.086573362350464, "eval_runtime": 371.7326, "eval_samples_per_second": 158.377, "eval_steps_per_second": 9.9, "step": 44157 } ], "max_steps": 44157, "num_train_epochs": 3, "total_flos": 6.916093810478285e+16, "trial_name": null, "trial_params": null }