{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 40080, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "learning_rate": 9.875249500998003e-05, "loss": 7.8671, "step": 500 }, { "epoch": 0.5, "learning_rate": 9.750499001996009e-05, "loss": 7.0941, "step": 1000 }, { "epoch": 0.75, "learning_rate": 9.625748502994012e-05, "loss": 6.7944, "step": 1500 }, { "epoch": 1.0, "learning_rate": 9.500998003992016e-05, "loss": 6.526, "step": 2000 }, { "epoch": 1.25, "learning_rate": 9.37624750499002e-05, "loss": 6.2583, "step": 2500 }, { "epoch": 1.5, "learning_rate": 9.251497005988024e-05, "loss": 6.0239, "step": 3000 }, { "epoch": 1.75, "learning_rate": 9.126746506986029e-05, "loss": 5.7969, "step": 3500 }, { "epoch": 2.0, "learning_rate": 9.001996007984033e-05, "loss": 5.5919, "step": 4000 }, { "epoch": 2.25, "learning_rate": 8.877245508982036e-05, "loss": 5.3796, "step": 4500 }, { "epoch": 2.5, "learning_rate": 8.752495009980041e-05, "loss": 5.1887, "step": 5000 }, { "epoch": 2.74, "learning_rate": 8.627744510978044e-05, "loss": 5.0026, "step": 5500 }, { "epoch": 2.99, "learning_rate": 8.502994011976048e-05, "loss": 4.843, "step": 6000 }, { "epoch": 3.24, "learning_rate": 8.378243512974053e-05, "loss": 4.6736, "step": 6500 }, { "epoch": 3.49, "learning_rate": 8.253493013972057e-05, "loss": 4.553, "step": 7000 }, { "epoch": 3.74, "learning_rate": 8.12874251497006e-05, "loss": 4.4224, "step": 7500 }, { "epoch": 3.99, "learning_rate": 8.003992015968065e-05, "loss": 4.3097, "step": 8000 }, { "epoch": 4.24, "learning_rate": 7.879241516966068e-05, "loss": 4.1975, "step": 8500 }, { "epoch": 4.49, "learning_rate": 7.754491017964072e-05, "loss": 4.0994, "step": 9000 }, { "epoch": 4.74, "learning_rate": 7.629740518962077e-05, "loss": 4.0258, "step": 9500 }, { "epoch": 4.99, "learning_rate": 7.50499001996008e-05, "loss": 3.9443, "step": 10000 }, { "epoch": 5.24, "learning_rate": 7.380239520958085e-05, "loss": 3.8616, "step": 10500 }, { "epoch": 5.49, "learning_rate": 7.255489021956088e-05, "loss": 3.8086, "step": 11000 }, { "epoch": 5.74, "learning_rate": 7.130738522954092e-05, "loss": 3.7464, "step": 11500 }, { "epoch": 5.99, "learning_rate": 7.005988023952096e-05, "loss": 3.6925, "step": 12000 }, { "epoch": 6.24, "learning_rate": 6.8812375249501e-05, "loss": 3.6216, "step": 12500 }, { "epoch": 6.49, "learning_rate": 6.756487025948103e-05, "loss": 3.585, "step": 13000 }, { "epoch": 6.74, "learning_rate": 6.631736526946109e-05, "loss": 3.5383, "step": 13500 }, { "epoch": 6.99, "learning_rate": 6.506986027944112e-05, "loss": 3.4925, "step": 14000 }, { "epoch": 7.24, "learning_rate": 6.382235528942116e-05, "loss": 3.4524, "step": 14500 }, { "epoch": 7.49, "learning_rate": 6.25748502994012e-05, "loss": 3.3985, "step": 15000 }, { "epoch": 7.73, "learning_rate": 6.132734530938125e-05, "loss": 3.3684, "step": 15500 }, { "epoch": 7.98, "learning_rate": 6.007984031936128e-05, "loss": 3.3273, "step": 16000 }, { "epoch": 8.23, "learning_rate": 5.8832335329341316e-05, "loss": 3.2898, "step": 16500 }, { "epoch": 8.48, "learning_rate": 5.758483033932136e-05, "loss": 3.2554, "step": 17000 }, { "epoch": 8.73, "learning_rate": 5.6337325349301393e-05, "loss": 3.2215, "step": 17500 }, { "epoch": 8.98, "learning_rate": 5.508982035928144e-05, "loss": 3.2006, "step": 18000 }, { "epoch": 9.23, "learning_rate": 5.384231536926148e-05, "loss": 3.1627, "step": 18500 }, { "epoch": 9.48, "learning_rate": 5.259481037924152e-05, "loss": 3.1391, "step": 19000 }, { "epoch": 9.73, "learning_rate": 5.1347305389221555e-05, "loss": 3.1167, "step": 19500 }, { "epoch": 9.98, "learning_rate": 5.0099800399201604e-05, "loss": 3.108, "step": 20000 }, { "epoch": 10.23, "learning_rate": 4.885229540918164e-05, "loss": 3.0782, "step": 20500 }, { "epoch": 10.48, "learning_rate": 4.7604790419161675e-05, "loss": 3.054, "step": 21000 }, { "epoch": 10.73, "learning_rate": 4.635728542914172e-05, "loss": 3.0302, "step": 21500 }, { "epoch": 10.98, "learning_rate": 4.510978043912176e-05, "loss": 3.0129, "step": 22000 }, { "epoch": 11.23, "learning_rate": 4.3862275449101795e-05, "loss": 2.9978, "step": 22500 }, { "epoch": 11.48, "learning_rate": 4.261477045908184e-05, "loss": 2.9658, "step": 23000 }, { "epoch": 11.73, "learning_rate": 4.136726546906188e-05, "loss": 2.9536, "step": 23500 }, { "epoch": 11.98, "learning_rate": 4.0119760479041915e-05, "loss": 2.937, "step": 24000 }, { "epoch": 12.23, "learning_rate": 3.887225548902196e-05, "loss": 2.9144, "step": 24500 }, { "epoch": 12.48, "learning_rate": 3.7624750499002e-05, "loss": 2.9045, "step": 25000 }, { "epoch": 12.72, "learning_rate": 3.637724550898204e-05, "loss": 2.8814, "step": 25500 }, { "epoch": 12.97, "learning_rate": 3.512974051896208e-05, "loss": 2.8749, "step": 26000 }, { "epoch": 13.22, "learning_rate": 3.388223552894212e-05, "loss": 2.8333, "step": 26500 }, { "epoch": 13.47, "learning_rate": 3.263473053892216e-05, "loss": 2.8402, "step": 27000 }, { "epoch": 13.72, "learning_rate": 3.13872255489022e-05, "loss": 2.8363, "step": 27500 }, { "epoch": 13.97, "learning_rate": 3.013972055888224e-05, "loss": 2.8235, "step": 28000 }, { "epoch": 14.22, "learning_rate": 2.8892215568862274e-05, "loss": 2.8038, "step": 28500 }, { "epoch": 14.47, "learning_rate": 2.7644710578842313e-05, "loss": 2.7948, "step": 29000 }, { "epoch": 14.72, "learning_rate": 2.6397205588822355e-05, "loss": 2.7762, "step": 29500 }, { "epoch": 14.97, "learning_rate": 2.5149700598802394e-05, "loss": 2.7711, "step": 30000 }, { "epoch": 15.22, "learning_rate": 2.3902195608782436e-05, "loss": 2.7616, "step": 30500 }, { "epoch": 15.47, "learning_rate": 2.2654690618762475e-05, "loss": 2.7325, "step": 31000 }, { "epoch": 15.72, "learning_rate": 2.1407185628742514e-05, "loss": 2.7418, "step": 31500 }, { "epoch": 15.97, "learning_rate": 2.0159680638722556e-05, "loss": 2.7358, "step": 32000 }, { "epoch": 16.22, "learning_rate": 1.8912175648702595e-05, "loss": 2.7215, "step": 32500 }, { "epoch": 16.47, "learning_rate": 1.7664670658682637e-05, "loss": 2.7199, "step": 33000 }, { "epoch": 16.72, "learning_rate": 1.6417165668662676e-05, "loss": 2.7016, "step": 33500 }, { "epoch": 16.97, "learning_rate": 1.5169660678642716e-05, "loss": 2.6966, "step": 34000 }, { "epoch": 17.22, "learning_rate": 1.3922155688622754e-05, "loss": 2.6957, "step": 34500 }, { "epoch": 17.47, "learning_rate": 1.2674650698602794e-05, "loss": 2.7005, "step": 35000 }, { "epoch": 17.71, "learning_rate": 1.1427145708582835e-05, "loss": 2.6819, "step": 35500 }, { "epoch": 17.96, "learning_rate": 1.0179640718562875e-05, "loss": 2.6823, "step": 36000 }, { "epoch": 18.21, "learning_rate": 8.932135728542916e-06, "loss": 2.6879, "step": 36500 }, { "epoch": 18.46, "learning_rate": 7.684630738522954e-06, "loss": 2.6588, "step": 37000 }, { "epoch": 18.71, "learning_rate": 6.437125748502994e-06, "loss": 2.6521, "step": 37500 }, { "epoch": 18.96, "learning_rate": 5.189620758483034e-06, "loss": 2.6667, "step": 38000 }, { "epoch": 19.21, "learning_rate": 3.942115768463074e-06, "loss": 2.6727, "step": 38500 }, { "epoch": 19.46, "learning_rate": 2.6946107784431138e-06, "loss": 2.6702, "step": 39000 }, { "epoch": 19.71, "learning_rate": 1.4471057884231539e-06, "loss": 2.6484, "step": 39500 }, { "epoch": 19.96, "learning_rate": 1.996007984031936e-07, "loss": 2.6556, "step": 40000 }, { "epoch": 20.0, "step": 40080, "total_flos": 2362586579435520.0, "train_loss": 3.5848853368245197, "train_runtime": 32321.6149, "train_samples_per_second": 158.707, "train_steps_per_second": 1.24 } ], "max_steps": 40080, "num_train_epochs": 20, "total_flos": 2362586579435520.0, "trial_name": null, "trial_params": null }