|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.0, |
|
"global_step": 34848, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.92825987144169e-05, |
|
"loss": 3.4212, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.85651974288338e-05, |
|
"loss": 3.323, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.784779614325069e-05, |
|
"loss": 3.2791, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.7130394857667584e-05, |
|
"loss": 3.2457, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.6412993572084486e-05, |
|
"loss": 3.2237, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.569559228650138e-05, |
|
"loss": 3.1983, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.4978191000918276e-05, |
|
"loss": 3.195, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.426078971533517e-05, |
|
"loss": 3.1841, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.354338842975207e-05, |
|
"loss": 3.1802, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.282598714416897e-05, |
|
"loss": 3.1674, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.210858585858586e-05, |
|
"loss": 3.1341, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.139118457300275e-05, |
|
"loss": 3.101, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.0673783287419655e-05, |
|
"loss": 3.0298, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.995638200183655e-05, |
|
"loss": 3.0358, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.9238980716253446e-05, |
|
"loss": 3.0531, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.852157943067034e-05, |
|
"loss": 3.0409, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.7804178145087236e-05, |
|
"loss": 3.0291, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.708677685950414e-05, |
|
"loss": 3.0346, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.6369375573921034e-05, |
|
"loss": 3.0356, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.565197428833792e-05, |
|
"loss": 3.0163, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.4934573002754824e-05, |
|
"loss": 3.026, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.421717171717172e-05, |
|
"loss": 3.0223, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.3499770431588615e-05, |
|
"loss": 3.0286, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.278236914600551e-05, |
|
"loss": 2.9479, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.2064967860422406e-05, |
|
"loss": 2.9327, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.134756657483931e-05, |
|
"loss": 2.943, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.06301652892562e-05, |
|
"loss": 2.9277, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.9912764003673095e-05, |
|
"loss": 2.9443, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.919536271808999e-05, |
|
"loss": 2.9452, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.847796143250689e-05, |
|
"loss": 2.9482, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.7760560146923788e-05, |
|
"loss": 2.9429, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.704315886134068e-05, |
|
"loss": 2.9463, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6325757575757575e-05, |
|
"loss": 2.9292, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.5608356290174473e-05, |
|
"loss": 2.9286, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.489095500459137e-05, |
|
"loss": 2.9312, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.4173553719008264e-05, |
|
"loss": 2.8712, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.3456152433425163e-05, |
|
"loss": 2.8583, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.2738751147842058e-05, |
|
"loss": 2.882, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 2.2021349862258957e-05, |
|
"loss": 2.857, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.130394857667585e-05, |
|
"loss": 2.8815, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.0586547291092747e-05, |
|
"loss": 2.8775, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.9869146005509643e-05, |
|
"loss": 2.8606, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.915174471992654e-05, |
|
"loss": 2.8845, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.8434343434343433e-05, |
|
"loss": 2.8692, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.7716942148760332e-05, |
|
"loss": 2.8829, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.6999540863177227e-05, |
|
"loss": 2.8782, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.6282139577594123e-05, |
|
"loss": 2.8401, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.5564738292011018e-05, |
|
"loss": 2.822, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.4847337006427917e-05, |
|
"loss": 2.8197, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1.4129935720844812e-05, |
|
"loss": 2.8227, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.3412534435261709e-05, |
|
"loss": 2.8154, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.2695133149678604e-05, |
|
"loss": 2.8247, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.1977731864095501e-05, |
|
"loss": 2.8196, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 1.1260330578512397e-05, |
|
"loss": 2.8357, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 1.0542929292929294e-05, |
|
"loss": 2.8163, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 9.825528007346189e-06, |
|
"loss": 2.8381, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 9.108126721763086e-06, |
|
"loss": 2.8198, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 8.390725436179981e-06, |
|
"loss": 2.8302, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 7.673324150596878e-06, |
|
"loss": 2.7897, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 6.955922865013774e-06, |
|
"loss": 2.7858, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 6.238521579430671e-06, |
|
"loss": 2.7865, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 5.521120293847567e-06, |
|
"loss": 2.7889, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 4.803719008264463e-06, |
|
"loss": 2.7797, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 4.086317722681359e-06, |
|
"loss": 2.7869, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 3.3689164370982553e-06, |
|
"loss": 2.7922, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 2.651515151515152e-06, |
|
"loss": 2.8032, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 1.934113865932048e-06, |
|
"loss": 2.7963, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 1.216712580348944e-06, |
|
"loss": 2.7952, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 4.993112947658403e-07, |
|
"loss": 2.7968, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 34848, |
|
"total_flos": 1.8211009462272e+16, |
|
"train_loss": 2.9473999731030567, |
|
"train_runtime": 8951.9422, |
|
"train_samples_per_second": 3.893, |
|
"train_steps_per_second": 3.893 |
|
} |
|
], |
|
"max_steps": 34848, |
|
"num_train_epochs": 6, |
|
"total_flos": 1.8211009462272e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|