|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7621196070511306, |
|
"global_step": 12500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9951222486433756e-05, |
|
"loss": 0.0754, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.990244497286751e-05, |
|
"loss": 0.0747, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9853667459301263e-05, |
|
"loss": 0.0737, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9804889945735017e-05, |
|
"loss": 0.073, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.975611243216877e-05, |
|
"loss": 0.0723, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9707334918602525e-05, |
|
"loss": 0.072, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.965855740503628e-05, |
|
"loss": 0.0709, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9609779891470033e-05, |
|
"loss": 0.0703, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9561002377903787e-05, |
|
"loss": 0.0708, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.951222486433754e-05, |
|
"loss": 0.0711, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9463447350771295e-05, |
|
"loss": 0.0699, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.941466983720505e-05, |
|
"loss": 0.0701, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.055753033608198166, |
|
"eval_runtime": 1008.1775, |
|
"eval_samples_per_second": 82.199, |
|
"eval_steps_per_second": 13.7, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9365892323638806e-05, |
|
"loss": 0.071, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.931711481007256e-05, |
|
"loss": 0.0692, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9268337296506314e-05, |
|
"loss": 0.0687, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9219559782940067e-05, |
|
"loss": 0.0689, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.917078226937382e-05, |
|
"loss": 0.0681, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9122004755807575e-05, |
|
"loss": 0.0678, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.907322724224133e-05, |
|
"loss": 0.0666, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9024449728675083e-05, |
|
"loss": 0.0676, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8975672215108837e-05, |
|
"loss": 0.0662, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.892689470154259e-05, |
|
"loss": 0.0676, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8878117187976345e-05, |
|
"loss": 0.0663, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.88293396744101e-05, |
|
"loss": 0.0661, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.8780562160843853e-05, |
|
"loss": 0.0661, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.05344025790691376, |
|
"eval_runtime": 1009.8843, |
|
"eval_samples_per_second": 82.06, |
|
"eval_steps_per_second": 13.677, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.8731784647277606e-05, |
|
"loss": 0.0674, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.868300713371136e-05, |
|
"loss": 0.066, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.8634229620145114e-05, |
|
"loss": 0.0642, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.8585452106578868e-05, |
|
"loss": 0.0655, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8536674593012622e-05, |
|
"loss": 0.0654, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8487897079446376e-05, |
|
"loss": 0.0641, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.843911956588013e-05, |
|
"loss": 0.0636, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8390342052313884e-05, |
|
"loss": 0.0658, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8341564538747638e-05, |
|
"loss": 0.0639, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8292787025181395e-05, |
|
"loss": 0.0642, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8244009511615145e-05, |
|
"loss": 0.0648, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8195231998048903e-05, |
|
"loss": 0.0645, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.05153830349445343, |
|
"eval_runtime": 1007.6883, |
|
"eval_samples_per_second": 82.239, |
|
"eval_steps_per_second": 13.707, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.8146454484482653e-05, |
|
"loss": 0.0639, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.809767697091641e-05, |
|
"loss": 0.0628, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8048899457350164e-05, |
|
"loss": 0.0647, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.800012194378392e-05, |
|
"loss": 0.0629, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7951344430217672e-05, |
|
"loss": 0.0638, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.7902566916651426e-05, |
|
"loss": 0.0635, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.785378940308518e-05, |
|
"loss": 0.0628, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7805011889518934e-05, |
|
"loss": 0.063, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7756234375952688e-05, |
|
"loss": 0.0636, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7707456862386442e-05, |
|
"loss": 0.0633, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.7658679348820196e-05, |
|
"loss": 0.0624, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.760990183525395e-05, |
|
"loss": 0.0615, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.7561124321687703e-05, |
|
"loss": 0.062, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 0.05109347030520439, |
|
"eval_runtime": 1005.611, |
|
"eval_samples_per_second": 82.409, |
|
"eval_steps_per_second": 13.735, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.7512346808121457e-05, |
|
"loss": 0.0616, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.746356929455521e-05, |
|
"loss": 0.0613, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7414791780988965e-05, |
|
"loss": 0.0616, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.736601426742272e-05, |
|
"loss": 0.0623, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.7317236753856473e-05, |
|
"loss": 0.062, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7268459240290227e-05, |
|
"loss": 0.0632, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7219681726723984e-05, |
|
"loss": 0.0623, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7170904213157735e-05, |
|
"loss": 0.0608, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7122126699591492e-05, |
|
"loss": 0.061, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7073349186025242e-05, |
|
"loss": 0.061, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7024571672459e-05, |
|
"loss": 0.0605, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.697579415889275e-05, |
|
"loss": 0.0607, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 0.04968786612153053, |
|
"eval_runtime": 1006.7134, |
|
"eval_samples_per_second": 82.318, |
|
"eval_steps_per_second": 13.72, |
|
"step": 12500 |
|
} |
|
], |
|
"max_steps": 82005, |
|
"num_train_epochs": 5, |
|
"total_flos": 6.34499629056e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|