|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.5242392141022614, |
|
"global_step": 25000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9951222486433756e-05, |
|
"loss": 0.0754, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.990244497286751e-05, |
|
"loss": 0.0747, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9853667459301263e-05, |
|
"loss": 0.0737, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9804889945735017e-05, |
|
"loss": 0.073, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.975611243216877e-05, |
|
"loss": 0.0723, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9707334918602525e-05, |
|
"loss": 0.072, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.965855740503628e-05, |
|
"loss": 0.0709, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9609779891470033e-05, |
|
"loss": 0.0703, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9561002377903787e-05, |
|
"loss": 0.0708, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.951222486433754e-05, |
|
"loss": 0.0711, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9463447350771295e-05, |
|
"loss": 0.0699, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.941466983720505e-05, |
|
"loss": 0.0701, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.055753033608198166, |
|
"eval_runtime": 1008.1775, |
|
"eval_samples_per_second": 82.199, |
|
"eval_steps_per_second": 13.7, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9365892323638806e-05, |
|
"loss": 0.071, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.931711481007256e-05, |
|
"loss": 0.0692, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9268337296506314e-05, |
|
"loss": 0.0687, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9219559782940067e-05, |
|
"loss": 0.0689, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.917078226937382e-05, |
|
"loss": 0.0681, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9122004755807575e-05, |
|
"loss": 0.0678, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.907322724224133e-05, |
|
"loss": 0.0666, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9024449728675083e-05, |
|
"loss": 0.0676, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8975672215108837e-05, |
|
"loss": 0.0662, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.892689470154259e-05, |
|
"loss": 0.0676, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8878117187976345e-05, |
|
"loss": 0.0663, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.88293396744101e-05, |
|
"loss": 0.0661, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.8780562160843853e-05, |
|
"loss": 0.0661, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.05344025790691376, |
|
"eval_runtime": 1009.8843, |
|
"eval_samples_per_second": 82.06, |
|
"eval_steps_per_second": 13.677, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.8731784647277606e-05, |
|
"loss": 0.0674, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.868300713371136e-05, |
|
"loss": 0.066, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.8634229620145114e-05, |
|
"loss": 0.0642, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.8585452106578868e-05, |
|
"loss": 0.0655, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8536674593012622e-05, |
|
"loss": 0.0654, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8487897079446376e-05, |
|
"loss": 0.0641, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.843911956588013e-05, |
|
"loss": 0.0636, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8390342052313884e-05, |
|
"loss": 0.0658, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8341564538747638e-05, |
|
"loss": 0.0639, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8292787025181395e-05, |
|
"loss": 0.0642, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8244009511615145e-05, |
|
"loss": 0.0648, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8195231998048903e-05, |
|
"loss": 0.0645, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.05153830349445343, |
|
"eval_runtime": 1007.6883, |
|
"eval_samples_per_second": 82.239, |
|
"eval_steps_per_second": 13.707, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.8146454484482653e-05, |
|
"loss": 0.0639, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.809767697091641e-05, |
|
"loss": 0.0628, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8048899457350164e-05, |
|
"loss": 0.0647, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.800012194378392e-05, |
|
"loss": 0.0629, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7951344430217672e-05, |
|
"loss": 0.0638, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.7902566916651426e-05, |
|
"loss": 0.0635, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.785378940308518e-05, |
|
"loss": 0.0628, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7805011889518934e-05, |
|
"loss": 0.063, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7756234375952688e-05, |
|
"loss": 0.0636, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.7707456862386442e-05, |
|
"loss": 0.0633, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.7658679348820196e-05, |
|
"loss": 0.0624, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.760990183525395e-05, |
|
"loss": 0.0615, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.7561124321687703e-05, |
|
"loss": 0.062, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 0.05109347030520439, |
|
"eval_runtime": 1005.611, |
|
"eval_samples_per_second": 82.409, |
|
"eval_steps_per_second": 13.735, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.7512346808121457e-05, |
|
"loss": 0.0616, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.746356929455521e-05, |
|
"loss": 0.0613, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7414791780988965e-05, |
|
"loss": 0.0616, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.736601426742272e-05, |
|
"loss": 0.0623, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.7317236753856473e-05, |
|
"loss": 0.062, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7268459240290227e-05, |
|
"loss": 0.0632, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.7219681726723984e-05, |
|
"loss": 0.0623, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7170904213157735e-05, |
|
"loss": 0.0608, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.7122126699591492e-05, |
|
"loss": 0.061, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7073349186025242e-05, |
|
"loss": 0.061, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7024571672459e-05, |
|
"loss": 0.0605, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.697579415889275e-05, |
|
"loss": 0.0607, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 0.04968786612153053, |
|
"eval_runtime": 1006.7134, |
|
"eval_samples_per_second": 82.318, |
|
"eval_steps_per_second": 13.72, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6927016645326508e-05, |
|
"loss": 0.0616, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6878239131760258e-05, |
|
"loss": 0.0596, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.6829461618194015e-05, |
|
"loss": 0.0615, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.6780684104627766e-05, |
|
"loss": 0.0599, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6731906591061523e-05, |
|
"loss": 0.0598, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.6683129077495277e-05, |
|
"loss": 0.0606, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.663435156392903e-05, |
|
"loss": 0.0596, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6585574050362785e-05, |
|
"loss": 0.0585, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.653679653679654e-05, |
|
"loss": 0.0597, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6488019023230293e-05, |
|
"loss": 0.0606, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6439241509664047e-05, |
|
"loss": 0.0591, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.63904639960978e-05, |
|
"loss": 0.06, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.6341686482531554e-05, |
|
"loss": 0.0586, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 0.04891153797507286, |
|
"eval_runtime": 1009.9343, |
|
"eval_samples_per_second": 82.056, |
|
"eval_steps_per_second": 13.676, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6292908968965308e-05, |
|
"loss": 0.06, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.6244131455399062e-05, |
|
"loss": 0.0587, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6195353941832816e-05, |
|
"loss": 0.0584, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.614657642826657e-05, |
|
"loss": 0.0602, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.6097798914700324e-05, |
|
"loss": 0.0587, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.604902140113408e-05, |
|
"loss": 0.0578, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.600024388756783e-05, |
|
"loss": 0.0578, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.595146637400159e-05, |
|
"loss": 0.0598, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.590268886043534e-05, |
|
"loss": 0.0583, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.5853911346869097e-05, |
|
"loss": 0.0577, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.5805133833302847e-05, |
|
"loss": 0.0571, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.5756356319736604e-05, |
|
"loss": 0.0583, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.048093125224113464, |
|
"eval_runtime": 1004.9925, |
|
"eval_samples_per_second": 82.459, |
|
"eval_steps_per_second": 13.743, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.5707578806170355e-05, |
|
"loss": 0.0588, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.5658801292604112e-05, |
|
"loss": 0.0578, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.5610023779037863e-05, |
|
"loss": 0.0584, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.556124626547162e-05, |
|
"loss": 0.0569, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.551246875190537e-05, |
|
"loss": 0.0575, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.5463691238339128e-05, |
|
"loss": 0.058, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.5414913724772882e-05, |
|
"loss": 0.0562, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.5366136211206636e-05, |
|
"loss": 0.0586, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.531735869764039e-05, |
|
"loss": 0.0576, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.5268581184074143e-05, |
|
"loss": 0.0578, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.5219803670507897e-05, |
|
"loss": 0.0571, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.5171026156941651e-05, |
|
"loss": 0.0564, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.5122248643375405e-05, |
|
"loss": 0.0578, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 0.047511521726846695, |
|
"eval_runtime": 1012.2966, |
|
"eval_samples_per_second": 81.864, |
|
"eval_steps_per_second": 13.644, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.5073471129809159e-05, |
|
"loss": 0.0573, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.5024693616242913e-05, |
|
"loss": 0.0577, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.4975916102676667e-05, |
|
"loss": 0.0571, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.492713858911042e-05, |
|
"loss": 0.0571, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.4878361075544175e-05, |
|
"loss": 0.0573, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.4829583561977929e-05, |
|
"loss": 0.0571, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.4780806048411684e-05, |
|
"loss": 0.0574, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.4732028534845438e-05, |
|
"loss": 0.0565, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.4683251021279192e-05, |
|
"loss": 0.0564, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.4634473507712946e-05, |
|
"loss": 0.0563, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.45856959941467e-05, |
|
"loss": 0.0564, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.4536918480580454e-05, |
|
"loss": 0.0557, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.04695958271622658, |
|
"eval_runtime": 1007.3223, |
|
"eval_samples_per_second": 82.269, |
|
"eval_steps_per_second": 13.712, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.4488140967014208e-05, |
|
"loss": 0.0563, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.4439363453447961e-05, |
|
"loss": 0.0555, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.4390585939881715e-05, |
|
"loss": 0.0567, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.434180842631547e-05, |
|
"loss": 0.0563, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.4293030912749223e-05, |
|
"loss": 0.0568, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.4244253399182977e-05, |
|
"loss": 0.0552, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.4195475885616733e-05, |
|
"loss": 0.0564, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.4146698372050487e-05, |
|
"loss": 0.0558, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.409792085848424e-05, |
|
"loss": 0.0553, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.4049143344917994e-05, |
|
"loss": 0.0545, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.4000365831351748e-05, |
|
"loss": 0.0551, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.3951588317785502e-05, |
|
"loss": 0.0556, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.3902810804219256e-05, |
|
"loss": 0.0543, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 0.04659077152609825, |
|
"eval_runtime": 1008.4315, |
|
"eval_samples_per_second": 82.178, |
|
"eval_steps_per_second": 13.697, |
|
"step": 25000 |
|
} |
|
], |
|
"max_steps": 82005, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.2689982006126182e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|