|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9697377269670477, |
|
"eval_steps": 50, |
|
"global_step": 276, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.01, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.0093, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.2857142857142859e-05, |
|
"loss": 0.0089, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.0091, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9923664122137406e-05, |
|
"loss": 0.0095, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.969465648854962e-05, |
|
"loss": 0.0081, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9465648854961833e-05, |
|
"loss": 0.0073, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.923664122137405e-05, |
|
"loss": 0.0082, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.900763358778626e-05, |
|
"loss": 0.0063, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.8778625954198473e-05, |
|
"loss": 0.0077, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.854961832061069e-05, |
|
"loss": 0.006, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.83206106870229e-05, |
|
"loss": 0.0064, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.8091603053435117e-05, |
|
"loss": 0.0056, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.786259541984733e-05, |
|
"loss": 0.0055, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.7633587786259544e-05, |
|
"loss": 0.0052, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.7404580152671757e-05, |
|
"loss": 0.0064, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 0.000551095581613481, |
|
"eval_mse": 0.0005510955593948523, |
|
"eval_runtime": 8.5427, |
|
"eval_samples_per_second": 36.64, |
|
"eval_steps_per_second": 9.248, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.717557251908397e-05, |
|
"loss": 0.0061, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6946564885496184e-05, |
|
"loss": 0.0052, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.6717557251908398e-05, |
|
"loss": 0.0053, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.648854961832061e-05, |
|
"loss": 0.0043, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6259541984732825e-05, |
|
"loss": 0.0035, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6030534351145038e-05, |
|
"loss": 0.0041, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5801526717557255e-05, |
|
"loss": 0.0046, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.5572519083969465e-05, |
|
"loss": 0.0036, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5343511450381682e-05, |
|
"loss": 0.0042, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5114503816793895e-05, |
|
"loss": 0.0037, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4885496183206107e-05, |
|
"loss": 0.0039, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4656488549618322e-05, |
|
"loss": 0.0027, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.4427480916030536e-05, |
|
"loss": 0.003, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4198473282442749e-05, |
|
"loss": 0.0031, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3969465648854963e-05, |
|
"loss": 0.0032, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3740458015267178e-05, |
|
"loss": 0.0031, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.351145038167939e-05, |
|
"loss": 0.0043, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 0.0004679011180996895, |
|
"eval_mse": 0.00046790109512442456, |
|
"eval_runtime": 8.5901, |
|
"eval_samples_per_second": 36.437, |
|
"eval_steps_per_second": 9.197, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.3282442748091605e-05, |
|
"loss": 0.0032, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.3053435114503818e-05, |
|
"loss": 0.0037, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2824427480916032e-05, |
|
"loss": 0.0026, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2595419847328245e-05, |
|
"loss": 0.003, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.236641221374046e-05, |
|
"loss": 0.0027, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.2137404580152672e-05, |
|
"loss": 0.0034, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1908396946564887e-05, |
|
"loss": 0.0031, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.16793893129771e-05, |
|
"loss": 0.0038, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.1450381679389312e-05, |
|
"loss": 0.003, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1221374045801527e-05, |
|
"loss": 0.003, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.0992366412213743e-05, |
|
"loss": 0.0032, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0763358778625954e-05, |
|
"loss": 0.0028, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0534351145038168e-05, |
|
"loss": 0.003, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.0305343511450383e-05, |
|
"loss": 0.0025, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.0076335877862595e-05, |
|
"loss": 0.0033, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.84732824427481e-06, |
|
"loss": 0.0031, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.618320610687025e-06, |
|
"loss": 0.0028, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 0.0006046506459824741, |
|
"eval_mse": 0.0006046506115009975, |
|
"eval_runtime": 8.4067, |
|
"eval_samples_per_second": 37.232, |
|
"eval_steps_per_second": 9.397, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.389312977099237e-06, |
|
"loss": 0.0029, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.16030534351145e-06, |
|
"loss": 0.0029, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.931297709923665e-06, |
|
"loss": 0.0021, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.702290076335879e-06, |
|
"loss": 0.0029, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.473282442748092e-06, |
|
"loss": 0.0026, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.244274809160306e-06, |
|
"loss": 0.0028, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 8.015267175572519e-06, |
|
"loss": 0.0031, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.786259541984733e-06, |
|
"loss": 0.0024, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.557251908396948e-06, |
|
"loss": 0.0021, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.328244274809161e-06, |
|
"loss": 0.0024, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.0992366412213746e-06, |
|
"loss": 0.0024, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.870229007633589e-06, |
|
"loss": 0.0025, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.641221374045802e-06, |
|
"loss": 0.0024, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.412213740458016e-06, |
|
"loss": 0.0027, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.18320610687023e-06, |
|
"loss": 0.0029, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.9541984732824435e-06, |
|
"loss": 0.0025, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 0.0004669851914513856, |
|
"eval_mse": 0.0004669852106191058, |
|
"eval_runtime": 8.5435, |
|
"eval_samples_per_second": 36.636, |
|
"eval_steps_per_second": 9.247, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.725190839694656e-06, |
|
"loss": 0.002, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.496183206106871e-06, |
|
"loss": 0.0026, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.267175572519084e-06, |
|
"loss": 0.0024, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 5.038167938931297e-06, |
|
"loss": 0.0025, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.8091603053435125e-06, |
|
"loss": 0.0022, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.580152671755725e-06, |
|
"loss": 0.002, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.351145038167939e-06, |
|
"loss": 0.0024, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.122137404580153e-06, |
|
"loss": 0.0021, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.893129770992366e-06, |
|
"loss": 0.0019, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.6641221374045806e-06, |
|
"loss": 0.0021, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.4351145038167944e-06, |
|
"loss": 0.0027, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.206106870229008e-06, |
|
"loss": 0.0024, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.9770992366412218e-06, |
|
"loss": 0.0024, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.7480916030534356e-06, |
|
"loss": 0.0021, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.5190839694656487e-06, |
|
"loss": 0.0019, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.2900763358778625e-06, |
|
"loss": 0.0019, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.0610687022900764e-06, |
|
"loss": 0.0025, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 0.00047420692862942815, |
|
"eval_mse": 0.00047420695769180793, |
|
"eval_runtime": 8.5496, |
|
"eval_samples_per_second": 36.61, |
|
"eval_steps_per_second": 9.24, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.8320610687022903e-06, |
|
"loss": 0.0021, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.603053435114504e-06, |
|
"loss": 0.0024, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.3740458015267178e-06, |
|
"loss": 0.0024, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.1450381679389313e-06, |
|
"loss": 0.0021, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 9.160305343511451e-07, |
|
"loss": 0.0026, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 6.870229007633589e-07, |
|
"loss": 0.0023, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.5801526717557257e-07, |
|
"loss": 0.0021, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.2900763358778629e-07, |
|
"loss": 0.0026, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.0, |
|
"loss": 0.0024, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"step": 276, |
|
"total_flos": 4678301863342080.0, |
|
"train_loss": 0.0036955964937131257, |
|
"train_runtime": 2019.4918, |
|
"train_samples_per_second": 8.831, |
|
"train_steps_per_second": 0.137 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 276, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 4678301863342080.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|