|
{ |
|
"best_metric": 1.2128502130508423, |
|
"best_model_checkpoint": "./outputs/checkpoint-4100", |
|
"epoch": 2.987249544626594, |
|
"eval_steps": 100, |
|
"global_step": 4100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1918, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.024646282196045, |
|
"eval_runtime": 143.5031, |
|
"eval_samples_per_second": 43.72, |
|
"eval_steps_per_second": 5.47, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9739, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.9310301542282104, |
|
"eval_runtime": 143.4865, |
|
"eval_samples_per_second": 43.725, |
|
"eval_steps_per_second": 5.471, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8997, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.869322419166565, |
|
"eval_runtime": 143.5199, |
|
"eval_samples_per_second": 43.715, |
|
"eval_steps_per_second": 5.47, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8476, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.8241251707077026, |
|
"eval_runtime": 143.5226, |
|
"eval_samples_per_second": 43.714, |
|
"eval_steps_per_second": 5.47, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7962, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.7821052074432373, |
|
"eval_runtime": 143.5402, |
|
"eval_samples_per_second": 43.709, |
|
"eval_steps_per_second": 5.469, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7679, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.7543551921844482, |
|
"eval_runtime": 143.714, |
|
"eval_samples_per_second": 43.656, |
|
"eval_steps_per_second": 5.462, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7293, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.7226487398147583, |
|
"eval_runtime": 143.4831, |
|
"eval_samples_per_second": 43.726, |
|
"eval_steps_per_second": 5.471, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7081, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.6933537721633911, |
|
"eval_runtime": 143.5765, |
|
"eval_samples_per_second": 43.698, |
|
"eval_steps_per_second": 5.467, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6726, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.6686474084854126, |
|
"eval_runtime": 143.516, |
|
"eval_samples_per_second": 43.716, |
|
"eval_steps_per_second": 5.47, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6577, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.6451665163040161, |
|
"eval_runtime": 146.451, |
|
"eval_samples_per_second": 42.84, |
|
"eval_steps_per_second": 5.36, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6377, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.623993992805481, |
|
"eval_runtime": 143.5112, |
|
"eval_samples_per_second": 43.718, |
|
"eval_steps_per_second": 5.47, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6028, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.6054009199142456, |
|
"eval_runtime": 143.561, |
|
"eval_samples_per_second": 43.703, |
|
"eval_steps_per_second": 5.468, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6101, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.5822986364364624, |
|
"eval_runtime": 143.5287, |
|
"eval_samples_per_second": 43.713, |
|
"eval_steps_per_second": 5.469, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5635, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.5618135929107666, |
|
"eval_runtime": 143.5801, |
|
"eval_samples_per_second": 43.697, |
|
"eval_steps_per_second": 5.467, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5353, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.546177864074707, |
|
"eval_runtime": 143.5634, |
|
"eval_samples_per_second": 43.702, |
|
"eval_steps_per_second": 5.468, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5167, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.5285366773605347, |
|
"eval_runtime": 143.5631, |
|
"eval_samples_per_second": 43.702, |
|
"eval_steps_per_second": 5.468, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5143, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.511216640472412, |
|
"eval_runtime": 143.4634, |
|
"eval_samples_per_second": 43.732, |
|
"eval_steps_per_second": 5.472, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4878, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.496885061264038, |
|
"eval_runtime": 143.4809, |
|
"eval_samples_per_second": 43.727, |
|
"eval_steps_per_second": 5.471, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4773, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.4805316925048828, |
|
"eval_runtime": 143.5169, |
|
"eval_samples_per_second": 43.716, |
|
"eval_steps_per_second": 5.47, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4603, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.4675525426864624, |
|
"eval_runtime": 143.6108, |
|
"eval_samples_per_second": 43.688, |
|
"eval_steps_per_second": 5.466, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.446, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.4523862600326538, |
|
"eval_runtime": 143.5584, |
|
"eval_samples_per_second": 43.703, |
|
"eval_steps_per_second": 5.468, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4205, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.4386054277420044, |
|
"eval_runtime": 143.497, |
|
"eval_samples_per_second": 43.722, |
|
"eval_steps_per_second": 5.47, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4198, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.4249650239944458, |
|
"eval_runtime": 143.5274, |
|
"eval_samples_per_second": 43.713, |
|
"eval_steps_per_second": 5.469, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4191, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.4121719598770142, |
|
"eval_runtime": 143.5251, |
|
"eval_samples_per_second": 43.714, |
|
"eval_steps_per_second": 5.469, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3902, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.3990552425384521, |
|
"eval_runtime": 143.4243, |
|
"eval_samples_per_second": 43.744, |
|
"eval_steps_per_second": 5.473, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3802, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.3864269256591797, |
|
"eval_runtime": 144.1474, |
|
"eval_samples_per_second": 43.525, |
|
"eval_steps_per_second": 5.446, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3683, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.3723174333572388, |
|
"eval_runtime": 143.4406, |
|
"eval_samples_per_second": 43.739, |
|
"eval_steps_per_second": 5.473, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.34, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.3610557317733765, |
|
"eval_runtime": 143.5173, |
|
"eval_samples_per_second": 43.716, |
|
"eval_steps_per_second": 5.47, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3145, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.347936749458313, |
|
"eval_runtime": 143.5505, |
|
"eval_samples_per_second": 43.706, |
|
"eval_steps_per_second": 5.468, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3152, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.3374146223068237, |
|
"eval_runtime": 143.4898, |
|
"eval_samples_per_second": 43.724, |
|
"eval_steps_per_second": 5.471, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2956, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.325629472732544, |
|
"eval_runtime": 143.5978, |
|
"eval_samples_per_second": 43.691, |
|
"eval_steps_per_second": 5.467, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2991, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.3144720792770386, |
|
"eval_runtime": 143.5018, |
|
"eval_samples_per_second": 43.721, |
|
"eval_steps_per_second": 5.47, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2803, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.3016444444656372, |
|
"eval_runtime": 143.5185, |
|
"eval_samples_per_second": 43.716, |
|
"eval_steps_per_second": 5.47, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2618, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.2920358180999756, |
|
"eval_runtime": 143.6048, |
|
"eval_samples_per_second": 43.689, |
|
"eval_steps_per_second": 5.466, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2626, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.2805367708206177, |
|
"eval_runtime": 143.5377, |
|
"eval_samples_per_second": 43.71, |
|
"eval_steps_per_second": 5.469, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2507, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.267604947090149, |
|
"eval_runtime": 143.5375, |
|
"eval_samples_per_second": 43.71, |
|
"eval_steps_per_second": 5.469, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2342, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.2559330463409424, |
|
"eval_runtime": 143.5745, |
|
"eval_samples_per_second": 43.699, |
|
"eval_steps_per_second": 5.468, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2114, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 1.2479283809661865, |
|
"eval_runtime": 143.5861, |
|
"eval_samples_per_second": 43.695, |
|
"eval_steps_per_second": 5.467, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2207, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.234686255455017, |
|
"eval_runtime": 143.5291, |
|
"eval_samples_per_second": 43.712, |
|
"eval_steps_per_second": 5.469, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2032, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 1.2260726690292358, |
|
"eval_runtime": 143.5138, |
|
"eval_samples_per_second": 43.717, |
|
"eval_steps_per_second": 5.47, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2012, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.2128502130508423, |
|
"eval_runtime": 143.5068, |
|
"eval_samples_per_second": 43.719, |
|
"eval_steps_per_second": 5.47, |
|
"step": 4100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 2.4438234279579648e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|