|
{ |
|
"best_metric": 6.556983947753906, |
|
"best_model_checkpoint": "distilbert_add_pre-training-dim-96/checkpoint-50022", |
|
"epoch": 15.0, |
|
"global_step": 53595, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.6756706234227496e-05, |
|
"loss": 14.685, |
|
"step": 3573 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.12398869012625627, |
|
"eval_loss": 9.392213821411133, |
|
"eval_runtime": 0.565, |
|
"eval_samples_per_second": 847.75, |
|
"eval_steps_per_second": 14.159, |
|
"step": 3573 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.341807645574353e-05, |
|
"loss": 8.0255, |
|
"step": 7146 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.1315243012683803, |
|
"eval_loss": 7.151001453399658, |
|
"eval_runtime": 0.5512, |
|
"eval_samples_per_second": 869.004, |
|
"eval_steps_per_second": 14.514, |
|
"step": 7146 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.008038134405085e-05, |
|
"loss": 7.0152, |
|
"step": 10719 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.14824699404427463, |
|
"eval_loss": 6.786097049713135, |
|
"eval_runtime": 0.5553, |
|
"eval_samples_per_second": 862.599, |
|
"eval_steps_per_second": 14.407, |
|
"step": 10719 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.674175156556688e-05, |
|
"loss": 6.8127, |
|
"step": 14292 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.1492738182022094, |
|
"eval_loss": 6.705262660980225, |
|
"eval_runtime": 0.5553, |
|
"eval_samples_per_second": 862.591, |
|
"eval_steps_per_second": 14.407, |
|
"step": 14292 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.3404056453874196e-05, |
|
"loss": 6.74, |
|
"step": 17865 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.1474488649134637, |
|
"eval_loss": 6.669492244720459, |
|
"eval_runtime": 0.5614, |
|
"eval_samples_per_second": 853.267, |
|
"eval_steps_per_second": 14.251, |
|
"step": 17865 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.0065426675390224e-05, |
|
"loss": 6.7067, |
|
"step": 21438 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.1490975747320925, |
|
"eval_loss": 6.643059253692627, |
|
"eval_runtime": 0.5554, |
|
"eval_samples_per_second": 862.513, |
|
"eval_steps_per_second": 14.405, |
|
"step": 21438 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 2.6726796896906253e-05, |
|
"loss": 6.6871, |
|
"step": 25011 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.14833291057185535, |
|
"eval_loss": 6.620449542999268, |
|
"eval_runtime": 0.5532, |
|
"eval_samples_per_second": 865.795, |
|
"eval_steps_per_second": 14.46, |
|
"step": 25011 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2.338910178521357e-05, |
|
"loss": 6.6748, |
|
"step": 28584 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.1472751167407656, |
|
"eval_loss": 6.625016212463379, |
|
"eval_runtime": 0.5524, |
|
"eval_samples_per_second": 867.158, |
|
"eval_steps_per_second": 14.483, |
|
"step": 28584 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2.005140667352089e-05, |
|
"loss": 6.6649, |
|
"step": 32157 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.14858899133836267, |
|
"eval_loss": 6.610751152038574, |
|
"eval_runtime": 0.5557, |
|
"eval_samples_per_second": 862.053, |
|
"eval_steps_per_second": 14.398, |
|
"step": 32157 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.671371156182821e-05, |
|
"loss": 6.6596, |
|
"step": 35730 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.14966520045947385, |
|
"eval_loss": 6.613977909088135, |
|
"eval_runtime": 0.5538, |
|
"eval_samples_per_second": 864.918, |
|
"eval_steps_per_second": 14.445, |
|
"step": 35730 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1.3375081783344236e-05, |
|
"loss": 6.6536, |
|
"step": 39303 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.14933370739234114, |
|
"eval_loss": 6.60673189163208, |
|
"eval_runtime": 0.5553, |
|
"eval_samples_per_second": 862.613, |
|
"eval_steps_per_second": 14.407, |
|
"step": 39303 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.0037386671651557e-05, |
|
"loss": 6.6483, |
|
"step": 42876 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.14892078326657765, |
|
"eval_loss": 6.613958835601807, |
|
"eval_runtime": 0.5595, |
|
"eval_samples_per_second": 856.133, |
|
"eval_steps_per_second": 14.299, |
|
"step": 42876 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 6.699691559958874e-06, |
|
"loss": 6.6463, |
|
"step": 46449 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.14841714254033375, |
|
"eval_loss": 6.609643459320068, |
|
"eval_runtime": 0.5507, |
|
"eval_samples_per_second": 869.79, |
|
"eval_steps_per_second": 14.527, |
|
"step": 46449 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.3619964482661936e-06, |
|
"loss": 6.6434, |
|
"step": 50022 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.15260783760924726, |
|
"eval_loss": 6.556983947753906, |
|
"eval_runtime": 0.5531, |
|
"eval_samples_per_second": 866.025, |
|
"eval_steps_per_second": 14.464, |
|
"step": 50022 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.336666978222264e-08, |
|
"loss": 6.6414, |
|
"step": 53595 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.15261607518586057, |
|
"eval_loss": 6.583581924438477, |
|
"eval_runtime": 0.5553, |
|
"eval_samples_per_second": 862.526, |
|
"eval_steps_per_second": 14.405, |
|
"step": 53595 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 53595, |
|
"total_flos": 3.972393508798464e+16, |
|
"train_loss": 7.326953015673104, |
|
"train_runtime": 5761.7187, |
|
"train_samples_per_second": 595.236, |
|
"train_steps_per_second": 9.302 |
|
} |
|
], |
|
"max_steps": 53595, |
|
"num_train_epochs": 15, |
|
"total_flos": 3.972393508798464e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|