|
{ |
|
"best_metric": 1.0586252212524414, |
|
"best_model_checkpoint": "distilbert_sa_GLUE_Experiment_data_aug_qnli_256/checkpoint-16604", |
|
"epoch": 6.0, |
|
"global_step": 99624, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9000240905805834e-05, |
|
"loss": 0.4745, |
|
"step": 16604 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5680029287937031, |
|
"eval_loss": 1.0586252212524414, |
|
"eval_runtime": 2.0901, |
|
"eval_samples_per_second": 2613.768, |
|
"eval_steps_per_second": 10.526, |
|
"step": 16604 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.800066249096604e-05, |
|
"loss": 0.2251, |
|
"step": 33208 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5707486728903532, |
|
"eval_loss": 1.3085120916366577, |
|
"eval_runtime": 2.0886, |
|
"eval_samples_per_second": 2615.588, |
|
"eval_steps_per_second": 10.533, |
|
"step": 33208 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.700114430257769e-05, |
|
"loss": 0.1318, |
|
"step": 49812 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5808164012447373, |
|
"eval_loss": 1.4266568422317505, |
|
"eval_runtime": 2.1547, |
|
"eval_samples_per_second": 2535.444, |
|
"eval_steps_per_second": 10.21, |
|
"step": 49812 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600144543483498e-05, |
|
"loss": 0.0838, |
|
"step": 66416 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5778876075416438, |
|
"eval_loss": 1.6666687726974487, |
|
"eval_runtime": 2.0958, |
|
"eval_samples_per_second": 2606.607, |
|
"eval_steps_per_second": 10.497, |
|
"step": 66416 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5001867019995185e-05, |
|
"loss": 0.0572, |
|
"step": 83020 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.576423210690097, |
|
"eval_loss": 1.9572008848190308, |
|
"eval_runtime": 2.0878, |
|
"eval_samples_per_second": 2616.615, |
|
"eval_steps_per_second": 10.537, |
|
"step": 83020 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.400228860515538e-05, |
|
"loss": 0.041, |
|
"step": 99624 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5744096650192202, |
|
"eval_loss": 2.0601627826690674, |
|
"eval_runtime": 2.0721, |
|
"eval_samples_per_second": 2636.402, |
|
"eval_steps_per_second": 10.617, |
|
"step": 99624 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 99624, |
|
"total_flos": 4.3517441866727424e+17, |
|
"train_loss": 0.16891067297805373, |
|
"train_runtime": 17984.6912, |
|
"train_samples_per_second": 11817.025, |
|
"train_steps_per_second": 46.161 |
|
} |
|
], |
|
"max_steps": 830200, |
|
"num_train_epochs": 50, |
|
"total_flos": 4.3517441866727424e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|