|
{ |
|
"best_metric": 0.5931658744812012, |
|
"best_model_checkpoint": "distilbert_add_GLUE_Experiment_mrpc_256/checkpoint-240", |
|
"epoch": 21.0, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.637, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6241778135299683, |
|
"eval_runtime": 0.2066, |
|
"eval_samples_per_second": 1975.28, |
|
"eval_steps_per_second": 9.683, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.629, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6240377426147461, |
|
"eval_runtime": 0.1942, |
|
"eval_samples_per_second": 2100.738, |
|
"eval_steps_per_second": 10.298, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.6302, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6248239874839783, |
|
"eval_runtime": 0.1917, |
|
"eval_samples_per_second": 2128.019, |
|
"eval_steps_per_second": 10.431, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.63, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.624061107635498, |
|
"eval_runtime": 0.1945, |
|
"eval_samples_per_second": 2097.442, |
|
"eval_steps_per_second": 10.282, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6323, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6240324378013611, |
|
"eval_runtime": 0.1944, |
|
"eval_samples_per_second": 2098.803, |
|
"eval_steps_per_second": 10.288, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.6299, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6242926120758057, |
|
"eval_runtime": 0.2052, |
|
"eval_samples_per_second": 1988.12, |
|
"eval_steps_per_second": 9.746, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.6325, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6239174604415894, |
|
"eval_runtime": 0.2195, |
|
"eval_samples_per_second": 1859.06, |
|
"eval_steps_per_second": 9.113, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.6301, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.623868465423584, |
|
"eval_runtime": 0.1993, |
|
"eval_samples_per_second": 2047.174, |
|
"eval_steps_per_second": 10.035, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.6324, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6240455508232117, |
|
"eval_runtime": 0.1991, |
|
"eval_samples_per_second": 2049.734, |
|
"eval_steps_per_second": 10.048, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.6293, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6240090727806091, |
|
"eval_runtime": 0.1918, |
|
"eval_samples_per_second": 2127.006, |
|
"eval_steps_per_second": 10.426, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.6307, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6239336729049683, |
|
"eval_runtime": 0.1917, |
|
"eval_samples_per_second": 2128.045, |
|
"eval_steps_per_second": 10.432, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.6302, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6240288019180298, |
|
"eval_runtime": 0.2014, |
|
"eval_samples_per_second": 2025.663, |
|
"eval_steps_per_second": 9.93, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.6338, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6237308382987976, |
|
"eval_runtime": 0.196, |
|
"eval_samples_per_second": 2081.206, |
|
"eval_steps_per_second": 10.202, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.6281, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6225215792655945, |
|
"eval_runtime": 0.1949, |
|
"eval_samples_per_second": 2092.958, |
|
"eval_steps_per_second": 10.26, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.6263, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6838235294117647, |
|
"eval_combined_score": 0.7480253018237863, |
|
"eval_f1": 0.8122270742358079, |
|
"eval_loss": 0.6182957291603088, |
|
"eval_runtime": 0.2058, |
|
"eval_samples_per_second": 1982.67, |
|
"eval_steps_per_second": 9.719, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.6017, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7107843137254902, |
|
"eval_combined_score": 0.767068803569332, |
|
"eval_f1": 0.8233532934131738, |
|
"eval_loss": 0.5931658744812012, |
|
"eval_runtime": 0.1933, |
|
"eval_samples_per_second": 2110.847, |
|
"eval_steps_per_second": 10.347, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.5213, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6642156862745098, |
|
"eval_combined_score": 0.7091275917907558, |
|
"eval_f1": 0.7540394973070018, |
|
"eval_loss": 0.6146347522735596, |
|
"eval_runtime": 0.2025, |
|
"eval_samples_per_second": 2014.987, |
|
"eval_steps_per_second": 9.877, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.4383, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6911764705882353, |
|
"eval_combined_score": 0.7377115229653506, |
|
"eval_f1": 0.7842465753424658, |
|
"eval_loss": 0.6404514908790588, |
|
"eval_runtime": 0.2015, |
|
"eval_samples_per_second": 2025.167, |
|
"eval_steps_per_second": 9.927, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.3903, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6911764705882353, |
|
"eval_combined_score": 0.7391693163751987, |
|
"eval_f1": 0.7871621621621622, |
|
"eval_loss": 0.6909675002098083, |
|
"eval_runtime": 0.1929, |
|
"eval_samples_per_second": 2115.29, |
|
"eval_steps_per_second": 10.369, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.363, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6544117647058824, |
|
"eval_combined_score": 0.695920966151824, |
|
"eval_f1": 0.7374301675977655, |
|
"eval_loss": 0.7221016883850098, |
|
"eval_runtime": 0.2057, |
|
"eval_samples_per_second": 1983.587, |
|
"eval_steps_per_second": 9.723, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.3306, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6862745098039216, |
|
"eval_combined_score": 0.7335482138060704, |
|
"eval_f1": 0.7808219178082192, |
|
"eval_loss": 0.7583230137825012, |
|
"eval_runtime": 0.1959, |
|
"eval_samples_per_second": 2082.795, |
|
"eval_steps_per_second": 10.21, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"step": 315, |
|
"total_flos": 1269130346889216.0, |
|
"train_loss": 0.5765140730237204, |
|
"train_runtime": 112.7881, |
|
"train_samples_per_second": 1626.058, |
|
"train_steps_per_second": 6.65 |
|
} |
|
], |
|
"max_steps": 750, |
|
"num_train_epochs": 50, |
|
"total_flos": 1269130346889216.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|