|
{ |
|
"best_metric": 0.8897058823529411, |
|
"best_model_checkpoint": "./bert-base-uncased/fine_tuned_models/checkpoint-1725", |
|
"epoch": 20.0, |
|
"global_step": 2300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6358, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7009803921568627, |
|
"eval_combined_score": 0.7599857450398557, |
|
"eval_f1": 0.8189910979228486, |
|
"eval_loss": 0.5365363359451294, |
|
"eval_runtime": 1.8712, |
|
"eval_samples_per_second": 218.045, |
|
"eval_steps_per_second": 27.256, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4545, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8161764705882353, |
|
"eval_combined_score": 0.84750665209541, |
|
"eval_f1": 0.8788368336025848, |
|
"eval_loss": 0.41578346490859985, |
|
"eval_runtime": 1.8846, |
|
"eval_samples_per_second": 216.486, |
|
"eval_steps_per_second": 27.061, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.2835, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_combined_score": 0.8995098039215687, |
|
"eval_f1": 0.9166666666666667, |
|
"eval_loss": 0.3312576413154602, |
|
"eval_runtime": 1.8685, |
|
"eval_samples_per_second": 218.351, |
|
"eval_steps_per_second": 27.294, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.1495, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8725490196078431, |
|
"eval_combined_score": 0.891446923597025, |
|
"eval_f1": 0.9103448275862069, |
|
"eval_loss": 0.4420200288295746, |
|
"eval_runtime": 1.8756, |
|
"eval_samples_per_second": 217.526, |
|
"eval_steps_per_second": 27.191, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.103, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8676470588235294, |
|
"eval_combined_score": 0.8840080312567832, |
|
"eval_f1": 0.9003690036900369, |
|
"eval_loss": 0.5437020063400269, |
|
"eval_runtime": 1.8749, |
|
"eval_samples_per_second": 217.61, |
|
"eval_steps_per_second": 27.201, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 0.0683, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8627450980392157, |
|
"eval_combined_score": 0.8835909790537375, |
|
"eval_f1": 0.9044368600682594, |
|
"eval_loss": 0.6472938656806946, |
|
"eval_runtime": 1.8885, |
|
"eval_samples_per_second": 216.048, |
|
"eval_steps_per_second": 27.006, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.4444444444444446e-05, |
|
"loss": 0.053, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8676470588235294, |
|
"eval_combined_score": 0.8877484440875327, |
|
"eval_f1": 0.9078498293515359, |
|
"eval_loss": 0.5814068913459778, |
|
"eval_runtime": 1.8738, |
|
"eval_samples_per_second": 217.736, |
|
"eval_steps_per_second": 27.217, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0292, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8578431372549019, |
|
"eval_combined_score": 0.880426585349859, |
|
"eval_f1": 0.903010033444816, |
|
"eval_loss": 0.7452187538146973, |
|
"eval_runtime": 1.8877, |
|
"eval_samples_per_second": 216.139, |
|
"eval_steps_per_second": 27.017, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.2222222222222224e-05, |
|
"loss": 0.0212, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8774509803921569, |
|
"eval_combined_score": 0.8951714832274373, |
|
"eval_f1": 0.9128919860627178, |
|
"eval_loss": 0.683774471282959, |
|
"eval_runtime": 1.8947, |
|
"eval_samples_per_second": 215.341, |
|
"eval_steps_per_second": 26.918, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 0.0103, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8602941176470589, |
|
"eval_combined_score": 0.8814291101055808, |
|
"eval_f1": 0.9025641025641027, |
|
"eval_loss": 0.789932906627655, |
|
"eval_runtime": 1.8751, |
|
"eval_samples_per_second": 217.593, |
|
"eval_steps_per_second": 27.199, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0153, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8676470588235294, |
|
"eval_combined_score": 0.8859511889862328, |
|
"eval_f1": 0.9042553191489361, |
|
"eval_loss": 0.7904257774353027, |
|
"eval_runtime": 1.8798, |
|
"eval_samples_per_second": 217.048, |
|
"eval_steps_per_second": 27.131, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.0136, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8799019607843137, |
|
"eval_combined_score": 0.8980706385118149, |
|
"eval_f1": 0.9162393162393162, |
|
"eval_loss": 0.6983678340911865, |
|
"eval_runtime": 1.8896, |
|
"eval_samples_per_second": 215.921, |
|
"eval_steps_per_second": 26.99, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.0083, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_combined_score": 0.9002208391889179, |
|
"eval_f1": 0.9180887372013652, |
|
"eval_loss": 0.7833622097969055, |
|
"eval_runtime": 1.8915, |
|
"eval_samples_per_second": 215.705, |
|
"eval_steps_per_second": 26.963, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0078, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.875, |
|
"eval_combined_score": 0.8939102564102563, |
|
"eval_f1": 0.9128205128205128, |
|
"eval_loss": 0.8284361362457275, |
|
"eval_runtime": 1.8794, |
|
"eval_samples_per_second": 217.087, |
|
"eval_steps_per_second": 27.136, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.0065, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8897058823529411, |
|
"eval_combined_score": 0.9062594591867621, |
|
"eval_f1": 0.9228130360205832, |
|
"eval_loss": 0.802143931388855, |
|
"eval_runtime": 1.8792, |
|
"eval_samples_per_second": 217.111, |
|
"eval_steps_per_second": 27.139, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0046, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8725490196078431, |
|
"eval_combined_score": 0.8922067131937521, |
|
"eval_f1": 0.9118644067796611, |
|
"eval_loss": 0.890489399433136, |
|
"eval_runtime": 1.8786, |
|
"eval_samples_per_second": 217.185, |
|
"eval_steps_per_second": 27.148, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.005, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.875, |
|
"eval_combined_score": 0.8931521739130435, |
|
"eval_f1": 0.9113043478260869, |
|
"eval_loss": 0.8655802607536316, |
|
"eval_runtime": 1.8821, |
|
"eval_samples_per_second": 216.775, |
|
"eval_steps_per_second": 27.097, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.004, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8700980392156863, |
|
"eval_combined_score": 0.889280453113888, |
|
"eval_f1": 0.9084628670120898, |
|
"eval_loss": 0.9055202007293701, |
|
"eval_runtime": 1.8752, |
|
"eval_samples_per_second": 217.58, |
|
"eval_steps_per_second": 27.198, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.0019, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8725490196078431, |
|
"eval_combined_score": 0.8911356209150327, |
|
"eval_f1": 0.9097222222222222, |
|
"eval_loss": 0.9129809141159058, |
|
"eval_runtime": 1.8883, |
|
"eval_samples_per_second": 216.063, |
|
"eval_steps_per_second": 27.008, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0008, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8725490196078431, |
|
"eval_combined_score": 0.8916009702850212, |
|
"eval_f1": 0.9106529209621993, |
|
"eval_loss": 0.9021580815315247, |
|
"eval_runtime": 1.8731, |
|
"eval_samples_per_second": 217.821, |
|
"eval_steps_per_second": 27.228, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2300, |
|
"total_flos": 4825456755302400.0, |
|
"train_loss": 0.09379986195784548, |
|
"train_runtime": 1024.4983, |
|
"train_samples_per_second": 71.606, |
|
"train_steps_per_second": 2.245 |
|
} |
|
], |
|
"max_steps": 2300, |
|
"num_train_epochs": 20, |
|
"total_flos": 4825456755302400.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|