|
{ |
|
"best_metric": 0.5935130715370178, |
|
"best_model_checkpoint": "bert_tiny_lda_20_v1_mrpc/checkpoint-45", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.7700666189193726, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6315, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6862745098039216, |
|
"eval_combined_score": 0.7492956126732218, |
|
"eval_f1": 0.812316715542522, |
|
"eval_loss": 0.6003513932228088, |
|
"eval_runtime": 0.1367, |
|
"eval_samples_per_second": 2984.452, |
|
"eval_steps_per_second": 14.63, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.322261929512024, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.6013, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6887254901960784, |
|
"eval_combined_score": 0.7462174591629542, |
|
"eval_f1": 0.8037094281298299, |
|
"eval_loss": 0.5958060622215271, |
|
"eval_runtime": 0.1396, |
|
"eval_samples_per_second": 2921.977, |
|
"eval_steps_per_second": 14.323, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.471887469291687, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.5707, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6985294117647058, |
|
"eval_combined_score": 0.7542106100554595, |
|
"eval_f1": 0.8098918083462132, |
|
"eval_loss": 0.5935130715370178, |
|
"eval_runtime": 0.1333, |
|
"eval_samples_per_second": 3060.988, |
|
"eval_steps_per_second": 15.005, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.2280375957489014, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.5415, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6985294117647058, |
|
"eval_combined_score": 0.750864705882353, |
|
"eval_f1": 0.8032000000000001, |
|
"eval_loss": 0.6068510413169861, |
|
"eval_runtime": 0.1378, |
|
"eval_samples_per_second": 2961.536, |
|
"eval_steps_per_second": 14.517, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.841067314147949, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4866, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.678921568627451, |
|
"eval_combined_score": 0.7263347048664026, |
|
"eval_f1": 0.7737478411053541, |
|
"eval_loss": 0.6273573040962219, |
|
"eval_runtime": 0.1355, |
|
"eval_samples_per_second": 3011.787, |
|
"eval_steps_per_second": 14.764, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 7.474719524383545, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.397, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6985294117647058, |
|
"eval_combined_score": 0.7495888549909429, |
|
"eval_f1": 0.8006482982171799, |
|
"eval_loss": 0.7452647089958191, |
|
"eval_runtime": 0.137, |
|
"eval_samples_per_second": 2978.759, |
|
"eval_steps_per_second": 14.602, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 6.474092483520508, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.3039, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6519607843137255, |
|
"eval_combined_score": 0.6968894830659537, |
|
"eval_f1": 0.7418181818181818, |
|
"eval_loss": 0.8151072859764099, |
|
"eval_runtime": 0.1328, |
|
"eval_samples_per_second": 3071.421, |
|
"eval_steps_per_second": 15.056, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.6470448970794678, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.2217, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6225490196078431, |
|
"eval_combined_score": 0.6614690623331044, |
|
"eval_f1": 0.7003891050583657, |
|
"eval_loss": 0.986514687538147, |
|
"eval_runtime": 0.1369, |
|
"eval_samples_per_second": 2979.22, |
|
"eval_steps_per_second": 14.604, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 120, |
|
"total_flos": 769504676118528.0, |
|
"train_loss": 0.4692632933457693, |
|
"train_runtime": 24.0661, |
|
"train_samples_per_second": 7620.676, |
|
"train_steps_per_second": 31.164 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 769504676118528.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|