|
{ |
|
"best_metric": 0.5874866247177124, |
|
"best_model_checkpoint": "bert_tiny_lda_100_v1_book_cola/checkpoint-102", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 272, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.9012271165847778, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.6112, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.6111970543861389, |
|
"eval_matthews_correlation": 0.0, |
|
"eval_runtime": 0.3494, |
|
"eval_samples_per_second": 2984.868, |
|
"eval_steps_per_second": 14.309, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.6870914697647095, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.5906, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6951102614402771, |
|
"eval_loss": 0.5991763472557068, |
|
"eval_matthews_correlation": 0.08436523968200914, |
|
"eval_runtime": 0.3261, |
|
"eval_samples_per_second": 3198.114, |
|
"eval_steps_per_second": 15.331, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.8232595920562744, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.5465, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.693192720413208, |
|
"eval_loss": 0.5874866247177124, |
|
"eval_matthews_correlation": 0.19955176173637962, |
|
"eval_runtime": 0.3617, |
|
"eval_samples_per_second": 2883.62, |
|
"eval_steps_per_second": 13.824, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.4694697856903076, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.4903, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6912751793861389, |
|
"eval_loss": 0.5886690616607666, |
|
"eval_matthews_correlation": 0.19917446639499126, |
|
"eval_runtime": 0.3515, |
|
"eval_samples_per_second": 2967.313, |
|
"eval_steps_per_second": 14.225, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.748969316482544, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4275, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7094918489456177, |
|
"eval_loss": 0.6417186260223389, |
|
"eval_matthews_correlation": 0.217400289930018, |
|
"eval_runtime": 0.3243, |
|
"eval_samples_per_second": 3215.81, |
|
"eval_steps_per_second": 15.416, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.171329975128174, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.3786, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7209970951080322, |
|
"eval_loss": 0.6495499014854431, |
|
"eval_matthews_correlation": 0.27111800500621025, |
|
"eval_runtime": 0.3294, |
|
"eval_samples_per_second": 3166.651, |
|
"eval_steps_per_second": 15.18, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.947795867919922, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.3295, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7027804255485535, |
|
"eval_loss": 0.6800650358200073, |
|
"eval_matthews_correlation": 0.24007288633988105, |
|
"eval_runtime": 0.336, |
|
"eval_samples_per_second": 3104.623, |
|
"eval_steps_per_second": 14.883, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 7.731811046600342, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.2891, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.715244472026825, |
|
"eval_loss": 0.7395167946815491, |
|
"eval_matthews_correlation": 0.27006989464867953, |
|
"eval_runtime": 0.3343, |
|
"eval_samples_per_second": 3120.119, |
|
"eval_steps_per_second": 14.957, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 272, |
|
"total_flos": 1793902531485696.0, |
|
"train_loss": 0.4579080904231352, |
|
"train_runtime": 48.0666, |
|
"train_samples_per_second": 8894.95, |
|
"train_steps_per_second": 35.368 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1793902531485696.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|