|
{ |
|
"best_metric": 0.3611699044704437, |
|
"best_model_checkpoint": "bert_base_lda_20_v1_qqp/checkpoint-4266", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 11376, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.913527011871338, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.4706, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7975760573831313, |
|
"eval_combined_score": 0.7412216058960324, |
|
"eval_f1": 0.6848671544089334, |
|
"eval_loss": 0.41439592838287354, |
|
"eval_runtime": 26.0568, |
|
"eval_samples_per_second": 1551.613, |
|
"eval_steps_per_second": 6.064, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.9698476791381836, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.3635, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8274301261439525, |
|
"eval_combined_score": 0.8024355964223224, |
|
"eval_f1": 0.7774410667006922, |
|
"eval_loss": 0.3808220326900482, |
|
"eval_runtime": 26.0976, |
|
"eval_samples_per_second": 1549.186, |
|
"eval_steps_per_second": 6.054, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.8180556297302246, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.2981, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.840563937670047, |
|
"eval_combined_score": 0.8157680574270204, |
|
"eval_f1": 0.7909721771839938, |
|
"eval_loss": 0.3611699044704437, |
|
"eval_runtime": 26.154, |
|
"eval_samples_per_second": 1545.844, |
|
"eval_steps_per_second": 6.041, |
|
"step": 4266 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.3878448009490967, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.2419, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8491219391540935, |
|
"eval_combined_score": 0.8199943211952585, |
|
"eval_f1": 0.7908667032364235, |
|
"eval_loss": 0.4086599349975586, |
|
"eval_runtime": 25.573, |
|
"eval_samples_per_second": 1580.967, |
|
"eval_steps_per_second": 6.178, |
|
"step": 5688 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.262328863143921, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1933, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8506059856542172, |
|
"eval_combined_score": 0.8207251134725826, |
|
"eval_f1": 0.7908442412909481, |
|
"eval_loss": 0.44820770621299744, |
|
"eval_runtime": 26.1949, |
|
"eval_samples_per_second": 1543.43, |
|
"eval_steps_per_second": 6.032, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.035948276519775, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.1514, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8534504081127875, |
|
"eval_combined_score": 0.8276283651201168, |
|
"eval_f1": 0.8018063221274461, |
|
"eval_loss": 0.43122583627700806, |
|
"eval_runtime": 26.1244, |
|
"eval_samples_per_second": 1547.595, |
|
"eval_steps_per_second": 6.048, |
|
"step": 8532 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.258938312530518, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.1208, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8498392282958199, |
|
"eval_combined_score": 0.8269528560391477, |
|
"eval_f1": 0.8040664837824754, |
|
"eval_loss": 0.5434415936470032, |
|
"eval_runtime": 25.8296, |
|
"eval_samples_per_second": 1565.258, |
|
"eval_steps_per_second": 6.117, |
|
"step": 9954 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.5687382221221924, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.097, |
|
"step": 11376 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.853227801137769, |
|
"eval_combined_score": 0.8277139005688845, |
|
"eval_f1": 0.8022000000000001, |
|
"eval_loss": 0.5604811310768127, |
|
"eval_runtime": 25.8751, |
|
"eval_samples_per_second": 1562.506, |
|
"eval_steps_per_second": 6.106, |
|
"step": 11376 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 11376, |
|
"total_flos": 3.8292762019405824e+17, |
|
"train_loss": 0.2420723964728719, |
|
"train_runtime": 4923.4333, |
|
"train_samples_per_second": 3695.043, |
|
"train_steps_per_second": 14.441 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 71100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.8292762019405824e+17, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|