|
{ |
|
"best_metric": 1.6650625467300415, |
|
"best_model_checkpoint": "bert_base_lda_20_v1_stsb/checkpoint-138", |
|
"epoch": 11.0, |
|
"eval_steps": 500, |
|
"global_step": 253, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 9.738332748413086, |
|
"learning_rate": 4.9e-05, |
|
"loss": 2.9133, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.0876826822817508, |
|
"eval_loss": 2.431685209274292, |
|
"eval_pearson": 0.07911668732334376, |
|
"eval_runtime": 0.9718, |
|
"eval_samples_per_second": 1543.503, |
|
"eval_spearmanr": 0.09624867724015784, |
|
"eval_steps_per_second": 6.174, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 7.058999061584473, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.7921, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.4556578456594328, |
|
"eval_loss": 2.1686253547668457, |
|
"eval_pearson": 0.4521331798111072, |
|
"eval_runtime": 0.9704, |
|
"eval_samples_per_second": 1545.826, |
|
"eval_spearmanr": 0.45918251150775835, |
|
"eval_steps_per_second": 6.183, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 10.802772521972656, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.4021, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.4786612048961132, |
|
"eval_loss": 1.9878859519958496, |
|
"eval_pearson": 0.47632579208141346, |
|
"eval_runtime": 0.9615, |
|
"eval_samples_per_second": 1560.037, |
|
"eval_spearmanr": 0.4809966177108129, |
|
"eval_steps_per_second": 6.24, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 12.505186080932617, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.0503, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.49639717241844983, |
|
"eval_loss": 1.933651328086853, |
|
"eval_pearson": 0.49418117132384864, |
|
"eval_runtime": 0.9692, |
|
"eval_samples_per_second": 1547.742, |
|
"eval_spearmanr": 0.49861317351305107, |
|
"eval_steps_per_second": 6.191, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 22.745203018188477, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.8246, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.5342338035217759, |
|
"eval_loss": 1.713676929473877, |
|
"eval_pearson": 0.5359622180256605, |
|
"eval_runtime": 0.957, |
|
"eval_samples_per_second": 1567.479, |
|
"eval_spearmanr": 0.5325053890178912, |
|
"eval_steps_per_second": 6.27, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 8.874309539794922, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.6757, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.5438926115492484, |
|
"eval_loss": 1.6650625467300415, |
|
"eval_pearson": 0.5450617299778879, |
|
"eval_runtime": 0.9929, |
|
"eval_samples_per_second": 1510.787, |
|
"eval_spearmanr": 0.542723493120609, |
|
"eval_steps_per_second": 6.043, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 7.297491550445557, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.5303, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.5421078551362122, |
|
"eval_loss": 1.7656605243682861, |
|
"eval_pearson": 0.5455585471094383, |
|
"eval_runtime": 0.9748, |
|
"eval_samples_per_second": 1538.833, |
|
"eval_spearmanr": 0.5386571631629862, |
|
"eval_steps_per_second": 6.155, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 10.889992713928223, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.4539, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.5493515644466885, |
|
"eval_loss": 1.9937711954116821, |
|
"eval_pearson": 0.548189645149559, |
|
"eval_runtime": 1.0094, |
|
"eval_samples_per_second": 1486.068, |
|
"eval_spearmanr": 0.550513483743818, |
|
"eval_steps_per_second": 5.944, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.418324947357178, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.399, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_combined_score": 0.5356497995592364, |
|
"eval_loss": 1.7298147678375244, |
|
"eval_pearson": 0.5401914274028985, |
|
"eval_runtime": 0.9724, |
|
"eval_samples_per_second": 1542.516, |
|
"eval_spearmanr": 0.5311081717155742, |
|
"eval_steps_per_second": 6.17, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 12.156574249267578, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3663, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_combined_score": 0.5606574879091346, |
|
"eval_loss": 1.8046331405639648, |
|
"eval_pearson": 0.5614070412566732, |
|
"eval_runtime": 0.9685, |
|
"eval_samples_per_second": 1548.827, |
|
"eval_spearmanr": 0.5599079345615962, |
|
"eval_steps_per_second": 6.195, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 5.27700662612915, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.2964, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_combined_score": 0.5435187902847016, |
|
"eval_loss": 1.7390520572662354, |
|
"eval_pearson": 0.5469683812366085, |
|
"eval_runtime": 0.968, |
|
"eval_samples_per_second": 1549.517, |
|
"eval_spearmanr": 0.5400691993327947, |
|
"eval_steps_per_second": 6.198, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 253, |
|
"total_flos": 8319365318060544.0, |
|
"train_loss": 0.973101574441661, |
|
"train_runtime": 142.7213, |
|
"train_samples_per_second": 2014.066, |
|
"train_steps_per_second": 8.058 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8319365318060544.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|