{ "best_metric": 1.6650625467300415, "best_model_checkpoint": "bert_base_lda_20_v1_stsb/checkpoint-138", "epoch": 11.0, "eval_steps": 500, "global_step": 253, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 9.738332748413086, "learning_rate": 4.9e-05, "loss": 2.9133, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.0876826822817508, "eval_loss": 2.431685209274292, "eval_pearson": 0.07911668732334376, "eval_runtime": 0.9718, "eval_samples_per_second": 1543.503, "eval_spearmanr": 0.09624867724015784, "eval_steps_per_second": 6.174, "step": 23 }, { "epoch": 2.0, "grad_norm": 7.058999061584473, "learning_rate": 4.8e-05, "loss": 1.7921, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.4556578456594328, "eval_loss": 2.1686253547668457, "eval_pearson": 0.4521331798111072, "eval_runtime": 0.9704, "eval_samples_per_second": 1545.826, "eval_spearmanr": 0.45918251150775835, "eval_steps_per_second": 6.183, "step": 46 }, { "epoch": 3.0, "grad_norm": 10.802772521972656, "learning_rate": 4.7e-05, "loss": 1.4021, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.4786612048961132, "eval_loss": 1.9878859519958496, "eval_pearson": 0.47632579208141346, "eval_runtime": 0.9615, "eval_samples_per_second": 1560.037, "eval_spearmanr": 0.4809966177108129, "eval_steps_per_second": 6.24, "step": 69 }, { "epoch": 4.0, "grad_norm": 12.505186080932617, "learning_rate": 4.600000000000001e-05, "loss": 1.0503, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.49639717241844983, "eval_loss": 1.933651328086853, "eval_pearson": 0.49418117132384864, "eval_runtime": 0.9692, "eval_samples_per_second": 1547.742, "eval_spearmanr": 0.49861317351305107, "eval_steps_per_second": 6.191, "step": 92 }, { "epoch": 5.0, "grad_norm": 22.745203018188477, "learning_rate": 4.5e-05, "loss": 0.8246, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.5342338035217759, "eval_loss": 1.713676929473877, "eval_pearson": 0.5359622180256605, "eval_runtime": 0.957, "eval_samples_per_second": 1567.479, "eval_spearmanr": 0.5325053890178912, "eval_steps_per_second": 6.27, "step": 115 }, { "epoch": 6.0, "grad_norm": 8.874309539794922, "learning_rate": 4.4000000000000006e-05, "loss": 0.6757, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.5438926115492484, "eval_loss": 1.6650625467300415, "eval_pearson": 0.5450617299778879, "eval_runtime": 0.9929, "eval_samples_per_second": 1510.787, "eval_spearmanr": 0.542723493120609, "eval_steps_per_second": 6.043, "step": 138 }, { "epoch": 7.0, "grad_norm": 7.297491550445557, "learning_rate": 4.3e-05, "loss": 0.5303, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.5421078551362122, "eval_loss": 1.7656605243682861, "eval_pearson": 0.5455585471094383, "eval_runtime": 0.9748, "eval_samples_per_second": 1538.833, "eval_spearmanr": 0.5386571631629862, "eval_steps_per_second": 6.155, "step": 161 }, { "epoch": 8.0, "grad_norm": 10.889992713928223, "learning_rate": 4.2e-05, "loss": 0.4539, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.5493515644466885, "eval_loss": 1.9937711954116821, "eval_pearson": 0.548189645149559, "eval_runtime": 1.0094, "eval_samples_per_second": 1486.068, "eval_spearmanr": 0.550513483743818, "eval_steps_per_second": 5.944, "step": 184 }, { "epoch": 9.0, "grad_norm": 5.418324947357178, "learning_rate": 4.1e-05, "loss": 0.399, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.5356497995592364, "eval_loss": 1.7298147678375244, "eval_pearson": 0.5401914274028985, "eval_runtime": 0.9724, "eval_samples_per_second": 1542.516, "eval_spearmanr": 0.5311081717155742, "eval_steps_per_second": 6.17, "step": 207 }, { "epoch": 10.0, "grad_norm": 12.156574249267578, "learning_rate": 4e-05, "loss": 0.3663, "step": 230 }, { "epoch": 10.0, "eval_combined_score": 0.5606574879091346, "eval_loss": 1.8046331405639648, "eval_pearson": 0.5614070412566732, "eval_runtime": 0.9685, "eval_samples_per_second": 1548.827, "eval_spearmanr": 0.5599079345615962, "eval_steps_per_second": 6.195, "step": 230 }, { "epoch": 11.0, "grad_norm": 5.27700662612915, "learning_rate": 3.9000000000000006e-05, "loss": 0.2964, "step": 253 }, { "epoch": 11.0, "eval_combined_score": 0.5435187902847016, "eval_loss": 1.7390520572662354, "eval_pearson": 0.5469683812366085, "eval_runtime": 0.968, "eval_samples_per_second": 1549.517, "eval_spearmanr": 0.5400691993327947, "eval_steps_per_second": 6.198, "step": 253 }, { "epoch": 11.0, "step": 253, "total_flos": 8319365318060544.0, "train_loss": 0.973101574441661, "train_runtime": 142.7213, "train_samples_per_second": 2014.066, "train_steps_per_second": 8.058 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8319365318060544.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }