|
{ |
|
"best_metric": 0.628333568572998, |
|
"best_model_checkpoint": "bert_uncased_L-4_H-256_A-4_stsb/checkpoint-253", |
|
"epoch": 16.0, |
|
"eval_steps": 500, |
|
"global_step": 368, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 23.512914657592773, |
|
"learning_rate": 4.9e-05, |
|
"loss": 5.5773, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.35940682306384497, |
|
"eval_loss": 2.741238594055176, |
|
"eval_pearson": 0.3844718888057109, |
|
"eval_runtime": 0.5362, |
|
"eval_samples_per_second": 2797.44, |
|
"eval_spearmanr": 0.334341757321979, |
|
"eval_steps_per_second": 11.19, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 15.94455623626709, |
|
"learning_rate": 4.8e-05, |
|
"loss": 2.5793, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.7641849595072145, |
|
"eval_loss": 1.9157546758651733, |
|
"eval_pearson": 0.7726894274450853, |
|
"eval_runtime": 0.5382, |
|
"eval_samples_per_second": 2787.176, |
|
"eval_spearmanr": 0.7556804915693437, |
|
"eval_steps_per_second": 11.149, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 10.711121559143066, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.5767, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.7589673053156418, |
|
"eval_loss": 0.9540784358978271, |
|
"eval_pearson": 0.7705953786726122, |
|
"eval_runtime": 0.537, |
|
"eval_samples_per_second": 2793.104, |
|
"eval_spearmanr": 0.7473392319586716, |
|
"eval_steps_per_second": 11.172, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 7.894664764404297, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.9474, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.8101368624163152, |
|
"eval_loss": 0.7628255486488342, |
|
"eval_pearson": 0.8132575634904194, |
|
"eval_runtime": 0.538, |
|
"eval_samples_per_second": 2787.885, |
|
"eval_spearmanr": 0.8070161613422111, |
|
"eval_steps_per_second": 11.152, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 7.180455684661865, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.7258, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.8405981737206667, |
|
"eval_loss": 0.6784510016441345, |
|
"eval_pearson": 0.8382918173715312, |
|
"eval_runtime": 0.5415, |
|
"eval_samples_per_second": 2770.208, |
|
"eval_spearmanr": 0.8429045300698023, |
|
"eval_steps_per_second": 11.081, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 8.510337829589844, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.6162, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.8437314098589771, |
|
"eval_loss": 0.6755565404891968, |
|
"eval_pearson": 0.8435713950894774, |
|
"eval_runtime": 0.5333, |
|
"eval_samples_per_second": 2812.417, |
|
"eval_spearmanr": 0.8438914246284768, |
|
"eval_steps_per_second": 11.25, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.887516498565674, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.5455, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.849162211616322, |
|
"eval_loss": 0.6391196250915527, |
|
"eval_pearson": 0.8479729963672471, |
|
"eval_runtime": 0.5344, |
|
"eval_samples_per_second": 2806.816, |
|
"eval_spearmanr": 0.850351426865397, |
|
"eval_steps_per_second": 11.227, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 8.618912696838379, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.4912, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.8466254837460676, |
|
"eval_loss": 0.6581634879112244, |
|
"eval_pearson": 0.8460862349631103, |
|
"eval_runtime": 0.539, |
|
"eval_samples_per_second": 2783.12, |
|
"eval_spearmanr": 0.8471647325290248, |
|
"eval_steps_per_second": 11.132, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 6.421316146850586, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.4443, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_combined_score": 0.8476912767928249, |
|
"eval_loss": 0.6560913324356079, |
|
"eval_pearson": 0.8471629476366933, |
|
"eval_runtime": 0.5436, |
|
"eval_samples_per_second": 2759.539, |
|
"eval_spearmanr": 0.8482196059489565, |
|
"eval_steps_per_second": 11.038, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.827481269836426, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3995, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_combined_score": 0.8503318094261108, |
|
"eval_loss": 0.6429010629653931, |
|
"eval_pearson": 0.850358057284851, |
|
"eval_runtime": 0.5319, |
|
"eval_samples_per_second": 2820.144, |
|
"eval_spearmanr": 0.8503055615673704, |
|
"eval_steps_per_second": 11.281, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 6.088059902191162, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.3689, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_combined_score": 0.8543458921269349, |
|
"eval_loss": 0.628333568572998, |
|
"eval_pearson": 0.8545298128890402, |
|
"eval_runtime": 0.5445, |
|
"eval_samples_per_second": 2754.594, |
|
"eval_spearmanr": 0.8541619713648296, |
|
"eval_steps_per_second": 11.018, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 4.50377893447876, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.3418, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_combined_score": 0.8520085335126549, |
|
"eval_loss": 0.6592078804969788, |
|
"eval_pearson": 0.8519775414066034, |
|
"eval_runtime": 0.5357, |
|
"eval_samples_per_second": 2800.12, |
|
"eval_spearmanr": 0.8520395256187064, |
|
"eval_steps_per_second": 11.2, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.240983486175537, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.3302, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_combined_score": 0.8527115311132238, |
|
"eval_loss": 0.650736391544342, |
|
"eval_pearson": 0.8524420075355381, |
|
"eval_runtime": 0.5456, |
|
"eval_samples_per_second": 2749.213, |
|
"eval_spearmanr": 0.8529810546909096, |
|
"eval_steps_per_second": 10.997, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 7.435579776763916, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.319, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_combined_score": 0.8527060192476493, |
|
"eval_loss": 0.648432195186615, |
|
"eval_pearson": 0.8527872374234446, |
|
"eval_runtime": 0.5301, |
|
"eval_samples_per_second": 2829.767, |
|
"eval_spearmanr": 0.852624801071854, |
|
"eval_steps_per_second": 11.319, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 5.8321027755737305, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2863, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_combined_score": 0.8526379883202206, |
|
"eval_loss": 0.639711320400238, |
|
"eval_pearson": 0.8525586066146398, |
|
"eval_runtime": 0.5532, |
|
"eval_samples_per_second": 2711.331, |
|
"eval_spearmanr": 0.8527173700258016, |
|
"eval_steps_per_second": 10.845, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 4.939488887786865, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.2774, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_combined_score": 0.855685351715838, |
|
"eval_loss": 0.6379477977752686, |
|
"eval_pearson": 0.8558522598630551, |
|
"eval_runtime": 0.5326, |
|
"eval_samples_per_second": 2816.114, |
|
"eval_spearmanr": 0.855518443568621, |
|
"eval_steps_per_second": 11.264, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 368, |
|
"total_flos": 455736856141824.0, |
|
"train_loss": 0.9891756526801897, |
|
"train_runtime": 56.9981, |
|
"train_samples_per_second": 5043.151, |
|
"train_steps_per_second": 20.176 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 455736856141824.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|