|
{ |
|
"best_metric": 0.5645565390586853, |
|
"best_model_checkpoint": "bert_uncased_L-4_H-512_A-8_stsb/checkpoint-414", |
|
"epoch": 23.0, |
|
"eval_steps": 500, |
|
"global_step": 529, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.830661773681641, |
|
"learning_rate": 4.9e-05, |
|
"loss": 2.5878, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.7701561984669311, |
|
"eval_loss": 0.9754181504249573, |
|
"eval_pearson": 0.7823353387435045, |
|
"eval_runtime": 0.6067, |
|
"eval_samples_per_second": 2472.44, |
|
"eval_spearmanr": 0.7579770581903577, |
|
"eval_steps_per_second": 9.89, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.863245487213135, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.797, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.8474045759571504, |
|
"eval_loss": 0.7765601277351379, |
|
"eval_pearson": 0.8466114574812236, |
|
"eval_runtime": 0.5993, |
|
"eval_samples_per_second": 2502.798, |
|
"eval_spearmanr": 0.8481976944330771, |
|
"eval_steps_per_second": 10.011, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 5.94612979888916, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.5786, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.8595259972012628, |
|
"eval_loss": 0.6313804984092712, |
|
"eval_pearson": 0.8603476034751536, |
|
"eval_runtime": 0.5837, |
|
"eval_samples_per_second": 2569.683, |
|
"eval_spearmanr": 0.8587043909273718, |
|
"eval_steps_per_second": 10.279, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.815054416656494, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.4961, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.863972207025754, |
|
"eval_loss": 0.6342185139656067, |
|
"eval_pearson": 0.864253629360168, |
|
"eval_runtime": 0.5999, |
|
"eval_samples_per_second": 2500.544, |
|
"eval_spearmanr": 0.8636907846913399, |
|
"eval_steps_per_second": 10.002, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.26311731338501, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.3944, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.8688625775742702, |
|
"eval_loss": 0.6018186807632446, |
|
"eval_pearson": 0.8694015311403887, |
|
"eval_runtime": 0.5948, |
|
"eval_samples_per_second": 2521.799, |
|
"eval_spearmanr": 0.8683236240081519, |
|
"eval_steps_per_second": 10.087, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 5.787060737609863, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.3362, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.8657998136907656, |
|
"eval_loss": 0.6101198792457581, |
|
"eval_pearson": 0.8658762558868609, |
|
"eval_runtime": 0.596, |
|
"eval_samples_per_second": 2516.574, |
|
"eval_spearmanr": 0.8657233714946704, |
|
"eval_steps_per_second": 10.066, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 11.731350898742676, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.2932, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.867202263600297, |
|
"eval_loss": 0.6055566072463989, |
|
"eval_pearson": 0.8677929726947374, |
|
"eval_runtime": 0.6269, |
|
"eval_samples_per_second": 2392.606, |
|
"eval_spearmanr": 0.8666115545058564, |
|
"eval_steps_per_second": 9.57, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 6.97740364074707, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.2495, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.8675369208706127, |
|
"eval_loss": 0.6255138516426086, |
|
"eval_pearson": 0.8678571378034932, |
|
"eval_runtime": 0.6161, |
|
"eval_samples_per_second": 2434.767, |
|
"eval_spearmanr": 0.8672167039377323, |
|
"eval_steps_per_second": 9.739, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.306568145751953, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.2268, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_combined_score": 0.8691801970155717, |
|
"eval_loss": 0.5970388054847717, |
|
"eval_pearson": 0.8699055489590755, |
|
"eval_runtime": 0.6258, |
|
"eval_samples_per_second": 2396.782, |
|
"eval_spearmanr": 0.8684548450720679, |
|
"eval_steps_per_second": 9.587, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 8.294599533081055, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2037, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_combined_score": 0.868154308441244, |
|
"eval_loss": 0.6516512632369995, |
|
"eval_pearson": 0.869129623680379, |
|
"eval_runtime": 0.6109, |
|
"eval_samples_per_second": 2455.261, |
|
"eval_spearmanr": 0.8671789932021089, |
|
"eval_steps_per_second": 9.821, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.237338542938232, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.191, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_combined_score": 0.8693045282961145, |
|
"eval_loss": 0.601725697517395, |
|
"eval_pearson": 0.8709460268802508, |
|
"eval_runtime": 0.6389, |
|
"eval_samples_per_second": 2347.804, |
|
"eval_spearmanr": 0.8676630297119781, |
|
"eval_steps_per_second": 9.391, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.5681674480438232, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.1678, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_combined_score": 0.8694683064851385, |
|
"eval_loss": 0.6096729636192322, |
|
"eval_pearson": 0.8704163838060811, |
|
"eval_runtime": 0.61, |
|
"eval_samples_per_second": 2458.83, |
|
"eval_spearmanr": 0.8685202291641958, |
|
"eval_steps_per_second": 9.835, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 3.010383367538452, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.1546, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_combined_score": 0.8707355017307861, |
|
"eval_loss": 0.6051801443099976, |
|
"eval_pearson": 0.8713328676887442, |
|
"eval_runtime": 0.6283, |
|
"eval_samples_per_second": 2387.444, |
|
"eval_spearmanr": 0.8701381357728278, |
|
"eval_steps_per_second": 9.55, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.628120422363281, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1486, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_combined_score": 0.8701183473370655, |
|
"eval_loss": 0.5913792252540588, |
|
"eval_pearson": 0.8713640146825573, |
|
"eval_runtime": 0.6333, |
|
"eval_samples_per_second": 2368.584, |
|
"eval_spearmanr": 0.8688726799915738, |
|
"eval_steps_per_second": 9.474, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 3.274272918701172, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1372, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_combined_score": 0.8719789615058817, |
|
"eval_loss": 0.6174795031547546, |
|
"eval_pearson": 0.8737840000829027, |
|
"eval_runtime": 0.6364, |
|
"eval_samples_per_second": 2357.008, |
|
"eval_spearmanr": 0.8701739229288608, |
|
"eval_steps_per_second": 9.428, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.2427499294281006, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.131, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_combined_score": 0.8714552917935461, |
|
"eval_loss": 0.5825785994529724, |
|
"eval_pearson": 0.8726691216431404, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 2440.092, |
|
"eval_spearmanr": 0.8702414619439519, |
|
"eval_steps_per_second": 9.76, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.5106945037841797, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.1216, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_combined_score": 0.8701714131214551, |
|
"eval_loss": 0.577867865562439, |
|
"eval_pearson": 0.8717439862070048, |
|
"eval_runtime": 0.5982, |
|
"eval_samples_per_second": 2507.56, |
|
"eval_spearmanr": 0.8685988400359054, |
|
"eval_steps_per_second": 10.03, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.543298721313477, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.1145, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_combined_score": 0.8721440455693346, |
|
"eval_loss": 0.5645565390586853, |
|
"eval_pearson": 0.873854128781376, |
|
"eval_runtime": 0.6076, |
|
"eval_samples_per_second": 2468.855, |
|
"eval_spearmanr": 0.8704339623572931, |
|
"eval_steps_per_second": 9.875, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.314229965209961, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.1158, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_combined_score": 0.8724136267007434, |
|
"eval_loss": 0.5810861587524414, |
|
"eval_pearson": 0.8737710324523099, |
|
"eval_runtime": 0.6002, |
|
"eval_samples_per_second": 2499.237, |
|
"eval_spearmanr": 0.871056220949177, |
|
"eval_steps_per_second": 9.997, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 5.715078830718994, |
|
"learning_rate": 3e-05, |
|
"loss": 0.109, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_combined_score": 0.8741776682213974, |
|
"eval_loss": 0.5896406173706055, |
|
"eval_pearson": 0.876321941342814, |
|
"eval_runtime": 0.599, |
|
"eval_samples_per_second": 2504.307, |
|
"eval_spearmanr": 0.8720333950999808, |
|
"eval_steps_per_second": 10.017, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"grad_norm": 3.1881017684936523, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.105, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_combined_score": 0.8720974137486168, |
|
"eval_loss": 0.5863229036331177, |
|
"eval_pearson": 0.8737022748828657, |
|
"eval_runtime": 0.5928, |
|
"eval_samples_per_second": 2530.409, |
|
"eval_spearmanr": 0.8704925526143681, |
|
"eval_steps_per_second": 10.122, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"grad_norm": 3.1537728309631348, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.0995, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_combined_score": 0.8721144188715132, |
|
"eval_loss": 0.5757761001586914, |
|
"eval_pearson": 0.8740875105899543, |
|
"eval_runtime": 0.6179, |
|
"eval_samples_per_second": 2427.472, |
|
"eval_spearmanr": 0.8701413271530721, |
|
"eval_steps_per_second": 9.71, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"grad_norm": 2.4976413249969482, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.0971, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_combined_score": 0.8730602670988505, |
|
"eval_loss": 0.5780627727508545, |
|
"eval_pearson": 0.8748200461236054, |
|
"eval_runtime": 0.613, |
|
"eval_samples_per_second": 2446.918, |
|
"eval_spearmanr": 0.8713004880740955, |
|
"eval_steps_per_second": 9.788, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"step": 529, |
|
"total_flos": 2614663011045888.0, |
|
"train_loss": 0.3415615337782862, |
|
"train_runtime": 113.3247, |
|
"train_samples_per_second": 2536.516, |
|
"train_steps_per_second": 10.148 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 5 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2614663011045888.0, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|