|
{ |
|
"best_metric": 1.9214489459991455, |
|
"best_model_checkpoint": "hBERTv1_new_pretrain_48_emb_com_stsb/checkpoint-990", |
|
"epoch": 27.0, |
|
"global_step": 1215, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 2.5817, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_combined_score": 0.1961723851082083, |
|
"eval_loss": 2.602813720703125, |
|
"eval_pearson": 0.2027356377543126, |
|
"eval_runtime": 6.748, |
|
"eval_samples_per_second": 222.289, |
|
"eval_spearmanr": 0.189609132462104, |
|
"eval_steps_per_second": 1.778, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 2.1023, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_combined_score": 0.19863013345614586, |
|
"eval_loss": 2.1595816612243652, |
|
"eval_pearson": 0.20345395522854448, |
|
"eval_runtime": 6.7492, |
|
"eval_samples_per_second": 222.249, |
|
"eval_spearmanr": 0.19380631168374723, |
|
"eval_steps_per_second": 1.778, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.76e-05, |
|
"loss": 1.9567, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_combined_score": 0.18933959939433503, |
|
"eval_loss": 2.3409199714660645, |
|
"eval_pearson": 0.18554383257948298, |
|
"eval_runtime": 6.7607, |
|
"eval_samples_per_second": 221.871, |
|
"eval_spearmanr": 0.19313536620918711, |
|
"eval_steps_per_second": 1.775, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.680000000000001e-05, |
|
"loss": 1.7201, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_combined_score": 0.2899464590935818, |
|
"eval_loss": 2.1790404319763184, |
|
"eval_pearson": 0.28650208786193304, |
|
"eval_runtime": 6.747, |
|
"eval_samples_per_second": 222.32, |
|
"eval_spearmanr": 0.29339083032523056, |
|
"eval_steps_per_second": 1.779, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 1.5153, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_combined_score": 0.3366713577400152, |
|
"eval_loss": 2.1208479404449463, |
|
"eval_pearson": 0.3381105093417233, |
|
"eval_runtime": 6.7661, |
|
"eval_samples_per_second": 221.694, |
|
"eval_spearmanr": 0.3352322061383072, |
|
"eval_steps_per_second": 1.774, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.52e-05, |
|
"loss": 1.2674, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_combined_score": 0.388996037809745, |
|
"eval_loss": 2.122382879257202, |
|
"eval_pearson": 0.3881787749232017, |
|
"eval_runtime": 6.7466, |
|
"eval_samples_per_second": 222.334, |
|
"eval_spearmanr": 0.3898133006962884, |
|
"eval_steps_per_second": 1.779, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.44e-05, |
|
"loss": 1.0115, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_combined_score": 0.42925164874364347, |
|
"eval_loss": 2.22530198097229, |
|
"eval_pearson": 0.43036479777857173, |
|
"eval_runtime": 6.7518, |
|
"eval_samples_per_second": 222.163, |
|
"eval_spearmanr": 0.4281384997087152, |
|
"eval_steps_per_second": 1.777, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.3600000000000004e-05, |
|
"loss": 0.7449, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_combined_score": 0.4279396504678673, |
|
"eval_loss": 2.323458433151245, |
|
"eval_pearson": 0.42358611153077663, |
|
"eval_runtime": 6.7608, |
|
"eval_samples_per_second": 221.867, |
|
"eval_spearmanr": 0.432293189404958, |
|
"eval_steps_per_second": 1.775, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3.28e-05, |
|
"loss": 0.66, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_combined_score": 0.4345560407048856, |
|
"eval_loss": 2.361697196960449, |
|
"eval_pearson": 0.4340089724216108, |
|
"eval_runtime": 6.77, |
|
"eval_samples_per_second": 221.566, |
|
"eval_spearmanr": 0.4351031089881605, |
|
"eval_steps_per_second": 1.773, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.4678, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_combined_score": 0.4278924878236148, |
|
"eval_loss": 2.0740506649017334, |
|
"eval_pearson": 0.42995071418144987, |
|
"eval_runtime": 6.7876, |
|
"eval_samples_per_second": 220.99, |
|
"eval_spearmanr": 0.4258342614657797, |
|
"eval_steps_per_second": 1.768, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.1200000000000006e-05, |
|
"loss": 0.4438, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_combined_score": 0.4289254482191033, |
|
"eval_loss": 2.3816161155700684, |
|
"eval_pearson": 0.4284999172461374, |
|
"eval_runtime": 6.7943, |
|
"eval_samples_per_second": 220.772, |
|
"eval_spearmanr": 0.4293509791920692, |
|
"eval_steps_per_second": 1.766, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.0400000000000004e-05, |
|
"loss": 0.3192, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_combined_score": 0.4590614019452154, |
|
"eval_loss": 2.1672608852386475, |
|
"eval_pearson": 0.45795017178280906, |
|
"eval_runtime": 6.8031, |
|
"eval_samples_per_second": 220.489, |
|
"eval_spearmanr": 0.4601726321076217, |
|
"eval_steps_per_second": 1.764, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 2.96e-05, |
|
"loss": 0.2481, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_combined_score": 0.43744904640918025, |
|
"eval_loss": 2.1544361114501953, |
|
"eval_pearson": 0.4392148992975784, |
|
"eval_runtime": 6.7836, |
|
"eval_samples_per_second": 221.122, |
|
"eval_spearmanr": 0.43568319352078216, |
|
"eval_steps_per_second": 1.769, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2.8800000000000002e-05, |
|
"loss": 0.2296, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_combined_score": 0.45925148211887024, |
|
"eval_loss": 2.007450580596924, |
|
"eval_pearson": 0.46026152385259717, |
|
"eval_runtime": 6.8038, |
|
"eval_samples_per_second": 220.465, |
|
"eval_spearmanr": 0.4582414403851433, |
|
"eval_steps_per_second": 1.764, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.8e-05, |
|
"loss": 0.1765, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_combined_score": 0.4620511023513117, |
|
"eval_loss": 2.1394569873809814, |
|
"eval_pearson": 0.46235490780864075, |
|
"eval_runtime": 6.7896, |
|
"eval_samples_per_second": 220.927, |
|
"eval_spearmanr": 0.46174729689398264, |
|
"eval_steps_per_second": 1.767, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.7200000000000004e-05, |
|
"loss": 0.1533, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_combined_score": 0.4469345688037569, |
|
"eval_loss": 2.2714791297912598, |
|
"eval_pearson": 0.45120593215851773, |
|
"eval_runtime": 6.8234, |
|
"eval_samples_per_second": 219.832, |
|
"eval_spearmanr": 0.4426632054489961, |
|
"eval_steps_per_second": 1.759, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2.6400000000000005e-05, |
|
"loss": 0.1343, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_combined_score": 0.4428810611082898, |
|
"eval_loss": 2.172586679458618, |
|
"eval_pearson": 0.44408986762850433, |
|
"eval_runtime": 6.787, |
|
"eval_samples_per_second": 221.01, |
|
"eval_spearmanr": 0.44167225458807524, |
|
"eval_steps_per_second": 1.768, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2.5600000000000002e-05, |
|
"loss": 0.1373, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_combined_score": 0.4478234688937688, |
|
"eval_loss": 2.02228045463562, |
|
"eval_pearson": 0.4532121986881763, |
|
"eval_runtime": 6.7875, |
|
"eval_samples_per_second": 220.996, |
|
"eval_spearmanr": 0.4424347390993613, |
|
"eval_steps_per_second": 1.768, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2.4800000000000003e-05, |
|
"loss": 0.1277, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_combined_score": 0.4346959695013698, |
|
"eval_loss": 1.9992105960845947, |
|
"eval_pearson": 0.4395412779716168, |
|
"eval_runtime": 6.7974, |
|
"eval_samples_per_second": 220.673, |
|
"eval_spearmanr": 0.42985066103112274, |
|
"eval_steps_per_second": 1.765, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.0968, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_combined_score": 0.46103377995091277, |
|
"eval_loss": 2.1078310012817383, |
|
"eval_pearson": 0.4620119136399576, |
|
"eval_runtime": 6.7915, |
|
"eval_samples_per_second": 220.864, |
|
"eval_spearmanr": 0.460055646261868, |
|
"eval_steps_per_second": 1.767, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2.32e-05, |
|
"loss": 0.084, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_combined_score": 0.460210810698569, |
|
"eval_loss": 2.068389415740967, |
|
"eval_pearson": 0.46274616538784863, |
|
"eval_runtime": 6.7629, |
|
"eval_samples_per_second": 221.8, |
|
"eval_spearmanr": 0.4576754560092894, |
|
"eval_steps_per_second": 1.774, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2.2400000000000002e-05, |
|
"loss": 0.0777, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_combined_score": 0.46235708950698207, |
|
"eval_loss": 1.9214489459991455, |
|
"eval_pearson": 0.46475032463030774, |
|
"eval_runtime": 6.7758, |
|
"eval_samples_per_second": 221.376, |
|
"eval_spearmanr": 0.45996385438365645, |
|
"eval_steps_per_second": 1.771, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2.1600000000000003e-05, |
|
"loss": 0.0572, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_combined_score": 0.44643779764240843, |
|
"eval_loss": 2.0636143684387207, |
|
"eval_pearson": 0.45063900222949543, |
|
"eval_runtime": 6.7543, |
|
"eval_samples_per_second": 222.082, |
|
"eval_spearmanr": 0.4422365930553215, |
|
"eval_steps_per_second": 1.777, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2.08e-05, |
|
"loss": 0.0615, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_combined_score": 0.44382684994837796, |
|
"eval_loss": 2.0404300689697266, |
|
"eval_pearson": 0.4488704472998438, |
|
"eval_runtime": 6.7964, |
|
"eval_samples_per_second": 220.705, |
|
"eval_spearmanr": 0.43878325259691214, |
|
"eval_steps_per_second": 1.766, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0516, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_combined_score": 0.4475345905545639, |
|
"eval_loss": 2.059901714324951, |
|
"eval_pearson": 0.4515878825695937, |
|
"eval_runtime": 6.7915, |
|
"eval_samples_per_second": 220.866, |
|
"eval_spearmanr": 0.44348129853953405, |
|
"eval_steps_per_second": 1.767, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.0501, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_combined_score": 0.4509567766877898, |
|
"eval_loss": 2.0359442234039307, |
|
"eval_pearson": 0.4530378970256098, |
|
"eval_runtime": 6.8254, |
|
"eval_samples_per_second": 219.768, |
|
"eval_spearmanr": 0.44887565634996973, |
|
"eval_steps_per_second": 1.758, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 0.0515, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_combined_score": 0.45481392235284457, |
|
"eval_loss": 1.9571282863616943, |
|
"eval_pearson": 0.4588386300973516, |
|
"eval_runtime": 6.8054, |
|
"eval_samples_per_second": 220.412, |
|
"eval_spearmanr": 0.4507892146083376, |
|
"eval_steps_per_second": 1.763, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"step": 1215, |
|
"total_flos": 2.301115836137472e+16, |
|
"train_loss": 0.6121585881268536, |
|
"train_runtime": 2633.1293, |
|
"train_samples_per_second": 109.167, |
|
"train_steps_per_second": 0.854 |
|
} |
|
], |
|
"max_steps": 2250, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.301115836137472e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|