{ | |
"lr_encoder": "1e-5", | |
"lr_others": "1e-4", | |
"num_steps": 300000, | |
"warmup_ratio": 0.1, | |
"train_batch_size": 1, | |
"eval_every": 3000, | |
"gradient_accumulation": 8, | |
"eval_batch_size": 32, | |
"max_width": 12, | |
"model_name": "microsoft/deberta-v3-large", | |
"fine_tune": true, | |
"subtoken_pooling": "first", | |
"hidden_size": 768, | |
"scorer": "dot", | |
"span_mode": "marker", | |
"refine_prompt": false, | |
"refine_relation": false, | |
"ffn_mul": 4, | |
"dropout": 0.4, | |
"scheduler": "cosine_with_warmup", | |
"loss_func": "binary_cross_entropy_loss", | |
"dataset_name": "zero_rel", | |
"root_dir": "ablation_backbone", | |
"train_data": [ | |
"data/zero_rel_all.jsonl" | |
], | |
"prev_path": "none", | |
"size_sup": -1, | |
"num_train_rel_types": 25, | |
"num_unseen_rel_types": 15, | |
"top_k": 1, | |
"random_drop": false, | |
"max_len": 384, | |
"eval_threshold": 0.001, | |
"name": "large", | |
"log_dir": "logs/zero_rel/zero_rel-2024-06-19__16-34-15", | |
"eval_data": "data/wiki_zsl_all.jsonl" | |
} |