|
{ |
|
"lr_encoder": "1e-5", |
|
"lr_others": "1e-4", |
|
"weight_decay_encoder": 0.01, |
|
"weight_decay_other": 0.01, |
|
"num_steps": 100000, |
|
"warmup_ratio": 0.1, |
|
"train_batch_size": 1, |
|
"eval_every": 3000, |
|
"gradient_accumulation": 8, |
|
"eval_batch_size": 32, |
|
"num_layers_freeze": null, |
|
"early_stopping_patience": null, |
|
"early_stopping_delta": 0.0, |
|
"save_at": [ |
|
200, |
|
6000, |
|
12000, |
|
20000, |
|
70000 |
|
], |
|
"max_saves": 6, |
|
"max_width": 12, |
|
"model_name": "microsoft/deberta-v3-large", |
|
"fine_tune": true, |
|
"subtoken_pooling": "first", |
|
"hidden_size": 768, |
|
"scorer": "dot", |
|
"rel_mode": "marker", |
|
"span_marker_mode": "markerv1", |
|
"refine_prompt": false, |
|
"refine_relation": false, |
|
"ffn_mul": 4, |
|
"dropout": 0.4, |
|
"scheduler": "cosine_with_warmup", |
|
"loss_func": "binary_cross_entropy_loss", |
|
"alpha": 0.6, |
|
"gamma": 3, |
|
"label_embed_strategy": "both", |
|
"coref_classifier": false, |
|
"coref_loss_weight": 10.0, |
|
"dataset_name": "zero_rel", |
|
"root_dir": "ablation_backbone", |
|
"train_data": [ |
|
"data/zero_rel_all.jsonl" |
|
], |
|
"prev_path": "none", |
|
"size_sup": -1, |
|
"num_train_rel_types": 25, |
|
"num_unseen_rel_types": 15, |
|
"top_k": 1, |
|
"random_drop": true, |
|
"max_len": 512, |
|
"eval_threshold": [ |
|
0.01, |
|
0.1, |
|
0.2, |
|
0.3, |
|
0.5 |
|
], |
|
"max_entity_pair_distance": null, |
|
"fixed_relation_types": true, |
|
"name": "large", |
|
"log_dir": "logs/zero_rel/zero_rel-2024-11-02__10-23-22", |
|
"eval_data": "data/wiki_zsl_all.jsonl", |
|
"coreference_label": "SELF", |
|
"entity_start_token": "[E]", |
|
"entity_end_token": "[/E]" |
|
} |