glirel-large-v0 / glirel_config.json
jackboyla's picture
Push model using huggingface_hub.
02e48d6 verified
{
"lr_encoder": "1e-5",
"lr_others": "1e-4",
"weight_decay_encoder": 0.01,
"weight_decay_other": 0.01,
"num_steps": 100000,
"warmup_ratio": 0.1,
"train_batch_size": 1,
"eval_every": 3000,
"gradient_accumulation": 8,
"eval_batch_size": 32,
"num_layers_freeze": null,
"early_stopping_patience": null,
"early_stopping_delta": 0.0,
"save_at": [
200,
6000,
12000,
20000,
70000
],
"max_saves": 6,
"max_width": 12,
"model_name": "microsoft/deberta-v3-large",
"fine_tune": true,
"subtoken_pooling": "first",
"hidden_size": 768,
"scorer": "dot",
"rel_mode": "marker",
"span_marker_mode": "markerv1",
"refine_prompt": false,
"refine_relation": false,
"ffn_mul": 4,
"dropout": 0.4,
"scheduler": "cosine_with_warmup",
"loss_func": "binary_cross_entropy_loss",
"alpha": 0.6,
"gamma": 3,
"label_embed_strategy": "both",
"coref_classifier": false,
"coref_loss_weight": 10.0,
"dataset_name": "zero_rel",
"root_dir": "ablation_backbone",
"train_data": [
"data/zero_rel_all.jsonl"
],
"prev_path": "none",
"size_sup": -1,
"num_train_rel_types": 25,
"num_unseen_rel_types": 15,
"top_k": 1,
"random_drop": true,
"max_len": 512,
"eval_threshold": [
0.01,
0.1,
0.2,
0.3,
0.5
],
"max_entity_pair_distance": null,
"fixed_relation_types": true,
"name": "large",
"log_dir": "logs/zero_rel/zero_rel-2024-11-02__10-23-22",
"eval_data": "data/wiki_zsl_all.jsonl",
"coreference_label": "SELF",
"entity_start_token": "[E]",
"entity_end_token": "[/E]"
}