|
{ |
|
"datasets": { |
|
"fantasy": { |
|
"canonical_cluster_threshold": 0, |
|
"cluster_threshold": 0, |
|
"has_conll": true, |
|
"name": "fantasy", |
|
"num_dev_docs": 20, |
|
"num_test_docs": 20, |
|
"num_train_docs": 171, |
|
"targeted_eval": false |
|
}, |
|
"litbank": { |
|
"canonical_cluster_threshold": 0, |
|
"cluster_threshold": 0, |
|
"cross_val_split": 0, |
|
"has_conll": true, |
|
"name": "LitBank", |
|
"num_dev_docs": 10, |
|
"num_test_docs": 10, |
|
"num_train_docs": 80, |
|
"targeted_eval": false |
|
} |
|
}, |
|
"desc": "Major Entity Tracking", |
|
"device": "cuda:0", |
|
"infra": { |
|
"is_local": true, |
|
"work_dir": "./" |
|
}, |
|
"keep_singletons": true, |
|
"key": "lf_sd_train_gen_4", |
|
"log_vals": false, |
|
"metrics": [ |
|
"MUC", |
|
"Bcub", |
|
"CEAFE" |
|
], |
|
"model": { |
|
"doc_encoder": { |
|
"add_speaker_tokens": true, |
|
"chunking": "independent", |
|
"finetune": true, |
|
"speaker_end": "[SPEAKER_END]", |
|
"speaker_start": "[SPEAKER_START]", |
|
"transformer": { |
|
"max_encoder_segment_len": 4096, |
|
"max_segment_len": 4096, |
|
"model_size": "large", |
|
"model_str": "allenai/longformer-large-4096", |
|
"name": "longformer" |
|
} |
|
}, |
|
"memory": { |
|
"batch_size": 64, |
|
"emb_size": 20, |
|
"entity_rep": "wt_avg", |
|
"mem_type": { |
|
"eval_max_ents": null, |
|
"max_ents": null, |
|
"name": "unbounded" |
|
}, |
|
"mlp_depth": 1, |
|
"mlp_size": 3000, |
|
"num_embeds": 10, |
|
"num_feats": 2, |
|
"pivot": false, |
|
"pseudo_dist": true, |
|
"rep_pos": "learned", |
|
"sim_func": "hadamard", |
|
"thresh": 0.0, |
|
"type": "hybrid" |
|
}, |
|
"mention_params": { |
|
"emb_size": 20, |
|
"ext_ment": false, |
|
"ignore_non_gold": true, |
|
"max_span_width": 20, |
|
"ment_emb": "attn", |
|
"ment_emb_to_size_factor": { |
|
"attn": 3, |
|
"endpoint": 2, |
|
"max": 1 |
|
}, |
|
"mlp_depth": 1, |
|
"mlp_size": 3000, |
|
"top_span_ratio": 0.4, |
|
"use_gold_ments": false, |
|
"use_topk": false |
|
}, |
|
"metadata_params": { |
|
"default_genre": "nw", |
|
"genres": [ |
|
"bc", |
|
"bn", |
|
"mz", |
|
"nw", |
|
"pt", |
|
"tc", |
|
"wb" |
|
], |
|
"use_genre_feature": false |
|
} |
|
}, |
|
"optimizer": { |
|
"fine_tune_lr": 1e-05, |
|
"init_lr": 0.0003, |
|
"lr_decay": "linear", |
|
"max_gradient_norm": 1.0 |
|
}, |
|
"override_encoder": false, |
|
"override_memory": false, |
|
"paths": { |
|
"base_data_dir": "${paths.resource_dir}/raw_data", |
|
"base_model_dir": "${infra.work_dir}/../models", |
|
"best_model_dir": ".//../models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best", |
|
"best_model_path": "/home/admin/coref_research/gpt-coref-met-1/models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/model.pth", |
|
"conll_scorer": "${paths.resource_dir}/reference-coreference-scorers/scorer.pl", |
|
"doc_encoder_dirname": "doc_encoder", |
|
"model_dir": ".//../models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4", |
|
"model_filename": "model.pth", |
|
"model_name": null, |
|
"model_name_prefix": "met_", |
|
"model_path": "/home/admin/coref_research/gpt-coref-met-1/models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/model.pth", |
|
"resource_dir": "../data/" |
|
}, |
|
"seed": 4, |
|
"train": true, |
|
"trainer": { |
|
"dropout_rate": 0.3, |
|
"eval_per_k_steps": null, |
|
"eval_type": "full", |
|
"generalise": true, |
|
"label_smoothing_wt": 0.1, |
|
"log_frequency": 500, |
|
"max_evals": 25, |
|
"max_training_segments": 1, |
|
"ment_loss_incl": true, |
|
"ment_loss_mode": "all", |
|
"normalize_loss": false, |
|
"num_training_steps": null, |
|
"patience": 10, |
|
"to_save_model": false |
|
}, |
|
"use_wandb": true |
|
} |