KawshikManikantan
/

meira-h

Model card Files Files and versions Community

meira-h / config.json

KawshikManikantan's picture

KawshikManikantan

meira-h push

ac6740a about 2 months ago

history blame contribute delete

4.53 kB

	{
	"datasets": {
	"fantasy": {
	"canonical_cluster_threshold": 0,
	"cluster_threshold": 0,
	"has_conll": true,
	"name": "fantasy",
	"num_dev_docs": 20,
	"num_test_docs": 20,
	"num_train_docs": 171,
	"targeted_eval": false
	},
	"litbank": {
	"canonical_cluster_threshold": 0,
	"cluster_threshold": 0,
	"cross_val_split": 0,
	"has_conll": true,
	"name": "LitBank",
	"num_dev_docs": 10,
	"num_test_docs": 10,
	"num_train_docs": 80,
	"targeted_eval": false
	}
	},
	"desc": "Major Entity Tracking",
	"device": "cuda:0",
	"infra": {
	"is_local": true,
	"work_dir": "./"
	},
	"keep_singletons": true,
	"key": "lf_sd_train_gen_4",
	"log_vals": false,
	"metrics": [
	"MUC",
	"Bcub",
	"CEAFE"
	],
	"model": {
	"doc_encoder": {
	"add_speaker_tokens": true,
	"chunking": "independent",
	"finetune": true,
	"speaker_end": "[SPEAKER_END]",
	"speaker_start": "[SPEAKER_START]",
	"transformer": {
	"max_encoder_segment_len": 4096,
	"max_segment_len": 4096,
	"model_size": "large",
	"model_str": "allenai/longformer-large-4096",
	"name": "longformer"
	}
	},
	"memory": {
	"batch_size": 64,
	"emb_size": 20,
	"entity_rep": "wt_avg",
	"mem_type": {
	"eval_max_ents": null,
	"max_ents": null,
	"name": "unbounded"
	},
	"mlp_depth": 1,
	"mlp_size": 3000,
	"num_embeds": 10,
	"num_feats": 2,
	"pivot": false,
	"pseudo_dist": true,
	"rep_pos": "learned",
	"sim_func": "hadamard",
	"thresh": 0.0,
	"type": "hybrid"
	},
	"mention_params": {
	"emb_size": 20,
	"ext_ment": false,
	"ignore_non_gold": true,
	"max_span_width": 20,
	"ment_emb": "attn",
	"ment_emb_to_size_factor": {
	"attn": 3,
	"endpoint": 2,
	"max": 1
	},
	"mlp_depth": 1,
	"mlp_size": 3000,
	"top_span_ratio": 0.4,
	"use_gold_ments": false,
	"use_topk": false
	},
	"metadata_params": {
	"default_genre": "nw",
	"genres": [
	"bc",
	"bn",
	"mz",
	"nw",
	"pt",
	"tc",
	"wb"
	],
	"use_genre_feature": false
	}
	},
	"optimizer": {
	"fine_tune_lr": 1e-05,
	"init_lr": 0.0003,
	"lr_decay": "linear",
	"max_gradient_norm": 1.0
	},
	"override_encoder": false,
	"override_memory": false,
	"paths": {
	"base_data_dir": "${paths.resource_dir}/raw_data",
	"base_model_dir": "${infra.work_dir}/../models",
	"best_model_dir": ".//../models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best",
	"best_model_path": "/home/admin/coref_research/gpt-coref-met-1/models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/best/model.pth",
	"conll_scorer": "${paths.resource_dir}/reference-coreference-scorers/scorer.pl",
	"doc_encoder_dirname": "doc_encoder",
	"model_dir": ".//../models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4",
	"model_filename": "model.pth",
	"model_name": null,
	"model_name_prefix": "met_",
	"model_path": "/home/admin/coref_research/gpt-coref-met-1/models/met_joint_f78b0fa9c1d7718b9ed703ddcf621ec9_lf_sd_train_gen_4/model.pth",
	"resource_dir": "../data/"
	},
	"seed": 4,
	"train": true,
	"trainer": {
	"dropout_rate": 0.3,
	"eval_per_k_steps": null,
	"eval_type": "full",
	"generalise": true,
	"label_smoothing_wt": 0.1,
	"log_frequency": 500,
	"max_evals": 25,
	"max_training_segments": 1,
	"ment_loss_incl": true,
	"ment_loss_mode": "all",
	"normalize_loss": false,
	"num_training_steps": null,
	"patience": 10,
	"to_save_model": false
	},
	"use_wandb": true
	}