model-editing / config.py
Charles Lin
All algs except KE working.
8335d0c
from omegaconf import OmegaConf
from torch.cuda import is_available as use_cuda
model_config = {
"name": "google/t5-large-ssm-nq",
"class_name": "AutoModelForSeq2SeqLM",
"tokenizer_class": "AutoTokenizer",
"tokenizer_name": "google/t5-large-ssm-nq",
"inner_params": [
"encoder.block.22.layer.1.DenseReluDense.wi.weight",
"encoder.block.22.layer.1.DenseReluDense.wo.weight",
"encoder.block.23.layer.1.DenseReluDense.wi.weight",
"encoder.block.23.layer.1.DenseReluDense.wo.weight",
"decoder.block.22.layer.2.DenseReluDense.wi.weight",
"decoder.block.22.layer.2.DenseReluDense.wo.weight",
"decoder.block.23.layer.2.DenseReluDense.wi.weight",
"decoder.block.23.layer.2.DenseReluDense.wo.weight",
],
"pt": None,
"small_name": "t5-small",
}
ft_config = OmegaConf.create({
"device": "cpu",
"edit_lr": 5e-6,
"train_base": False,
"grad_clip": 100,
"ft": {
"verbose": False,
"max_edit_steps": 100,
"time_limit": None,
"locality": {
"enabled": False,
"oracle": True,
"cedit": 1e-2,
"batch_size": 1,
},
"rank": None,
"opt": "RMSprop",
"init_std": 0.01,
},
"model": model_config,
})
lu_config = OmegaConf.create({
"device": "cpu",
"lu": {
"threshold": 2.75,
"onehot_logit": 1,
},
"model": model_config,
})
ke_config = OmegaConf.create({
"device": "cpu",
"train_base": False,
"lr": 1e-5,
"model": model_config,
})
enn_config = OmegaConf.create({
"device": "cpu",
"lr": 1e-5,
"edit_lr": 1e-2,
"lr_lr": 1e-3,
"train_base": True,
"grad_clip": 100,
"dropout": 0,
"no_grad_layers": None,
"enn": {
"first_order": False,
"n_edit_steps": 1,
},
"model": model_config,
"archive": 8684705655, # "/iris/u/clin/code/efk/outputs/2022-02-09_05-48-20_8684705655/models/t5-large-ssm-nq.2022-02-09_05-48-20_8684705655",
})
mend_config = OmegaConf.create({
"device": "cpu",
"lr": 1e-6,
"edit_lr": 1e-4,
"lr_lr": 1e-4,
"train_base": True,
"grad_clip": 100,
"dropout": 0,
"no_grad_layers": None,
"gtn": {
"one_sided": False,
"n_hidden": 1,
"hidden_dim": None,
"init": "id",
"norm": True,
"combine": True,
"x_only": False,
"delta_only": False,
"act": "relu",
"rank": 1920,
"mlp_class": "IDMLP",
"shared": True,
"descent": False,
},
"model": model_config,
"archive": 5940349945, # "/iris/u/clin/code/efk/outputs/2022-02-09_11-47-28_5940349945/models/t5-large-ssm-nq.2022-02-09_11-47-28_5940349945",
})
serac_config = OmegaConf.create({
"device": "cpu", # "device": "cuda" if use_cuda() else "cpu",
"lr": 1e-5,
"edit_lr": 1e-2,
"lr_lr": 0,
"train_base": False,
"grad_clip": 100,
"dropout": 0,
"no_grad_layers": None,
"rep": {
"cls_name": "distilbert-base-cased",
"cls_class": "AutoModel",
"supervised": "true",
"cos": False,
"freeze": None,
"square": True,
"bound_embeds": False,
"use_all_negatives": False,
"freeze_cntr": False,
"dist_heads": 1,
"cross_attend": False,
"lora": None,
"soft_weighting": False,
"checkpoint_grad": False,
"cache_embeds": True,
},
"model": model_config,
"archive": 4719776130, # "/iris/u/clin/code/efk/outputs/2022-02-09_14-05-56_4719776130/models/t5-large-ssm-nq.2022-02-09_14-05-56_4719776130",
})