Spaces:

ericanthonymitchell
/

model-editing

Runtime error

File size: 2,988 Bytes

8f3eda5

from omegaconf import OmegaConf
from torch.cuda import is_available as use_cuda

model_config = {
  "name": "google/t5-large-ssm-nq",
  "class_name": "AutoModelForSeq2SeqLM",
  "tokenizer_class": "AutoTokenizer",
  "tokenizer_name": "google/t5-large-ssm-nq",
  "inner_params": [
    "encoder.block.22.layer.1.DenseReluDense.wi.weight",
    "encoder.block.22.layer.1.DenseReluDense.wo.weight",
    "encoder.block.23.layer.1.DenseReluDense.wi.weight",
    "encoder.block.23.layer.1.DenseReluDense.wo.weight",
    "decoder.block.22.layer.2.DenseReluDense.wi.weight",
    "decoder.block.22.layer.2.DenseReluDense.wo.weight",
    "decoder.block.23.layer.2.DenseReluDense.wi.weight",
    "decoder.block.23.layer.2.DenseReluDense.wo.weight",
  ],
  "pt": None,
  "small_name": "t5-small",
}

ft_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "edit_lr": 5e-6,
  "train_base": False,
  "ft": {
    "verbose": False,
    "max_edit_steps": 100,
    "time_limit": None,
    "locality": {
      "enabled": False,
      "oracle": True,
      "cedit": 1e-2,
      "batch_size": 1,
    },
    "rank": None,
    "opt": "RMSprop",
    "init_std": 0.01,
  },
  "model": model_config,
})

lu_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "lu": {
    "threshold": 2.75,
    "onehot_logit": 1,
  },
  "model": model_config,
})

ke_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "train_base": False,
  "lr": 1e-5,
  "model": model_config,
})

enn_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "lr": 1e-5,
  "edit_lr": 1e-2,
  "lr_lr": 1e-3,
  "train_base": True,
  "grad_clip": 100,
  "dropout": 0,
  "no_grad_layers": None,
  "enn": {
    "first_order": False,
    "n_edit_steps": 1,
  },
  "model": model_config,
})

mend_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "lr": 1e-6,
  "edit_lr": 1e-4,
  "lr_lr": 1e-4,
  "train_base": True,
  "grad_clip": 100,
  "dropout": 0,
  "no_grad_layers": None,
  "gtn": {
    "one_sided": False,
    "n_hidden": 1,
    "hidden_dim": None,
    "init": "id",
    "norm": True,
    "combine": True,
    "x_only": False,
    "delta_only": False,
    "act": "relu",
    "rank": 1920,
    "mlp_class": "IDMLP",
    "shared": True,
    "descent": False,
  },
  "model": model_config,
})

serac_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "lr": 1e-5,
  "edit_lr": 1e-2,
  "lr_lr": 0,
  "train_base": False,
  "grad_clip": 100,
  "dropout": 0,
  "no_grad_layers": None,
  "rep": {
    "cls_name": "distilbert-base-cased",
    "cls_class": "AutoModel",
    "supervised": "true",
    "cos": False,
    "freeze": None,
    "square": True,
    "bound_embeds": False,
    "use_all_negatives": False,
    "freeze_cntr": False,
    "dist_heads": 1,
    "cross_attend": False,
    "lora": None,
    "soft_weighting": False,
    "checkpoint_grad": False,
    "cache_embeds": True,
  },
  "model": model_config,
})