File size: 2,988 Bytes
8f3eda5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from omegaconf import OmegaConf
from torch.cuda import is_available as use_cuda

model_config = {
  "name": "google/t5-large-ssm-nq",
  "class_name": "AutoModelForSeq2SeqLM",
  "tokenizer_class": "AutoTokenizer",
  "tokenizer_name": "google/t5-large-ssm-nq",
  "inner_params": [
    "encoder.block.22.layer.1.DenseReluDense.wi.weight",
    "encoder.block.22.layer.1.DenseReluDense.wo.weight",
    "encoder.block.23.layer.1.DenseReluDense.wi.weight",
    "encoder.block.23.layer.1.DenseReluDense.wo.weight",
    "decoder.block.22.layer.2.DenseReluDense.wi.weight",
    "decoder.block.22.layer.2.DenseReluDense.wo.weight",
    "decoder.block.23.layer.2.DenseReluDense.wi.weight",
    "decoder.block.23.layer.2.DenseReluDense.wo.weight",
  ],
  "pt": None,
  "small_name": "t5-small",
}

ft_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "edit_lr": 5e-6,
  "train_base": False,
  "ft": {
    "verbose": False,
    "max_edit_steps": 100,
    "time_limit": None,
    "locality": {
      "enabled": False,
      "oracle": True,
      "cedit": 1e-2,
      "batch_size": 1,
    },
    "rank": None,
    "opt": "RMSprop",
    "init_std": 0.01,
  },
  "model": model_config,
})

lu_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "lu": {
    "threshold": 2.75,
    "onehot_logit": 1,
  },
  "model": model_config,
})

ke_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "train_base": False,
  "lr": 1e-5,
  "model": model_config,
})

enn_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "lr": 1e-5,
  "edit_lr": 1e-2,
  "lr_lr": 1e-3,
  "train_base": True,
  "grad_clip": 100,
  "dropout": 0,
  "no_grad_layers": None,
  "enn": {
    "first_order": False,
    "n_edit_steps": 1,
  },
  "model": model_config,
})

mend_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "lr": 1e-6,
  "edit_lr": 1e-4,
  "lr_lr": 1e-4,
  "train_base": True,
  "grad_clip": 100,
  "dropout": 0,
  "no_grad_layers": None,
  "gtn": {
    "one_sided": False,
    "n_hidden": 1,
    "hidden_dim": None,
    "init": "id",
    "norm": True,
    "combine": True,
    "x_only": False,
    "delta_only": False,
    "act": "relu",
    "rank": 1920,
    "mlp_class": "IDMLP",
    "shared": True,
    "descent": False,
  },
  "model": model_config,
})

serac_config = OmegaConf.create({
  "device": "cuda" if use_cuda() else "cpu",
  "lr": 1e-5,
  "edit_lr": 1e-2,
  "lr_lr": 0,
  "train_base": False,
  "grad_clip": 100,
  "dropout": 0,
  "no_grad_layers": None,
  "rep": {
    "cls_name": "distilbert-base-cased",
    "cls_class": "AutoModel",
    "supervised": "true",
    "cos": False,
    "freeze": None,
    "square": True,
    "bound_embeds": False,
    "use_all_negatives": False,
    "freeze_cntr": False,
    "dist_heads": 1,
    "cross_attend": False,
    "lora": None,
    "soft_weighting": False,
    "checkpoint_grad": False,
    "cache_embeds": True,
  },
  "model": model_config,
})