Spaces:

ericanthonymitchell
/

model-editing

Runtime error

App Files Files Community

Charles Lin commited on Apr 27, 2022

Commit

e56055d

•

1 Parent(s): 717a51e

Add algorithms from efk codebase

Browse files

Files changed (15) hide show

algs/enn.py +114 -0
algs/ft.py +121 -0
algs/ke.py +312 -0
algs/lu.py +90 -0
algs/mend.py +297 -0
algs/serac.py +452 -0
app.py +1 -0
editable_model.py +36 -0
hooks.py +28 -0
losses.py +181 -0
metrics.py +135 -0
models.py +196 -0
nn.py +362 -0
requirements.txt +6 -0
utils.py +441 -0

algs/enn.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import torch
+import torch.nn as nn
+import higher
+from editable_model import EditableModel
+from utils import _logits
+def fomaml_callback(all_grads):
+    return [g.detach() if g is not None else None for g in all_grads]
+class ENN(EditableModel):
+    def __init__(self, model, config, model_constructor, edit_lrs=None, edit_loss_fn=None):
+        super().__init__(model, config, model_constructor)
+        if edit_lrs is None:
+            edit_lrs = nn.Parameter(torch.tensor([config.edit_lr] * len(self.config.model.inner_params)))
+        self.edit_lrs = edit_lrs
+        if edit_loss_fn is not None:
+            self.edit_loss_fn = edit_loss_fn
+        self.grad_callback = fomaml_callback if config.enn.first_order else lambda x: x
+    def outer_parameters(self, grouped=False):
+        extra_params = [self.edit_lrs]
+        if self.config.no_grad_layers is None:
+            model_params = self.model.parameters() if type(self.model.parameters()) == list else list(self.model.parameters())
+        else:
+            model_params = []
+            for m in self.model.modules():
+                if isinstance(m, nn.ModuleList):
+                    model_params.extend(list(m[self.config.no_grad_layers:].parameters()))
+        if grouped:
+            return [
+                dict(params=model_params, lr=self.config.lr),
+                dict(params=extra_params, lr=self.config.lr_lr)
+            ]
+        else:
+            return model_params + extra_params
+    def get_state_dict(self):
+        return self.state_dict()
+    def edit(self, batch, condition=None, detach_history=False):
+        opt = torch.optim.SGD([{"params": p, "lr": None}
+                               for (n, p) in self.model.named_parameters() if n in self.config.model.inner_params])
+        with torch.enable_grad(), higher.innerloop_ctx(
+                self.model,
+                opt,
+                override={'lr': list(self.edit_lrs)},
+                copy_initial_weights=False,
+                track_higher_grads=self.training,
+                in_place=True
+        ) as (fmodel, diffopt):
+            fmodel.eval()
+            for edit_step in range(self.config.enn.n_edit_steps):
+                output = _logits(fmodel(**batch))
+                loss = self.edit_loss_fn(output, batch["labels"])["nll"]
+                diffopt.step(loss, grad_callback=self.grad_callback)
+        if not detach_history:
+            model_edited = fmodel
+        else:
+            model_edited = self.model_constructor()
+            model_edited.load_state_dict(fmodel.state_dict())
+        model_edited.train(self.training)
+        return ENN(model_edited, self.config, self.model_constructor, edit_lrs=self.edit_lrs, edit_loss_fn=self.edit_loss_fn), {}
+def test():
+    import transformers
+    import types
+    import copy
+    model = transformers.GPT2LMHeadModel.from_pretrained("gpt2")
+    config = types.SimpleNamespace()
+    config.edit_lr = 0.1
+    config.model.inner_params = [
+        "transformer.h.9.mlp.c_fc.weight",
+        "transformer.h.9.mlp.c_proj.weight",
+        "transformer.h.10.mlp.c_fc.weight",
+        "transformer.h.10.mlp.c_proj.weight",
+        "transformer.h.11.mlp.c_fc.weight",
+        "transformer.h.11.mlp.c_proj.weight",
+    ]
+    config.enn = {
+        "n_edit_steps": 2,
+        "first_order": False
+    }
+    enn = ENN(model, config, lambda: copy.deepcopy(model)).cuda()
+    x = torch.arange(100).view(5, 20).cuda() + 1000
+    edited = enn.edit(x, masks=torch.ones_like(x), labels=x)
+    orig_param = [p for (n, p) in enn.model.named_parameters() if n == config.model.inner_params[-1]][0]
+    edited_param = [p for (n, p) in edited.model.named_parameters() if n == config.model.inner_params[-1]][0]
+    print((orig_param - edited_param).abs().max())
+    edited.eval()
+    print(enn(x, labels=x).loss, edited(x, labels=x).loss, edited.edit_loss_fn(edited(x).logits, x)["nll"])
+    edited.edit_loss_fn(edited(x).logits, x).backward()
+    import pdb; pdb.set_trace()
+if __name__ == '__main__':
+    with torch.autograd.set_detect_anomaly(True):
+        test()

algs/ft.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import torch
+import torch.nn as nn
+import higher
+from higher.patch import monkeypatch as make_functional
+import time
+from editable_model import EditableModel
+from utils import _logits, _inner_params
+from losses import kl_loc_loss
+class FT(EditableModel):
+    """
+    Fine-tuning approach. Does not require training.
+    """
+    def __init__(self, model, config, model_constructor, edit_loss_fn=None):
+        super().__init__(model, config, model_constructor)
+        if edit_loss_fn is not None:
+            self.edit_loss_fn = edit_loss_fn
+        self.locality_loss_fn = kl_loc_loss
+        self.loc_ids = None
+        self.loc_masks = None
+        self.loc_sampler = None
+    def _edit_loss(self, model, p0, p_edited, edit_batch):
+        output = _logits(model(**edit_batch, params=p_edited))
+        loss_dict = self.edit_loss_fn(output, edit_batch["labels"])
+        l_edit, acc = loss_dict["nll"], loss_dict["acc"]
+        if self.config.ft.locality.enabled:
+            if self.config.ft.locality.oracle:
+                loc_batch = next(self.loc_sampler)["loc"]
+            else:
+                raise NotImplementedError
+            with torch.no_grad():
+                original_base_logits = _logits(model(**loc_batch, params=p0))
+            edited_base_logits = _logits(model(**loc_batch, params=p_edited))
+            kl_mask = loc_batch.get("decoder_attention_mask", loc_batch["attention_mask"])
+            l_loc = self.locality_loss_fn(original_base_logits, edited_base_logits, mask=kl_mask)
+            loss = l_loc + self.config.ft.locality.cedit * l_edit
+        else:
+            l_loc = torch.tensor(float('nan'))
+            loss = l_edit
+        return loss, l_edit, l_loc, acc
+    def accuracy(self, output, labels):
+        if output.shape[-1] != 1:
+            shifted_output = output.argmax(-1)[:, :-1]
+            shifted_labels = labels[:, 1:]
+            to_predict = (shifted_labels != -100).sum()
+            correct = (shifted_output == shifted_labels).sum()
+            acc = correct.float() / to_predict.float()
+        else:
+            acc = ((output > 0) == labels.bool()).sum().float()
+        return acc
+    def _edit_status(self, step, loss, l_edit, l_loc, acc, res_p):
+        return (
+            f"step: {step}".ljust(14) +
+            f"loss: {loss.item():.5f}".ljust(18) +
+            f"l_edit: {l_edit.item():.5f}".ljust(18) +
+            f"l_loc: {l_loc.item():.5f}".ljust(18) +
+            f"acc: {acc.item():.2f}".ljust(14) +
+            f"norm: {res_p.view(-1).norm().item():.5f}"
+        )
+    def edit(self, batch, condition=None, detach_history=False):
+        edit_model = self.model.eval()
+        p0 = list(edit_model.named_parameters())
+        if not isinstance(edit_model, higher.patch._MonkeyPatchBase):
+            edit_model = make_functional(self.model, track_higher_grads=False, in_place=True)
+        packed_residuals = {}
+        opt_params = []
+        for n, p in _inner_params(edit_model.named_parameters(), self.config.model.inner_params):
+            if self.config.ft.rank is not None:
+                u = nn.Parameter(torch.randn(p.shape[0], self.config.ft.rank, device=p.device) * self.config.ft.init_std)
+                v = nn.Parameter(torch.zeros(self.config.ft.rank, p.shape[1], device=p.device))
+                res = [u, v]
+            else:
+                res = [nn.Parameter(torch.zeros_like(p, device=p.device))]
+            packed_residuals[n] = res
+            opt_params.extend(res)
+        assert len(opt_params) == len(self.config.model.inner_params)
+        OptClass = getattr(torch.optim, self.config.ft.opt)
+        opt = OptClass(opt_params, lr=self.config.edit_lr)
+        start_time = time.time()
+        for edit_step in range(self.config.ft.max_edit_steps):
+            if self.config.ft.time_limit is not None and (time.time() - start_time > self.config.ft.time_limit):
+                break
+            residuals = {k: v[0] @ v[1] if len(v) == 2 else v[0] for k, v in packed_residuals.items()}
+            edited_params = [p if n not in residuals else p.detach() + residuals[n] for n, p in p0]
+            loss, l_edit, l_loc, acc = self._edit_loss(edit_model, [p for n, p in p0], edited_params, batch)
+            if self.config.ft.verbose:
+                residual = list(residuals.values())[-1]
+                print(self._edit_status(edit_step, loss, l_edit, l_loc, acc, residual), end="\r")
+            if acc == 1.0:
+                break
+            for p, g in zip(opt_params, torch.autograd.grad(loss, opt_params)):
+                p.grad = g
+            torch.nn.utils.clip_grad_norm_(opt_params, self.config.grad_clip)
+            opt.step()
+            opt.zero_grad()
+        if detach_history:
+            new_model = self.model_constructor()
+            new_model.load_state_dict(edit_model.state_dict())
+            edit_model = new_model
+        edit_model.train(self.training)
+        return FT(edit_model, self.config, self.model_constructor, self.edit_loss_fn), {}

algs/ke.py ADDED Viewed

	@@ -0,0 +1,312 @@

+# Adapted from https://github.com/nicola-decao/KnowledgeEditor/blob/main/src/models/one_shot_learner.py
+"""
+@inproceedings{decao2020editing,
+ title={Editing Factual Knowledge in Language Models},
+ author={Nicola De Cao and Wilker Aziz and Ivan Titov},
+ booktitle={arXiv pre-print 2104.08164},
+ url={https://arxiv.org/abs/2104.08164},
+ year={2021},
+}
+"""
+import torch
+import copy
+import higher
+from higher.patch import monkeypatch as make_functional
+from allennlp.modules.feedforward import FeedForward
+from allennlp.modules.seq2vec_encoders import PytorchSeq2VecWrapper
+import logging
+from editable_model import EditableModel
+from utils import _logits, _inner_params
+from models import BertClassifier
+from transformers import BartForConditionalGeneration, T5ForConditionalGeneration
+LOG = logging.getLogger(__name__)
+class KE(EditableModel):
+    def __init__(self, model, config, model_constructor, editor=None):
+        super().__init__(model, config, model_constructor)
+        if editor is None:
+            if isinstance(model, BertClassifier):
+                embedding = model.model.embeddings.word_embeddings.weight.data
+            elif isinstance(model, BartForConditionalGeneration):
+                embedding = model.model.shared.weight.data
+            elif isinstance(model, T5ForConditionalGeneration):
+                embedding = model.shared.weight.data
+            else:
+                embedding = model.transformer.wte.weight.data
+            editor = OneShotLearner(model, vocab_dim=model.config.vocab_size,
+                                    include_set=config.model.inner_params,
+                                    embedding_dim=embedding.shape[-1],
+                                    embedding_init=embedding.clone().to(torch.float32),
+                                    max_scale=1)
+        self.editor = editor
+    def outer_parameters(self, grouped=False):
+        if grouped:
+            return [
+                dict(params=self.editor.parameters(), lr=self.config.lr)
+            ]
+        else:
+            return list(self.editor.parameters())
+    def state_dict(self, destination=None, prefix="", keep_vars=False):
+        state_dict = super().state_dict(prefix=prefix, keep_vars=keep_vars)  # Get default state dict
+        model_keys = self.model.state_dict(prefix=prefix, keep_vars=keep_vars).keys()  # Remove model params
+        for k in model_keys:
+            del state_dict[f"model.{k}"]
+        state_dict["model_config"] = self.model.config  # Include model config
+        return state_dict
+    def load_state_dict(self, state_dict, strict: bool = True):
+        config = state_dict["model_config"]
+        del state_dict["model_config"]
+        if config != self.model.config:
+            LOG.info("Loaded model config doesn't match current model config.")
+            LOG.info(f"Loaded: {config}")
+            LOG.info(f"Current: {self.model.config}")
+        res = super().load_state_dict(state_dict, False)
+        # We should only have missing keys for the model, and no unexpected keys
+        assert len([k for k in res.missing_keys if not k.startswith("model.")]) == 0, "Should only have missing keys for model."
+        assert len(res.unexpected_keys) == 0, "Shouldn't have any unexpected keys"
+        return res
+    def edit(self, batch, condition, detach_history=False):
+        outputs = _logits(self.model(**batch))
+        loss = self.edit_loss_fn(outputs, batch["labels"])["nll"]
+        names = set([n for n, p in self.model.named_parameters()])
+        pset = set(self.config.model.inner_params)
+        for p in pset:
+            assert p in names, f"inner param {p} not in model"
+        grads = torch.autograd.grad(
+            loss,
+            [p for (n, p) in _inner_params(self.model.named_parameters(), self.config.model.inner_params)]
+        )
+        params_dict = self.editor(
+            condition["input_ids"] if condition is not None else batch["input_ids"],
+            condition["attention_mask"] if condition is not None else batch["attention_mask"],
+            {n: g.to(torch.float32) for (n, g) in zip(self.config.model.inner_params, grads)},
+        )
+        edited_model = self.model
+        if not isinstance(edited_model, higher.patch._MonkeyPatchBase):
+            edited_model = make_functional(edited_model, in_place=True)
+        def new_param(n, p):
+            if n not in params_dict:
+                return p
+            if p.shape[0] == params_dict[n].shape[0]:
+                return p + params_dict[n]
+            else:
+                return p + params_dict[n].T
+        edited_model.update_params(
+            [new_param(n, p) for (n, p) in edited_model.named_parameters()]
+        )
+        if detach_history:
+            new_model = self.model_constructor()
+            new_model.load_state_dict(edited_model.state_dict())
+            edited_model = new_model
+        return KE(edited_model, self.config, self.model_constructor, editor=self.editor), {}
+class ConditionedParameter(torch.nn.Module):
+    def __init__(self, parameter, condition_dim=1024, hidden_dim=128, max_scale=1):
+        super().__init__()
+        self.parameter_shape = parameter.shape
+        if len(self.parameter_shape) == 2:
+            self.conditioners = torch.nn.Sequential(
+                torch.nn.utils.weight_norm(torch.nn.Linear(condition_dim, hidden_dim)),
+                torch.nn.Tanh(),
+                torch.nn.utils.weight_norm(
+                    torch.nn.Linear(
+                        hidden_dim, 2 * (parameter.shape[0] + parameter.shape[1]) + 1
+                    )
+                ),
+            )
+        elif len(self.parameter_shape) == 1:
+            self.conditioners = torch.nn.Sequential(
+                torch.nn.utils.weight_norm(torch.nn.Linear(condition_dim, hidden_dim)),
+                torch.nn.Tanh(),
+                torch.nn.utils.weight_norm(
+                    torch.nn.Linear(hidden_dim, 2 * parameter.shape[0] + 1)
+                ),
+            )
+        else:
+            raise RuntimeError()
+        self.max_scale = max_scale
+    def forward(self, inputs, grad):
+        if inputs.shape[0] > 1:
+            raise RuntimeError("Can only condition on batches of size 1")
+        if len(self.parameter_shape) == 2:
+            (
+                conditioner_cola,
+                conditioner_rowa,
+                conditioner_colb,
+                conditioner_rowb,
+                conditioner_norm,
+            ) = self.conditioners(inputs).split(
+                [
+                    self.parameter_shape[1],
+                    self.parameter_shape[0],
+                    self.parameter_shape[1],
+                    self.parameter_shape[0],
+                    1,
+                ],
+                dim=-1,
+            )
+            a = conditioner_rowa.softmax(-1).T @ conditioner_cola
+            b = conditioner_rowb.softmax(-1).T @ conditioner_colb
+        elif len(self.parameter_shape) == 1:
+            a, b, conditioner_norm = self.conditioners(inputs).split(
+                [self.parameter_shape[0], self.parameter_shape[0], 1], dim=-1
+            )
+        else:
+            raise RuntimeError()
+        if a.squeeze().shape[0] != grad.shape[0]:
+            return self.max_scale * conditioner_norm.sigmoid().squeeze() * (grad * a.squeeze().T + b.squeeze().T)
+        else:
+            return self.max_scale * conditioner_norm.sigmoid().squeeze() * (grad * a.squeeze() + b.squeeze())
+class LSTMConditioner(torch.nn.Module):
+    def __init__(
+        self,
+        vocab_dim=30522,
+        embedding_dim=768,
+        hidden_dim=256,
+        output_dim=1024,
+        embedding_init=None,
+    ):
+        super().__init__()
+        self.embedding = torch.nn.Embedding(
+            num_embeddings=vocab_dim,
+            embedding_dim=embedding_dim,
+            padding_idx=0,
+            _weight=embedding_init,
+        )
+        self.lstm = PytorchSeq2VecWrapper(
+            torch.nn.LSTM(
+                input_size=embedding_dim,
+                hidden_size=hidden_dim,
+                num_layers=1,
+                bidirectional=True,
+                batch_first=True,
+            )
+        )
+        self.linear = FeedForward(
+            input_dim=hidden_dim * 2,
+            num_layers=1,
+            hidden_dims=[output_dim],
+            activations=[torch.nn.Tanh()],
+        )
+    def forward(self, inputs, masks):
+        return self.linear(self.lstm(self.embedding(inputs), masks))
+class OneShotLearner(torch.nn.Module):
+    def __init__(
+        self,
+        model,
+        vocab_dim,
+        embedding_dim=768,
+        hidden_dim=512,
+        condition_dim=768,
+        include_set={},
+        max_scale=1e-3,
+        embedding_init=None,
+    ):
+        super().__init__()
+        self.param2conditioner_map = {
+            n: "{}_conditioner".format(n).replace(".", "_")
+            for n, p in model.named_parameters()
+            if n in include_set
+        }
+        self.conditioners = torch.nn.ModuleDict(
+            {
+                self.param2conditioner_map[n]: ConditionedParameter(
+                    p,
+                    condition_dim,
+                    hidden_dim,
+                    max_scale=max_scale,
+                )
+                for n, p in model.named_parameters()
+                if n in include_set
+            }
+        )
+        self.condition = LSTMConditioner(
+            vocab_dim,
+            embedding_dim,
+            hidden_dim,
+            condition_dim,
+            embedding_init=embedding_init,
+        )
+    def forward(self, inputs, masks, grads=None):
+        condition = self.condition(inputs, masks)
+        return {
+            p: self.conditioners[self.param2conditioner_map[p]](
+                condition,
+                grad=grads[p] if grads else None,
+            )
+            for p, c in self.param2conditioner_map.items()
+        }
+if __name__ == '__main__':
+    import transformers
+    import types
+    model = transformers.GPT2LMHeadModel.from_pretrained("gpt2")
+    config = types.SimpleNamespace()
+    config.model.inner_params = [
+        "transformer.h.9.mlp.c_fc.weight",
+        "transformer.h.9.mlp.c_proj.weight",
+        "transformer.h.10.mlp.c_fc.weight",
+        "transformer.h.10.mlp.c_proj.weight",
+        "transformer.h.11.mlp.c_fc.weight",
+        "transformer.h.11.mlp.c_proj.weight",
+    ]
+    efk = KE(model, config, lambda: copy.deepcopy(model)).cuda()
+    x = torch.arange(20).view(1, 20).cuda() + 1000
+    orig_logits = efk(x).logits
+    edited = efk.edit(x, masks=torch.ones_like(x), labels=x)
+    post_logits = efk(x).logits
+    assert torch.allclose(orig_logits, post_logits)
+    orig_param = [p for (n, p) in efk.model.named_parameters() if n == config.model.inner_params[-1]][0]
+    edited_param = [p for (n, p) in edited.model.named_parameters() if n == config.model.inner_params[-1]][0]
+    print((orig_param - edited_param).abs().max())
+    edited.eval()
+    print(efk(x, labels=x).loss, edited(x, labels=x).loss, edited.edit_loss_fn(edited(x).logits, x))["nll"]
+    edited2 = edited.edit(x, masks=torch.ones_like(x), labels=x)
+    print(efk(x, labels=x).loss, edited(x, labels=x).loss, edited2(x, labels=x).loss)
+    import pdb; pdb.set_trace()

algs/lu.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+import time
+from editable_model import EditableModel
+from utils import _last_encoder_state, _logits
+class LU(EditableModel):
+    """
+    Representation lookup approach. Does not require training.
+    """
+    def __init__(self, model, config, model_constructor, memory=None):
+        super().__init__(model, config, model_constructor)
+        self.memory = memory
+    def forward(self, *inputs, **kwargs):
+        if "bert" in self.config.model.name.lower():
+            output, encoder_states = self.model(*inputs, **kwargs, output_hidden_states=True)
+        else:
+            model_output = self.model(*inputs, **kwargs, output_hidden_states=True)
+            encoder_states = _last_encoder_state(model_output)
+            output = _logits(model_output)
+        if self.memory is not None:
+            for i, encoder_state in enumerate(encoder_states):
+                if "gpt2" in self.config.model.name.lower():
+                    # NOTE: broken
+                    memory_prefixes, memory_labels = self.memory
+                    prefix_means = encoder_state.cumsum(0).detach() / torch.arange(1, encoder_state.shape[0] + 1, device=encoder_state.device).view(-1, 1)
+                    dist_mat = (prefix_means.unsqueeze(1) - memory_prefixes.unsqueeze(0)).norm(2, dim=-1)
+                    min_dists, min_idxs = dist_mat.min(-1)
+                    memory_mask = (min_dists < self.config.lu.threshold)
+                    onehot_logits = self.config.lu.onehot_logit * F.one_hot(memory_labels[min_idxs], output.shape[-1]).float()
+                    output[i, memory_mask] = onehot_logits[memory_mask]
+                elif "bart" in self.config.model.name.lower() or "t5" in self.config.model.name.lower():
+                    avg_encoder_state = encoder_state.detach().mean(0)
+                    memory_keys, memory_labels = self.memory
+                    dists = torch.norm(avg_encoder_state - memory_keys, dim=-1)
+                    closest_dist = dists.min()
+                    closest_idx = dists.argmin()
+                    closest_v = memory_labels[closest_idx]
+                    if closest_dist < self.config.lu.threshold:
+                        output[i] = torch.zeros((1, kwargs['labels'].shape[1], output.shape[2]), device=output.device)
+                        for j, idx in enumerate(closest_v):
+                            if j >= output.shape[1]:
+                                break
+                            output[i, j, idx] = self.config.lu.onehot_logit
+                        if "t5" not in self.config.model.name.lower():
+                            # T5 does not shift targets in the loss
+                            output[i] = output[i].roll(-1, -2)
+                else:
+                    avg_encoder_state = encoder_state.detach().mean(0)
+                    memory_keys, memory_labels = self.memory
+                    dists = torch.norm(avg_encoder_state - memory_keys, dim=-1)
+                    closest_dist = dists.min()
+                    closest_idx = dists.argmin()
+                    closest_v = memory_labels[closest_idx]
+                    if closest_dist < self.config.lu.threshold:
+                        output[i] = self.config.lu.onehot_logit * (2 * closest_v - 1)  # Return onehot_logit or -onehot_logit
+        return output
+    def edit(self, batch, condition=None):
+        edit_model = self.model.eval()
+        if "bert" in self.config.model.name.lower():
+            _, encoder_states = self.model(**batch, output_hidden_states=True)
+        else:
+            encoder_states = _last_encoder_state(self.model(**batch, output_hidden_states=True))
+        memory_keys = []
+        memory_labels = []
+        for encoder_state, label in zip(encoder_states, batch["labels"]):
+            if "gpt2" in self.config.model.name.lower():
+                # NOTE: broken
+                avg_encoder_states = (encoder_state.cumsum(0).detach() / torch.arange(1, encoder_state.shape[0] + 1, device=encoder_state.device).view(-1, 1))[-10:, :]
+                memory = (avg_encoder_states, label[-10:])
+            else:
+                avg_encoder_state = encoder_state.detach().mean(0)
+                memory_keys.append(avg_encoder_state)
+                memory_labels.append(label)
+        memory = (torch.stack(memory_keys), torch.stack(memory_labels))
+        return LU(self.model.eval(), self.config, self.model_constructor, memory), {}

algs/mend.py ADDED Viewed

	@@ -0,0 +1,297 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import copy
+import transformers
+import higher
+import logging
+from higher.patch import monkeypatch as make_functional
+from collections import defaultdict
+from editable_model import EditableModel
+from hooks import hook_model
+import nn as local_nn
+from utils import _logits, _inner_params
+LOG = logging.getLogger(__name__)
+def update_counter(x, m, s, k):
+    new_m = m + (x - m) / k
+    new_s = s + (x - m) * (x - new_m)
+    return new_m, new_s
+class GradientTransform(nn.Module):
+    def __init__(self, x_dim: int, delta_dim: int, cfg, n_modes = None):
+        super().__init__()
+        self.x_dim = x_dim
+        self.delta_dim = delta_dim
+        self.cfg = cfg
+        if cfg.combine and (cfg.one_sided or cfg.x_only or cfg.delta_only):
+            raise ValueError("cfg.combine cannot be used with one-sided MEND variants")
+        self.norm_init = False
+        self.register_buffer("u_mean", torch.full((x_dim,), float("nan")))
+        self.register_buffer("v_mean", torch.full((delta_dim,), float("nan")))
+        self.register_buffer("u_std", torch.full((x_dim,), float("nan")))
+        self.register_buffer("v_std", torch.full((delta_dim,), float("nan")))
+        self.register_buffer("u_s", torch.full((x_dim,), float("nan")))
+        self.register_buffer("v_s", torch.full((delta_dim,), float("nan")))
+        self.register_buffer("k", torch.full((1,), float("nan")))
+        MlpClass = getattr(local_nn, cfg.mlp_class)
+        LOG.info(f"Building Gradient Transform with MLP class {MlpClass}")
+        def delta_net():
+            return MlpClass(delta_dim, delta_dim, delta_dim * 2, cfg.n_hidden, init=cfg.init, act=cfg.act, rank=cfg.rank, n_modes=n_modes)
+        def x_net():
+            return MlpClass(x_dim, x_dim, x_dim * 2, cfg.n_hidden, init=cfg.init, act=cfg.act, rank=cfg.rank, n_modes=n_modes)
+        def combined_net():
+            return MlpClass(delta_dim + x_dim, delta_dim + x_dim, (delta_dim + x_dim) * 2,
+                            cfg.n_hidden, init=cfg.init, act=cfg.act, rank=cfg.rank, n_modes=n_modes)
+        def ID():
+            return lambda x, mode=None: x
+        if cfg.combine:
+            self.mlp = combined_net()
+        elif cfg.one_sided:
+            if x_dim > delta_dim:
+                self.mlp1, self.mlp2 = ID(), delta_net()
+            else:
+                self.mlp1, self.mlp2 = x_net(), ID()
+        elif cfg.x_only:
+            self.mlp1, self.mlp2 = x_net(), ID()
+        elif cfg.delta_only:
+            self.mlp1, self.mlp2 = ID(), delta_net()
+        else:
+            self.mlp1, self.mlp2 = x_net(), delta_net()
+    def forward(self, u, v, param_idx=None):
+        u, v = u.to(torch.float32), v.to(torch.float32)
+        u_ = u.view(-1, u.shape[-1])
+        v_ = v.view(-1, v.shape[-1])
+        nz_mask = (u_ != 0).any(-1) * (v_ != 0).any(-1)  # Skip batch elements with zero grad
+        u_ = u_[nz_mask]
+        v_ = v_[nz_mask]
+        if self.training:
+            for idx in range(u_.shape[0]):
+                if not self.norm_init:
+                    self.u_mean = u_[idx].clone().detach()
+                    self.v_mean = v_[idx].clone().detach()
+                    self.u_s.zero_()
+                    self.v_s.zero_()
+                    self.k[:] = 1
+                    self.norm_init = True
+                else:
+                    self.k += 1
+                    self.u_mean, self.u_s = update_counter(u_[idx], self.u_mean, self.u_s, self.k)
+                    self.v_mean, self.v_s = update_counter(v_[idx], self.v_mean, self.v_s, self.k)
+            if self.k < 2:
+                raise RuntimeError(f"Can't perform normalization with only {self.k} samples so far")
+            self.u_std = (self.u_s / (self.k - 1)) ** 0.5
+            self.v_std = (self.v_s / (self.k - 1)) ** 0.5
+        if self.cfg.norm:
+            u_input = (u_ - self.u_mean) / (self.u_std + 1e-7)
+            v_input = (v_ - self.v_mean) / (self.v_std + 1e-7)
+        else:
+            u_input = u_
+            v_input = v_
+        if self.cfg.combine:
+            output = self.mlp(torch.cat((u_input, v_input), -1), mode=param_idx)
+            out1, out2 = output.split([u.shape[-1], v.shape[-1]], -1)
+            return out1, out2
+        else:
+            return self.mlp1(u_input, mode=param_idx), self.mlp2(v_input, mode=param_idx)
+class MEND(EditableModel):
+    def get_shape(self, p):
+        # We need to (annoyingly) flip the shapes since OpenAI gpt2 uses convs instead of linear
+        return p.shape if isinstance(self.model, transformers.GPT2LMHeadModel) else (p.shape[1], p.shape[0])
+    def __init__(self, model, config, model_constructor, gtn=None, edit_lrs=None):
+        super().__init__(model, config, model_constructor)
+        if edit_lrs is None:
+            edit_lrs = nn.Parameter(torch.tensor([config.edit_lr] * len(self.config.model.inner_params)))
+        self.edit_lrs = edit_lrs
+        if not hasattr(self.model, "handles"):
+            hook_model(self.model, self.config.model.inner_params)
+            LOG.info(f"Hooked {len(self.model.handles)//2} modules")
+        if config.gtn.shared:
+            shape_dict = defaultdict(list)
+            for n, p in _inner_params(model.named_parameters(), self.config.model.inner_params):
+                shape_dict[self.get_shape(p)].append(n)
+            self.shape_dict = shape_dict
+        if gtn is None:
+            if not config.gtn.shared:
+                self.gtn = nn.ModuleDict({
+                    n.replace(".", "#"): GradientTransform(*self.get_shape(p), config.gtn)
+                    for (n, p) in _inner_params(model.named_parameters(), self.config.model.inner_params)
+                })
+            else:
+                self.gtn = nn.ModuleDict({
+                    str(tuple(s)): GradientTransform(*s, config.gtn, len(shape_dict[s]))
+                    for s in shape_dict.keys()
+                })
+        else:
+            self.gtn = gtn
+    def state_dict(self, destination=None, prefix="", keep_vars=False):
+        state_dict = super().state_dict(prefix=prefix, keep_vars=keep_vars)  # Get default state dict
+        model_keys = self.model.state_dict(prefix=prefix, keep_vars=keep_vars).keys()  # Remove model params
+        for k in model_keys:
+            del state_dict[f"model.{k}"]
+        state_dict["model_config"] = self.model.config  # Include model config
+        return state_dict
+    def load_state_dict(self, state_dict, strict: bool = True):
+        config = state_dict["model_config"]
+        del state_dict["model_config"]
+        if config != self.model.config:
+            LOG.info("Loaded model config doesn't match current model config.")
+            LOG.info(f"Loaded: {config}")
+            LOG.info(f"Current: {self.model.config}")
+        res = super().load_state_dict(state_dict, False)
+        # We should only have missing keys for the model, and no unexpected keys
+        assert len([k for k in res.missing_keys if not k.startswith("model.")]) == 0, "Should only have missing keys for model."
+        assert len(res.unexpected_keys) == 0, "Shouldn't have any unexpected keys"
+        return res
+    def outer_parameters(self, grouped=False):
+        if grouped:
+            return [
+                dict(params=list(self.gtn.parameters()), lr=self.config.lr),
+                dict(params=[self.edit_lrs], lr=self.config.lr_lr)
+            ]
+        else:
+            return list(self.gtn.parameters()) + [self.edit_lrs]
+    def edit(self, batch, condition=None, detach_history=False):
+        outputs = _logits(self.model(**batch))
+        loss = self.edit_loss_fn(outputs, batch["labels"])["nll"]
+        names = set([n for n, p in self.model.named_parameters()])
+        pset = set(self.config.model.inner_params)
+        for p in pset:
+            assert p in names, f"inner param {p} not in model"
+        loss.backward()
+        if self.config.gtn.shared:
+            param_idx = lambda n, p: self.shape_dict[self.get_shape(p)].index(n) if self.config.gtn.shared else None  # noqa: E731
+            transformed_factors = {
+                n: self.gtn[str(tuple(self.get_shape(p)))](p.__x__, p.__delta__, param_idx(n, p))
+                for n, p in _inner_params(self.model.named_parameters(), self.config.model.inner_params)
+            }
+        else:
+            transformed_factors = {
+                n: self.gtn[n.replace(".", "#")](p.__x__, p.__delta__)
+                for n, p in _inner_params(self.model.named_parameters(), self.config.model.inner_params)
+            }
+        # Should be bi,bj->ji for nn.Linear, but [annoying] GPT2 uses Conv1d instead...
+        if isinstance(self.model, transformers.GPT2LMHeadModel):
+            targ = "ij"
+        else:
+            targ = "ji"
+        mean_grads = {
+            n: torch.einsum(f"bi,bj->{targ}", x, delta)
+            for n, (x, delta) in transformed_factors.items()
+        }
+        info_dict = {}
+        idx = 0
+        for n, p in _inner_params(self.model.named_parameters(), self.config.model.inner_params):
+            info_dict[f"grad/true_mag{idx}"] = p.grad.norm(2).item()
+            info_dict[f"grad/pseudo_mag{idx}"] = mean_grads[n].norm(2).item()
+            info_dict[f"grad/true_std{idx}"] = p.grad.std().item()
+            info_dict[f"grad/pseudo_std{idx}"] = mean_grads[n].std().item()
+            info_dict[f"grad/diff{idx}"] = (p.grad - mean_grads[n]).norm(2).item()
+            info_dict[f"grad/cos{idx}"] = F.cosine_similarity(p.grad.reshape(-1), mean_grads[n].reshape(-1), dim=0).item()
+            idx += 1
+        self.model.zero_grad()
+        assert len(self.edit_lrs) == len(list(mean_grads.items()))
+        updates = {n: lr * g for lr, (n, g) in zip(self.edit_lrs, mean_grads.items())}
+        edited_model = self.model
+        if not isinstance(edited_model, higher.patch._MonkeyPatchBase):
+            edited_model = make_functional(edited_model, in_place=True)
+        new_params = []
+        for n, p in edited_model.named_parameters():
+            if n in pset:
+                if self.config.gtn.descent:
+                    new_params.append(p - updates[n])
+                else:
+                    new_params.append(p + updates[n])
+            else:
+                new_params.append(p)
+        edited_model.update_params(new_params)
+        if detach_history:
+            new_model = self.model_constructor()
+            new_model.load_state_dict(edited_model.state_dict())
+            edited_model = new_model
+        return MEND(edited_model, self.config, self.model_constructor, self.gtn, edit_lrs=self.edit_lrs), info_dict
+if __name__ == '__main__':
+    import types
+    model = transformers.GPT2LMHeadModel.from_pretrained("gpt2")
+    config = types.SimpleNamespace()
+    config.model.inner_params = [
+        "transformer.h.9.mlp.c_fc.weight",
+        "transformer.h.9.mlp.c_proj.weight",
+        "transformer.h.10.mlp.c_fc.weight",
+        "transformer.h.10.mlp.c_proj.weight",
+        "transformer.h.11.mlp.c_fc.weight",
+        "transformer.h.11.mlp.c_proj.weight",
+    ]
+    config.edit_lr = 0.0001
+    config.gtn = types.SimpleNamespace()
+    config.gtn.n_hidden = 1
+    config.gtn = config.gtn.__dict__
+    gtn = MEND(model, config, lambda: copy.deepcopy(model)).cuda()
+    # torch.save(gtn.state_dict(), "test_state.pt")
+    import pdb; pdb.set_trace()
+    gtn.load_state_dict(torch.load("test_state.pt"))
+    x = torch.arange(20).view(1, 20).cuda() + 1000
+    orig_logits = gtn(x)
+    edited = gtn.edit(x, masks=torch.ones_like(x), labels=x)
+    post_logits = gtn(x)
+    assert torch.allclose(orig_logits, post_logits)
+    orig_param = [p for (n, p) in gtn.model.named_parameters() if n == config.model.inner_params[-1]][0]
+    edited_param = [p for (n, p) in edited.model.named_parameters() if n == config.model.inner_params[-1]][0]
+    LOG.info((orig_param - edited_param).abs().max())
+    edited.eval()
+    LOG.info(gtn(x, labels=x).loss, edited(x, labels=x).loss, edited.edit_loss_fn(edited(x).logits, x)["nll"])
+    edited2 = edited.edit(x, masks=torch.ones_like(x), labels=x)
+    LOG.info(gtn(x, labels=x).loss, edited(x, labels=x).loss, edited2(x, labels=x).loss)

algs/serac.py ADDED Viewed

	@@ -0,0 +1,452 @@

+import torch
+import copy
+import transformers
+import logging
+from utils import scr, set_dropout, _logits, add_padding, add_sep
+from editable_model import EditableModel
+from models import BertClassifier
+LOG = logging.getLogger(__name__)
+def translate_tokens(tokens, from_tok, to_tok):
+    tokens = tokens.masked_fill(tokens == -100, from_tok.pad_token_id)
+    text = from_tok.batch_decode(tokens, skip_special_tokens=True)
+    return to_tok(text, return_tensors="pt")["input_ids"].to(tokens.device)
+class SERAC(EditableModel):
+    def __init__(self, model, config, model_constructor, classifier=None, classifier_tok=None,
+                 replacement=None, replacement_tok=None, cache_inputs=None, cache_labels=None,
+                 cache_embeds=None, scale=None):
+        super().__init__(model, config, model_constructor)
+        if classifier is None:
+            if config.rep.cross_attend and not config.rep.cls_class.endswith("ForSequenceClassification"):
+                LOG.warn(f"Switching {config.rep.cls_class} to {config.rep.cls_class}ForSequenceClassification for cross-attend")
+                config.rep.cls_class += "ForSequenceClassification"
+            self.classifier = getattr(transformers, config.rep.cls_class).from_pretrained(config.rep.cls_name, cache_dir=scr())
+            if self.config.rep.checkpoint_grad:
+                LOG.info(f"Checking for checkpointing: {hasattr(self.classifier.config, 'gradient_checkpointing')}")
+                self.classifier.config.gradient_checkpointing = True
+            self.classifier_tok = transformers.AutoTokenizer.from_pretrained(config.rep.cls_name, cache_dir=scr())
+            if not self.config.rep.cross_attend and 'bert' in self.config.rep.cls_name:
+                self.classifier.pooler = None  # we don't need the classification head
+            elif not self.config.rep.cross_attend and "mpnet" not in self.config.rep.cls_name:
+                if hasattr(self.classifier, "pooler"):
+                    self.classifier.pooler = None  # we don't need the classification head
+            set_dropout(self.classifier, config.dropout)
+            if self.config.rep.lora is not None:
+                self.classifier = LoraModel(self.classifier, self.config.rep.lora)
+        else:
+            assert isinstance(classifier, torch.nn.Module), f"Classifier is a {type(classifier)}!"
+            assert isinstance(classifier_tok, transformers.PreTrainedTokenizerBase), f"Classifier tok is {type(classifier_tok)}!"
+            self.classifier, self.classifier_tok = classifier, classifier_tok
+        if replacement is None:
+            # self.replacement_tok = getattr(transformers, config.model.tokenizer_class).from_pretrained(config.model.tokenizer_name,
+            #                                                                                            cache_dir=scr())
+            self.replacement_tok = transformers.AutoTokenizer.from_pretrained(config.model.small_name, cache_dir=scr())
+            # if self.replacement_tok.sep_token is None:
+            #     self.replacement_tok.sep_token = self.replacement_tok.eos_token
+            if (False and self.config.rep.freeze_cntr):
+                self.replacement = None
+            else:
+                if config.model.class_name == "BertClassifier":
+                    self.replacement = BertClassifier(config.model.small_name)
+                else:
+                    self.replacement = getattr(transformers, config.model.class_name).from_pretrained(config.model.small_name, cache_dir=scr())
+                if self.replacement_tok.sep_token is None and "gpt" not in self.model.name_or_path.lower():
+                    add_sep(self.replacement_tok, self.replacement)
+                if self.replacement_tok.pad_token is None:
+                    add_padding(self.replacement_tok, self.replacement)
+                set_dropout(self.replacement, config.dropout)
+        else:
+            assert isinstance(replacement, torch.nn.Module), "Rep is {type(replacement)}!"
+            assert isinstance(replacement_tok, transformers.PreTrainedTokenizerBase), "Rep tok is {type(replacement_tok)}!"
+            self.replacement, self.replacement_tok = replacement, replacement_tok
+        if self.config.rep.cross_attend:
+            self.scale = None
+        else:
+            if scale is None:
+                self.register_buffer("scale", torch.tensor(1.0))
+                # self.scale = nn.Parameter(torch.tensor(1.0))
+            else:
+                self.scale = scale
+        if cache_inputs is None:
+            self.cache_inputs = []
+            self.cache_labels = []
+            if config.rep.cache_embeds and not config.rep.cross_attend:
+                self.cache_embeds = {}
+        else:
+            assert isinstance(cache_inputs, list), f"Cache inputs is {cache_inputs}"
+            assert isinstance(cache_labels, list), f"Cache labels is {cache_labels}"
+            self.cache_inputs = copy.deepcopy(cache_inputs)
+            self.cache_labels = copy.deepcopy(cache_labels)
+            if config.rep.cache_embeds and not config.rep.cross_attend:
+                assert isinstance(cache_embeds, dict), f"Cache embeds is {cache_embeds}"
+                self.cache_embeds = copy.deepcopy(cache_embeds)
+    def state_dict(self, destination=None, prefix="", keep_vars=False):
+        state_dict = super().state_dict(prefix=prefix, keep_vars=keep_vars)  # Get default state dict
+        model_keys = self.model.state_dict(prefix=prefix, keep_vars=keep_vars).keys()  # Remove model params
+        for k in model_keys:
+            del state_dict[f"model.{k}"]
+        if self.config.rep.freeze_cntr:
+            cntr_keys = self.replacement.state_dict().keys()
+            for k in cntr_keys:
+                del state_dict[f"replacement.{k}"]
+        state_dict["model_config"] = self.model.config  # Include model config
+        return state_dict
+    def load_state_dict(self, state_dict, strict: bool = True):
+        config = state_dict["model_config"]
+        del state_dict["model_config"]
+        if config != self.model.config:
+            LOG.info("Loaded model config doesn't match current model config.")
+            LOG.info(f"Loaded: {config}")
+            LOG.info(f"Current: {self.model.config}")
+        if (False and self.config.rep.freeze_cntr):
+            rep_keys = list(state_dict.keys())
+            for k in rep_keys:
+                if k.startswith("replacement"):
+                    del state_dict[k]
+            res = super().load_state_dict(state_dict, False)
+        else:
+            try:
+                res = super().load_state_dict(state_dict, False)
+            except RuntimeError:
+                LOG.info("Load failed; trying again without loading counterfactual model weights.")
+                rep_keys = list(state_dict.keys())
+                for k in rep_keys:
+                    if k.startswith("replacement"):
+                        del state_dict[k]
+                res = super().load_state_dict(state_dict, False)
+        # We should only have missing keys for the model, and no unexpected keys
+        def ok_to_miss(k):
+            return k.startswith("model.") or ((False and self.config.rep.freeze_cntr) and k.startswith("replacement."))
+        missing_keys = [k for k in res.missing_keys if not ok_to_miss(k)]
+        assert len(missing_keys) == 0, f"Should only have missing keys for model: {missing_keys}."
+        assert len(res.unexpected_keys) == 0, "Shouldn't have any unexpected keys"
+        return res
+    def outer_parameters(self, grouped=False):
+        if self.config.rep.freeze is not None:
+            modlist = None
+            for m in self.classifier.modules():
+                if isinstance(m, torch.nn.ModuleList):
+                    modlist = m
+                    break
+            model_params = list(modlist[-self.config.rep.freeze:].parameters())
+        else:
+            model_params = list(self.classifier.parameters())
+        if self.config.rep.lora is not None or self.config.rep.freeze is not None:
+            cls = self.classifier.base_model if self.config.rep.lora else self.classifier
+            if hasattr(cls, "classifier"):
+                model_params.extend(cls.classifier.parameters())
+            if hasattr(cls, "pre_classifier"):
+                model_params.extend(cls.pre_classifier.parameters())
+        if not (False and self.config.rep.freeze_cntr):
+            model_params.extend(list(self.replacement.parameters()))
+        extra_params = []
+        if grouped:
+            return [
+                dict(params=model_params, lr=self.config.lr),
+                dict(params=extra_params, lr=self.config.lr_lr)
+            ]
+        else:
+            return model_params + extra_params
+    def edit(self, batch, condition=None, detach_history=False):
+        def detokenize(toks, tok):
+            tokens = toks.masked_fill(toks == -100, tok.pad_token_id)
+            return tok.batch_decode(tokens, skip_special_tokens=True)
+        inputs = detokenize(batch["input_ids"], self.replacement_tok)
+        if "bert" in self.config.model.name:
+            labels = ["" for _ in batch["labels"]]
+        else:
+            labels = detokenize(batch["labels"], self.replacement_tok)
+        cache_inputs = self.cache_inputs + inputs
+        cache_labels = self.cache_labels + labels
+        if self.config.rep.cache_embeds and not self.config.rep.cross_attend:
+            cls_inputs = self.build_cls_cache_inputs(inputs, labels)
+            with torch.no_grad():
+                embeds = self.compute_cls_embeddings(cls_inputs)
+            cache_embeds = {inp: emb for inp, emb in zip(cls_inputs, embeds)}
+            cache_embeds.update(self.cache_embeds)
+        else:
+            cache_embeds = None
+        new_model = SERAC(self.model, self.config, self.model_constructor, self.classifier, self.classifier_tok,
+                        self.replacement, self.replacement_tok, cache_inputs, cache_labels, cache_embeds, self.scale)
+        new_model.train(self.training)
+        return new_model, {}
+    def stats(self):
+        return self.last_stats
+    def compute_cls_embeddings(self, text):
+        inputs = self.classifier_tok(text, return_tensors="pt", padding=True).to(self.config.device)
+        if 'bert' in self.config.rep.cls_name:
+            embeds = self.classifier(**inputs).last_hidden_state[:, 0].unsqueeze(1)
+        else:
+            embeds = self.classifier(**inputs).pooler_output.unsqueeze(1)
+        embeds = embeds.view(embeds.shape[0], self.config.rep.dist_heads, -1)
+        if self.config.rep.bound_embeds:
+            embeds = embeds.tanh()
+        return embeds
+    def embedding_logsim_matrix(self, cls_ctxs, test_input_text):
+        if self.config.rep.cache_embeds and not self.config.rep.cross_attend and not self.training:
+            ctx_embeds = torch.cat([self.cache_embeds[ctx] for ctx in cls_ctxs])
+        else:
+            ctx_embeds = self.compute_cls_embeddings(cls_ctxs)
+        main_embeds = self.compute_cls_embeddings(test_input_text)
+        if self.config.rep.cos:
+            cos = (ctx_embeds[None] * main_embeds[:, None]).sum(-1) / (ctx_embeds[None].norm(2, -1) * main_embeds[:, None].norm(2, -1))
+            dists = 1 - cos
+        else:
+            dists = (ctx_embeds[None] - main_embeds[:, None]).norm(2, -1)
+            if self.config.rep.square:
+                dists = dists ** 2
+        dists = dists.min(-1).values  # get rid of the dists head dimension
+        assert dists.min() >= 0, "Shouldn't have negative distances!"
+        cls_logsims = -dists * self.scale
+        return cls_logsims
+    def crossattend_logsim_matrix(self, cls_ctxs, test_input_texts):
+        batch = [ctx + self.classifier_tok.sep_token + test for test in test_input_texts for ctx in cls_ctxs]
+        batch_toks = self.classifier_tok(batch, return_tensors="pt", padding=True).to(self.config.device)
+        batch_logsims = self.classifier(**batch_toks).logits.log_softmax(-1)[:, 0]
+        logsim_matrix = batch_logsims.view(len(test_input_texts), len(cls_ctxs))
+        return logsim_matrix
+    def build_rep_cache_contexts(self):
+        sep = " "
+        if hasattr(self.model, "name_or_path") and "gpt" in self.model.name_or_path.lower():
+            # The labels are include in the inputs for autoregressive models. Cut off the label for the classifier
+            ctxs = [cin + sep for cin in self.cache_inputs]
+        else:
+            ctxs = [cin + sep + clab + sep for cin, clab in zip(self.cache_inputs, self.cache_labels)]
+        return ctxs
+    def build_cls_cache_inputs(self, cache_inputs=None, cache_labels=None):
+        sep = self.classifier_tok.sep_token
+        if cache_inputs is None:
+            cache_inputs = self.cache_inputs
+        if cache_labels is None:
+            cache_labels = self.cache_labels
+        if hasattr(self.model, "name_or_path") and "gpt" in self.model.name_or_path.lower():
+            # The labels are include in the inputs for autoregressive models. Cut off the label for the classifier
+            inputs = [cin.rsplit(" ", 1)[0] + sep for cin in cache_inputs]
+        else:
+            inputs = [cin + sep + clab + sep for cin, clab in zip(cache_inputs, cache_labels)]
+        return inputs
+    def build_rep_input_tokens(self, kwargs, idxs, generation=False):
+        assert len(idxs) == len(kwargs["input_ids"]), "Need one cache idx for each test input"
+        cache_contexts = self.build_rep_cache_contexts()
+        selected_contexts = [cache_contexts[idx.item()] for idx in idxs]
+        test_inputs = self.replacement_tok.batch_decode(kwargs["input_ids"], skip_special_tokens=True)
+        rep_texts = [ctx + inp for ctx, inp in zip(selected_contexts, test_inputs)]
+        rep_input_tokens = self.replacement_tok(rep_texts, return_tensors="pt", padding=True).to(self.config.device)
+        rep_kwargs = {
+            "input_ids": rep_input_tokens["input_ids"],
+            "attention_mask": rep_input_tokens["attention_mask"],
+        }
+        if not generation:
+            rep_kwargs["labels"] = kwargs["labels"]
+        # if self.config.task in ["fc", "fnli"]:
+        #     del rep_kwargs["labels"]
+        if hasattr(self.model, "name_or_path") and "gpt" in self.model.name_or_path.lower():
+            # Add 'ignore' labels for the prepended cache inputs
+            pre = torch.full((kwargs["labels"].shape[0], rep_kwargs["input_ids"].shape[-1] - kwargs["labels"].shape[-1]), -100,
+                             device=kwargs["labels"].device)
+            rep_kwargs["labels"] = torch.cat((pre, kwargs["labels"]), dim=-1)
+        return rep_kwargs
+    def run_classifier(self, *inputs, **kwargs):
+        cache_inputs = self.build_cls_cache_inputs()
+        test_inputs = self.replacement_tok.batch_decode(kwargs["input_ids"], skip_special_tokens=True)
+        if self.config.rep.cross_attend:
+            log_sim_matrix = self.crossattend_logsim_matrix(cache_inputs, test_inputs)
+        else:
+            log_sim_matrix = self.embedding_logsim_matrix(cache_inputs, test_inputs)
+        sims = log_sim_matrix.exp()
+        assert sims.max() <= 1, "Similarities shouldn't exceed 1!"
+        cls_sims, cls_idxs = sims.max(-1)
+        return cls_sims, cls_idxs, log_sim_matrix
+    def generate(self, *args, **kwargs):
+        # input_text = self.replacement_tok.batch_decode(kwargs["input_ids"], skip_special_tokens=True)
+        base_generate_fn = (
+            self.model.forward if type(self.model) == BertClassifier
+            else lambda *args, **kwargs: self.model.generate(*args, **kwargs, max_new_tokens=20)
+        )
+        cntr_generate_fn = (
+            self.replacement.forward if type(self.replacement) == BertClassifier
+            else lambda *args, **kwargs: self.replacement.generate(*args, **kwargs, max_new_tokens=20)
+        )
+        # assert len(args) == 0, "Should only pass named arguments to generate()"
+        if len(self.cache_inputs) > 0:
+            override = kwargs.get("override")
+            if override:
+                del kwargs["override"]
+            cls_sims, cls_idxs, _ = self.run_classifier(*args, **kwargs)
+            # assert cls_sims.numel() == 1
+            # print(f"Cache score: {cls_sims.item()} " + ("[MISS]" if cls_sims.item() < 0.5 else "[HIT]"))
+            use_cntr = (override == "cntr") if override is not None else (cls_sims.item() > 0.5)
+            if use_cntr:
+                rep_input = self.build_rep_input_tokens(kwargs, cls_idxs, generation=True)
+                kwargs["input_ids"] = rep_input["input_ids"]
+                kwargs["attention_mask"] = rep_input["attention_mask"]
+                # rep_input_text = self.replacement_tok.decode(rep_input["input_ids"][0])
+                # print(f"Returning counterfactual model output for '{rep_input_text}'")
+                if self.config.rep.freeze_cntr:
+                    return base_generate_fn(*args, **kwargs)
+                else:
+                    return cntr_generate_fn(*args, **kwargs)
+        # print(f"Returning base model output for '{input_text}'")
+        return base_generate_fn(*args, **kwargs)
+    def forward(self, *inputs, return_logits_only=True, eps=torch.finfo(torch.float32).eps, pos_pairs=None, **kwargs):
+        grad_enabled = torch.is_grad_enabled()
+        torch.set_grad_enabled(self.training)
+        # need to do soft mixing of logits if we're doing supervised training or we've specifically requested it
+        soft = (not self.config.rep.supervised) or self.config.rep.soft_weighting
+        with torch.no_grad():
+            if len(self.cache_inputs) == 0:
+                super_out = super().forward(*inputs, **kwargs).float()
+                torch.set_grad_enabled(grad_enabled)
+                return super_out
+            else:
+                base_logits = super().forward(*inputs, **kwargs).float()
+                if soft:
+                    if base_logits.dim() == 3:
+                        base_probs = base_logits.softmax(-1)
+                    else:
+                        base_probs = base_logits.sigmoid()
+                    del base_logits
+        cls_sims, cls_idxs, cls_logits = self.run_classifier(*inputs, **kwargs)
+        rep_cls_inputs = self.build_rep_input_tokens(kwargs, cls_idxs)
+        if self.config.rep.freeze_cntr:
+            rep_cls_logits = _logits(super().forward(**rep_cls_inputs))
+        else:
+            rep_cls_logits = _logits(self.replacement(**rep_cls_inputs))
+        if pos_pairs is not None:
+            assert (pos_pairs[:, 0] == torch.arange(pos_pairs.shape[0], device=pos_pairs.device)).all()
+            gold_idxs = pos_pairs[:, 1]
+            # print("IDX acc:", (cls_idxs == gold_idxs).shape, (cls_idxs == gold_idxs).float().mean())
+            rep_gold_inputs = self.build_rep_input_tokens(kwargs, gold_idxs)
+            if (False and self.config.rep.freeze_cntr):
+                rep_gold_logits = _logits(super().forward(**rep_gold_inputs))
+            else:
+                rep_gold_logits = _logits(self.replacement(**rep_gold_inputs))
+        else:
+            rep_gold_logits = rep_cls_logits
+        cls_sims = cls_sims.view(-1, 1)  # For (binary) classification, predictions are (B x 1)
+        if rep_cls_logits.dim() == 3:
+            cls_sims.unsqueeze_(-1)  # For generation/seq2seq, predictions are (B x S x V)
+        stats = {
+            'sims/mean': cls_sims.mean().item(),
+            'sims/pos': (cls_sims >= 0.5).float().mean().item(),
+            'sims/neg': (cls_sims < 0.5).float().mean().item(),
+            'params/scale': self.scale.item() if self.scale is not None else 0.0,
+        }
+        if hasattr(self.model, "name_or_path") and "gpt" in self.model.name_or_path.lower():
+            rep_cls_logits = rep_cls_logits[:, -kwargs["labels"].shape[-1]:, :]
+        if soft:
+            rep_weight = cls_sims
+            if base_probs.dim() == 3:
+                mixture_logits = ((1 - rep_weight) * base_probs + rep_weight * rep_cls_logits.softmax(-1) + eps).log()
+            else:
+                mixture_logits = ((1 - rep_weight) * base_probs + rep_weight * rep_cls_logits.sigmoid() + eps).log()
+        else:
+            rep_idxs = torch.where(cls_sims > 0.5)[0]
+            mixture_logits = base_logits
+            if rep_idxs.numel() > 0:
+                mixture_logits[rep_idxs] = rep_cls_logits[rep_idxs]
+        torch.set_grad_enabled(grad_enabled)
+        if return_logits_only:
+            return mixture_logits
+        else:
+            return mixture_logits, cls_logits, rep_gold_logits, stats
+if __name__ == '__main__':
+    import types
+    model = transformers.GPT2LMHeadModel.from_pretrained("gpt2")
+    config = types.SimpleNamespace()
+    config.model.inner_params = [
+        "transformer.h.9.mlp.c_fc.weight",
+        "transformer.h.9.mlp.c_proj.weight",
+        "transformer.h.10.mlp.c_fc.weight",
+        "transformer.h.10.mlp.c_proj.weight",
+        "transformer.h.11.mlp.c_fc.weight",
+        "transformer.h.11.mlp.c_proj.weight",
+    ]
+    config.edit_lr = 0.0001
+    config.gtn = types.SimpleNamespace()
+    config.gtn.n_hidden = 1
+    config.gtn = config.gtn.__dict__
+    gtn = SERAC(model, config, lambda: copy.deepcopy(model)).cuda()
+    # torch.save(gtn.state_dict(), "test_state.pt")
+    import pdb; pdb.set_trace()
+    gtn.load_state_dict(torch.load("test_state.pt"))
+    x = torch.arange(20).view(1, 20).cuda() + 1000
+    orig_logits = gtn(x)
+    edited = gtn.edit(x, masks=torch.ones_like(x), labels=x)
+    post_logits = gtn(x)
+    assert torch.allclose(orig_logits, post_logits)
+    orig_param = [p for (n, p) in gtn.model.named_parameters() if n == config.model.inner_params[-1]][0]
+    edited_param = [p for (n, p) in edited.model.named_parameters() if n == config.model.inner_params[-1]][0]
+    LOG.info((orig_param - edited_param).abs().max())
+    edited.eval()
+    LOG.info(gtn(x, labels=x).loss, edited(x, labels=x).loss, edited.edit_loss_fn(edited(x).logits, x)["nll"])
+    edited2 = edited.edit(x, masks=torch.ones_like(x), labels=x)
+    LOG.info(gtn(x, labels=x).loss, edited(x, labels=x).loss, edited2(x, labels=x).loss)

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import streamlit as st
 import pandas as pd
 import time
 EDIT_ALGS = [
     "MEND: Model editor networks using gradient decomposition",

 import streamlit as st
 import pandas as pd
 import time
+import algs
 EDIT_ALGS = [
     "MEND: Model editor networks using gradient decomposition",

editable_model.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import torch.nn as nn
+from losses import masked_log_probs
+from utils import _logits, shift_targets
+class EditableModel(nn.Module):
+    def __init__(self, model, config, model_constructor):
+        super().__init__()
+        self.model = model
+        self.config = config
+        self.model_constructor = model_constructor
+        def _edit_loss_fn(pred, targ, **kwargs):
+            return masked_log_probs(pred, targ, shift=shift_targets(self.config), **kwargs)
+        self.edit_loss_fn = _edit_loss_fn
+        self.loc_loss_fn = _edit_loss_fn
+    def edit(self, batch, condition=None, detach_history=False):
+        raise NotImplementedError
+    def forward(self, *inputs, **kwargs):
+        return _logits(self.model(*inputs, **kwargs))
+    def outer_parameters(self, grouped=False):
+        if grouped:
+            return [dict(params=self.parameters(), lr=self.config.lr)]
+        else:
+            return list(self.parameters())
+    def generate(self, *args, **kwargs):
+        return self.model.generate(*args, **kwargs)
+    def base_loss(self, input_ids, attention_masks, label_ids):
+        pass

hooks.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from utils import parent_module
+def linear_backward_hook(mod, grad_in, grad_out):
+    if not hasattr(mod, "weight"):
+        print(f"{mod} has no weight!")
+        return
+    if hasattr(mod.weight, "__x__"):
+        assert len(grad_out) == 1
+        # mod.weight.__bgrad__ = grad_out[0].unsqueeze(-1) * mod.__x__[0].unsqueeze(-2)
+        mod.weight.__delta__ = grad_out[0].detach()
+    else:
+        print(f"{mod} has no __x__")
+def linear_forward_hook(mod, activations, output):
+    assert len(activations) == 1
+    mod.weight.__x__ = activations[0].detach()
+def hook_model(model, pnames):
+    handles = []
+    for m in [parent_module(model, pname) for pname in pnames]:
+        handles.append(m.register_full_backward_hook(linear_backward_hook))
+        handles.append(m.register_forward_hook(linear_forward_hook))
+    model.handles = handles

losses.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import torch
+import torch.nn.functional as F
+from metrics import es_sentiment
+from utils import gather_log_probs, mask_hf_labels, masked_mean
+def balanced_bce(log_probs, labels, eps=torch.finfo(torch.float32).eps):
+    assert labels.max() <= 1
+    assert labels.min() >= 0
+    pos_losses = -log_probs[labels == 1]
+    neg_probs = 1 - log_probs.exp()
+    neg_probs[neg_probs == 0] += eps  # for numerical stability
+    neg_losses = -neg_probs.log()[labels == 0]
+    pos_loss = pos_losses.mean() if pos_losses.numel() > 0 else 0
+    neg_loss = neg_losses.mean() if neg_losses.numel() > 0 else 0
+    return pos_loss + neg_loss
+def kl_loc_loss(pre, post, mask=None):
+    pre = pre.to(torch.float32)
+    post = post.to(torch.float32)
+    sequence = pre.dim() == 3
+    pre_ = pre.view(-1, pre.shape[-1])
+    post_ = post.view(pre_.shape)
+    assert pre_.shape[0] == post_.shape[0]
+    if not sequence:
+        if pre_.shape[-1] == 1:  # No masking needed for binary classification
+            return (pre.sigmoid() * (F.logsigmoid(pre) - F.logsigmoid(post))).mean() + (
+                (-pre).sigmoid() * (F.logsigmoid(-pre) - F.logsigmoid(-post))
+            ).mean()
+    else:  # We have sequences of predictions; masking needed
+        if pre_.shape[-1] > 1:
+            assert mask is not None
+            mask_ = mask.view(pre_.shape[0])
+            kl = (pre_.softmax(-1) * (pre_.log_softmax(-1) - post_.log_softmax(-1))).sum(-1)
+            return (kl * mask_).sum() / mask_.sum()
+    raise NotImplementedError
+def binary_log_probs(pred, targ, should_reduce=True):
+    assert targ.max() <= 1
+    assert targ.min() >= 0
+    neg_mask = torch.ones_like(pred)
+    neg_mask[targ == 0] *= -1
+    pred = pred * neg_mask
+    log_probs = F.logsigmoid(pred)
+    acc = (log_probs.exp() > 0.5).float()
+    if should_reduce:
+        acc = acc.mean()
+    return {
+        "acc": acc,
+        "log_prob": log_probs.mean(),
+        "prob": log_probs.exp().mean(),
+        "nll": -log_probs.mean(),
+        "n_tokens": log_probs.shape[0]
+    }
+def multiclass_log_probs(
+    pred,
+    raw_targets,
+    shift=True,
+    eps=torch.finfo(torch.float32).eps,
+    should_reduce=True,
+    **kwargs,
+):
+    NULL_TOKEN = 0  # a placeholder used for masked target locations
+    pred = pred.clone()
+    mask, targ = mask_hf_labels(raw_targets)
+    if shift and pred.dim() == 3:  # Dealing with sequences
+        pred = pred[:, :-1]  # Remove last prediction in sequence
+        targ = targ[:, 1:]  # Shift to align predictions and targets
+    unmasked_log_probs = gather_log_probs(pred, targ)
+    pred_ids = pred.argmax(-1).masked_fill(~mask, NULL_TOKEN)
+    correct = pred_ids == targ
+    if pred.dim() == 3:
+        correct = (pred_ids == targ).all(-1)  # We want to get the whole sequence right
+    acc = correct.float()
+    if should_reduce:
+        acc = acc.mean()
+    if "inner_sent" in kwargs:
+        # Only use outer samples with the same sentiment as the inner sample
+        same_sent_mask = torch.tensor([i == o for i, o in zip(kwargs["inner_sent"], kwargs["outer_sent"])], device=pred.device)
+        good_mask = mask * same_sent_mask.unsqueeze(-1)
+        bad_mask = mask * (~same_sent_mask.unsqueeze(-1))
+        good_log_prob = masked_mean(unmasked_log_probs, good_mask)
+        bad_log_prob = masked_mean((1 - unmasked_log_probs.exp() + eps).log(), bad_mask)
+        n_tokens = good_mask.float().sum()
+        avg_log_prob = good_log_prob
+        if kwargs["unlikelihood"]:
+            nll = -good_log_prob - bad_log_prob
+        else:
+            nll = -good_log_prob
+    else:
+        n_tokens = mask.float().sum()
+        avg_log_prob = (unmasked_log_probs * mask.float()).sum() / n_tokens
+        nll = -avg_log_prob
+    info_dict = {
+        "acc": acc,
+        "log_prob": avg_log_prob,
+        "prob": avg_log_prob.exp(),
+        "n_tokens": n_tokens,
+        "nll": nll
+    }
+    if "inner_sent" in kwargs:
+        info_dict.update(es_sentiment(kwargs["pre_edit_logits"],
+                                      kwargs["post_edit_logits"],
+                                      raw_targets,
+                                      same_sent_mask))
+    return info_dict
+def masked_log_probs(pred, targ, shift=True, **kwargs):
+    pred = pred.to(torch.float32)
+    if not (pred.dim() == 2 or pred.dim() == 3):
+        raise RuntimeError(f"Expected pred to have 2 or 3 dimensions, got {pred.shape}")
+    if pred.shape[-1] == 1:
+        should_reduce = True
+        if "should_reduce" in kwargs:
+            should_reduce = kwargs["should_reduce"]
+        return binary_log_probs(pred, targ, should_reduce=should_reduce)
+    else:
+        return multiclass_log_probs(pred, targ, shift=shift, **kwargs)
+def test_masked_log_probs():
+    print()
+    N = 10000
+    pred = torch.randn(10, 15, N)
+    targ = torch.randint(0, N, (10, 15))
+    true_pred = pred.clone()
+    true_pred.scatter_(2, targ.unsqueeze(-1), 5)
+    true_pred = true_pred.roll(-1, 1)
+    half_pred = true_pred.clone()
+    mask = torch.arange(10) % 2 == 0
+    half_pred[mask] = pred[mask]
+    pred_ = pred.clone()
+    true_pred_ = true_pred.clone()
+    half_pred_ = half_pred.clone()
+    targ_ = targ.clone()
+    print(masked_log_probs(pred, targ, return_acc=True))
+    print(masked_log_probs(true_pred, targ, return_acc=True))
+    print(masked_log_probs(half_pred, targ, return_acc=True))
+    assert (pred == pred_).all()
+    assert (targ == targ_).all()
+    assert (half_pred == half_pred_).all()
+    assert (true_pred == true_pred_).all()
+    import pdb; pdb.set_trace()
+    pred = torch.randn(1000, 15, 1)
+    targ = torch.randint(0, 2, (1000, 15))
+    print(masked_log_probs(pred, targ, return_acc=True))
+if __name__ == "__main__":
+    torch.manual_seed(0)
+    test_masked_log_probs()

metrics.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import torch
+from utils import gather_log_probs, mask_hf_labels, masked_mean
+def es_sentiment(pre_logits, post_logits, raw_targets, same_sent_mask, NULL_TOKEN=0):
+    with torch.no_grad():
+        mask, targ = mask_hf_labels(raw_targets)
+        pos_mask = same_sent_mask.unsqueeze(-1) * mask
+        neg_mask = (~same_sent_mask).unsqueeze(-1) * mask
+        # Compute log likelihoods of pos/neg samples
+        pre_edit_token_log_probs = gather_log_probs(pre_logits, targ)
+        post_edit_token_log_probs = gather_log_probs(post_logits, targ)
+        mean_pos_pre = masked_mean(pre_edit_token_log_probs, pos_mask)
+        mean_pos_post = masked_mean(post_edit_token_log_probs, pos_mask)
+        mean_neg_post = masked_mean(post_edit_token_log_probs, neg_mask)
+        z_sent = (mean_pos_post - mean_neg_post).sigmoid()
+        z_topic_raw = (mean_pos_post - mean_pos_pre).exp()
+        z_topic = min(1, z_topic_raw)
+        es_sent = z_sent * z_topic
+        return {
+            "acc_sent": es_sent,
+            "z_sent": z_sent,
+            "z_topic": z_topic,
+            "z_topic_raw": z_topic_raw,
+            "correct_probs": mean_pos_post,
+            "wrong_probs": mean_neg_post,
+        }
+# DEPRECATED
+def sent_success(pre_edit_probs, post_edit_probs, pos_mask, eps=torch.finfo(torch.float32).eps, batch_size=20):
+    assert False, "No longer used"
+    # content_score = post_edit_probs[pos_mask].prod() ** (1/pos_mask.sum()) / (pre_edit_probs[pos_mask]. + eps)
+    post_pos_avg = post_edit_probs[pos_mask].prod() ** (1 / pos_mask.sum())
+    pre_pos_avg = pre_edit_probs[pos_mask].prod() ** (1 / pos_mask.sum())
+    content_score = post_pos_avg / (pre_pos_avg + eps)
+    z_content = min(1., content_score)
+    # compute z_sent through a weighting objective
+    # normalized_probs = post_edit_probs / (post_edit_probs.sum() + eps)
+    # balancing_factor = 0.5 * ((~pos_mask).float().sum() / pos_mask.float().sum() + 1)
+    # z_sent_weight = balancing_factor * normalized_probs.dot(pos_mask.float())
+    post_neg_avg = post_edit_probs[~pos_mask].prod() ** (1 / (~pos_mask).sum())
+    neg_over_pos = post_neg_avg / (eps + post_pos_avg)
+    z_sent_weight = 1 / (1 + neg_over_pos)
+    # compute z_sent through a ranking objective
+    batch_mask = pos_mask.view(-1, batch_size).long()
+    sort_idxs = post_edit_probs.view(-1, batch_size).sort(-1, descending=True).indices
+    ranked_mask = batch_mask.gather(1, sort_idxs)
+    true_mask = batch_mask.sort(-1, descending=True).values
+    z_sent_rank = (ranked_mask == true_mask).float().mean()
+    # compute the final success scores
+    weight_success = (z_content * z_sent_weight) ** 0.5
+    rank_success = (z_content * z_sent_rank) ** 0.5
+    correct_probs = post_edit_probs[pos_mask].mean()
+    wrong_probs = post_edit_probs[~pos_mask].mean()
+    return {
+        "acc_weight": weight_success,
+        "acc_rank": rank_success,
+        "rank_score": z_sent_rank,
+        "weight_score": z_sent_weight,
+        "content_score": content_score,
+        "post_edit_probs": post_edit_probs.sum(),
+        "pre_edit_probs": pre_edit_probs.sum(),
+        "correct_probs": correct_probs,
+        "wrong_probs": wrong_probs
+    }
+# def sent_retain(pre_logits, post_logits, sent_mask, batch_size=20, eps=torch.finfo(torch.float32).eps):
+#     pre_log_probs = pre_logits.log_softmax(-1).gather(-1, all_targ.unsqueeze(-1)).squeeze(-1)
+#     post_log_probs = post_logits.log_softmax(-1).gather(-1, all_targ.unsqueeze(-1)).squeeze(-1)
+#     pre_batch = pre_probs.view(-1, batch_size)
+#     post_batch = post_probs.view(-1, batch_size)
+#     mask_batch = sent_mask.view(-1, batch_size)
+#     stats = []
+#     for pre, post, mask in zip(pre_batch, post_batch, mask_batch):
+#         avg_pre = pre.prod() ** (1 / pre.numel())
+#         avg_post = post.prod() ** (1 / post.numel())
+#         z_avg = min(avg_pre / avg_post, avg_post / avg_pre)
+#         post_neg_avg = post[~mask].prod() ** (1 / (~mask).sum())
+#         post_pos_avg = post[mask].prod() ** (1 / mask.sum())
+#         pre_neg_avg = pre[~mask].prod() ** (1 / (~mask).sum())
+#         pre_pos_avg = pre[mask].prod() ** (1 / mask.sum())
+#         post_neg_over_pos = post_neg_avg / (eps + post_pos_avg)
+#         pre_neg_over_pos = pre_neg_avg / (eps + pre_pos_avg)
+#         z_post = 1 / (1 + post_neg_over_pos)
+#         z_pre = 1 / (1 + pre_neg_over_pos)
+#         z_sent = min(z_post / z_pre, z_pre / z_post)
+#         stats.append((z_avg * z_sent) ** 0.5)
+#     return sum(stats) / len(stats)
+# For zsRE and F-NLI
+def retain_rate(pre_logits, post_logits, mask=None):
+    if pre_logits.shape[-1] == 1:
+        pre_logits = pre_logits.squeeze(-1)
+    if post_logits.shape[-1] == 1:
+        post_logits = post_logits.squeeze(-1)
+    assert pre_logits.shape == post_logits.shape
+    assert pre_logits.shape[0] == mask.shape[0]
+    if pre_logits.dim() == 1:
+        # binary classification
+        pre_preds = pre_logits > 0
+        post_preds = post_logits > 0
+        retain = (pre_preds == post_preds).float().mean()
+    elif pre_logits.dim() == 3:
+        # sequence modeling
+        pre_preds = pre_logits.argmax(-1)
+        post_preds = post_logits.argmax(-1)
+        match = (pre_preds == post_preds) * mask
+        retain = (match.sum(-1) == mask.sum(-1)).float().mean()
+    else:
+        raise NotImplementedError
+    return retain.item()

models.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import transformers
+import torch
+import torch.nn as nn
+import re
+import logging
+from nn import FixableDropout
+from utils import scr
+LOG = logging.getLogger(__name__)
+class CastModule(nn.Module):
+    def __init__(self, module: nn.Module, in_cast: torch.dtype = torch.float32, out_cast: torch.dtype = None):
+        super().__init__()
+        self.underlying = module
+        self.in_cast = in_cast
+        self.out_cast = out_cast
+    def cast(self, obj, dtype):
+        if dtype is None:
+            return obj
+        if isinstance(obj, torch.Tensor):
+            return obj.to(dtype)
+        else:
+            return obj
+    def forward(self, *args, **kwargs):
+        args = tuple(self.cast(a, self.in_cast) for a in args)
+        kwargs = {k: self.cast(v, self.in_cast) for k, v in kwargs.items()}
+        outputs = self.underlying(*args, **kwargs)
+        if isinstance(outputs, torch.Tensor):
+            outputs = self.cast(outputs, self.out_cast)
+        elif isinstance(outputs, tuple):
+            outputs = tuple(self.cast(o, self.out_cast) for o in outputs)
+        else:
+            raise RuntimeError(f"Not sure how to cast type {type(outputs)}")
+        return outputs
+    def extra_repr(self):
+        return f"in_cast: {self.in_cast}\nout_cast: {self.out_cast}"
+class BertClassifier(torch.nn.Module):
+    def __init__(self, model_name, hidden_dim=768):
+        super().__init__()
+        if model_name.startswith("bert"):
+            self.model = transformers.BertModel.from_pretrained(model_name, cache_dir=scr())
+        else:
+            self.model = transformers.AutoModel.from_pretrained(model_name, cache_dir=scr())
+        self.classifier = torch.nn.Linear(hidden_dim, 1)
+    @property
+    def config(self):
+        return self.model.config
+    def forward(self, *args, **kwargs):
+        filtered_kwargs = {k: v for k, v in kwargs.items() if k != "labels"}
+        model_output = self.model(*args, **filtered_kwargs)
+        if "pooler_output" in model_output.keys():
+            pred = self.classifier(model_output.pooler_output)
+        else:
+            pred = self.classifier(model_output.last_hidden_state[:, 0])
+        if "output_hidden_states" in kwargs and kwargs["output_hidden_states"]:
+            last_hidden_state = model_output.last_hidden_state
+            return pred, last_hidden_state
+        else:
+            return pred
+def replace_dropout(model):
+    for m in model.modules():
+        for n, c in m.named_children():
+            if isinstance(c, nn.Dropout):
+                setattr(m, n, FixableDropout(c.p))
+    def resample(m, seed=None):
+        for c in m.children():
+            if hasattr(c, "resample"):
+                c.resample(seed)
+            else:
+                resample(c, seed)
+    model.resample_dropout = resample.__get__(model)
+def get_model(config):
+    if config.model.class_name == "BertClassifier":
+        model = BertClassifier(config.model.name)
+    else:
+        ModelClass = getattr(transformers, config.model.class_name)
+        LOG.info(f"Loading model class {ModelClass} with name {config.model.name} from cache dir {scr()}")
+        model = ModelClass.from_pretrained(config.model.name, cache_dir=scr())
+    if config.model.pt is not None:
+        LOG.info(f"Loading model initialization from {config.model.pt}")
+        state_dict = torch.load(config.model.pt, map_location="cpu")
+        try:
+            model.load_state_dict(state_dict)
+        except RuntimeError:
+            LOG.info("Default load failed; stripping prefix and trying again.")
+            state_dict = {re.sub("^model.", "", k): v for k, v in state_dict.items()}
+            model.load_state_dict(state_dict)
+        LOG.info("Loaded model initialization")
+    if config.dropout is not None:
+        n_reset = 0
+        for m in model.modules():
+            if isinstance(m, nn.Dropout):
+                m.p = config.dropout
+                n_reset += 1
+            if hasattr(m, "dropout"):  # Requires for BART, which uses F.dropout
+                if isinstance(m.dropout, float):
+                    m.dropout = config.dropout
+                    n_reset += 1
+            if hasattr(m, "activation_dropout"):  # Requires for BART, which uses F.dropout
+                if isinstance(m.activation_dropout, float):
+                    m.activation_dropout = config.dropout
+                    n_reset += 1
+        LOG.info(f"Set {n_reset} dropout modules to p={config.dropout}")
+    param_names = [n for n, _ in model.named_parameters()]
+    bad_inner_params = [p for p in config.model.inner_params if p not in param_names]
+    if len(bad_inner_params) != 0:
+        raise ValueError(f"Params {bad_inner_params} do not exist in model of type {type(model)}.")
+    if config.no_grad_layers is not None:
+        if config.half:
+            model.bfloat16()
+        def upcast(mod):
+            modlist = None
+            for child in mod.children():
+                if isinstance(child, nn.ModuleList):
+                    assert modlist is None, f"Found multiple modlists for {mod}"
+                    modlist = child
+            if modlist is None:
+                raise RuntimeError("Couldn't find a ModuleList child")
+            LOG.info(f"Setting {len(modlist) - config.no_grad_layers} modules to full precision, with autocasting")
+            modlist[config.no_grad_layers:].to(torch.float32)
+            modlist[config.no_grad_layers] = CastModule(modlist[config.no_grad_layers])
+            modlist[-1] = CastModule(modlist[-1], in_cast=torch.float32, out_cast=torch.bfloat16)
+        parents = []
+        if hasattr(model, "transformer"):
+            parents.append(model.transformer)
+        if hasattr(model, "encoder"):
+            parents.append(model.encoder)
+        if hasattr(model, "decoder"):
+            parents.append(model.decoder)
+        if hasattr(model, "model"):
+            parents.extend([model.model.encoder, model.model.decoder])
+        for t in parents:
+            t.no_grad_layers = config.no_grad_layers
+            if config.half and config.alg != "rep":
+                upcast(t)
+        if config.half and config.alg != "rep":
+            idxs = []
+            for p in config.model.inner_params:
+                for comp in p.split('.'):
+                    if comp.isdigit():
+                        idxs.append(int(comp))
+            max_idx, min_idx = str(max(idxs)), str(config.no_grad_layers)
+            for pidx, p in enumerate(config.model.inner_params):
+                comps = p.split('.')
+                if max_idx in comps or min_idx in comps:
+                    index = comps.index(max_idx) if max_idx in comps else comps.index(min_idx)
+                    comps.insert(index + 1, 'underlying')
+                    new_p = '.'.join(comps)
+                    LOG.info(f"Replacing config.model.inner_params[{pidx}] '{p}' -> '{new_p}'")
+                    config.model.inner_params[pidx] = new_p
+    return model
+def get_tokenizer(config):
+    tok_name = config.model.tokenizer_name if config.model.tokenizer_name is not None else config.model.name
+    return getattr(transformers, config.model.tokenizer_class).from_pretrained(tok_name, cache_dir=scr())
+if __name__ == '__main__':
+    m = BertClassifier("bert-base-uncased")
+    m(torch.arange(5)[None, :])
+    import pdb; pdb.set_trace()

nn.py ADDED Viewed

	@@ -0,0 +1,362 @@

+import torch
+import torch.nn as nn
+import logging
+import time
+from utils import factorization
+LOG = logging.getLogger(__name__)
+class FixableDropout(nn.Module):
+    def __init__(self, p: float):
+        super().__init__()
+        self.p = p
+        self.mask_cache = {}
+        self.seed = 0
+    def resample(self, seed=None):
+        if seed is None:
+            seed = int(time.time() * 1e6)
+        self.mask_cache = {}
+        self.seed = seed
+    def forward(self, x):
+        if self.training:
+            if x.shape not in self.mask_cache:
+                generator = torch.Generator(x.device).manual_seed(self.seed)
+                self.mask_cache[x.shape] = torch.bernoulli(
+                    torch.full_like(x, 1 - self.p), generator=generator
+                ).bool()
+                self.should_resample = False
+            x = (self.mask_cache[x.shape] * x) / (1 - self.p)
+        return x
+    def extra_repr(self) -> str:
+        return f"p={self.p}"
+class ActMLP(nn.Module):
+    def __init__(self, hidden_dim, n_hidden):
+        super().__init__()
+        self.mlp = MLP(1, 1, hidden_dim, n_hidden, init="id")
+    def forward(self, x):
+        return self.mlp(x.view(-1, 1)).view(x.shape)
+class LightIDMLP(nn.Module):
+    def __init__(
+        self,
+        indim: int,
+        outdim: int,
+        hidden_dim: int,
+        n_hidden: int,
+        init: str = None,
+        act: str = None,
+        rank: int = None,
+    ):
+        super().__init__()
+        LOG.info(f"Building LightIDMLP {[indim] + [rank] + [indim]}")
+        self.layer1 = nn.Linear(indim, rank)
+        self.layer2 = nn.Linear(rank, indim)
+        self.layer2.weight.data[:] = 0
+        self.layer2.bias = None
+    def forward(self, x):
+        h = self.layer1(x).relu()
+        return x + self.layer2(h)
+class IDMLP(nn.Module):
+    def __init__(
+        self,
+        indim: int,
+        outdim: int,
+        hidden_dim: int,
+        n_hidden: int,
+        init: str = None,
+        act: str = None,
+        rank: int = None,
+        n_modes: int = None
+    ):
+        super().__init__()
+        LOG.info(f"Building IDMLP ({init}) {[indim] * (n_hidden + 2)}")
+        self.layers = nn.ModuleList(
+            [
+                LRLinear(indim, indim, rank=rank, relu=idx < n_hidden, init=init, n_modes=n_modes)
+                for idx in range(n_hidden + 1)
+            ]
+        )
+    def forward(self, x, mode=None):
+        for layer in self.layers:
+            x = layer(x, mode=mode)
+        return x
+class LatentIDMLP(nn.Module):
+    def __init__(
+        self,
+        indim: int,
+        outdim: int,
+        hidden_dim: int,
+        n_hidden: int,
+        init: str = None,
+        act: str = None,
+        rank: int = None,
+    ):
+        super().__init__()
+        LOG.info(f"Building Latent IDMLP ({init}) {[indim] * (n_hidden + 2)}")
+        self.layers = nn.ModuleList()
+        self.layers.append(nn.Linear(indim, rank))
+        for _ in range(n_hidden - 1):
+            self.layers.append(nn.Linear(rank, rank))
+        self.layers.append(nn.Linear(rank, outdim))
+        for layer in self.layers[:-1]:
+            nn.init.xavier_normal_(layer.weight.data)
+        if init == "id":
+            self.layers[-1].weight.data.zero_()
+            self.layers[-1].bias.data.zero_()
+        self.init = init
+    def forward(self, x):
+        out = x
+        for layer in self.layers[:-1]:
+            out = layer(out).relu()
+        out = self.layers[-1](out)
+        if self.init == "id":
+            return out + x
+        else:
+            return out
+class KLinear(nn.Module):
+    def __init__(self, inf, outf, pfrac=0.05, symmetric=True, zero_init: bool = True):
+        super().__init__()
+        self.inf = inf
+        in_fact = factorization(inf)
+        out_fact = factorization(outf)
+        total_params = 0
+        self.a, self.b = nn.ParameterList(), nn.ParameterList()
+        for (i1, i2), (o1, o2) in zip(reversed(in_fact), reversed(out_fact)):
+            new_params = (o1 * i1 + o2 * i2) * (2 if symmetric else 1)
+            if (total_params + new_params) / (inf * outf) > pfrac and len(self.a) > 0:
+                break
+            total_params += new_params
+            self.a.append(nn.Parameter(torch.empty(o1, i1)))
+            if symmetric:
+                self.a.append(nn.Parameter(torch.empty(o2, i2)))
+            self.b.append(nn.Parameter(torch.empty(o2, i2)))
+            if symmetric:
+                self.b.append(nn.Parameter(torch.empty(o1, i1)))
+            assert self.a[-1].kron(self.b[-1]).shape == (outf, inf)
+        for factor in self.a:
+            nn.init.kaiming_normal_(factor.data)
+        for factor in self.b:
+            if zero_init:
+                factor.data.zero_()
+            else:
+                nn.init.kaiming_normal_(factor.data)
+        print(f"Created ({symmetric}) k-layer using {total_params/(outf*inf):.3f} params, {len(self.a)} comps")
+        self.bias = nn.Parameter(torch.zeros(outf))
+    def forward(self, x):
+        assert x.shape[-1] == self.inf, f"Expected input with {self.inf} dimensions, got {x.shape}"
+        w = sum([a.kron(b) for a, b in zip(self.a, self.b)]) / (2 * len(self.a) ** 0.5)
+        y = w @ x.T
+        if self.bias is not None:
+            y = y + self.bias
+        return y
+class LRLinear(nn.Module):
+    def __init__(self, inf, outf, rank: int = None, relu=False, init="id", n_modes=None):
+        super().__init__()
+        mid_dim = min(rank, inf)
+        if init == "id":
+            self.u = nn.Parameter(torch.zeros(outf, mid_dim))
+            self.v = nn.Parameter(torch.randn(mid_dim, inf))
+        elif init == "xavier":
+            self.u = nn.Parameter(torch.empty(outf, mid_dim))
+            self.v = nn.Parameter(torch.empty(mid_dim, inf))
+            nn.init.xavier_uniform_(self.u.data, gain=nn.init.calculate_gain("relu"))
+            nn.init.xavier_uniform_(self.v.data, gain=1.0)
+        else:
+            raise ValueError(f"Unrecognized initialization {init}")
+        if n_modes is not None:
+            self.mode_shift = nn.Embedding(n_modes, outf)
+            self.mode_shift.weight.data.zero_()
+            self.mode_scale = nn.Embedding(n_modes, outf)
+            self.mode_scale.weight.data.fill_(1)
+        self.n_modes = n_modes
+        self.bias = nn.Parameter(torch.zeros(outf))
+        self.inf = inf
+        self.init = init
+    def forward(self, x, mode=None):
+        if mode is not None:
+            assert self.n_modes is not None, "Linear got a mode but wasn't initialized for it"
+            assert mode < self.n_modes, f"Input mode {mode} outside of range {self.n_modes}"
+        assert x.shape[-1] == self.inf, f"Input wrong dim ({x.shape}, {self.inf})"
+        pre_act = (self.u @ (self.v @ x.T)).T
+        if self.bias is not None:
+            pre_act += self.bias
+        if mode is not None:
+            if not isinstance(mode, torch.Tensor):
+                mode = torch.tensor(mode).to(x.device)
+            scale, shift = self.mode_scale(mode), self.mode_shift(mode)
+            pre_act = pre_act * scale + shift
+        # need clamp instead of relu so gradient at 0 isn't 0
+        acts = pre_act.clamp(min=0)
+        if self.init == "id":
+            return acts + x
+        else:
+            return acts
+class MLP(nn.Module):
+    def __init__(
+        self,
+        indim: int,
+        outdim: int,
+        hidden_dim: int,
+        n_hidden: int,
+        init: str = "xavier_uniform",
+        act: str = "relu",
+        rank: int = None,
+    ):
+        super().__init__()
+        self.init = init
+        if act == "relu":
+            self.act = nn.ReLU()
+        elif act == "learned":
+            self.act = ActMLP(10, 1)
+        else:
+            raise ValueError(f"Unrecognized activation function '{act}'")
+        if hidden_dim is None:
+            hidden_dim = outdim * 2
+        if init.startswith("id") and outdim != indim:
+            LOG.info(f"Overwriting outdim ({outdim}) to be indim ({indim})")
+            outdim = indim
+        if init == "id":
+            old_hidden_dim = hidden_dim
+            if hidden_dim < indim * 2:
+                hidden_dim = indim * 2
+            if hidden_dim % indim != 0:
+                hidden_dim += hidden_dim % indim
+            if old_hidden_dim != hidden_dim:
+                LOG.info(
+                    f"Overwriting hidden dim ({old_hidden_dim}) to be {hidden_dim}"
+                )
+        if init == "id_alpha":
+            self.alpha = nn.Parameter(torch.zeros(1, outdim))
+        dims = [indim] + [hidden_dim] * n_hidden + [outdim]
+        LOG.info(f"Building ({init}) MLP: {dims} (rank {rank})")
+        layers = []
+        for idx, (ind, outd) in enumerate(zip(dims[:-1], dims[1:])):
+            if rank is None:
+                layers.append(nn.Linear(ind, outd))
+            else:
+                layers.append(LRLinear(ind, outd, rank=rank))
+            if idx < n_hidden:
+                layers.append(self.act)
+        if rank is None:
+            if init == "id":
+                if n_hidden > 0:
+                    layers[0].weight.data = torch.eye(indim).repeat(
+                        hidden_dim // indim, 1
+                    )
+                    layers[0].weight.data[hidden_dim // 2:] *= -1
+                    layers[-1].weight.data = torch.eye(outdim).repeat(
+                        1, hidden_dim // outdim
+                    )
+                    layers[-1].weight.data[:, hidden_dim // 2:] *= -1
+                    layers[-1].weight.data /= (hidden_dim // indim) / 2.0
+            for layer in layers:
+                if isinstance(layer, nn.Linear):
+                    if init == "ortho":
+                        nn.init.orthogonal_(layer.weight)
+                    elif init == "id":
+                        if layer.weight.shape[0] == layer.weight.shape[1]:
+                            layer.weight.data = torch.eye(hidden_dim)
+                    else:
+                        gain = 3 ** 0.5 if (layer is layers[-1]) else 1.0
+                        nn.init.xavier_uniform_(layer.weight, gain=gain)
+                    layer.bias.data[:] = 0
+        layers[-1].bias = None
+        self.mlp = nn.Sequential(*layers)
+    def forward(self, x):
+        if self.init == "id_alpha":
+            return x + self.alpha * self.mlp(x)
+        else:
+            return self.mlp(x)
+if __name__ == "__main__":
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s [%(filename)s:%(lineno)d] %(message)s",
+        level=logging.INFO,
+    )
+    m0 = MLP(1000, 1000, 1500, 3)
+    m1 = MLP(1000, 1000, 1500, 3, init="id")
+    m2 = MLP(1000, 1000, 1500, 3, init="id_alpha")
+    m3 = MLP(1000, 1000, 1500, 3, init="ortho", act="learned")
+    x = 0.01 * torch.randn(999, 1000)
+    y0 = m0(x)
+    y1 = m1(x)
+    y2 = m2(x)
+    y3 = m3(x)
+    print("y0", (y0 - x).abs().max())
+    print("y1", (y1 - x).abs().max())
+    print("y2", (y2 - x).abs().max())
+    print("y3", (y3 - x).abs().max())
+    assert not torch.allclose(y0, x)
+    assert torch.allclose(y1, x)
+    assert torch.allclose(y2, x)
+    assert not torch.allclose(y3, x)
+    import pdb; pdb.set_trace()  # fmt: skip

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+allennlp
+git+https://github.com/eric-mitchell/higher@master # For in-place functional models
+pandas
+streamlit
+torch
+transformers

utils.py ADDED Viewed

	@@ -0,0 +1,441 @@

+import datetime
+import typing
+import numpy as np
+import struct
+import os
+import getpass
+import logging
+import torch
+import torch.nn as nn
+from collections import defaultdict
+import math
+LOG = logging.getLogger(__name__)
+def masked_mean(values, mask):
+    assert mask.dtype == torch.bool
+    assert values.shape == mask.shape
+    return (values * mask.float()).sum() / mask.sum().float()
+def mask_hf_labels(labels, null_token=0):
+    valid_mask = labels != -100
+    valid_labels = labels.masked_fill(~valid_mask, null_token)
+    return valid_mask, valid_labels
+def gather_log_probs(logits, labels):
+    assert labels.dim() == logits.dim() - 1
+    assert labels.shape == logits.shape[:-1]
+    return logits.log_softmax(-1).gather(-1, labels.unsqueeze(-1)).squeeze(-1)
+def off_diagonal(mat):
+    assert mat.dim() == 2
+    # assert mat.shape[0] == mat.shape[1]
+    mask = ~torch.eye(max(mat.shape), dtype=torch.bool)
+    mask = mask[:mat.shape[0], :mat.shape[1]]
+    off_d = mat[mask]
+    assert off_d.numel() == mat.shape[0] * mat.shape[1] - min(mat.shape)
+    return off_d
+def set_dropout(model, p):
+    if p is not None:
+        n_reset = 0
+        for m in model.modules():
+            if isinstance(m, nn.Dropout):
+                m.p = p
+                n_reset += 1
+            if hasattr(m, "dropout"):  # Requires for BART, which uses F.dropout
+                if isinstance(m.dropout, float):
+                    m.dropout = p
+                    n_reset += 1
+            if hasattr(m, "activation_dropout"):  # Requires for BART, which uses F.dropout
+                if isinstance(m.activation_dropout, float):
+                    m.activation_dropout = p
+                    n_reset += 1
+        LOG.info(f"Set {n_reset} dropout modules to p={p}")
+def _inner_params(named_parameters, inner_names):
+    param_dict = dict(named_parameters)
+    return [(n, param_dict[n]) for n in inner_names]
+def shift_targets(config):
+    return "t5" not in config.model.name.lower() and "blender" not in config.model.name.lower()
+# https://stackoverflow.com/questions/32871539/integer-factorization-in-python
+def factorization(n):
+    return [(i, n // i) for i in range(1, int(n**0.5) + 1) if n % i == 0]
+def scr():
+    if os.path.exists("/scr-ssd"):
+        scr_dir = "/scr-ssd/" + getpass.getuser()
+    else:
+        scr_dir = "/scr/" + getpass.getuser()
+    if not os.path.exists(scr_dir):
+        os.makedirs(scr_dir)
+    return scr_dir
+def uuid(digits=4):
+    if not hasattr(uuid, "uuid_value"):
+        uuid.uuid_value = struct.unpack('I', os.urandom(4))[0] % int(10**digits)
+    return uuid.uuid_value
+def formatted_timestamp(time=None):
+    if time is None:
+        time = datetime.datetime.now()
+    return time.strftime("%d/%m/%Y-%H:%M:%S/%f")
+def time_delta_seconds(start, finish=None):
+    assert type(start) == str
+    t1 = datetime.datetime.strptime(start, "%d/%m/%Y-%H:%M:%S/%f")
+    if finish is not None:
+        assert type(finish) == str
+        t2 = datetime.datetime.strptime(finish, "%d/%m/%Y-%H:%M:%S/%f")
+    else:
+        t2 = datetime.datetime.now()
+    return (t2 - t1).total_seconds()
+def dict_to(d, device):
+    new_dict = {}
+    for k, v in d.items():
+        if isinstance(v, torch.Tensor):
+            new_dict[k] = v.to(device)
+        elif isinstance(v, dict):
+            new_dict[k] = dict_to(v, device)
+        else:
+            new_dict[k] = v
+    return new_dict
+def safe_backward(loss, parameters, accumulate=1, allow_unused=False, backward=False):
+    if backward:
+        (loss / accumulate).backward()
+    else:
+        parameters = list(parameters)  # Capture the generator output
+        grads = torch.autograd.grad(loss, parameters, allow_unused=allow_unused)
+        nan, inf = False, False
+        for g in grads:
+            if g is not None:
+                nan |= g.isnan().any().item()
+                inf |= g.isinf().any().item()
+        if not (nan or inf):
+            for p, g in zip(parameters, grads):
+                if g is None:
+                    continue
+                if p.grad is None:
+                    p.grad = g / accumulate
+                else:
+                    p.grad += g / accumulate
+        else:
+            LOG.info(f"Skipping grad accumulation because inf: {inf} nan: {nan}")
+def _logits(x):
+    return x if not hasattr(x, "logits") else x.logits
+def _last_encoder_state(x):
+    if hasattr(x, "encoder_last_hidden_state"):
+        return x.encoder_last_hidden_state
+    else:
+        return x.hidden_states[-1]
+def load_archive(path):
+    import torch
+    if not os.path.exists(path):
+        # We've not passed an explicit path, but a part of the filename
+        wd = '/iris/u/clin/code/efk/'
+        directories = ["outputs", "multirun"]
+        matches = []
+        for d in directories:
+            search = os.path.join(wd, d)
+            for run_dir in os.listdir(search):
+                if path in run_dir:
+                    matches.append(os.path.join(search, run_dir))
+        assert len(matches) == 1, f">1 matches for search {path}; specify exact path"
+        full_run_dir = matches[0]
+        if "0" in os.listdir(full_run_dir):
+            full_run_dir = os.path.join(full_run_dir, "0")
+        models_dir = os.path.join(full_run_dir, "models")
+        models = os.listdir(models_dir)
+        non_bk = [m for m in models if not m.endswith(".bk")]
+        assert (
+            len(non_bk) == 1
+        ), f"Expected a single model in {models_dir}, got {len(non_bk)}"
+        path = os.path.join(models_dir, non_bk[0])
+    LOG.info(f"Loading checkpoint from {path}")
+    archive = torch.load(path, map_location="cpu")
+    LOG.info("Load complete.")
+    return archive, path
+def flatten_dict(d):
+    to_process = list(d.items())
+    output = {}
+    while len(to_process):
+        k, v = to_process.pop()
+        if isinstance(v, typing.MutableMapping):
+            to_process.extend([(f"{k}.{k_}", v_) for (k_, v_) in v.items()])
+        else:
+            assert k not in output.keys(), "Somehow ended up with duplicate keys"
+            output[k] = v
+    return output
+def add_padding(tokenizer, model):
+    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
+    model.resize_token_embeddings(len(tokenizer))
+    model.transformer.wte.weight.data[-1] = model.transformer.wte.weight.data.mean(0)
+def add_sep(tokenizer, model):
+    tokenizer.add_special_tokens({'sep_token': '[SEP]'})
+    # model.resize_token_embeddings(len(tokenizer))
+    # model.lm_head.weight.data[-1, :] = model.lm_head.weight.data.mean(0)
+class EarlyStopper:
+    def __init__(self, patience: int, key: str, minimize: bool = False):
+        self.best_value = 1e9 if minimize else -1e9
+        self.best_iter = 0
+        self.current_iter = 0
+        self.key = key
+        self.patience = patience
+        self.minimize = minimize
+        self._stop = False
+    def update(self, idx, stats):
+        assert self.key in stats, f"'{self.key}' not in stats dict"
+        value = stats[self.key]
+        new_best = value < self.best_value if self.minimize else value > self.best_value
+        if new_best:
+            self.best_value = value
+            self.best_iter = idx
+        self.current_iter = idx
+        return new_best
+    def should_stop(self):
+        self._stop |= self.current_iter - self.best_iter >= self.patience
+        return self._stop
+class RunningStatAverager:
+    def __init__(self, suffix="", exclude=["grad/"], compute_ppl: bool = True):
+        self.underlying = None
+        self.suffix = suffix
+        self.exclude = exclude
+        self.compute_ppl = compute_ppl
+        self.reset()
+    def add(self, d: dict):
+        for k, v in d.items():
+            if not any([k.startswith(prefix) for prefix in self.exclude]):
+                if len(self.suffix):
+                    self.underlying[f"{k}_{self.suffix}"].append(v)
+                else:
+                    self.underlying[k].append(v)
+    def average(self):
+        average = {}
+        for k, v in self.underlying.items():
+            if not k.startswith("nll/"):
+                average[k] = sum(v) / len(v)
+            else:
+                assert len(k.split("/")) == 2, f"Invalid key {k}"
+                name = k.split("/")[1]
+                token_counts = self.underlying[f"n_tokens/{name}"]
+                total_nll = sum([nll * c for nll, c in zip(v, token_counts)])
+                average[k] = total_nll / sum(token_counts)
+                if self.compute_ppl:
+                    average[f"perplexity/{name}"] = math.e ** average[k]
+        return {k: v if not isinstance(v, torch.Tensor) else v.item() for k, v in average.items()}
+    def reset(self):
+        self.underlying = defaultdict(list)
+class EditBatchSampler:
+    def __init__(
+        self,
+        n,
+        memorize_mode=False,
+        loc_disjoint=True,
+        seed=0,
+        hard_neg=False,
+        hard_neg_prob=1.0,
+        loc_distr_matrix=None,
+        loc_idx_matrix=None,
+        keep_probs=None,
+        mutex=None
+    ):
+        self.memorize_mode = memorize_mode
+        self.n = n
+        self.loc_disjoint = loc_disjoint
+        self.rng = np.random.default_rng(seed)
+        self.hard_neg = hard_neg
+        self.hard_neg_prob = hard_neg_prob
+        self.loc_probs = loc_distr_matrix
+        self.loc_idxs = loc_idx_matrix
+        self.keep_probs = np.array(keep_probs)[:self.n] if keep_probs is not None else None
+        self.mutex = mutex[:self.n] if mutex is not None else None
+        self._init()
+    def _init(self):
+        idxs = np.arange(self.n)
+        if self.keep_probs is not None:
+            sample = self.rng.binomial(1, self.keep_probs).astype(np.bool)
+            idxs = idxs[sample]
+        self.perm = self.rng.permutation(idxs)
+        self.edit_position = 0
+    def get_edit_idxs(self, batch_size):
+        if self.mutex is None:
+            idxs = set([int(idx) for idx in self.perm[self.edit_position: self.edit_position + batch_size]])
+            self.edit_position += batch_size
+        else:
+            mutexes = []
+            idxs = []
+            def notin(x, mutexes):
+                for m in mutexes:
+                    if x in m or m in x:
+                        return False
+                return True
+            while len(idxs) < batch_size:
+                new_idx = self.perm[self.edit_position]
+                if notin(self.mutex[new_idx], mutexes):
+                    mutexes.append(self.mutex[new_idx])
+                    idxs.append(int(new_idx))
+                self.edit_position += 1
+                if self.edit_position == self.perm.shape[0]:
+                    return None
+            idxs = set(idxs)
+        return idxs
+    def sample(self, batch_size, return_hard_flag=False):
+        if self.memorize_mode:
+            return list(range(batch_size)), list(range(batch_size, batch_size * 2))
+        if self.edit_position + batch_size >= self.perm.shape[0]:
+            self._init()  # Re-start if we end with a partially-sized batch
+        edit_idxs = self.get_edit_idxs(batch_size)
+        if edit_idxs is None:
+            self._init()
+            edit_idxs = self.get_edit_idxs(batch_size)
+            if edit_idxs is None:
+                raise RuntimeError(f"No valid batches of size {batch_size} exist!")
+        if self.hard_neg:
+            assert self.loc_probs is not None, "hard_neg is on, but don't have distance matrix!"
+        def get_loc_idxs():
+            if self.hard_neg and self.rng.uniform() < self.hard_neg_prob:
+                return [int(self.rng.choice(self.loc_idxs[idx], p=self.loc_probs[idx])) for idx in edit_idxs], True
+            else:
+                # Use deterministic implementation in case edit batches are large
+                non_edit_idxs = list(set(range(self.n)) - set(edit_idxs))
+                return [int(idx) for idx in self.rng.choice(non_edit_idxs, batch_size)], False
+        loc_idxs, hard = get_loc_idxs()
+        if self.loc_disjoint:
+            steps = 0
+            while len(edit_idxs.intersection(set(loc_idxs))) > 0:
+                loc_idxs, hard = get_loc_idxs()
+                steps += 1
+                if steps > 100:
+                    raise RuntimeError("Can't find disjoint loc_idxs and edit_idxs!")
+        if return_hard_flag:
+            return list(edit_idxs), loc_idxs, hard
+        else:
+            return list(edit_idxs), loc_idxs
+def parent_module(model, pname):
+    comps = pname.split('.')
+    parent = model
+    for comp in comps[:-1]:
+        if hasattr(parent, comp):
+            parent = getattr(parent, comp)
+        elif comp.isdigit():
+            parent = parent[int(comp)]
+        else:
+            raise RuntimeError(f"Couldn't find child module {comp}")
+    assert hasattr(parent, comps[-1])
+    return parent
+def build_distr_matrix(edit_qs, config, loc_qs=None, slice_size=1000):
+    n = len(edit_qs)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    num_neighbors = config.data.hard_neg_neighbors
+    num_exclude = config.data.hard_neg_exclude
+    temp = config.data.hard_neg_temp
+    from sentence_transformers import SentenceTransformer
+    from sentence_transformers.util import pytorch_cos_sim
+    embedding_model = SentenceTransformer('all-MiniLM-L6-v2', cache_folder=scr()).to(device)
+    ind_matrix = torch.zeros((n, num_neighbors - num_exclude), dtype=torch.long)
+    distr_matrix = torch.full((n, num_neighbors - num_exclude), float('nan'))
+    edit_encodings = torch.FloatTensor(embedding_model.encode(edit_qs, batch_size=256)).to(device)
+    # If loc_qs is None then build the similarity matrix between edit_qs and itself
+    loc_encodings = edit_encodings if loc_qs is None else embedding_model.encode(loc_qs, batch_size=256)
+    if isinstance(loc_encodings, np.ndarray):
+        loc_encodings = torch.FloatTensor(loc_encodings).to(device)
+    for idx in range(0, n, slice_size):
+        end_idx = idx + slice_size if idx + slice_size <= n else n
+        slice_encodings = edit_encodings[idx:end_idx]
+        sim_rows = pytorch_cos_sim(slice_encodings, loc_encodings)
+        indices = sim_rows.topk(num_neighbors, -1).indices[:, num_exclude:]
+        ind_matrix[idx:end_idx] = indices.cpu()
+        distr_matrix[idx:end_idx] = sim_rows.gather(-1, indices).mul(temp).exp().cpu()
+    assert not torch.isnan(distr_matrix).any()
+    LOG.info(f"Built hard negative distribution matrix of size {distr_matrix.shape}")
+    distr_matrix = distr_matrix.numpy()
+    distr_matrix = distr_matrix / distr_matrix.sum(-1, keepdims=True)
+    return distr_matrix, ind_matrix.numpy()