Add TT-compressed model with rank 128

Browse files

Files changed (8) hide show

README.md +33 -0
config.json +89 -0
configuration_bart.py +20 -0
linalg.py +45 -0
modeling_bart.py +61 -0
modules.py +143 -0
pytorch_model.bin +3 -0
util.py +193 -0

README.md ADDED Viewed

	@@ -0,0 +1,33 @@

+---
+language:
+- en
+tags:
+- detoxification
+licenses:
+- cc-by-nc-sa
+pipeline_tag: text2text-generation
+---
+**Model Overview**
+It is a TT-compressed model of original BART-based detoxification model
+[s-nlp/bart-base-detox][1].
+**How to use**
+```python
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model = AutoModelForSeq2SeqLM \
+    .from_pretrained('s-nlp/bart-base-detox-ttd', trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained('facebook/bart-base')
+toxics = ['that sick fuck is going to be out in 54 years.']
+tokens = tokenizer(toxics)
+tokens = model.generate(**tokens, num_return_sequences=1, do_sample=False,
+                        temperature=1.0, repetition_penalty=10.0,
+                        max_length=128, num_beams=5)
+neutrals = tokenizer.decode(tokens[0, ...], skip_special_tokens=True)
+print(neutrals) # stdout: She is going to be out in 54 years.
+```
+[1]: //s-nlp/bart-base-detox

config.json ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+  "_name_or_path": "facebook/bart-base",
+  "activation_dropout": 0.1,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "TTCompressedBartForConditionGeneration"
+  ],
+  "attention_dropout": 0.1,
+  "auto_map": {
+    "AutoConfig": "configuration_bart.TTCompressedBartConfig",
+    "AutoModelForSeq2SeqLM": "modeling_bart.TTCompressedBartForConditionGeneration"
+  },
+  "bos_token_id": 0,
+  "classif_dropout": 0.1,
+  "classifier_dropout": 0.0,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 2,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "no_repeat_ngram_size": 3,
+  "normalize_before": false,
+  "normalize_embedding": true,
+  "num_beams": 4,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "rank": 128,
+  "scale_embedding": false,
+  "shape_in": [
+    8,
+    8,
+    12
+  ],
+  "shape_out": [
+    16,
+    16,
+    12
+  ],
+  "task_specific_params": {
+    "summarization": {
+      "length_penalty": 1.0,
+      "max_length": 128,
+      "min_length": 12,
+      "num_beams": 4
+    },
+    "summarization_cnn": {
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "num_beams": 4
+    },
+    "summarization_xsum": {
+      "length_penalty": 1.0,
+      "max_length": 62,
+      "min_length": 11,
+      "num_beams": 6
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.25.1",
+  "use_cache": true,
+  "vocab_size": 50266
+}

configuration_bart.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from typing import Tuple
+from transformers import BartConfig
+class TTCompressedBartConfig(BartConfig):
+    """Class TTCompressedBartConfig defines a configuration for TT-compressed
+    BART. Here, we split shape to input and output shape in order to serialize
+    them to different fields in JSON.
+    """
+    def __init__(self, *args, shape_in: Tuple[int] = (),
+                 shape_out: Tuple[int] = (), rank: int = 128, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.shape_in = shape_in
+        self.shape_out = shape_out
+        self.rank = rank
+TTCompressedBartConfig.register_for_auto_class()

linalg.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from functools import partial
+from typing import Sequence
+import torch as T
+def svd_truncated(mat: T.Tensor, rank: int):
+    lvecs, svals, rvecs = T.linalg.svd(mat)
+    return lvecs[:, :rank], svals[:rank], rvecs[:rank, :].T
+def ttd(ten: T.Tensor, rank: Sequence[int], noiters: int = 1000,
+        method: str = 'tsvd') -> Sequence[T.Tensor]:
+    """Function ttd implements tensor-train decomposition.
+    """
+    if ten.ndim + 1 != len(rank):
+        raise ValueError
+    if rank[0] != 1 or rank[-1] != 1:
+        raise ValueError
+    if method == 'svd':
+        factorize = svd_truncated
+    elif method == 'tsvd':
+        factorize = partial(T.svd_lowrank, niter=noiters)
+    else:
+        raise ValueError(f'Unknown method: {method}.')
+    cores = []
+    shape = ten.shape
+    # Iterate over shape of cores and split off core from tensor.
+    for core_shape in zip(rank, shape, rank[1:]):
+        # breakpoint()
+        # Matricization of tensor over the first two axes.
+        mat = ten.reshape(core_shape[0] * core_shape[1], -1)
+        # Singlular Value Decomposition (SVD).
+        lvecs, svals, rvecs = factorize(mat, core_shape[2])
+        # Reshape core and rest of tensor.
+        core = lvecs * svals[None, :]
+        core = core.reshape(core_shape)
+        cores.append(core)
+        # Use right vectors as a tensor itself.
+        ten = rvecs.T
+    return cores

modeling_bart.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""This module uses parts of rut5compressed. It shares the same module
+structure as model used in neural network compression experiments with
+rut5compressed.
+"""
+from functools import partial
+from typing import Optional, Tuple
+import numpy as np
+import torch as T
+from transformers import BartForConditionalGeneration
+from .configuration_bart import TTCompressedBartConfig
+from .linalg import ttd  # noqa: F401 We need this import for HF.
+from .modules import TTCompressedLinear
+from .util import compress_linear_tt, map_module
+class TTCompressedBartForConditionGeneration(BartForConditionalGeneration):
+    """Class TTCompressedBartForConditionGeneration defines a BART-based model
+    with compressed linear layers with TT.
+    """
+    LAYERS = r'/(de|en)coder/layers/\d+/fc[12]'
+    config_class = TTCompressedBartConfig
+    def __init__(self, config: TTCompressedBartConfig,
+                 shape: Optional[Tuple[Tuple[int], Tuple[int]]] = None,
+                 rank: Optional[int] = None,
+                 compress: bool = False):
+        super().__init__(config)
+        self.rank = rank or config.rank
+        self.shape = shape
+        if self.shape is None:
+            self.shape = (tuple(self.config.shape_in),
+                          tuple(self.config.shape_out))
+        compress_fn = partial(compress_linear_tt, rank=self.rank)
+        if not compress:
+            compress_fn = self.convert
+        self.model = map_module(self.model, compress_fn, self.LAYERS)
+    def convert(self, module: T.nn.Module, path: str) -> T.nn.Module:
+        if isinstance(module, T.nn.Linear):
+            # If in_features < out_features of original linear module then this
+            # is extension mapping; otherwise, it is embedding mapping and we
+            # need to swap input and output shape.
+            in_shape, out_shape = self.shape
+            if module.in_features > module.out_features:
+                out_shape, in_shape = self.shape
+            shape = (in_shape, out_shape)
+            bias = module.bias is not None
+            return TTCompressedLinear.from_random(shape, self.rank, bias)
+        return module
+TTCompressedBartForConditionGeneration \
+    .register_for_auto_class('AutoModelForSeq2SeqLM')

modules.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# Copied from rut5compressed/nn/modules.py modules of original repository.
+from typing import Optional, Sequence, Tuple
+import numpy as np
+import torch as T
+from opt_einsum import contract_expression
+from opt_einsum.contract import ContractExpression
+from .linalg import ttd
+def make_contraction(shape, rank, batch_size=32,
+                     seqlen=512) -> ContractExpression:
+    ndim = len(rank) - 1
+    row_shape, col_shape = shape
+    # Generate all contraction indexes.
+    row_ix, col_ix = np.arange(2 * ndim).reshape(2, ndim)
+    rank_ix = 2 * ndim + np.arange(ndim + 1)
+    batch_ix = 4 * ndim  # Zero-based index.
+    # Order indexes of cores.
+    cores_ix = np.column_stack([rank_ix[:-1], row_ix, col_ix, rank_ix[1:]])
+    cores_shape = zip(rank[:-1], row_shape, col_shape, rank[1:])
+    # Order indexes of input (contraction by columns: X G_1 G_2 ... G_d).
+    input_ix = np.insert(row_ix, 0, batch_ix)
+    input_shape = (batch_size * seqlen, ) + row_shape
+    # Order indexes of output (append rank indexes as well).
+    output_ix = np.insert(col_ix, 0, batch_ix)
+    output_ix = np.append(output_ix, (rank_ix[0], rank_ix[-1]))
+    # Prepare contraction operands.
+    ops = [input_shape, input_ix]
+    for core_ix, core_shape in zip(cores_ix, cores_shape):
+        ops.append(core_shape)
+        ops.append(core_ix)
+    ops.append(output_ix)
+    ops = [tuple(op) for op in ops]
+    return contract_expression(*ops)
+class TTCompressedLinear(T.nn.Module):
+    """Class TTCompressedLinear is a layer which represents a weight matrix of
+    linear layer in factorized view as tensor train matrix.
+    >>> linear_layer = T.nn.Linear(6, 6)
+    >>> tt_layer = TTCompressedLinear \
+    ...     .from_linear(linear_layer, rank=2, shape=((2, 3), (3, 2)))
+    """
+    def __init__(self, cores: Sequence[T.Tensor],
+                 bias: Optional[T.Tensor] = None):
+        super().__init__()
+        for i, core in enumerate(cores):
+            if core.ndim != 4:
+                raise ValueError('Expected number of dimensions of the '
+                                 f'{i}-th core is 4 but given {cores.ndim}.')
+        # Prepare contaction expression.
+        self.rank = (1, ) + tuple(core.shape[3] for core in cores)
+        self.shape = (tuple(core.shape[1] for core in cores),
+                      tuple(core.shape[2] for core in cores))
+        self.contact = make_contraction(self.shape, self.rank)
+        # TT-matrix is applied on the left. So, this defines number of input
+        # and output features.
+        self.in_features = np.prod(self.shape[0])
+        self.out_features = np.prod(self.shape[1])
+        # Create trainable variables.
+        self.cores = T.nn.ParameterList(T.nn.Parameter(core) for core in cores)
+        self.bias = None
+        if bias is not None:
+            if bias.size() != self.out_features:
+                raise ValueError(f'Expected bias size is {self.out_features} '
+                                 f'but its shape is {bias.shape}.')
+            self.bias = T.nn.Parameter(bias)
+    def forward(self, input: T.Tensor) -> T.Tensor:
+        # We need replace the feature dimension with multi-dimension to contact
+        # with TT-matrix.
+        input_shape = input.shape
+        input = input.reshape(-1, *self.shape[0])
+        # Contract input with weights and replace back multi-dimension with
+        # feature dimension.
+        output = self.contact(input, *self.cores)
+        output = output.reshape(*input_shape[:-1], self.out_features)
+        if self.bias is not None:
+            output += self.bias
+        return output
+    @classmethod
+    def from_linear(cls, linear: T.nn.Linear,
+                    shape: Tuple[Tuple[int], Tuple[int]], rank: int, **kwargs):
+        ndim = len(shape[0])
+        # Prepare information about shape and rank of TT (not TTM).
+        tt_rank = (1, ) + (rank, ) * (ndim - 1) + (1, )
+        tt_shape = tuple(n * m for n, m in zip(*shape))
+        # Reshape weight matrix to tensor indexes like TT-matrix.
+        matrix = linear.weight.data.T
+        tensor = matrix.reshape(shape[0] + shape[1])
+        for i in range(ndim - 1):
+            tensor = tensor.moveaxis(ndim + i, 2 * i + 1)
+        # Reshape TT-matrix to a plain TT and apply decomposition.
+        tensor = tensor.reshape(tt_shape)
+        cores = ttd(tensor, tt_rank, **kwargs)
+        # Reshape TT-cores back to TT-matrix cores (TTM-cores).
+        core_shapes = zip(tt_rank, *shape, tt_rank[1:])
+        cores = [core.reshape(core_shape)
+                 for core, core_shape in zip(cores, core_shapes)]
+        # Make copy of bias if it exists.
+        bias = None
+        if linear.bias is not None:
+            bias = T.clone(linear.bias.data)
+        return TTCompressedLinear(cores, bias)
+    @classmethod
+    def from_random(cls, shape: Tuple[Tuple[int], Tuple[int]], rank: int,
+                    bias: bool = True):
+        tt_ndim = len(shape[0])
+        tt_rank = (1, ) + (rank, ) * (tt_ndim - 1) + (1, )
+        core_shapes = zip(tt_rank, *shape, tt_rank[1:])
+        cores = [T.randn(core_shape) for core_shape in core_shapes]
+        bias_term = None
+        if bias:
+            out_features = np.prod(shape[1])
+            bias_term = T.randn(out_features)
+        return TTCompressedLinear(cores, bias_term)

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a97ccb2e65c441bf9d23d4e4f48e9e88efe407c54874034acce71c6706c4562e
+size 536167389

util.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# Copied from rut5compressed/util.py of rut5compressed repository.
+import logging
+import re
+from functools import wraps
+from re import Pattern
+from typing import Callable, Dict, Optional, Tuple
+import numpy as np
+import torch as T
+from .modules import TTCompressedLinear
+def map_module(root: T.nn.Module,
+               func: Callable[[T.nn.Module, str], T.nn.Module],
+               patt: Optional[str] = None) -> T.nn.Module:
+    """Function ``map_module`` applies a function to each leaf of module tree
+    which matches to a specified pattern.
+    Parameters
+    ----------
+    root : torch.nn.Module
+        Module to modify.
+    func : callable
+        Function to be applied to every module (or matched to pattern) in
+        module tree.
+    patt : str, optional
+        Pattern to filter modules by path in module tree.
+    Returns
+    -------
+    torch.nn.Module
+        Module modified in-place.
+    """
+    @wraps(func)
+    def func_safe(*args, **kwargs):
+        node = func(*args, **kwargs)
+        if not isinstance(node, T.nn.Module):
+            raise ValueError('Mapped result must be toch.nn.Module type '
+                             f'but given {type(node)}.')
+        return node
+    return _map_module(root, func_safe, re.compile(patt or r'.*'), '')
+def _map_module(root: T.nn.Module,
+                func: Callable[[T.nn.Module, str], T.nn.Module], patt: Pattern,
+                path: str) -> T.nn.Module:
+    for name, child in root.named_children():
+        node = _map_module(child, func, patt, f'{path}/{name}')
+        if node != child:
+            setattr(root, name, node)
+    if patt.match(path or '/'):
+        root = func(root, path or '/')
+    return root
+def convert_linear(module: T.nn.Linear, ctor, **kwargs) -> T.nn.Module:
+    """Function convert_linear takes module and returns linear module with
+    approximate matmul. Non-linear modules are returned intact.
+    """
+    if not isinstance(module, T.nn.Linear):
+        return module
+    raise NotImplementedError
+def numel(module: T.nn.Module):
+    value = sum(x.numel() for x in module.parameters()) + \
+            sum(x.numel() for x in module.buffers())
+    def account_prunned(module: T.nn.Module, path: str):
+        nonlocal value
+        for name, attr in vars(module).items():
+            if not name.endswith('_mask') or not isinstance(attr, T.Tensor):
+                continue
+            weight_name = name[:-5]
+            if not hasattr(module, weight_name):
+                continue
+            weight = getattr(module, weight_name)
+            value -= weight.numel() - attr.sum()
+            value += attr.numel()
+        return module
+    def account_quantized(module: T.nn.Module, path: str):
+        nonlocal value
+        if isinstance(module, T.nn.quantized.Linear):
+            value += module.weight().numel()
+            if module.bias() is not None:
+                value += module.bias().numel()
+        return module
+    def account_rest(module: T.nn.Module, path: str):
+        account_prunned(module, path)
+        account_quantized(module, path)
+        return module
+    map_module(module, account_rest)
+    return value
+def sizeof(module: T.nn.Module):
+    value = sum(x.numel() * x.element_size() for x in module.parameters()) + \
+            sum(x.numel() * x.element_size() for x in module.buffers())
+    def account_prunned(module: T.nn.Module, path: str):
+        nonlocal value
+        for name, attr in vars(module).items():
+            if not name.endswith('_mask') or not isinstance(attr, T.Tensor):
+                continue
+            weight_name = name[:-5]
+            if not hasattr(module, weight_name):
+                continue
+            weight = getattr(module, weight_name)
+            value -= (weight.numel() - attr.sum()) * weight.element_size()
+            value += attr.numel() * attr.element_size()
+        return module
+    def account_quantized(module: T.nn.Module, path: str):
+        nonlocal value
+        if isinstance(module, T.nn.quantized.Linear):
+            value += module.weight().numel() * module.weight().element_size()
+            if (bias := module.bias()) is not None:
+                value += bias.numel() * bias.element_size()
+        return module
+    def account_rest(module: T.nn.Module, path: str):
+        account_prunned(module, path)
+        account_quantized(module, path)
+        return module
+    map_module(module, account_rest)
+    return value
+def flatten_module(module: T.nn.Module, regexp=None) -> Dict[str, T.nn.Module]:
+    modules = {}
+    map_module(module, lambda x, y: modules.update(**{y: x}) or x, regexp)
+    return modules
+def print_flatten(module: T.nn.Module):
+    paths = []
+    path_len = 0
+    names = []
+    name_len = 0
+    indx_len = 0
+    def func(module, path):
+        nonlocal path_len, name_len, indx_len
+        paths.append(path)
+        path_len = max(path_len, len(path))
+        name = module.__class__.__name__
+        names.append(name)
+        name_len = max(name_len, len(name))
+        indx_len += 1
+        return module
+    map_module(module, func)
+    indx_len = int(np.ceil(np.log10(indx_len)))
+    fmt = f'{{indx:>{indx_len}s}} {{path:{path_len}s}} {{name:{name_len}s}}'
+    print(fmt.format(indx='#', path='Path', name='Layer'))
+    print('-' * (indx_len + path_len + name_len + 2))
+    for i, (path, name) in enumerate(zip(paths, names)):
+        print(fmt.format(indx=str(i), path=path, name=name))
+def compress_linear_tt(module: T.nn.Module, path: str,
+                       shape: Tuple[Tuple[int], Tuple[int]],
+                       rank: int) -> T.nn.Module:
+    if not isinstance(module, T.nn.Linear):
+        return module
+    # TODO(@not-found): We need propper compression config.
+    inp_size = np.prod(shape[0])
+    out_size = np.prod(shape[1])
+    if inp_size == module.in_features and out_size == module.out_features:
+        pass
+    elif inp_size == module.out_features and out_size == module.in_features:
+        shape = (shape[1], shape[0])
+    else:
+        raise ValueError(
+            'Input and output features does not match to compression shape: '
+            f'{shape[0]} vs {module.in_features} and {shape[1]} vs '
+            f'{module.out_features}.')
+    logging.info('apply tt compression to layer %s', path)
+    return TTCompressedLinear.from_linear(module, shape, rank)  # noqa: F821