koichi12 commited on Feb 12, 2025

Commit

6229f35

verified ·

1 Parent(s): 55ebfe8

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.cpython-311-x86_64-linux-gnu.so +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Debugger/__pycache__/libpython.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/console.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_tensor_str.py +697 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/modules/fused.py +160 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/__init__.py +14 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__pycache__/linear_relu.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/conv_relu.py +175 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/linear_relu.py +177 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/modules/linear.py +25 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/modules/__init__.py +14 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/activation.py +465 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/__pycache__/functional.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__pycache__/rnn.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/batchnorm.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/conv.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/embedding_ops.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/linear.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/rnn.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/utils.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/activation.py +302 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/conv.py +945 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/dropout.py +27 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/functional_modules.py +249 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__init__.py +21 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/linear.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/sparse.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/rnn.py +614 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/utils.py +323 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__pycache__/utils.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/dynamic/linear.py +139 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_scheduler/__init__.py +5 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/__pycache__/quantization_utils.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/lightning/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/quantization_utils.py +130 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/__pycache__/FPGM_pruner.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/saliency_pruner.py +29 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/scheduler/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/scheduler/__pycache__/base_scheduler.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/scheduler/lambda_scheduler.py +47 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/__pycache__/base_sparsifier.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/__pycache__/weight_norm_sparsifier.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/_equalize.py +182 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__init__.py +23 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/__init__.cpython-311.pyc +0 -0

.gitattributes CHANGED Viewed

@@ -66,3 +66,6 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycach
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_fp.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/function_docs.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text

 tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/tests/__pycache__/test_fp.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/mpmath/__pycache__/function_docs.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/console.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Debugger/__pycache__/libpython.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text

tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.cpython-311-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2c7b91ab0731f5672d976d4408f3525891b8c4e1d4ed4d403f56d1c141c7f94
+size 688080

tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Debugger/__pycache__/libpython.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e828bf211daa379b740684868a31081921397805bfc7ef4b41a8572d794eaafb
+size 137864

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/console.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a7da30d6865deaf94e2814884970e99b253843c23d4aa93b1107a23e61de6c1
+size 123664

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_tensor_str.py ADDED Viewed

	@@ -0,0 +1,697 @@

+import contextlib
+import dataclasses
+import math
+import textwrap
+from typing import Any, Dict, Optional
+import torch
+from torch import inf
+@dataclasses.dataclass
+class __PrinterOptions:
+    precision: int = 4
+    threshold: float = 1000
+    edgeitems: int = 3
+    linewidth: int = 80
+    sci_mode: Optional[bool] = None
+PRINT_OPTS = __PrinterOptions()
+# We could use **kwargs, but this will give better docs
+def set_printoptions(
+    precision=None,
+    threshold=None,
+    edgeitems=None,
+    linewidth=None,
+    profile=None,
+    sci_mode=None,
+):
+    r"""Set options for printing. Items shamelessly taken from NumPy
+    Args:
+        precision: Number of digits of precision for floating point output
+            (default = 4).
+        threshold: Total number of array elements which trigger summarization
+            rather than full `repr` (default = 1000).
+        edgeitems: Number of array items in summary at beginning and end of
+            each dimension (default = 3).
+        linewidth: The number of characters per line for the purpose of
+            inserting line breaks (default = 80). Thresholded matrices will
+            ignore this parameter.
+        profile: Sane defaults for pretty printing. Can override with any of
+            the above options. (any one of `default`, `short`, `full`)
+        sci_mode: Enable (True) or disable (False) scientific notation. If
+            None (default) is specified, the value is defined by
+            `torch._tensor_str._Formatter`. This value is automatically chosen
+            by the framework.
+    Example::
+        >>> # Limit the precision of elements
+        >>> torch.set_printoptions(precision=2)
+        >>> torch.tensor([1.12345])
+        tensor([1.12])
+        >>> # Limit the number of elements shown
+        >>> torch.set_printoptions(threshold=5)
+        >>> torch.arange(10)
+        tensor([0, 1, 2, ..., 7, 8, 9])
+        >>> # Restore defaults
+        >>> torch.set_printoptions(profile='default')
+        >>> torch.tensor([1.12345])
+        tensor([1.1235])
+        >>> torch.arange(10)
+        tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    """
+    if profile is not None:
+        if profile == "default":
+            PRINT_OPTS.precision = 4
+            PRINT_OPTS.threshold = 1000
+            PRINT_OPTS.edgeitems = 3
+            PRINT_OPTS.linewidth = 80
+        elif profile == "short":
+            PRINT_OPTS.precision = 2
+            PRINT_OPTS.threshold = 1000
+            PRINT_OPTS.edgeitems = 2
+            PRINT_OPTS.linewidth = 80
+        elif profile == "full":
+            PRINT_OPTS.precision = 4
+            PRINT_OPTS.threshold = inf
+            PRINT_OPTS.edgeitems = 3
+            PRINT_OPTS.linewidth = 80
+    if precision is not None:
+        PRINT_OPTS.precision = precision
+    if threshold is not None:
+        PRINT_OPTS.threshold = threshold
+    if edgeitems is not None:
+        PRINT_OPTS.edgeitems = edgeitems
+    if linewidth is not None:
+        PRINT_OPTS.linewidth = linewidth
+    PRINT_OPTS.sci_mode = sci_mode
+def get_printoptions() -> Dict[str, Any]:
+    r"""Gets the current options for printing, as a dictionary that
+    can be passed as ``**kwargs`` to set_printoptions().
+    """
+    return dataclasses.asdict(PRINT_OPTS)
+@contextlib.contextmanager
+def printoptions(**kwargs):
+    r"""Context manager that temporarily changes the print options.  Accepted
+    arguments are same as :func:`set_printoptions`."""
+    old_kwargs = get_printoptions()
+    set_printoptions(**kwargs)
+    try:
+        yield
+    finally:
+        set_printoptions(**old_kwargs)
+def tensor_totype(t):
+    dtype = torch.float if t.is_mps else torch.double
+    return t.to(dtype=dtype)
+class _Formatter:
+    def __init__(self, tensor):
+        self.floating_dtype = tensor.dtype.is_floating_point
+        self.int_mode = True
+        self.sci_mode = False
+        self.max_width = 1
+        with torch.no_grad():
+            tensor_view = tensor.reshape(-1)
+        if not self.floating_dtype:
+            for value in tensor_view:
+                value_str = f"{value}"
+                self.max_width = max(self.max_width, len(value_str))
+        else:
+            nonzero_finite_vals = torch.masked_select(
+                tensor_view, torch.isfinite(tensor_view) & tensor_view.ne(0)
+            )
+            if nonzero_finite_vals.numel() == 0:
+                # no valid number, do nothing
+                return
+            # Convert to double for easy calculation. HalfTensor overflows with 1e8, and there's no div() on CPU.
+            nonzero_finite_abs = tensor_totype(nonzero_finite_vals.abs())
+            nonzero_finite_min = tensor_totype(nonzero_finite_abs.min())
+            nonzero_finite_max = tensor_totype(nonzero_finite_abs.max())
+            for value in nonzero_finite_vals:
+                if value != torch.ceil(value):
+                    self.int_mode = False
+                    break
+            if self.int_mode:
+                # in int_mode for floats, all numbers are integers, and we append a decimal to nonfinites
+                # to indicate that the tensor is of floating type. add 1 to the len to account for this.
+                if (
+                    nonzero_finite_max / nonzero_finite_min > 1000.0
+                    or nonzero_finite_max > 1.0e8
+                ):
+                    self.sci_mode = True
+                    for value in nonzero_finite_vals:
+                        value_str = f"{{:.{PRINT_OPTS.precision}e}}".format(value)
+                        self.max_width = max(self.max_width, len(value_str))
+                else:
+                    for value in nonzero_finite_vals:
+                        value_str = f"{value:.0f}"
+                        self.max_width = max(self.max_width, len(value_str) + 1)
+            else:
+                # Check if scientific representation should be used.
+                if (
+                    nonzero_finite_max / nonzero_finite_min > 1000.0
+                    or nonzero_finite_max > 1.0e8
+                    or nonzero_finite_min < 1.0e-4
+                ):
+                    self.sci_mode = True
+                    for value in nonzero_finite_vals:
+                        value_str = f"{{:.{PRINT_OPTS.precision}e}}".format(value)
+                        self.max_width = max(self.max_width, len(value_str))
+                else:
+                    for value in nonzero_finite_vals:
+                        value_str = f"{{:.{PRINT_OPTS.precision}f}}".format(value)
+                        self.max_width = max(self.max_width, len(value_str))
+        if PRINT_OPTS.sci_mode is not None:
+            self.sci_mode = PRINT_OPTS.sci_mode
+    def width(self):
+        return self.max_width
+    def format(self, value):
+        if self.floating_dtype:
+            if self.sci_mode:
+                ret = f"{{:{self.max_width}.{PRINT_OPTS.precision}e}}".format(value)
+            elif self.int_mode:
+                ret = f"{value:.0f}"
+                if not (math.isinf(value) or math.isnan(value)):
+                    ret += "."
+            else:
+                ret = f"{{:.{PRINT_OPTS.precision}f}}".format(value)
+        else:
+            ret = f"{value}"
+        return (self.max_width - len(ret)) * " " + ret
+def _scalar_str(self, formatter1, formatter2=None):
+    if formatter2 is not None:
+        real_str = _scalar_str(self.real, formatter1)
+        imag_str = (_scalar_str(self.imag, formatter2) + "j").lstrip()
+        # handles negative numbers, +0.0, -0.0
+        if imag_str[0] == "+" or imag_str[0] == "-":
+            return real_str + imag_str
+        else:
+            return real_str + "+" + imag_str
+    else:
+        return formatter1.format(self.item())
+def _vector_str(self, indent, summarize, formatter1, formatter2=None):
+    # length includes spaces and comma between elements
+    element_length = formatter1.width() + 2
+    if formatter2 is not None:
+        # width for imag_formatter + an extra j for complex
+        element_length += formatter2.width() + 1
+    elements_per_line = max(
+        1, int(math.floor((PRINT_OPTS.linewidth - indent) / (element_length)))
+    )
+    def _val_formatter(val, formatter1=formatter1, formatter2=formatter2):
+        if formatter2 is not None:
+            real_str = formatter1.format(val.real)
+            imag_str = (formatter2.format(val.imag) + "j").lstrip()
+            # handles negative numbers, +0.0, -0.0
+            if imag_str[0] == "+" or imag_str[0] == "-":
+                return real_str + imag_str
+            else:
+                return real_str + "+" + imag_str
+        else:
+            return formatter1.format(val)
+    if summarize and not PRINT_OPTS.edgeitems:
+        # Deal with edge case that negative zero is zero
+        data = ["..."]
+    elif summarize and self.size(0) > 2 * PRINT_OPTS.edgeitems:
+        data = (
+            [_val_formatter(val) for val in self[: PRINT_OPTS.edgeitems].tolist()]
+            + [" ..."]
+            + [_val_formatter(val) for val in self[-PRINT_OPTS.edgeitems :].tolist()]
+        )
+    else:
+        data = [_val_formatter(val) for val in self.tolist()]
+    data_lines = [
+        data[i : i + elements_per_line] for i in range(0, len(data), elements_per_line)
+    ]
+    lines = [", ".join(line) for line in data_lines]
+    return "[" + ("," + "\n" + " " * (indent + 1)).join(lines) + "]"
+# formatter2 is only used for printing complex tensors.
+# For complex tensors, formatter1 and formatter2 are the formatters for tensor.real
+# and tensor.imag respesectively
+def _tensor_str_with_formatter(self, indent, summarize, formatter1, formatter2=None):
+    dim = self.dim()
+    if dim == 0:
+        return _scalar_str(self, formatter1, formatter2)
+    if dim == 1:
+        return _vector_str(self, indent, summarize, formatter1, formatter2)
+    if summarize and self.size(0) > 2 * PRINT_OPTS.edgeitems:
+        slices = (
+            [
+                _tensor_str_with_formatter(
+                    self[i], indent + 1, summarize, formatter1, formatter2
+                )
+                for i in range(0, PRINT_OPTS.edgeitems)
+            ]
+            + ["..."]
+            + [
+                _tensor_str_with_formatter(
+                    self[i], indent + 1, summarize, formatter1, formatter2
+                )
+                for i in range(len(self) - PRINT_OPTS.edgeitems, len(self))
+            ]
+        )
+    else:
+        slices = [
+            _tensor_str_with_formatter(
+                self[i], indent + 1, summarize, formatter1, formatter2
+            )
+            for i in range(0, self.size(0))
+        ]
+    tensor_str = ("," + "\n" * (dim - 1) + " " * (indent + 1)).join(slices)
+    return "[" + tensor_str + "]"
+def _tensor_str(self, indent):
+    if self.numel() == 0:
+        return "[]"
+    if self.has_names():
+        # There are two main codepaths (possibly more) that tensor printing goes through:
+        # - tensor data can fit comfortably on screen
+        # - tensor data needs to be summarized
+        # Some of the codepaths don't fully support named tensors, so we send in
+        # an unnamed tensor to the formatting code as a workaround.
+        self = self.rename(None)
+    summarize = self.numel() > PRINT_OPTS.threshold
+    if self._is_zerotensor():
+        self = self.clone()
+    # handle the negative bit
+    if self.is_neg():
+        self = self.resolve_neg()
+    if self.dtype in [
+        torch.float16,
+        torch.bfloat16,
+        torch.float8_e5m2,
+        torch.float8_e5m2fnuz,
+        torch.float8_e4m3fn,
+        torch.float8_e4m3fnuz,
+    ]:
+        self = self.float()
+    if self.dtype is torch.complex32:
+        self = self.cfloat()
+    if self.dtype.is_complex:
+        # handle the conjugate bit
+        self = self.resolve_conj()
+        real_formatter = _Formatter(
+            get_summarized_data(self.real) if summarize else self.real
+        )
+        imag_formatter = _Formatter(
+            get_summarized_data(self.imag) if summarize else self.imag
+        )
+        return _tensor_str_with_formatter(
+            self, indent, summarize, real_formatter, imag_formatter
+        )
+    else:
+        formatter = _Formatter(get_summarized_data(self) if summarize else self)
+        return _tensor_str_with_formatter(self, indent, summarize, formatter)
+def _add_suffixes(tensor_str, suffixes, indent, force_newline):
+    tensor_strs = [tensor_str]
+    last_line_len = len(tensor_str) - tensor_str.rfind("\n") + 1
+    for suffix in suffixes:
+        suffix_len = len(suffix)
+        if force_newline or last_line_len + suffix_len + 2 > PRINT_OPTS.linewidth:
+            tensor_strs.append(",\n" + " " * indent + suffix)
+            last_line_len = indent + suffix_len
+            force_newline = False
+        else:
+            tensor_strs.append(", " + suffix)
+            last_line_len += suffix_len + 2
+    tensor_strs.append(")")
+    return "".join(tensor_strs)
+def get_summarized_data(self):
+    dim = self.dim()
+    if dim == 0:
+        return self
+    if dim == 1:
+        if self.size(0) > 2 * PRINT_OPTS.edgeitems:
+            return torch.cat(
+                (self[: PRINT_OPTS.edgeitems], self[-PRINT_OPTS.edgeitems :])
+            )
+        else:
+            return self
+    if not PRINT_OPTS.edgeitems:
+        return self.new_empty([0] * self.dim())
+    elif self.size(0) > 2 * PRINT_OPTS.edgeitems:
+        start = [self[i] for i in range(0, PRINT_OPTS.edgeitems)]
+        end = [self[i] for i in range(len(self) - PRINT_OPTS.edgeitems, len(self))]
+        return torch.stack([get_summarized_data(x) for x in (start + end)])
+    else:
+        return torch.stack([get_summarized_data(x) for x in self])
+def _str_intern(inp, *, tensor_contents=None):
+    if torch._C._functorch.is_functorch_wrapped_tensor(inp):
+        return _functorch_wrapper_str_intern(inp, tensor_contents=tensor_contents)
+    is_plain_tensor = type(inp) is torch.Tensor or type(inp) is torch.nn.Parameter
+    if inp.is_nested:
+        prefix = "nested_tensor("
+    elif is_plain_tensor:
+        prefix = "tensor("
+    else:
+        prefix = f"{type(inp).__name__}("
+    indent = len(prefix)
+    suffixes = []
+    custom_contents_provided = tensor_contents is not None
+    if custom_contents_provided:
+        tensor_str = tensor_contents
+    # This is used to extract the primal value and thus disable the forward AD
+    # within this function.
+    # TODO(albanD) This needs to be updated when more than one level is supported
+    self, tangent = torch.autograd.forward_ad.unpack_dual(inp)
+    # Note [Print tensor device]:
+    # A general logic here is we only print device when it doesn't match
+    # the device specified in default tensor type.
+    # Currently torch.set_default_tensor_type() only supports CPU/CUDA, thus
+    # torch._C._get_default_device() only returns either cpu or cuda.
+    # In other cases, we don't have a way to set them as default yet,
+    # and we should always print out device for them.
+    if (
+        self.device.type != torch._C._get_default_device()
+        or (
+            self.device.type == "cuda"
+            and torch.cuda.current_device() != self.device.index
+        )
+        or (self.device.type == "mps")
+    ):
+        suffixes.append("device='" + str(self.device) + "'")
+    # Tensor printing performs tensor operations like slice, indexing, etc to make it in a
+    # representable format. These operations on ipu/xla/lazy/mtia tensor results in compilations. Hence,
+    # to avoid compilations, copying the tensor to cpu before printing.
+    if self.device.type in ["xla", "lazy", "ipu", "mtia"]:
+        self = self.to("cpu")
+    # TODO: add an API to map real -> complex dtypes
+    _default_complex_dtype = (
+        torch.cdouble if torch.get_default_dtype() == torch.double else torch.cfloat
+    )
+    has_default_dtype = self.dtype in (
+        torch.get_default_dtype(),
+        _default_complex_dtype,
+        torch.int64,
+        torch.bool,
+    )
+    if self.is_sparse:
+        suffixes.append("size=" + str(tuple(self.shape)))
+        from torch._subclasses.fake_tensor import FakeTensor
+        is_meta = self.is_meta or isinstance(self, FakeTensor)
+        if not is_meta:
+            suffixes.append("nnz=" + str(self._nnz()))
+        if not has_default_dtype:
+            suffixes.append("dtype=" + str(self.dtype))
+        if not custom_contents_provided:
+            indices_prefix = "indices=tensor("
+            indices = self._indices().detach()
+            if is_meta:
+                indices_str = "..."
+            else:
+                indices_str = _tensor_str(indices, indent + len(indices_prefix))
+            if indices.numel() == 0 or is_meta:
+                indices_str += ", size=" + str(tuple(indices.shape))
+            values_prefix = "values=tensor("
+            values = self._values().detach()
+            if is_meta:
+                values_str = "..."
+            else:
+                values_str = _tensor_str(values, indent + len(values_prefix))
+            if values.numel() == 0 or is_meta:
+                values_str += ", size=" + str(tuple(values.shape))
+            tensor_str = (
+                indices_prefix
+                + indices_str
+                + "),\n"
+                + " " * indent
+                + values_prefix
+                + values_str
+                + ")"
+            )
+    elif self.layout in {
+        torch.sparse_csr,
+        torch.sparse_csc,
+        torch.sparse_bsr,
+        torch.sparse_bsc,
+    }:
+        from torch._subclasses.fake_tensor import FakeTensor
+        suffixes.append("size=" + str(tuple(self.shape)))
+        is_meta = self.is_meta or isinstance(self, FakeTensor)
+        if not is_meta:
+            suffixes.append("nnz=" + str(self._nnz()))
+        if not has_default_dtype:
+            suffixes.append("dtype=" + str(self.dtype))
+        if not custom_contents_provided:
+            compressed_indices_method, plain_indices_method = {
+                torch.sparse_csr: (torch.Tensor.crow_indices, torch.Tensor.col_indices),
+                torch.sparse_csc: (torch.Tensor.ccol_indices, torch.Tensor.row_indices),
+                torch.sparse_bsr: (torch.Tensor.crow_indices, torch.Tensor.col_indices),
+                torch.sparse_bsc: (torch.Tensor.ccol_indices, torch.Tensor.row_indices),
+            }[self.layout]
+            if self.layout in {torch.sparse_csr, torch.sparse_bsr}:
+                cdimname, pdimname = "row", "column"
+            else:
+                cdimname, pdimname = "column", "row"
+            compressed_indices_prefix = f"c{cdimname[:3]}_indices=tensor("
+            compressed_indices = compressed_indices_method(self).detach()
+            if is_meta:
+                compressed_indices_str = "..."
+            else:
+                compressed_indices_str = _tensor_str(
+                    compressed_indices, indent + len(compressed_indices_prefix)
+                )
+            if compressed_indices.numel() == 0 or is_meta:
+                compressed_indices_str += ", size=" + str(
+                    tuple(compressed_indices.shape)
+                )
+            plain_indices_prefix = f"{pdimname[:3]}_indices=tensor("
+            plain_indices = plain_indices_method(self).detach()
+            if is_meta:
+                plain_indices_str = "..."
+            else:
+                plain_indices_str = _tensor_str(
+                    plain_indices, indent + len(plain_indices_prefix)
+                )
+            if plain_indices.numel() == 0 or is_meta:
+                plain_indices_str += ", size=" + str(tuple(plain_indices.shape))
+            values_prefix = "values=tensor("
+            values = self.values().detach()
+            if is_meta:
+                values_str = "..."
+            else:
+                values_str = _tensor_str(values, indent + len(values_prefix))
+            if values.numel() == 0 or is_meta:
+                values_str += ", size=" + str(tuple(values.shape))
+            tensor_str = (
+                compressed_indices_prefix
+                + compressed_indices_str
+                + "),\n"
+                + " " * indent
+                + plain_indices_prefix
+                + plain_indices_str
+                + "),\n"
+                + " " * indent
+                + values_prefix
+                + values_str
+                + ")"
+            )
+    elif self.is_quantized:
+        suffixes.append("size=" + str(tuple(self.shape)))
+        if not has_default_dtype:
+            suffixes.append("dtype=" + str(self.dtype))
+        suffixes.append("quantization_scheme=" + str(self.qscheme()))
+        if (
+            self.qscheme() == torch.per_tensor_affine
+            or self.qscheme() == torch.per_tensor_symmetric
+        ):
+            suffixes.append("scale=" + str(self.q_scale()))
+            suffixes.append("zero_point=" + str(self.q_zero_point()))
+        elif (
+            self.qscheme() == torch.per_channel_affine
+            or self.qscheme() == torch.per_channel_symmetric
+            or self.qscheme() == torch.per_channel_affine_float_qparams
+        ):
+            suffixes.append("scale=" + str(self.q_per_channel_scales()))
+            suffixes.append("zero_point=" + str(self.q_per_channel_zero_points()))
+            suffixes.append("axis=" + str(self.q_per_channel_axis()))
+        if not custom_contents_provided:
+            tensor_str = _tensor_str(self.dequantize(), indent)
+    elif self.is_nested:
+        if not custom_contents_provided:
+            def indented_str(s, indent):
+                return "\n".join(f"  {line}" for line in s.split("\n"))
+            strs = ",\n".join(
+                indented_str(str(t), indent + 1)
+                for t in torch.ops.aten.unbind.int(self, 0)
+            )
+            tensor_str = f"[\n{strs}\n]"
+    elif torch._is_functional_tensor(self):
+        prefix = "_to_functional_tensor("
+        tensor_str = repr(torch._from_functional_tensor(self))
+    else:
+        # Circular import problem, so we import it here
+        from torch._subclasses.fake_tensor import FakeTensor
+        if self.is_meta or isinstance(self, FakeTensor):
+            suffixes.append("size=" + str(tuple(self.shape)))
+            if self.dtype != torch.get_default_dtype():
+                suffixes.append("dtype=" + str(self.dtype))
+            # TODO: This implies that ellipses is valid syntax for allocating
+            # a meta tensor or FakeTensor, which it could be, but it isn't right now
+            if not custom_contents_provided:
+                tensor_str = "..."
+        else:
+            if self.numel() == 0 and not self.is_sparse:
+                # Explicitly print the shape if it is not (0,), to match NumPy behavior
+                if self.dim() != 1:
+                    suffixes.append("size=" + str(tuple(self.shape)))
+                # In an empty tensor, there are no elements to infer if the dtype
+                # should be int64, so it must be shown explicitly.
+                if self.dtype != torch.get_default_dtype():
+                    suffixes.append("dtype=" + str(self.dtype))
+                if not custom_contents_provided:
+                    tensor_str = "[]"
+            else:
+                if not PRINT_OPTS.edgeitems:
+                    suffixes.append("size=" + str(tuple(self.shape)))
+                if not has_default_dtype:
+                    suffixes.append("dtype=" + str(self.dtype))
+                if not custom_contents_provided:
+                    if self.layout != torch.strided:
+                        tensor_str = _tensor_str(self.to_dense(), indent)
+                    else:
+                        tensor_str = _tensor_str(self, indent)
+    if self.layout != torch.strided:
+        suffixes.append("layout=" + str(self.layout))
+    # Use inp here to get the original grad_fn and not the one generated by the forward grad
+    # unpacking.
+    grad_fn_name = None
+    try:
+        grad_fn = inp.grad_fn
+    except RuntimeError:
+        # Accessing the grad_fn calls rebasing logic which would cause an error
+        # if that tensor is a view created in no-grad mode modified in-place in
+        # no-grad mode. See: https://github.com/pytorch/pytorch/issues/99968
+        grad_fn_name = "Invalid"
+    if grad_fn_name is None and grad_fn is not None:  # type: ignore[possibly-undefined]
+        grad_fn_name = type(grad_fn).__name__
+        if grad_fn_name == "CppFunction":
+            grad_fn_name = grad_fn.name().rsplit("::", 1)[-1]
+    if grad_fn_name is not None:
+        suffixes.append(f"grad_fn=<{grad_fn_name}>")
+    elif inp.requires_grad:
+        suffixes.append("requires_grad=True")
+    if self.has_names():
+        suffixes.append(f"names={self.names}")
+    if tangent is not None:
+        suffixes.append(f"tangent={tangent}")
+    string_repr = _add_suffixes(
+        prefix + tensor_str, suffixes, indent, force_newline=self.is_sparse  # type: ignore[possibly-undefined]
+    )
+    # Check if this instance is flagged as a parameter and change the repr accordingly.
+    # Unfortunately, this function has to be aware of this detail.
+    # NB: This is currently skipped for plain tensor parameters to maintain BC. In the future,
+    # this should be done for those as well to produce a valid repr.
+    if isinstance(self, torch.nn.Parameter) and not is_plain_tensor:
+        string_repr = f"Parameter({string_repr})"
+    return string_repr
+def _functorch_wrapper_str_intern(tensor, *, tensor_contents=None):
+    level = torch._C._functorch.maybe_get_level(tensor)
+    assert level != -1
+    if torch._C._functorch.is_functionaltensor(tensor):
+        # Since we're unwrapping the FunctionalTensorWrapper, we need to make sure
+        # that it's up to date first
+        torch._sync(tensor)
+    value = torch._C._functorch.get_unwrapped(tensor)
+    value_repr = repr(value)
+    indented_value_repr = textwrap.indent(value_repr, " " * 4)
+    if torch._C._functorch.is_batchedtensor(tensor):
+        bdim = torch._C._functorch.maybe_get_bdim(tensor)
+        assert bdim != -1
+        return (
+            f"BatchedTensor(lvl={level}, bdim={bdim}, value=\n"
+            f"{indented_value_repr}\n"
+            f")"
+        )
+    if torch._C._functorch.is_gradtrackingtensor(tensor):
+        return (
+            f"GradTrackingTensor(lvl={level}, value=\n" f"{indented_value_repr}\n" f")"
+        )
+    if torch._C._functorch.is_functionaltensor(tensor):
+        return f"FunctionalTensor(lvl={level}, value=\\\n{value_repr})"
+    raise ValueError("We don't know how to print this, please file us an issue")
+def _str(self, *, tensor_contents=None):
+    with torch.no_grad(), torch.utils._python_dispatch._disable_current_modes():
+        guard = torch._C._DisableFuncTorch()
+        return _str_intern(self, tensor_contents=tensor_contents)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/modules/fused.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import torch
+from torch.nn import Conv1d, Conv2d, Conv3d, ReLU, Linear, BatchNorm1d, BatchNorm2d, BatchNorm3d
+from torch.nn.utils.parametrize import type_before_parametrizations
+__all__ = ['ConvReLU1d', 'ConvReLU2d', 'ConvReLU3d', 'LinearReLU', 'ConvBn1d', 'ConvBn2d',
+           'ConvBnReLU1d', 'ConvBnReLU2d', 'ConvBn3d', 'ConvBnReLU3d', 'BNReLU2d', 'BNReLU3d',
+           'LinearBn1d', 'LinearLeakyReLU', 'LinearTanh', 'ConvAdd2d', 'ConvAddReLU2d']
+# Used for identifying intrinsic modules used in quantization
+class _FusedModule(torch.nn.Sequential):
+    pass
+class ConvReLU1d(_FusedModule):
+    r"""This is a sequential container which calls the Conv1d and ReLU modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, relu):
+        assert type_before_parametrizations(conv) == Conv1d and type_before_parametrizations(relu) == ReLU, \
+            f'Incorrect types for input modules{type_before_parametrizations(conv)}{type_before_parametrizations(relu)}'
+        super().__init__(conv, relu)
+class ConvReLU2d(_FusedModule):
+    r"""This is a sequential container which calls the Conv2d and ReLU modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, relu):
+        assert type_before_parametrizations(conv) == Conv2d and type_before_parametrizations(relu) == ReLU, \
+            f'Incorrect types for input modules{type_before_parametrizations(conv)}{type_before_parametrizations(relu)}'
+        super().__init__(conv, relu)
+class ConvReLU3d(_FusedModule):
+    r"""This is a sequential container which calls the Conv3d and ReLU modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, relu):
+        assert type_before_parametrizations(conv) == Conv3d and type_before_parametrizations(relu) == ReLU, \
+            f'Incorrect types for input modules{type_before_parametrizations(conv)}{type_before_parametrizations(relu)}'
+        super().__init__(conv, relu)
+class LinearReLU(_FusedModule):
+    r"""This is a sequential container which calls the Linear and ReLU modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, linear, relu):
+        assert type_before_parametrizations(linear) == Linear and type_before_parametrizations(relu) == ReLU, \
+            'Incorrect types for input modules{}{}'.format(
+                type_before_parametrizations(linear), type_before_parametrizations(relu))
+        super().__init__(linear, relu)
+class ConvBn1d(_FusedModule):
+    r"""This is a sequential container which calls the Conv 1d and Batch Norm 1d modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, bn):
+        assert type_before_parametrizations(conv) == Conv1d and type_before_parametrizations(bn) == BatchNorm1d, \
+            f'Incorrect types for input modules{type_before_parametrizations(conv)}{type_before_parametrizations(bn)}'
+        super().__init__(conv, bn)
+class ConvBn2d(_FusedModule):
+    r"""This is a sequential container which calls the Conv 2d and Batch Norm 2d modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, bn):
+        assert type_before_parametrizations(conv) == Conv2d and type_before_parametrizations(bn) == BatchNorm2d, \
+            f'Incorrect types for input modules{type_before_parametrizations(conv)}{type_before_parametrizations(bn)}'
+        super().__init__(conv, bn)
+class ConvBnReLU1d(_FusedModule):
+    r"""This is a sequential container which calls the Conv 1d, Batch Norm 1d, and ReLU modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, bn, relu):
+        assert type_before_parametrizations(conv) == Conv1d and type_before_parametrizations(bn) == BatchNorm1d and \
+            type_before_parametrizations(relu) == ReLU, 'Incorrect types for input modules{}{}{}' \
+            .format(type_before_parametrizations(conv), type_before_parametrizations(bn), type_before_parametrizations(relu))
+        super().__init__(conv, bn, relu)
+class ConvBnReLU2d(_FusedModule):
+    r"""This is a sequential container which calls the Conv 2d, Batch Norm 2d, and ReLU modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, bn, relu):
+        assert type_before_parametrizations(conv) == Conv2d and type_before_parametrizations(bn) == BatchNorm2d and \
+            type_before_parametrizations(relu) == ReLU, 'Incorrect types for input modules{}{}{}' \
+            .format(type_before_parametrizations(conv), type_before_parametrizations(bn), type_before_parametrizations(relu))
+        super().__init__(conv, bn, relu)
+class ConvBn3d(_FusedModule):
+    r"""This is a sequential container which calls the Conv 3d and Batch Norm 3d modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, bn):
+        assert type_before_parametrizations(conv) == Conv3d and type_before_parametrizations(bn) == BatchNorm3d, \
+            f'Incorrect types for input modules{type_before_parametrizations(conv)}{type_before_parametrizations(bn)}'
+        super().__init__(conv, bn)
+class ConvBnReLU3d(_FusedModule):
+    r"""This is a sequential container which calls the Conv 3d, Batch Norm 3d, and ReLU modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, bn, relu):
+        assert type_before_parametrizations(conv) == Conv3d and type_before_parametrizations(bn) == BatchNorm3d and \
+            type_before_parametrizations(relu) == ReLU, 'Incorrect types for input modules{}{}{}' \
+            .format(type_before_parametrizations(conv), type_before_parametrizations(bn), type_before_parametrizations(relu))
+        super().__init__(conv, bn, relu)
+class BNReLU2d(_FusedModule):
+    r"""This is a sequential container which calls the BatchNorm 2d and ReLU modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, batch_norm, relu):
+        assert type_before_parametrizations(batch_norm) == BatchNorm2d and type_before_parametrizations(relu) == ReLU, \
+            'Incorrect types for input modules{}{}'.format(
+                type_before_parametrizations(batch_norm), type_before_parametrizations(relu))
+        super().__init__(batch_norm, relu)
+class BNReLU3d(_FusedModule):
+    r"""This is a sequential container which calls the BatchNorm 3d and ReLU modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, batch_norm, relu):
+        assert type_before_parametrizations(batch_norm) == BatchNorm3d and type_before_parametrizations(relu) == ReLU, \
+            'Incorrect types for input modules{}{}'.format(
+                type_before_parametrizations(batch_norm), type_before_parametrizations(relu))
+        super().__init__(batch_norm, relu)
+class LinearBn1d(_FusedModule):
+    r"""This is a sequential container which calls the Linear and BatchNorm1d modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, linear, bn):
+        assert type_before_parametrizations(linear) == Linear and type_before_parametrizations(bn) == BatchNorm1d, \
+            f'Incorrect types for input modules{type_before_parametrizations(linear)}{type_before_parametrizations(bn)}'
+        super().__init__(linear, bn)
+class LinearLeakyReLU(_FusedModule):
+    r"""This is a sequential container which calls the Linear and LeakyReLU modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, linear, leaky_relu):
+        assert type(linear) == Linear and type(leaky_relu) == torch.nn.LeakyReLU, \
+            f'Incorrect types for input modules{type(linear)}{type(leaky_relu)}'
+        super().__init__(linear, leaky_relu)
+class LinearTanh(_FusedModule):
+    r"""This is a sequential container which calls the Linear and Tanh modules.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, linear, tanh):
+        assert type(linear) == Linear and type(tanh) == torch.nn.Tanh, \
+            f'Incorrect types for input modules{type(linear)}{type(tanh)}'
+        super().__init__(linear, tanh)
+class ConvAdd2d(_FusedModule):
+    r"""This is a sequential container which calls the Conv2d modules with extra Add.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, add):
+        super().__init__(conv)
+        self.add = add
+    def forward(self, x1, x2):
+        return self.add(self[0](x1), x2)
+class ConvAddReLU2d(_FusedModule):
+    r"""This is a sequential container which calls the Conv2d, add, Relu.
+    During quantization this will be replaced with the corresponding fused module."""
+    def __init__(self, conv, add, relu):
+        super().__init__(conv)
+        self.add = add
+        self.relu = relu
+    def forward(self, x1, x2):
+        return self.relu(self.add(self[0](x1), x2))

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from .modules import *  # noqa: F403
+__all__ = [
+    'BNReLU2d',
+    'BNReLU3d',
+    'ConvReLU1d',
+    'ConvReLU2d',
+    'ConvReLU3d',
+    'LinearReLU',
+    'LinearLeakyReLU',
+    'LinearTanh',
+    'ConvAdd2d',
+    'ConvAddReLU2d',
+]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__pycache__/linear_relu.cpython-311.pyc ADDED Viewed

Binary file (3.56 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/conv_relu.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import torch
+import torch.ao.nn.intrinsic
+import torch.ao.nn.intrinsic.qat
+import torch.nn.functional as F
+import torch.ao.nn.quantized as nnq
+from torch.nn.utils import fuse_conv_bn_weights
+__all__ = [
+    "ConvReLU1d",
+    "ConvReLU2d",
+    "ConvReLU3d",
+]
+_reverse_repeat_padding = nnq.modules.conv._reverse_repeat_padding
+# TODO: factor out the common parts to ConvNd
+class ConvReLU1d(nnq.Conv1d):
+    r"""
+    A ConvReLU1d module is a fused module of Conv1d and ReLU
+    We adopt the same interface as :class:`torch.ao.nn.quantized.Conv1d`.
+    Attributes:
+        Same as torch.ao.nn.quantized.Conv1d
+    """
+    _FLOAT_MODULE = torch.ao.nn.intrinsic.ConvReLU1d  # type: ignore[assignment]
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True,
+                 padding_mode='zeros', device=None, dtype=None):
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride=stride,
+            padding=padding, dilation=dilation, groups=groups, bias=bias,
+            padding_mode=padding_mode, device=device, dtype=dtype)
+    def forward(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 3:
+            raise ValueError("Input shape must be `(N, C, L)`!")
+        if self.padding_mode != 'zeros':
+            # Padding in Conv1d is stored as (p, p), need to get (p,)
+            _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding[:1])
+            input = F.pad(input, _reversed_padding_repeated_twice,
+                          mode=self.padding_mode)
+        return torch.ops.quantized.conv1d_relu(
+            input, self._packed_params, self.scale, self.zero_point)
+    def _get_name(self):
+        return 'QuantizedConvReLU1d'
+    @classmethod
+    def from_float(cls, mod):
+        if type(mod) == torch.ao.nn.intrinsic.qat.ConvBnReLU1d:
+            assert mod.bn.running_var is not None and mod.bn.running_mean is not None
+            mod.weight, mod.bias = fuse_conv_bn_weights(
+                mod.weight, mod.bias, mod.bn.running_mean, mod.bn.running_var,
+                mod.bn.eps, mod.bn.weight, mod.bn.bias)
+        return super().from_float(mod)
+    @classmethod
+    def from_reference(cls, ref_qconv, output_scale, output_zero_point):
+        assert type(ref_qconv) != torch.ao.nn.intrinsic.ConvBnReLU1d, \
+            "BatchNorm1d should be fused into Conv1d before converting to reference module"
+        return super().from_reference(ref_qconv[0], output_scale, output_zero_point)
+class ConvReLU2d(nnq.Conv2d):
+    r"""
+    A ConvReLU2d module is a fused module of Conv2d and ReLU
+    We adopt the same interface as :class:`torch.ao.nn.quantized.Conv2d`.
+    Attributes:
+        Same as torch.ao.nn.quantized.Conv2d
+    """
+    _FLOAT_MODULE = torch.ao.nn.intrinsic.ConvReLU2d  # type: ignore[assignment]
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True,
+                 padding_mode='zeros', device=None, dtype=None):
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride=stride,
+            padding=padding, dilation=dilation, groups=groups, bias=bias,
+            padding_mode=padding_mode, device=device, dtype=dtype)
+    def forward(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 4:
+            raise ValueError("Input shape must be `(N, C, H, W)`!")
+        if self.padding_mode != 'zeros':
+            _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding)
+            input = F.pad(input, _reversed_padding_repeated_twice,
+                          mode=self.padding_mode)
+        return torch.ops.quantized.conv2d_relu(
+            input, self._packed_params, self.scale, self.zero_point)
+    def _get_name(self):
+        return 'QuantizedConvReLU2d'
+    @classmethod
+    def from_float(cls, mod):
+        if type(mod) == torch.ao.nn.intrinsic.qat.ConvBnReLU2d:
+            assert mod.bn.running_var is not None and mod.bn.running_mean is not None
+            mod.weight, mod.bias = fuse_conv_bn_weights(
+                mod.weight, mod.bias, mod.bn.running_mean, mod.bn.running_var,
+                mod.bn.eps, mod.bn.weight, mod.bn.bias)
+        return super().from_float(mod)
+    @classmethod
+    def from_reference(cls, ref_qconv, output_scale, output_zero_point):
+        assert type(ref_qconv) != torch.ao.nn.intrinsic.ConvBnReLU2d, \
+            "BatchNorm2d should be fused into Conv2d before converting to reference module"
+        return super().from_reference(ref_qconv[0], output_scale, output_zero_point)
+class ConvReLU3d(nnq.Conv3d):
+    r"""
+    A ConvReLU3d module is a fused module of Conv3d and ReLU
+    We adopt the same interface as :class:`torch.ao.nn.quantized.Conv3d`.
+    Attributes: Same as torch.ao.nn.quantized.Conv3d
+    """
+    _FLOAT_MODULE = torch.ao.nn.intrinsic.ConvReLU3d  # type: ignore[assignment]
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True,
+                 padding_mode='zeros', device=None, dtype=None):
+        assert padding_mode != 'reflect', "Conv3d does not support reflection padding"
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride=stride,
+            padding=padding, dilation=dilation, groups=groups, bias=bias,
+            padding_mode=padding_mode, device=device, dtype=dtype)
+    def forward(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 5:
+            raise ValueError("Input shape must be `(N, C, D, H, W)`!")
+        if self.padding_mode != 'zeros':
+            _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding)
+            input = F.pad(input, _reversed_padding_repeated_twice,
+                          mode=self.padding_mode)
+        return torch.ops.quantized.conv3d_relu(
+            input, self._packed_params, self.scale, self.zero_point)
+    def _get_name(self):
+        return 'QuantizedConvReLU3d'
+    @classmethod
+    def from_float(cls, mod):
+        if type(mod) == torch.ao.nn.intrinsic.qat.ConvBnReLU3d:
+            assert mod.bn.running_var is not None and mod.bn.running_mean is not None
+            mod.weight, mod.bias = fuse_conv_bn_weights(
+                mod.weight,
+                mod.bias,
+                mod.bn.running_mean,
+                mod.bn.running_var,
+                mod.bn.eps,
+                mod.bn.weight,
+                mod.bn.bias,
+            )
+        return super().from_float(mod)
+    @classmethod
+    def from_reference(cls, ref_qconv, output_scale, output_zero_point):
+        assert type(ref_qconv) != torch.ao.nn.intrinsic.ConvBnReLU3d, \
+            "BatchNorm3d should be fused into Conv3d before converting to reference module"
+        return super().from_reference(ref_qconv[0], output_scale, output_zero_point)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/modules/linear_relu.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import torch
+import torch.ao.nn.quantized as nnq
+import torch.ao.nn.intrinsic as nni
+from torch.ao.nn.quantized.modules.utils import _quantize_weight
+__all__ = [
+    "LinearReLU",
+    "LinearLeakyReLU",
+    "LinearTanh",
+]
+class LinearReLU(nnq.Linear):
+    r"""
+    A LinearReLU module fused from Linear and ReLU modules
+    We adopt the same interface as :class:`torch.ao.nn.quantized.Linear`.
+    Attributes:
+        Same as torch.ao.nn.quantized.Linear
+    Examples::
+        >>> # xdoctest: +SKIP
+        >>> m = nn.intrinsic.LinearReLU(20, 30)
+        >>> input = torch.randn(128, 20)
+        >>> output = m(input)
+        >>> print(output.size())
+        torch.Size([128, 30])
+    """
+    _FLOAT_MODULE = nni.LinearReLU  # type: ignore[assignment]
+    def __init__(self, in_features, out_features, bias=True, dtype=torch.qint8):
+        super().__init__(in_features, out_features, bias, dtype)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.ops.quantized.linear_relu(
+            x, self._packed_params._packed_params, self.scale, self.zero_point)
+    def _get_name(self):
+        return 'QuantizedLinearReLU'
+    @classmethod
+    def from_float(cls, mod):
+        return super().from_float(mod)
+    @classmethod
+    def from_reference(cls, ref_linear_relu, output_scale, output_zero_point):
+        return super().from_reference(ref_linear_relu[0], output_scale, output_zero_point)
+class LinearLeakyReLU(nnq.Linear):
+    r"""
+    For onednn backend only
+    A LinearLeakyReLU module fused from Linear and LeakyReLU modules
+    We adopt the same interface as :class:`torch.ao.nn.quantized.Linear`.
+    Attributes:
+        Same as torch.ao.nn.quantized.Linear
+        + negative_slope
+    Examples::
+        >>> # xdoctest: +SKIP
+        >>> m = nn.intrinsic.LinearLeakyReLU(20, 30, 0.01)
+        >>> input = torch.randn(128, 20)
+        >>> output = m(input)
+        >>> print(output.size())
+        torch.Size([128, 30])
+    """
+    _FLOAT_MODULE = nni.LinearLeakyReLU  # type: ignore[assignment]
+    def __init__(self, in_features, out_features, negative_slope, bias=True, dtype=torch.qint8):
+        super().__init__(in_features, out_features, bias, dtype)
+        self.negative_slope = negative_slope
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.ops.quantized.linear_leaky_relu(
+            x, self._packed_params._packed_params, self.scale, self.zero_point, self.negative_slope)
+    def _get_name(self):
+        return 'QuantizedLinearLeakyReLU'
+    @classmethod
+    def from_float(cls, mod):
+        assert type(mod) == nni.LinearLeakyReLU, 'Input float module should be LinearLeakyReLU'
+        assert hasattr(mod, 'qconfig'), 'Input float module must have qconfig defined'
+        activation_post_process = mod.activation_post_process
+        leaky_relu = mod[1]
+        mod = mod[0]
+        weight_post_process = mod.qconfig.weight()
+        weight_post_process(mod.weight)
+        dtype = weight_post_process.dtype
+        act_scale, act_zp = activation_post_process.calculate_qparams()  # type: ignore[union-attr,operator]
+        assert dtype == torch.qint8, 'Weight observer must have dtype torch.qint8'
+        qweight = _quantize_weight(mod.weight.float(), weight_post_process)
+        qlinear_leaky_relu = cls(
+            mod.in_features,
+            mod.out_features,
+            leaky_relu.negative_slope,
+            dtype=dtype)
+        qlinear_leaky_relu.set_weight_bias(qweight, mod.bias)
+        qlinear_leaky_relu.scale = float(act_scale)
+        qlinear_leaky_relu.zero_point = int(act_zp)
+        return qlinear_leaky_relu
+    @classmethod
+    def from_reference(cls, ref_mod, output_scale, output_zero_point):
+        linear = ref_mod[0]
+        leaky_relu = ref_mod[1]
+        qlinear_leaky_relu = cls(
+            linear.in_features,
+            linear.out_features,
+            leaky_relu.negative_slope)
+        qweight = linear.get_quantized_weight()
+        qlinear_leaky_relu.set_weight_bias(qweight, linear.bias)
+        qlinear_leaky_relu.scale = float(output_scale)
+        qlinear_leaky_relu.zero_point = int(output_zero_point)
+        return qlinear_leaky_relu
+class LinearTanh(nnq.Linear):
+    r"""
+    A LinearTanh module fused from Linear and Tanh modules
+    We adopt the same interface as :class:`torch.ao.nn.quantized.Linear`.
+    Attributes:
+        Same as torch.ao.nn.quantized.Linear
+    Examples::
+        >>> # xdoctest: +SKIP
+        >>> m = nn.intrinsic.LinearTanh(20, 30)
+        >>> input = torch.randn(128, 20)
+        >>> output = m(input)
+        >>> print(output.size())
+        torch.Size([128, 30])
+    """
+    _FLOAT_MODULE = nni.LinearTanh  # type: ignore[assignment]
+    def __init__(self, in_features, out_features, bias=True, dtype=torch.qint8):
+        super().__init__(in_features, out_features, bias, dtype)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.ops.quantized.linear_tanh(
+            x, self._packed_params._packed_params, self.scale, self.zero_point)
+    def _get_name(self):
+        return 'QuantizedLinearTanh'
+    @classmethod
+    def from_float(cls, mod):
+        assert type(mod) == nni.LinearTanh, 'Input float module should be LinearTanh'
+        assert hasattr(mod, 'qconfig'), 'Input float module must have qconfig defined'
+        activation_post_process = mod.activation_post_process
+        mod = mod[0]
+        weight_post_process = mod.qconfig.weight()
+        weight_post_process(mod.weight)
+        dtype = weight_post_process.dtype
+        act_scale, act_zp = activation_post_process.calculate_qparams()  # type: ignore[union-attr,operator]
+        assert dtype == torch.qint8, 'Weight observer must have dtype torch.qint8'
+        qweight = _quantize_weight(mod.weight.float(), weight_post_process)
+        qlinear_tanh = cls(
+            mod.in_features,
+            mod.out_features,
+            dtype=dtype)
+        qlinear_tanh.set_weight_bias(qweight, mod.bias)
+        qlinear_tanh.scale = float(act_scale)
+        qlinear_tanh.zero_point = int(act_zp)
+        return qlinear_tanh
+    @classmethod
+    def from_reference(cls, ref_mod, output_scale, output_zero_point):
+        linear = ref_mod[0]
+        qlinear_tanh = cls(
+            linear.in_features,
+            linear.out_features)
+        qweight = linear.get_quantized_weight()
+        qlinear_tanh.set_weight_bias(qweight, linear.bias)
+        qlinear_tanh.scale = float(output_scale)
+        qlinear_tanh.zero_point = int(output_zero_point)
+        return qlinear_tanh

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/dynamic/modules/linear.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import torch
+__all__ = ["Linear"]
+class Linear(torch.ao.nn.qat.Linear):
+    r"""
+    A linear module attached with FakeQuantize modules for weight,
+    used for dynamic quantization aware training.
+    We adopt the same interface as `torch.nn.Linear`, please see
+    https://pytorch.org/docs/stable/nn.html#torch.nn.Linear
+    for documentation.
+    Similar to `torch.nn.Linear`, with FakeQuantize modules initialized to
+    default.
+    """
+    def __init__(self, in_features, out_features, bias=True,
+                 qconfig=None, device=None, dtype=None) -> None:
+        super().__init__(in_features, out_features, bias, qconfig, device, dtype)
+        if not torch.ao.quantization.qconfig._activation_is_memoryless(qconfig):
+            raise ValueError(
+                "Dynamic QAT requires a memoryless observer." +
+                "This means a MovingAverage observer with averaging constant equal to 1"
+            )

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from .linear import Linear
+from .conv import Conv1d
+from .conv import Conv2d
+from .conv import Conv3d
+from .embedding_ops import EmbeddingBag, Embedding
+__all__ = [
+    "Linear",
+    "Conv1d",
+    "Conv2d",
+    "Conv3d",
+    "Embedding",
+    "EmbeddingBag",
+]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (256 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/activation.py ADDED Viewed

	@@ -0,0 +1,465 @@

+import torch
+import torch.jit  # this is needed to avoid a circular import
+from torch import nn
+import torch.nn.functional as nnF
+from torch import Tensor
+from typing import Optional, Tuple
+import warnings
+__all__ = [
+    "MultiheadAttention"
+]
+class MultiheadAttention(nn.MultiheadAttention):
+    _FLOAT_MODULE = nn.MultiheadAttention
+    r"""Quantizable implementation of the MultiheadAttention.
+    Note::
+        Please, refer to :class:`~torch.nn.MultiheadAttention` for more
+        information
+    Allows the model to jointly attend to information from different
+    representation subspaces.
+    See reference: Attention Is All You Need
+    The original MHA module is not quantizable.
+    This reimplements it by explicitly instantiating the linear layers.
+    .. math::
+        \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O
+        \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V)
+    Args:
+        embed_dim: total dimension of the model.
+        num_heads: parallel attention heads.
+        dropout: a Dropout layer on attn_output_weights. Default: 0.0.
+        bias: add bias as module parameter. Default: True.
+        add_bias_kv: add bias to the key and value sequences at dim=0.
+        add_zero_attn: add a new batch of zeros to the key and
+                       value sequences at dim=1.
+        kdim: total number of features in key. Default: None.
+        vdim: total number of features in value. Default: None.
+        batch_first: If ``True``, then the input and output tensors are provided
+            as (batch, seq, feature). Default: ``False`` (seq, batch, feature).
+    Note that if :attr:`kdim` and :attr:`vdim` are None, they will be set
+    to :attr:`embed_dim` such that query, key, and value have the same
+    number of features.
+    Examples::
+        >>> import torch.ao.nn.quantizable as nnqa
+        >>> multihead_attn = nnqa.MultiheadAttention(embed_dim, num_heads)
+        >>> attn_output, attn_output_weights = multihead_attn(query, key, value)
+    Note::
+        Please, follow the quantization flow to convert the quantizable MHA.
+    """
+    __constants__ = ['batch_first']
+    def __init__(self, embed_dim: int, num_heads: int,
+                 dropout: float = 0., bias: bool = True,
+                 add_bias_kv: bool = False, add_zero_attn: bool = False,
+                 kdim: Optional[int] = None, vdim: Optional[int] = None, batch_first: bool = False,
+                 device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(embed_dim, num_heads, dropout,
+                         bias, add_bias_kv,
+                         add_zero_attn, kdim, vdim, batch_first,
+                         **factory_kwargs)
+        self.linear_Q = nn.Linear(self.embed_dim, self.embed_dim, bias=bias, **factory_kwargs)
+        self.linear_K = nn.Linear(self.kdim, self.embed_dim, bias=bias, **factory_kwargs)
+        self.linear_V = nn.Linear(self.vdim, self.embed_dim, bias=bias, **factory_kwargs)
+        # for the type: ignore, see https://github.com/pytorch/pytorch/issues/58969
+        self.out_proj = nn.Linear(self.embed_dim, self.embed_dim, bias=bias, **factory_kwargs)  # type: ignore[assignment]
+        # Functionals
+        self.q_scaling_product = torch.ao.nn.quantized.FloatFunctional()
+        # note: importing torch.ao.nn.quantized at top creates a circular import
+        # Quant/Dequant
+        self.quant_attn_output = torch.ao.quantization.QuantStub()
+        self.quant_attn_output_weights = torch.ao.quantization.QuantStub()
+        self.dequant_q = torch.ao.quantization.DeQuantStub()
+        self.dequant_k = torch.ao.quantization.DeQuantStub()
+        self.dequant_v = torch.ao.quantization.DeQuantStub()
+    def _get_name(self):
+        return 'QuantizableMultiheadAttention'
+    @classmethod
+    def from_float(cls, other):
+        assert type(other) == cls._FLOAT_MODULE
+        assert hasattr(other, 'qconfig'), "The float module must have 'qconfig'"
+        # Setting the dropout to 0.0!
+        observed = cls(other.embed_dim, other.num_heads, other.dropout,
+                       (other.in_proj_bias is not None),
+                       (other.bias_k is not None),
+                       other.add_zero_attn, other.kdim, other.vdim,
+                       other.batch_first)
+        observed.bias_k = other.bias_k
+        observed.bias_v = other.bias_v
+        observed.qconfig = other.qconfig
+        # Set the linear weights
+        # for the type: ignores, see https://github.com/pytorch/pytorch/issues/58969
+        observed.out_proj.weight = other.out_proj.weight  # type: ignore[has-type]
+        observed.out_proj.bias = other.out_proj.bias  # type: ignore[has-type]
+        if other._qkv_same_embed_dim:
+            # Use separate params
+            bias = other.in_proj_bias
+            _start = 0
+            _end = _start + other.embed_dim
+            weight = other.in_proj_weight[_start:_end, :]
+            if bias is not None:
+                bias = torch.nn.Parameter(bias[_start:_end], bias.requires_grad)
+            observed.linear_Q.weight = torch.nn.Parameter(weight,
+                                                          weight.requires_grad)
+            observed.linear_Q.bias = bias
+            bias = other.in_proj_bias
+            _start = _end
+            _end = _start + other.embed_dim
+            weight = other.in_proj_weight[_start:_end, :]
+            if bias is not None:
+                bias = torch.nn.Parameter(bias[_start:_end], bias.requires_grad)
+            observed.linear_K.weight = torch.nn.Parameter(weight,
+                                                          weight.requires_grad)
+            observed.linear_K.bias = bias
+            bias = other.in_proj_bias
+            _start = _end
+            weight = other.in_proj_weight[_start:, :]
+            if bias is not None:
+                bias = torch.nn.Parameter(bias[_start:], bias.requires_grad)
+            observed.linear_V.weight = torch.nn.Parameter(weight,
+                                                          weight.requires_grad)
+            observed.linear_V.bias = bias
+        else:
+            observed.linear_Q.weight = nn.Parameter(other.q_proj_weight)
+            observed.linear_K.weight = nn.Parameter(other.k_proj_weight)
+            observed.linear_V.weight = nn.Parameter(other.v_proj_weight)
+            if other.in_proj_bias is None:
+                observed.linear_Q.bias = None  # type: ignore[assignment]
+                observed.linear_K.bias = None  # type: ignore[assignment]
+                observed.linear_V.bias = None  # type: ignore[assignment]
+            else:
+                observed.linear_Q.bias = nn.Parameter(other.in_proj_bias[0:other.embed_dim])
+                observed.linear_K.bias = nn.Parameter(other.in_proj_bias[other.embed_dim:(other.embed_dim * 2)])
+                observed.linear_V.bias = nn.Parameter(other.in_proj_bias[(other.embed_dim * 2):])
+        observed.eval()
+        # Explicit prepare
+        observed = torch.ao.quantization.prepare(observed, inplace=True)
+        return observed
+    @torch.jit.unused
+    def dequantize(self):
+        r"""Utility to convert the quantized MHA back to float.
+        The motivation for this is that it is not trivial to conver the weights
+        from the format that is used in the quantized version back to the
+        float.
+        """
+        fp = self._FLOAT_MODULE(self.embed_dim, self.num_heads, self.dropout,
+                                (self.linear_Q._weight_bias()[1] is not None),
+                                (self.bias_k is not None),
+                                self.add_zero_attn, self.kdim, self.vdim, self.batch_first)
+        assert fp._qkv_same_embed_dim == self._qkv_same_embed_dim
+        if self.bias_k is not None:
+            fp.bias_k = nn.Parameter(self.bias_k.dequantize())
+        if self.bias_v is not None:
+            fp.bias_v = nn.Parameter(self.bias_v.dequantize())
+        # Set the linear weights
+        # Note: Because the linear layers are quantized, mypy does not nkow how
+        # to deal with them -- might need to ignore the typing checks.
+        # for the type: ignore[has-type], see https://github.com/pytorch/pytorch/issues/58969
+        w, b = self.out_proj._weight_bias()  # type: ignore[operator, has-type]
+        fp.out_proj.weight = nn.Parameter(w.dequantize())
+        if b is not None:
+            fp.out_proj.bias = nn.Parameter(b)
+        wQ, bQ = self.linear_Q._weight_bias()  # type: ignore[operator]
+        wQ = wQ.dequantize()
+        wK, bK = self.linear_K._weight_bias()  # type: ignore[operator]
+        wK = wK.dequantize()
+        wV, bV = self.linear_V._weight_bias()  # type: ignore[operator]
+        wV = wV.dequantize()
+        if fp._qkv_same_embed_dim:
+            # Use separate params
+            _start = 0
+            _end = _start + fp.embed_dim
+            fp.in_proj_weight[_start:_end, :] = wQ
+            if fp.in_proj_bias is not None:
+                assert all(bQ == 0)
+                fp.in_proj_bias[_start:_end] = bQ
+            _start = _end
+            _end = _start + fp.embed_dim
+            fp.in_proj_weight[_start:_end, :] = wK
+            if fp.in_proj_bias is not None:
+                assert all(bK == 0)
+                fp.in_proj_bias[_start:_end] = bK
+            _start = _end
+            fp.in_proj_weight[_start:, :] = wV
+            if fp.in_proj_bias is not None:
+                assert all(bV == 0)
+                fp.in_proj_bias[_start:] = bV
+        else:
+            fp.q_proj_weight = nn.Parameter(wQ)
+            fp.k_proj_weight = nn.Parameter(wK)
+            fp.v_proj_weight = nn.Parameter(wV)
+            if fp.in_proj_bias is None:
+                self.linear_Q.bias = None
+                self.linear_K.bias = None
+                self.linear_V.bias = None
+            else:
+                fp.in_proj_bias[0:fp.embed_dim] = bQ
+                fp.in_proj_bias[fp.embed_dim:(fp.embed_dim * 2)] = bK
+                fp.in_proj_bias[(fp.embed_dim * 2):] = bV
+        return fp
+    @classmethod
+    def from_observed(cls, other):
+        # The whole flow is float -> observed -> quantized
+        # This class does float -> observed only
+        # See nn.quantized.MultiheadAttention
+        raise NotImplementedError("It looks like you are trying to prepare an "
+                                  "MHA module. Please, see "
+                                  "the examples on quantizable MHAs.")
+    def forward(self,
+                query: Tensor,
+                key: Tensor,
+                value: Tensor,
+                key_padding_mask: Optional[Tensor] = None,
+                need_weights: bool = True,
+                attn_mask: Optional[Tensor] = None,
+                average_attn_weights: bool = True,
+                is_causal: bool = False) -> Tuple[Tensor, Optional[Tensor]]:
+        r"""
+    Note::
+        Please, refer to :func:`~torch.nn.MultiheadAttention.forward` for more
+        information
+    Args:
+        query, key, value: map a query and a set of key-value pairs to an output.
+            See "Attention Is All You Need" for more details.
+        key_padding_mask: if provided, specified padding elements in the key will
+            be ignored by the attention. When given a binary mask and a value is True,
+            the corresponding value on the attention layer will be ignored.
+        need_weights: output attn_output_weights.
+        attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all
+            the batches while a 3D mask allows to specify a different mask for the entries of each batch.
+    Shape:
+        - Inputs:
+        - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
+          the embedding dimension. :math:`(N, L, E)` if ``batch_first`` is ``True``.
+        - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
+          the embedding dimension. :math:`(N, S, E)` if ``batch_first`` is ``True``.
+        - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
+          the embedding dimension. :math:`(N, S, E)` if ``batch_first`` is ``True``.
+        - key_padding_mask: :math:`(N, S)` where N is the batch size, S is the source sequence length.
+          If a BoolTensor is provided, the positions with the
+          value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
+        - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
+          3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
+          S is the source sequence length. attn_mask ensure that position i is allowed to attend the unmasked
+          positions. If a BoolTensor is provided, positions with ``True``
+          is not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
+          is provided, it will be added to the attention weight.
+        - is_causal: If specified, applies a causal mask as attention mask. Mutually exclusive with providing attn_mask.
+          Default: ``False``.
+        - average_attn_weights: If true, indicates that the returned ``attn_weights`` should be averaged across
+          heads. Otherwise, ``attn_weights`` are provided separately per head. Note that this flag only has an
+          effect when ``need_weights=True.``. Default: True (i.e. average weights across heads)
+        - Outputs:
+        - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
+          E is the embedding dimension. :math:`(N, L, E)` if ``batch_first`` is ``True``.
+        - attn_output_weights: If ``average_attn_weights=True``, returns attention weights averaged
+          across heads of shape :math:`(N, L, S)`, where N is the batch size, L is the target sequence length,
+          S is the source sequence length. If ``average_attn_weights=False``, returns attention weights per
+          head of shape :math:`(N, num_heads, L, S)`.
+        """
+        return self._forward_impl(query, key, value, key_padding_mask,
+                                  need_weights, attn_mask, average_attn_weights,
+                                  is_causal)
+    def _forward_impl(self,
+                      query: Tensor,
+                      key: Tensor,
+                      value: Tensor,
+                      key_padding_mask: Optional[Tensor] = None,
+                      need_weights: bool = True,
+                      attn_mask: Optional[Tensor] = None,
+                      average_attn_weights: bool = True,
+                      is_causal: bool = False) -> Tuple[Tensor, Optional[Tensor]]:
+        # This version will not deal with the static key/value pairs.
+        # Keeping it here for future changes.
+        #
+        # TODO: This method has some duplicate lines with the
+        # `torch.nn.functional.multi_head_attention`. Will need to refactor.
+        static_k = None
+        static_v = None
+        if attn_mask is not None and is_causal:
+            raise AssertionError("Only allow causal mask or attn_mask")
+        if is_causal:
+            raise AssertionError("causal mask not supported by AO MHA module")
+        if self.batch_first:
+            query, key, value = (x.transpose(0, 1) for x in (query, key, value))
+        tgt_len, bsz, embed_dim_to_check = query.size()
+        assert self.embed_dim == embed_dim_to_check
+        # allow MHA to have different sizes for the feature dimension
+        assert key.size(0) == value.size(0) and key.size(1) == value.size(1)
+        head_dim = self.embed_dim // self.num_heads
+        assert head_dim * self.num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+        scaling = float(head_dim) ** -0.5
+        q = self.linear_Q(query)
+        k = self.linear_K(key)
+        v = self.linear_V(value)
+        q = self.q_scaling_product.mul_scalar(q, scaling)
+        if attn_mask is not None:
+            if attn_mask.dtype == torch.uint8:
+                warnings.warn("Byte tensor for attn_mask in nn.MultiheadAttention is deprecated. Use bool tensor instead.")
+                attn_mask = attn_mask.to(torch.bool)
+            assert attn_mask.is_floating_point() or attn_mask.dtype == torch.bool, \
+                f'Only float and bool types are supported for attn_mask, not {attn_mask.dtype}'
+            if attn_mask.dim() == 2:
+                attn_mask = attn_mask.unsqueeze(0)
+                if list(attn_mask.size()) != [1, query.size(0), key.size(0)]:
+                    raise RuntimeError('The size of the 2D attn_mask is not correct.')
+            elif attn_mask.dim() == 3:
+                if list(attn_mask.size()) != [bsz * self.num_heads, query.size(0), key.size(0)]:
+                    raise RuntimeError('The size of the 3D attn_mask is not correct.')
+            else:
+                raise RuntimeError(f"attn_mask's dimension {attn_mask.dim()} is not supported")
+            # attn_mask's dim is 3 now.
+        # convert ByteTensor key_padding_mask to bool
+        if key_padding_mask is not None and key_padding_mask.dtype == torch.uint8:
+            warnings.warn("Byte tensor for key_padding_mask in nn.MultiheadAttention is deprecated. Use bool tensor instead.")
+            key_padding_mask = key_padding_mask.to(torch.bool)
+        if self.bias_k is not None and self.bias_v is not None:
+            if static_k is None and static_v is None:
+                # Explicitly assert that bias_k and bias_v are not None
+                # in a way that TorchScript can understand.
+                bias_k = self.bias_k
+                assert bias_k is not None
+                bias_v = self.bias_v
+                assert bias_v is not None
+                k = torch.cat([k, bias_k.repeat(1, bsz, 1)])
+                v = torch.cat([v, bias_v.repeat(1, bsz, 1)])
+                if attn_mask is not None:
+                    attn_mask = nnF.pad(attn_mask, (0, 1))
+                if key_padding_mask is not None:
+                    key_padding_mask = nnF.pad(key_padding_mask, (0, 1))
+            else:
+                assert static_k is None, "bias cannot be added to static key."
+                assert static_v is None, "bias cannot be added to static value."
+        else:
+            assert self.bias_k is None
+            assert self.bias_v is None
+        q = q.contiguous().view(tgt_len, bsz * self.num_heads, head_dim).transpose(0, 1)
+        if k is not None:
+            k = k.contiguous().view(-1, bsz * self.num_heads, head_dim).transpose(0, 1)
+        if v is not None:
+            v = v.contiguous().view(-1, bsz * self.num_heads, head_dim).transpose(0, 1)
+        if static_k is not None:
+            assert static_k.size(0) == bsz * self.num_heads
+            assert static_k.size(2) == head_dim
+            k = static_k
+        if static_v is not None:
+            assert static_v.size(0) == bsz * self.num_heads
+            assert static_v.size(2) == head_dim
+            v = static_v
+        src_len = k.size(1)
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+        if self.add_zero_attn:
+            src_len += 1
+            k_zeros = torch.zeros((k.size(0), 1) + k.size()[2:])
+            if k.is_quantized:
+                k_zeros = torch.quantize_per_tensor(k_zeros, k.q_scale(), k.q_zero_point(), k.dtype)
+            k = torch.cat([k, k_zeros], dim=1)
+            v_zeros = torch.zeros((v.size(0), 1) + k.size()[2:])
+            if v.is_quantized:
+                v_zeros = torch.quantize_per_tensor(v_zeros, v.q_scale(), v.q_zero_point(), v.dtype)
+            v = torch.cat([v, v_zeros], dim=1)
+            if attn_mask is not None:
+                attn_mask = nnF.pad(attn_mask, (0, 1))
+            if key_padding_mask is not None:
+                key_padding_mask = nnF.pad(key_padding_mask, (0, 1))
+        # Leaving the quantized zone here
+        q = self.dequant_q(q)
+        k = self.dequant_k(k)
+        v = self.dequant_v(v)
+        attn_output_weights = torch.bmm(q, k.transpose(1, 2))
+        assert list(attn_output_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+        if attn_mask is not None:
+            if attn_mask.dtype == torch.bool:
+                attn_output_weights.masked_fill_(attn_mask, float('-inf'))
+            else:
+                attn_output_weights += attn_mask
+        if key_padding_mask is not None:
+            attn_output_weights = attn_output_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_output_weights = attn_output_weights.masked_fill(
+                key_padding_mask.unsqueeze(1).unsqueeze(2),
+                float('-inf'),
+            )
+            attn_output_weights = attn_output_weights.view(bsz * self.num_heads, tgt_len, src_len)
+        attn_output_weights = nnF.softmax(
+            attn_output_weights, dim=-1)
+        attn_output_weights = nnF.dropout(attn_output_weights, p=self.dropout, training=self.training)
+        attn_output = torch.bmm(attn_output_weights, v)
+        assert list(attn_output.size()) == [bsz * self.num_heads, tgt_len, head_dim]
+        if self.batch_first:
+            attn_output = attn_output.view(bsz, tgt_len, self.embed_dim)
+        else:
+            attn_output = attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, self.embed_dim)
+        # Reentering the quantized zone
+        attn_output = self.quant_attn_output(attn_output)
+        # for the type: ignore[has-type], see https://github.com/pytorch/pytorch/issues/58969
+        attn_output = self.out_proj(attn_output)  # type: ignore[has-type]
+        attn_output_weights = self.quant_attn_output_weights(attn_output_weights)
+        if need_weights:
+            # average attention weights over heads
+            attn_output_weights = attn_output_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            if average_attn_weights:
+                attn_output_weights = attn_output_weights.mean(dim=1)
+            return attn_output, attn_output_weights
+        else:
+            return attn_output, None

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/__pycache__/functional.cpython-311.pyc ADDED Viewed

Binary file (32.6 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/dynamic/modules/__pycache__/rnn.cpython-311.pyc ADDED Viewed

Binary file (59 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/batchnorm.cpython-311.pyc ADDED Viewed

Binary file (6.44 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/conv.cpython-311.pyc ADDED Viewed

Binary file (49.1 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/embedding_ops.cpython-311.pyc ADDED Viewed

Binary file (18.1 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/linear.cpython-311.pyc ADDED Viewed

Binary file (16.3 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/rnn.cpython-311.pyc ADDED Viewed

Binary file (2.6 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (7.19 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/activation.py ADDED Viewed

	@@ -0,0 +1,302 @@

+import torch
+from warnings import warn
+__all__ = [
+    "ReLU6",
+    "Hardswish",
+    "ELU",
+    "LeakyReLU",
+    "Sigmoid",
+    "Softmax",
+    "MultiheadAttention",
+    "PReLU"
+]
+class ReLU6(torch.nn.ReLU):
+    r"""Applies the element-wise function:
+    :math:`\text{ReLU6}(x) = \min(\max(x_0, x), q(6))`, where :math:`x_0` is the
+    zero_point, and :math:`q(6)` is the quantized representation of number 6.
+    Args:
+        inplace: can optionally do the operation in-place. Default: ``False``
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+    .. image:: ../scripts/activation_images/ReLU6.png
+    Examples::
+        >>> m = nn.quantized.ReLU6()
+        >>> input = torch.randn(2)
+        >>> # xdoctest: +SKIP
+        >>> input = torch.quantize_per_tensor(input, 1.0, 0, dtype=torch.qint32)
+        >>> output = m(input)
+    """
+    def __init__(self, inplace=False):
+        super().__init__(inplace)
+        self.inplace = inplace
+    def forward(self, input):
+        return torch.ops.quantized.relu6(input, self.inplace)
+    def _get_name(self):
+        return 'QuantizedReLU6'
+    @staticmethod
+    def from_float(mod):
+        return ReLU6(mod.inplace)
+class Hardswish(torch.nn.Hardswish):
+    r"""This is the quantized version of :class:`~torch.nn.Hardswish`.
+    Args:
+        scale: quantization scale of the output tensor
+        zero_point: quantization zero point of the output tensor
+    """
+    def __init__(self, scale, zero_point, device=None, dtype=None):
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__()
+        self.register_buffer('scale', torch.tensor(scale, **factory_kwargs))
+        self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs))
+    def forward(self, input):
+        return torch.ops.quantized.hardswish(input, self.scale, self.zero_point)
+    def _get_name(self):
+        return 'QuantizedHardswish'
+    @staticmethod
+    def from_float(mod):
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        return Hardswish(float(scale), int(zero_point))
+    @classmethod
+    def from_reference(cls, mod, scale, zero_point):
+        return cls(float(scale), int(zero_point))
+class ELU(torch.nn.ELU):
+    r"""This is the quantized equivalent of :class:`~torch.nn.ELU`.
+    Args:
+        scale: quantization scale of the output tensor
+        zero_point: quantization zero point of the output tensor
+        alpha: the alpha constant
+    """
+    def __init__(self, scale, zero_point, alpha=1.):
+        super().__init__(alpha)
+        self.scale = scale
+        self.zero_point = zero_point
+    def forward(self, input):
+        return torch.ao.nn.quantized.functional.elu(
+            input, self.scale, self.zero_point, self.alpha)
+    def _get_name(self):
+        return 'QuantizedELU'
+    @staticmethod
+    def from_float(mod):
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        return ELU(float(scale), int(zero_point), mod.alpha)
+    @classmethod
+    def from_reference(cls, mod, scale, zero_point):
+        return cls(float(scale), int(zero_point), mod.alpha)
+class LeakyReLU(torch.nn.LeakyReLU):
+    r"""This is the quantized equivalent of :class:`~torch.nn.LeakyReLU`.
+    Args:
+        scale: quantization scale of the output tensor
+        zero_point: quantization zero point of the output tensor
+        negative_slope: Controls the angle of the negative slope. Default: 1e-2
+    """
+    def __init__(self, scale: float, zero_point: int, negative_slope: float = 1e-2,
+                 inplace: bool = False, device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(negative_slope, inplace)
+        self.register_buffer('scale', torch.tensor(scale, **factory_kwargs))
+        self.register_buffer('zero_point', torch.tensor(zero_point, **factory_kwargs))
+    def forward(self, input):
+        return torch.ops.quantized.leaky_relu(
+            input, self.negative_slope, self.inplace, self.scale, self.zero_point)
+    def _get_name(self):
+        return 'QuantizedLeakyReLU'
+    @classmethod
+    def from_float(cls, mod):
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        return cls(float(scale), int(zero_point), mod.negative_slope, mod.inplace)
+    @classmethod
+    def from_reference(cls, mod, scale, zero_point):
+        return cls(float(scale), int(zero_point), mod.negative_slope, mod.inplace)
+class Sigmoid(torch.nn.Sigmoid):
+    r"""This is the quantized equivalent of :class:`~torch.nn.Sigmoid`.
+    Args:
+        scale: quantization scale of the output tensor
+        zero_point: quantization zero point of the output tensor
+    """
+    def __init__(self, output_scale: float, output_zero_point: int):
+        super().__init__()
+        self.output_scale = output_scale
+        self.output_zero_point = output_zero_point
+    def forward(self, input):
+        return torch.ops.quantized.sigmoid(input, self.output_scale, self.output_zero_point)
+    @classmethod
+    def from_float(cls, mod):
+        output_scale, output_zero_point = mod.activation_post_process.calculate_qparams()
+        return cls(float(output_scale), int(output_zero_point))
+class Softmax(torch.nn.Softmax):
+    r"""This is the quantized version of :class:`~torch.nn.Softmax`.
+    Args:
+        dim: A dimension along which Softmax will be computed (so every slice along dim will sum to 1).
+        scale: quantization scale of the output tensor
+        zero_point: quantization zero point of the output tensor
+    """
+    def __init__(self, dim=None, scale=1.0, zero_point=0):
+        super().__init__()
+        self.dim = dim
+        self.scale = scale
+        self.zero_point = zero_point
+    def forward(self, input):
+        dim = self.dim
+        if dim is None:
+            stacklevel = 3
+            # Note: adding the mypy ignore on _get_softmax_dim seems less bad
+            # than making `_get_softmax_dim` an official API.
+            dim = torch.nn.functional._get_softmax_dim(  # type: ignore[attr-defined]
+                "softmax", input.dim(), stacklevel)
+        return torch.ops.quantized.softmax(
+            input, dim, self.scale, self.zero_point)
+    def _get_name(self):
+        return 'QuantizedSoftmax'
+    @staticmethod
+    def from_float(mod):
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        return Softmax(mod.dim, float(scale), int(zero_point))
+    @classmethod
+    def from_reference(cls, mod, scale, zero_point):
+        return cls(mod.dim, float(scale), int(zero_point))
+class MultiheadAttention(torch.ao.nn.quantizable.MultiheadAttention):
+    _FLOAT_MODULE = torch.ao.nn.quantizable.MultiheadAttention
+    def _get_name(self):
+        return "QuantizedMultiheadAttention"
+    @classmethod
+    def from_float(cls, other):
+        # The whole flow is float -> observed -> quantized
+        # This class does observed -> quantized only
+        raise NotImplementedError("It looks like you are trying to convert a "
+                                  "non-observed MHA module. Please, see "
+                                  "the examples on quantizable MHAs.")
+    @classmethod
+    def from_observed(cls, other):
+        converted = torch.ao.quantization.convert(other, mapping=None,
+                                                  inplace=False,
+                                                  remove_qconfig=True,
+                                                  convert_custom_config_dict=None)
+        converted.__class__ = cls
+        # Remove the parameters for the bias_k and bias_v to quantize them
+        # TODO: This is a potential source of accuracy drop.
+        #       quantized cat takes the scale and zp of the first
+        #       element, which might lose the precision in the bias_k
+        #       and the bias_v (which are cat'ed with k/v being first).
+        if converted.bias_k is not None:
+            bias_k = converted._parameters.pop('bias_k')
+            sc, zp = torch._choose_qparams_per_tensor(bias_k,
+                                                      reduce_range=False)
+            bias_k = torch.quantize_per_tensor(bias_k, sc, zp, torch.quint8)
+            setattr(converted, 'bias_k', bias_k)  # noqa: B010
+        if converted.bias_v is not None:
+            bias_v = converted._parameters.pop('bias_v')
+            sc, zp = torch._choose_qparams_per_tensor(bias_k,  # type: ignore[possibly-undefined]
+                                                      reduce_range=False)
+            bias_v = torch.quantize_per_tensor(bias_v, sc, zp, torch.quint8)
+            setattr(converted, 'bias_v', bias_v)  # noqa: B010
+        del converted.in_proj_weight
+        del converted.in_proj_bias
+        return converted
+class PReLU(torch.nn.Module):
+    r"""This is the quantized equivalent of :class:`~torch.nn.PReLU`.
+    Args:
+        scale: quantization scale of the output tensor
+        zero_point: quantization zero point of the output tensor
+        num_parameters: number of parameters: 1, or the number of channels at input. Default: 1
+    """
+    def __init__(self, output_scale: float, output_zero_point: int,
+                 num_parameters: int = 1) -> None:
+        super().__init__()
+        self.num_parameters = num_parameters
+        self.scale = output_scale
+        self.zero_point = output_zero_point
+        w = torch.randn(num_parameters, dtype=torch.float)
+        qw = torch.quantize_per_tensor(w, scale=1.0, zero_point=0, dtype=torch.quint8)
+        self.set_weight(qw)
+    def set_weight(self, w: torch.Tensor) -> None:
+        self.weight = w
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        return torch.ops.quantized.prelu(input, self.weight, self.scale, self.zero_point)
+    def _get_name(self):
+        return 'QuantizedPReLU'
+    @classmethod
+    def from_float(cls, mod):
+        scale, zero_point = mod.activation_post_process.calculate_qparams()
+        qprelu = cls(float(scale), int(zero_point), mod.num_parameters)
+        float_wt = mod.weight.float()
+        observer = mod.qconfig.weight()
+        observer(float_wt)
+        if observer.dtype != torch.quint8:
+            warn(
+                f"PReLU's weight observer should have dtype quint8 but got {observer.dtype}"
+            )
+        wt_scale, wt_zp = observer.calculate_qparams()
+        qweight = torch.quantize_per_tensor(
+            float_wt, float(wt_scale), int(wt_zp), torch.quint8)
+        qprelu.set_weight(qweight)
+        return qprelu
+    @classmethod
+    def from_reference(cls, mod, scale, zero_point):
+        qprelu = cls(float(scale), int(zero_point), mod.num_parameters)
+        float_wt = mod.weight.float()
+        observer = mod.qconfig.weight()
+        observer(float_wt)
+        if observer.dtype != torch.quint8:
+            warn(
+                f"PReLU's weight observer should have dtype quint8 but got {observer.dtype}"
+            )
+        wt_scale, wt_zp = observer.calculate_qparams()
+        qweight = torch.quantize_per_tensor(
+            float_wt, float(wt_scale), int(wt_zp), torch.quint8)
+        qprelu.set_weight(qweight)
+        return qprelu

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/conv.py ADDED Viewed

	@@ -0,0 +1,945 @@

+r"""Quantized convolution modules."""
+from typing import Optional, List, TypeVar
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.ao.nn.intrinsic as nni
+import torch.ao.nn.intrinsic.qat as nniqat
+from torch._ops import ops
+from torch.nn.common_types import _size_1_t
+from torch.nn.modules.utils import _single, _pair, _triple
+from torch.nn.utils import fuse_conv_bn_weights
+from .utils import _quantize_weight, WeightedQuantizedModule
+__all__ = ['Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d', 'ConvTranspose2d', 'ConvTranspose3d']
+_SUPPORTED_PADDING = {
+    'zeros',
+    'reflect'
+}
+def _reverse_repeat_padding(padding: List[int]) -> List[int]:
+    _reversed_padding_repeated_twice: List[int] = []
+    N = len(padding)
+    for idx in range(N):
+        for _ in range(2):
+            _reversed_padding_repeated_twice.append(padding[N - idx - 1])
+    return _reversed_padding_repeated_twice
+class _ConvNd(WeightedQuantizedModule):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True,
+                 padding_mode='zeros', device=None, dtype=None):
+        # All subclasses have this signature - See PR #49702s
+        raise NotImplementedError
+    def _init(self, in_channels, out_channels, kernel_size, stride,
+              padding, dilation,
+              transposed, output_padding,
+              groups, bias,
+              padding_mode='zeros',
+              device=None,
+              dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__()
+        if in_channels % groups != 0:
+            raise ValueError('in_channels must be divisible by groups')
+        if out_channels % groups != 0:
+            raise ValueError('out_channels must be divisible by groups')
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.dilation = dilation
+        self.transposed = transposed
+        self.output_padding = output_padding
+        self.groups = groups
+        if padding_mode not in _SUPPORTED_PADDING:
+            raise ValueError(f"'padding_mode' {padding_mode} is not supported by quantized convolution")
+        self.padding_mode = padding_mode
+        # Initialize as NCHW. set_weight will internally transpose to NHWC.
+        if self.transposed:
+            weight_shape = [in_channels, out_channels // self.groups]
+        else:
+            weight_shape = [out_channels, in_channels // self.groups]
+        qweight = torch._empty_affine_quantized(
+            weight_shape + list(kernel_size),
+            scale=1, zero_point=0, dtype=torch.qint8,
+            **{k: v for k, v in factory_kwargs.items() if k != 'dtype'})
+        bias_float = (
+            torch.zeros(out_channels, dtype=torch.float,
+                        **{k: v for k, v in factory_kwargs.items() if k != 'dtype'}) if bias else None)
+        self.set_weight_bias(qweight, bias_float)
+        self.scale = 1.0
+        self.zero_point = 0
+    def set_weight_bias(self, qweight, bias_float):
+        raise NotImplementedError
+    def bias(self):
+        raise NotImplementedError
+    def _weight_bias(self):
+        raise NotImplementedError
+    def extra_repr(self):
+        s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
+             ', stride={stride}, scale={scale}, zero_point={zero_point}')
+        if self.padding != (0,) * len(self.padding):
+            s += ', padding={padding}'
+        if self.dilation != (1,) * len(self.dilation):
+            s += ', dilation={dilation}'
+        if self.output_padding != (0,) * len(self.output_padding):
+            s += ', output_padding={output_padding}'
+        if self.groups != 1:
+            s += ', groups={groups}'
+        if self.bias() is None:
+            s += ', bias=False'
+        return s.format(**self.__dict__)
+    # ===== Serialization methods =====
+    # The special consideration here is that we have to unpack the weights into
+    # their regular QTensor form for serialization. Packed weights should not
+    # live outside the process in which they were created, rather they should be
+    # derived from the QTensor weight.
+    #   self
+    #   |--- weight : Tensor
+    #   |--- bias : Tensor
+    #
+    # TODO: maybe change to this when https://github.com/pytorch/pytorch/pull/32958 is landed
+    #   self
+    #   |--- _packed_params : Conv2dPackedParamsBase or Conv3dPackedParamsBase
+    def _save_to_state_dict(self, destination, prefix, keep_vars):
+        super()._save_to_state_dict(destination, prefix, keep_vars)
+        (w, b) = self._weight_bias()
+        destination[prefix + 'weight'] = w
+        destination[prefix + 'bias'] = b
+        destination[prefix + 'scale'] = torch.tensor(self.scale)
+        destination[prefix + 'zero_point'] = torch.tensor(self.zero_point)
+    @torch.jit.export
+    def __getstate__(self):
+        (w, b) = self._weight_bias()
+        return (
+            self.in_channels,
+            self.out_channels,
+            self.kernel_size,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.transposed,
+            self.output_padding,
+            self.groups,
+            self.padding_mode,
+            w,
+            b,
+            self.scale,
+            self.zero_point,
+            self.training
+        )
+    # ===== Deserialization methods =====
+    # Counterpart to the serialization methods, we must pack the serialized
+    # QTensor weight into its packed format for use by the FBGEMM ops.
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        self.set_weight_bias(
+            state_dict[prefix + 'weight'], state_dict[prefix + 'bias'])
+        state_dict.pop(prefix + 'weight')
+        state_dict.pop(prefix + 'bias')
+        self.scale = float(state_dict[prefix + 'scale'])
+        state_dict.pop(prefix + 'scale')
+        self.zero_point = int(state_dict[prefix + 'zero_point'])
+        state_dict.pop(prefix + 'zero_point')
+        super()._load_from_state_dict(
+            state_dict, prefix, local_metadata, False, missing_keys,
+            unexpected_keys, error_msgs)
+    @torch.jit.export
+    def __setstate__(self, state):
+        self.in_channels = state[0]
+        self.out_channels = state[1]
+        self.kernel_size = state[2]
+        self.stride = state[3]
+        self.padding = state[4]
+        self.dilation = state[5]
+        self.transposed = state[6]
+        self.output_padding = state[7]
+        self.groups = state[8]
+        self.padding_mode = state[9]
+        self.set_weight_bias(state[10], state[11])
+        self.scale = state[12]
+        self.zero_point = state[13]
+        self.training = state[14]
+    def __deepcopy__(self, memo):
+        new_instance = type(self).__new__(type(self))
+        torch.nn.Module.__init__(new_instance)
+        state = self.__getstate__()
+        new_instance.__setstate__(state)
+        return new_instance
+    def __copy__(self):
+        return self.__deepcopy__({})
+    @classmethod
+    def get_qconv(cls, mod, activation_post_process, weight_post_process=None):
+        r"""Creates a qconv object and returns it.
+        """
+        if weight_post_process is None:
+            weight_post_process = mod.qconfig.weight()
+        weight_post_process(mod.weight)
+        assert weight_post_process.dtype == torch.qint8, \
+            'Weight observer must have a dtype of qint8'
+        qweight = _quantize_weight(mod.weight.float(), weight_post_process)
+        # the __init__ call used is the one from derived classes and not the one from _ConvNd
+        qconv = cls(mod.in_channels, mod.out_channels, mod.kernel_size,
+                    mod.stride, mod.padding, mod.dilation, mod.groups,
+                    mod.bias is not None, mod.padding_mode)
+        qconv.set_weight_bias(qweight, mod.bias)
+        if activation_post_process is None or activation_post_process.dtype == torch.float:
+            return qconv  # dynamic quantization doesn't need scale/zero_point
+        else:
+            act_scale, act_zp = activation_post_process.calculate_qparams()
+            qconv.scale = float(act_scale)
+            qconv.zero_point = int(act_zp)
+            return qconv
+    @staticmethod
+    def from_float(cls, mod):
+        if hasattr(mod, "weight_fake_quant"):
+            # assert type(mod) == cls.__QAT_MODULE, " nnq." + cls.__name__ + \
+            # ".from_float only works for " + cls.__QAT_MODULE.__name__
+            if type(mod) == cls._NNIQAT_CONV_BN_MODULE:
+                mod.weight, mod.bias = fuse_conv_bn_weights(
+                    mod.weight, mod.bias, mod.bn.running_mean, mod.bn.running_var,
+                    mod.bn.eps, mod.bn.weight, mod.bn.bias)
+            assert hasattr(mod, "activation_post_process"), \
+                "Input QAT module must have observer attached"
+            weight_post_process = mod.weight_fake_quant
+            activation_post_process = mod.activation_post_process
+        else:
+            assert type(mod) == cls._FLOAT_MODULE, \
+                " nnq." + cls.__name__ + ".from_float only works for " + \
+                cls._FLOAT_MODULE.__name__ + " but got:" + str(type(mod))
+            assert hasattr(mod, "qconfig"), \
+                "Input float module must have qconfig defined."
+            activation_post_process = None if not hasattr(
+                mod, "activation_post_process") else mod.activation_post_process
+            if type(mod) in [cls._NNI_CONV_RELU_MODULE, cls._NNI_CONV_ADD_MODULE, cls._NNI_CONV_ADD_RELU_MODULE]:
+                mod = mod[0]
+            weight_post_process = mod.qconfig.weight()
+        return cls.get_qconv(mod, activation_post_process, weight_post_process)
+    @classmethod
+    def from_reference(cls, ref_qconv, output_scale, output_zero_point):
+        r"""Create a (fbgemm/qnnpack) quantized module from a reference quantized module
+        Args:
+            ref_qconv (Module): a reference quantized  module, either produced by torch.ao.quantization
+                                utilities or provided by the user
+            output_scale (float): scale for output Tensor
+            output_zero_point (int): zero point for output Tensor
+        """
+        qconv = cls(
+            ref_qconv.in_channels,
+            ref_qconv.out_channels,
+            ref_qconv.kernel_size,  # type: ignore[arg-type]
+            ref_qconv.stride,  # type: ignore[arg-type]
+            ref_qconv.padding,  # type: ignore[arg-type]
+            ref_qconv.dilation,  # type: ignore[arg-type]
+            ref_qconv.groups,
+            ref_qconv.bias is not None,  # type: ignore[arg-type]
+            ref_qconv.padding_mode,
+            device=ref_qconv.weight.device,
+            dtype=ref_qconv.weight.dtype)
+        qweight = ref_qconv.get_quantized_weight()
+        qconv.set_weight_bias(qweight, ref_qconv.bias)
+        qconv.scale = float(output_scale)
+        qconv.zero_point = int(output_zero_point)
+        return qconv
+class Conv1d(_ConvNd):
+    r"""Applies a 1D convolution over a quantized input signal composed of
+    several quantized input planes.
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.Conv1d`.
+    .. note::
+        Only `zeros` is supported for the :attr:`padding_mode` argument.
+    .. note::
+        Only `torch.quint8` is supported for the input data type.
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+    See :class:`~torch.nn.Conv1d` for other attributes.
+    Examples::
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
+        >>> m = nn.quantized.Conv1d(16, 33, 3, stride=2)
+        >>> input = torch.randn(20, 16, 100)
+        >>> # quantize input to quint8
+        >>> # xdoctest: +SKIP
+        >>> q_input = torch.quantize_per_tensor(input, scale=1.0, zero_point=0,
+        ...                                     dtype=torch.quint8)
+        >>> output = m(q_input)
+    """
+    _FLOAT_MODULE = nn.Conv1d
+    _NNIQAT_CONV_BN_MODULE = nniqat.ConvBn1d
+    _NNI_CONV_RELU_MODULE = nni.ConvReLU1d
+    _NNI_CONV_ADD_MODULE: None = None
+    _NNI_CONV_ADD_RELU_MODULE: None = None
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: _size_1_t,
+                 stride: _size_1_t = 1,
+                 padding: _size_1_t = 0,
+                 dilation: _size_1_t = 1,
+                 groups: int = 1,
+                 bias: bool = True,
+                 padding_mode: str = 'zeros',
+                 device=None,
+                 dtype=None):
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        kernel_size = _single(kernel_size)
+        stride = _single(stride)
+        padding = padding if isinstance(padding, str) else _single(padding)
+        dilation = _single(dilation)
+        # Subclasses of _ConvNd needs to call _init rather than __init__. See
+        # discussion on PR #49702
+        super()._init(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            False, _single(0), groups, bias, padding_mode, **factory_kwargs)
+    def _get_name(self):
+        return 'QuantizedConv1d'
+    def set_weight_bias(self, w: torch.Tensor, b: Optional[torch.Tensor]) -> None:
+        if self.padding_mode == 'zeros':
+            self._packed_params = torch.ops.quantized.conv1d_prepack(
+                w, b, self.stride, self.padding, self.dilation, self.groups)
+        else:
+            self._packed_params = torch.ops.quantized.conv1d_prepack(
+                w, b, self.stride, _pair(0), self.dilation,
+                self.groups)
+    def _weight_bias(self):
+        w, b = torch.ops.quantized.conv1d_unpack(self._packed_params)
+        return w, b
+    def weight(self):
+        return self._weight_bias()[0]
+    def bias(self):
+        return self._weight_bias()[1]
+    def forward(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 3:
+            raise ValueError("Input shape must be `(N, C, L)`!")
+        if self.padding_mode != 'zeros':
+            # Padding in Conv1d is stored as (p, p), need to get (p,)
+            _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding[:1])
+            input = F.pad(input, _reversed_padding_repeated_twice,
+                          mode=self.padding_mode)
+        return ops.quantized.conv1d(input, self._packed_params, self.scale, self.zero_point)
+    @classmethod
+    def from_float(cls, mod):
+        r"""Creates a quantized module from a float module or qparams_dict.
+        Args:
+            mod (Module): a float module, either produced by torch.ao.quantization
+              utilities or provided by the user
+        """
+        return _ConvNd.from_float(cls, mod)
+class Conv2d(_ConvNd):
+    r"""Applies a 2D convolution over a quantized input signal composed of
+    several quantized input planes.
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.Conv2d`.
+    .. note::
+        Only `zeros` is supported for the :attr:`padding_mode` argument.
+    .. note::
+        Only `torch.quint8` is supported for the input data type.
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+    See :class:`~torch.nn.Conv2d` for other attributes.
+    Examples::
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
+        >>> # With square kernels and equal stride
+        >>> m = nn.quantized.Conv2d(16, 33, 3, stride=2)
+        >>> # non-square kernels and unequal stride and with padding
+        >>> m = nn.quantized.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+        >>> # non-square kernels and unequal stride and with padding and dilation
+        >>> m = nn.quantized.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
+        >>> input = torch.randn(20, 16, 50, 100)
+        >>> # quantize input to quint8
+        >>> # xdoctest: +SKIP
+        >>> q_input = torch.quantize_per_tensor(input, scale=1.0, zero_point=0, dtype=torch.quint8)
+        >>> output = m(q_input)
+    """
+    _FLOAT_MODULE = nn.Conv2d
+    _NNIQAT_CONV_BN_MODULE = nniqat.ConvBn2d
+    _NNI_CONV_RELU_MODULE = nni.ConvReLU2d
+    _NNI_CONV_ADD_MODULE = nni.ConvAdd2d
+    _NNI_CONV_ADD_RELU_MODULE = nni.ConvAddReLU2d
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True,
+                 padding_mode='zeros', device=None, dtype=None):
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride)
+        padding = _pair(padding)
+        dilation = _pair(dilation)
+        # Subclasses of _ConvNd need to call _init rather than __init__. See
+        # discussion on PR #49702
+        super()._init(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            False, _pair(0), groups, bias, padding_mode, **factory_kwargs)
+    def _get_name(self):
+        return 'QuantizedConv2d'
+    def set_weight_bias(self, w: torch.Tensor, b: Optional[torch.Tensor]) -> None:
+        if self.padding_mode == 'zeros':
+            self._packed_params = torch.ops.quantized.conv2d_prepack(
+                w, b, self.stride, self.padding, self.dilation, self.groups)
+        else:
+            self._packed_params = torch.ops.quantized.conv2d_prepack(
+                w, b, self.stride, _pair(0), self.dilation, self.groups)
+    def _weight_bias(self):
+        return self._packed_params.unpack()
+    def weight(self):
+        return self._weight_bias()[0]
+    def bias(self):
+        return self._weight_bias()[1]
+    def forward(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 4:
+            raise ValueError("Input shape must be `(N, C, H, W)`!")
+        if self.padding_mode != 'zeros':
+            _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding)
+            input = F.pad(input, _reversed_padding_repeated_twice,
+                          mode=self.padding_mode)
+        return ops.quantized.conv2d(
+            input, self._packed_params, self.scale, self.zero_point)
+    @classmethod
+    def from_float(cls, mod):
+        r"""Creates a quantized module from a float module or qparams_dict.
+        Args:
+            mod (Module): a float module, either produced by torch.ao.quantization
+              utilities or provided by the user
+        """
+        return _ConvNd.from_float(cls, mod)
+class Conv3d(_ConvNd):
+    r"""Applies a 3D convolution over a quantized input signal composed of
+    several quantized input planes.
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.Conv3d`.
+    .. note::
+        Only `zeros` is supported for the :attr:`padding_mode` argument.
+    .. note::
+        Only `torch.quint8` is supported for the input data type.
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+    See :class:`~torch.nn.Conv3d` for other attributes.
+    Examples::
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
+        >>> # With square kernels and equal stride
+        >>> m = nn.quantized.Conv3d(16, 33, 3, stride=2)
+        >>> # non-square kernels and unequal stride and with padding
+        >>> m = nn.quantized.Conv3d(16, 33, (3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2))
+        >>> # non-square kernels and unequal stride and with padding and dilation
+        >>> m = nn.quantized.Conv3d(16, 33, (3, 5, 5), stride=(1, 2, 2), padding=(1, 2, 2), dilation=(1, 2, 2))
+        >>> input = torch.randn(20, 16, 56, 56, 56)
+        >>> # quantize input to quint8
+        >>> # xdoctest: +SKIP
+        >>> q_input = torch.quantize_per_tensor(input, scale=1.0, zero_point=0, dtype=torch.quint8)
+        >>> output = m(q_input)
+    """
+    _FLOAT_MODULE = nn.Conv3d
+    _NNIQAT_CONV_BN_MODULE = nniqat.ConvBn3d
+    _NNI_CONV_RELU_MODULE = nni.ConvReLU3d
+    _NNI_CONV_ADD_MODULE: None = None
+    _NNI_CONV_ADD_RELU_MODULE: None = None
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, dilation=1, groups=1, bias=True,
+                 padding_mode='zeros', device=None, dtype=None):
+        assert padding_mode != 'reflect', "Conv3d does not support reflection padding"
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        kernel_size = _triple(kernel_size)
+        stride = _triple(stride)
+        padding = _triple(padding)
+        dilation = _triple(dilation)
+        # Subclasses of _ConvNd need to call _init rather than __init__. See
+        # discussion on PR #49702
+        super()._init(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            False, _triple(0), groups, bias, padding_mode, **factory_kwargs)
+    def _get_name(self):
+        return 'QuantizedConv3d'
+    def set_weight_bias(self, w: torch.Tensor, b: Optional[torch.Tensor]) -> None:
+        if self.padding_mode == 'zeros':
+            self._packed_params = torch.ops.quantized.conv3d_prepack(
+                w, b, self.stride, self.padding, self.dilation, self.groups)
+        else:
+            self._packed_params = torch.ops.quantized.conv3d_prepack(
+                w, b, self.stride, _triple(0), self.dilation, self.groups)
+    def _weight_bias(self):
+        return self._packed_params.unpack()
+    def weight(self):
+        return self._weight_bias()[0]
+    def bias(self):
+        return self._weight_bias()[1]
+    def forward(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 5:
+            raise ValueError("Input shape must be `(N, C, D, H, W)`!")
+        if self.padding_mode != 'zeros':
+            _reversed_padding_repeated_twice = _reverse_repeat_padding(self.padding)
+            input = F.pad(input, _reversed_padding_repeated_twice,
+                          mode=self.padding_mode)
+        return ops.quantized.conv3d(
+            input, self._packed_params, self.scale, self.zero_point)
+    @classmethod
+    def from_float(cls, mod):
+        r"""Creates a quantized module from a float module or qparams_dict.
+        Args:
+            mod (Module): a float module, either produced by torch.ao.quantization
+              utilities or provided by the user
+        """
+        return _ConvNd.from_float(cls, mod)
+# === Transposed Convolutions ===
+MOD = TypeVar('MOD', bound=nn.modules.conv._ConvNd)
+class _ConvTransposeNd(_ConvNd):
+    _FLOAT_MODULE = MOD
+    def __init__(self, in_channels, out_channels, kernel_size, stride,
+                 padding, dilation, transposed, output_padding,
+                 groups, bias, padding_mode, device=None, dtype=None):
+        if padding_mode != 'zeros':
+            raise ValueError(f'Only "zeros" padding mode is supported for {self.__class__.__name__}')
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        # Subclasses of _ConvNd need to call _init rather than __init__. See
+        # discussion on PR #49702
+        super()._init(
+            in_channels, out_channels, kernel_size, stride,
+            padding, dilation, transposed, output_padding,
+            groups, bias, padding_mode, **factory_kwargs)
+    def _input_padding(self, kernel_size: List[int], dilation: List[int], padding: List[int]) -> List[int]:
+        res = torch.jit.annotate(List[int], [])
+        for kdx in range(len(kernel_size)):
+            pad = (dilation[kdx] * (kernel_size[kdx] - 1) - padding[kdx])
+            res.append(pad)
+        return res
+    @classmethod
+    def from_float(cls, mod):
+        r"""Creates a quantized module from a float module or qparams_dict.
+        Args:
+            mod (Module): a float module, either produced by torch.ao.quantization
+              utilities or provided by the user
+        """
+        # derived classes override cls._FLOAT_MODULE attribute
+        msg = ' nnq.' + cls.__name__ + '.from_float only works for ' + \
+              cls._FLOAT_MODULE.__name__  # type: ignore[attr-defined]
+        assert type(mod) == cls._FLOAT_MODULE, msg
+        assert hasattr(mod, 'qconfig'), \
+            'Input float module must have qconfig defined.'
+        weight_post_process = mod.qconfig.weight()
+        weight_post_process(mod.weight)
+        assert weight_post_process.dtype == torch.qint8, \
+            'Weight observer must have a dtype of qint8'
+        qweight = _quantize_weight(mod.weight.float(), weight_post_process)
+        # the __init__ call used is the one from derived classes and not the one from _ConvTransposeNd
+        qconv = cls(mod.in_channels, mod.out_channels, mod.kernel_size,  # type: ignore[call-arg]
+                    mod.stride, mod.padding, mod.output_padding, mod.groups,
+                    mod.bias is not None, mod.dilation, mod.padding_mode)
+        qconv.set_weight_bias(qweight, mod.bias)
+        if not hasattr(mod, "activation_post_process") or mod.activation_post_process.dtype == torch.float:
+            return qconv  # dynamic quantization doesn't need scale/zero_point
+        else:
+            act_scale, act_zp = mod.activation_post_process.calculate_qparams()
+            qconv.scale = float(act_scale)
+            qconv.zero_point = int(act_zp)
+            return qconv
+    @staticmethod
+    def from_reference(cls, ref_qconvt, output_scale, output_zero_point):
+        r"""Create a (fbgemm/qnnpack) quantized module from a reference quantized module
+        Args:
+            ref_qconvt (Module): a reference quantized  module, either produced by torch.ao.quantization
+                                 utilities or provided by the user
+            output_scale (float): scale for output Tensor
+            output_zero_point (int): zero point for output Tensor
+        """
+        qconv = cls(
+            ref_qconvt.in_channels,
+            ref_qconvt.out_channels,
+            ref_qconvt.kernel_size,  # type: ignore[arg-type]
+            ref_qconvt.stride,  # type: ignore[arg-type]
+            ref_qconvt.padding,  # type: ignore[arg-type]
+            ref_qconvt.output_padding,  # type: ignore[arg-type]
+            ref_qconvt.groups,
+            ref_qconvt.bias is not None,  # type: ignore[arg-type]
+            ref_qconvt.dilation,  # type: ignore[arg-type]
+            ref_qconvt.padding_mode,
+            device=ref_qconvt.weight.device,
+            dtype=ref_qconvt.weight.dtype)
+        qweight = ref_qconvt.get_quantized_weight()
+        qconv.set_weight_bias(qweight, ref_qconvt.bias)
+        qconv.scale = float(output_scale)
+        qconv.zero_point = int(output_zero_point)
+        return qconv
+class ConvTranspose1d(_ConvTransposeNd):
+    r"""Applies a 1D transposed convolution operator over an input image
+    composed of several input planes.
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.ConvTranspose1d`.
+    .. note:: Currently only the QNNPACK engine is implemented.
+        Please, set the `torch.backends.quantized.engine = 'qnnpack'`
+    For special notes, please, see :class:`~torch.ao.nn.quantized.Conv1d`
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+    See :class:`~torch.nn.ConvTranspose2d` for other attributes.
+    Examples::
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
+        >>> torch.backends.quantized.engine = 'qnnpack'
+        >>> from torch.ao.nn import quantized as nnq
+        >>> # With square kernels and equal stride
+        >>> m = nnq.ConvTranspose1d(16, 33, 3, stride=2)
+        >>> # non-square kernels and unequal stride and with padding
+        >>> m = nnq.ConvTranspose1d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+        >>> input = torch.randn(20, 16, 50)
+        >>> q_input = torch.quantize_per_tensor(input, scale=1.0, zero_point=0, dtype=torch.quint8)
+        >>> output = m(q_input)
+        >>> # exact output size can be also specified as an argument
+        >>> input = torch.randn(1, 16, 12)
+        >>> q_input = torch.quantize_per_tensor(input, scale=1.0, zero_point=0, dtype=torch.quint8)
+        >>> downsample = nnq.Conv1d(16, 16, 3, stride=2, padding=1)
+        >>> upsample = nnq.ConvTranspose1d(16, 16, 3, stride=2, padding=1)
+        >>> h = downsample(q_input)
+        >>> h.size()
+        torch.Size([1, 16, 6])
+        >>> # xdoctest: +SKIP("FIXME: output_size is not a parameter)
+        >>> output = upsample(h, output_size=input.size())
+        >>> output.size()
+        torch.Size([1, 16, 12])
+    """
+    _FLOAT_MODULE = nn.ConvTranspose1d
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, output_padding=0, groups=1, bias=True,
+                 dilation=1, padding_mode='zeros', device=None, dtype=None):
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        kernel_size = _single(kernel_size)
+        stride = _single(stride)
+        padding = _single(padding)
+        dilation = _single(dilation)
+        output_padding = _single(output_padding)
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            True, output_padding, groups, bias, padding_mode, **factory_kwargs)
+    def _get_name(self):
+        return 'QuantizedConvTranspose1d'
+    def set_weight_bias(self, w: torch.Tensor, b: Optional[torch.Tensor]) -> None:
+        self._packed_params = torch.ops.quantized.conv_transpose1d_prepack(
+            w, b, self.stride, self.padding, self.output_padding, self.dilation,
+            self.groups)
+    def _weight_bias(self):
+        w, b = torch.ops.quantized.conv_transpose1d_unpack(self._packed_params)
+        return w, b
+    def weight(self):
+        (w, _) = self._weight_bias()
+        return w
+    def bias(self):
+        (_, b) = self._weight_bias()
+        return b
+    def forward(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 3:
+            raise ValueError("Input shape must be `(N, C, L)`!")
+        return torch.ops.quantized.conv_transpose1d(
+            input, self._packed_params, self.scale, self.zero_point)
+    @classmethod
+    def from_reference(cls, ref_qconvt, output_scale, output_zero_point):
+        return _ConvTransposeNd.from_reference(cls, ref_qconvt, output_scale, output_zero_point)
+class ConvTranspose2d(_ConvTransposeNd):
+    r"""Applies a 2D transposed convolution operator over an input image
+    composed of several input planes.
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.ConvTranspose2d`.
+    For special notes, please, see :class:`~torch.ao.nn.quantized.Conv2d`
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+    See :class:`~torch.nn.ConvTranspose2d` for other attributes.
+    Examples::
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
+        >>> # QNNPACK or FBGEMM as backend
+        >>> torch.backends.quantized.engine = 'qnnpack'
+        >>> # With square kernels and equal stride
+        >>> import torch.ao.nn.quantized as nnq
+        >>> m = nnq.ConvTranspose2d(16, 33, 3, stride=2)
+        >>> # non-square kernels and unequal stride and with padding
+        >>> m = nnq.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+        >>> input = torch.randn(20, 16, 50, 100)
+        >>> q_input = torch.quantize_per_tensor(input, scale=1.0, zero_point=0, dtype=torch.quint8)
+        >>> output = m(q_input)
+        >>> # exact output size can be also specified as an argument
+        >>> input = torch.randn(1, 16, 12, 12)
+        >>> q_input = torch.quantize_per_tensor(input, scale=1.0, zero_point=0, dtype=torch.quint8)
+        >>> downsample = nnq.Conv2d(16, 16, 3, stride=2, padding=1)
+        >>> upsample = nnq.ConvTranspose2d(16, 16, 3, stride=2, padding=1)
+        >>> h = downsample(q_input)
+        >>> h.size()
+        torch.Size([1, 16, 6, 6])
+        >>> # xdoctest: +SKIP("FIXME: output_size is not a parameter)
+        >>> output = upsample(h, output_size=input.size())
+        >>> output.size()
+        torch.Size([1, 16, 12, 12])
+    """
+    _FLOAT_MODULE = nn.ConvTranspose2d
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, output_padding=0, groups=1, bias=True,
+                 dilation=1, padding_mode='zeros', device=None, dtype=None):
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride)
+        padding = _pair(padding)
+        dilation = _pair(dilation)
+        output_padding = _pair(output_padding)
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            True, output_padding, groups, bias, padding_mode, **factory_kwargs)
+    def _get_name(self):
+        return 'QuantizedConvTranspose2d'
+    def set_weight_bias(self, w: torch.Tensor, b: Optional[torch.Tensor]) -> None:
+        self._packed_params = torch.ops.quantized.conv_transpose2d_prepack(
+            w, b, self.stride, self.padding, self.output_padding, self.dilation,
+            self.groups)
+    def _weight_bias(self):
+        w, b = torch.ops.quantized.conv2d_unpack(self._packed_params)
+        return w, b
+    def weight(self):
+        (w, _) = self._weight_bias()
+        return w
+    def bias(self):
+        (_, b) = self._weight_bias()
+        return b
+    def forward(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 4:
+            raise ValueError("Input shape must be `(N, C, H, W)`!")
+        return ops.quantized.conv_transpose2d(
+            input, self._packed_params, self.scale, self.zero_point)
+    @classmethod
+    def from_reference(cls, ref_qconvt, output_scale, output_zero_point):
+        return _ConvTransposeNd.from_reference(cls, ref_qconvt, output_scale, output_zero_point)
+class ConvTranspose3d(_ConvTransposeNd):
+    r"""Applies a 3D transposed convolution operator over an input image
+    composed of several input planes.
+    For details on input arguments, parameters, and implementation see
+    :class:`~torch.nn.ConvTranspose3d`.
+    .. note:: Currently only the FBGEMM engine is implemented.
+        Please, set the `torch.backends.quantized.engine = 'fbgemm'`
+    For special notes, please, see :class:`~torch.ao.nn.quantized.Conv3d`
+    Attributes:
+        weight (Tensor):     packed tensor derived from the learnable weight
+                             parameter.
+        scale (Tensor):      scalar for the output scale
+        zero_point (Tensor): scalar for the output zero point
+    See :class:`~torch.nn.ConvTranspose3d` for other attributes.
+    Examples::
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE)
+        >>> torch.backends.quantized.engine = 'fbgemm'
+        >>> from torch.ao.nn import quantized as nnq
+        >>> # With cubic kernels and equal stride
+        >>> m = nnq.ConvTranspose3d(16, 33, 3, stride=2)
+        >>> # non-cubic kernels and unequal stride and with padding
+        >>> m = nnq.ConvTranspose3d(16, 33, (3, 3, 5), stride=(2, 1, 1), padding=(4, 2, 2))
+        >>> input = torch.randn(20, 16, 50, 100, 100)
+        >>> q_input = torch.quantize_per_tensor(input, scale=1.0, zero_point=0, dtype=torch.quint8)
+        >>> output = m(q_input)
+        >>> # exact output size can be also specified as an argument
+        >>> input = torch.randn(1, 16, 12, 12, 12)
+        >>> q_input = torch.quantize_per_tensor(input, scale=1.0, zero_point=0, dtype=torch.quint8)
+        >>> downsample = nnq.Conv3d(16, 16, 3, stride=2, padding=1)
+        >>> upsample = nnq.ConvTranspose3d(16, 16, 3, stride=2, padding=1)
+        >>> h = downsample(q_input)
+        >>> h.size()
+        torch.Size([1, 16, 6, 6, 6])
+        >>> # xdoctest: +SKIP("FIXME: output_size is not a parameter)
+        >>> output = upsample(h, output_size=input.size())
+        >>> output.size()
+        torch.Size([1, 16, 12, 12, 12])
+    """
+    _FLOAT_MODULE = nn.ConvTranspose3d
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
+                 padding=0, output_padding=0, groups=1, bias=True,
+                 dilation=1, padding_mode='zeros', device=None, dtype=None):
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        kernel_size = _triple(kernel_size)
+        stride = _triple(stride)
+        padding = _triple(padding)
+        dilation = _triple(dilation)
+        output_padding = _triple(output_padding)
+        super().__init__(
+            in_channels, out_channels, kernel_size, stride, padding, dilation,
+            True, output_padding, groups, bias, padding_mode, **factory_kwargs)
+    def _get_name(self):
+        return 'QuantizedConvTranspose3d'
+    def set_weight_bias(self, w: torch.Tensor, b: Optional[torch.Tensor]) -> None:
+        self._packed_params = torch.ops.quantized.conv_transpose3d_prepack(
+            w, b, self.stride, self.padding, self.output_padding, self.dilation,
+            self.groups)
+    def _weight_bias(self):
+        w, b = torch.ops.quantized.conv3d_unpack(self._packed_params)
+        return w, b
+    def weight(self):
+        (w, _) = self._weight_bias()
+        return w
+    def bias(self):
+        (_, b) = self._weight_bias()
+        return b
+    def forward(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 5:
+            raise ValueError("Input shape must be `(N, C, T, H, W)`!")
+        return ops.quantized.conv_transpose3d(
+            input, self._packed_params, self.scale, self.zero_point)
+    @classmethod
+    def from_reference(cls, ref_qconvt, output_scale, output_zero_point):
+        return _ConvTransposeNd.from_reference(cls, ref_qconvt, output_scale, output_zero_point)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/dropout.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import torch
+__all__ = ['Dropout']
+class Dropout(torch.nn.Dropout):
+    r"""This is the quantized equivalent of :class:`~torch.nn.Dropout`.
+        And this is a placeholder to enable models where fp32 tensors
+        had dropout to work with quantized tensors in train and eval mode.
+    Args:
+        p: probability of an element to be zeroed
+        inplace: can optionally do the operation in-place. Default: ``False``
+    """
+    def forward(self, input):
+        return input
+    def _get_name(self):
+        return 'QuantizedDropout'
+    @classmethod
+    def from_float(cls, mod):
+        return cls(mod.p, mod.inplace)
+    @classmethod
+    def from_reference(cls, mod, scale, zero_point):
+        return cls(mod.p, mod.inplace)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/functional_modules.py ADDED Viewed

	@@ -0,0 +1,249 @@

+from typing import List
+import torch
+from torch import Tensor
+from torch._ops import ops
+__all__ = ['FloatFunctional', 'FXFloatFunctional', 'QFunctional']
+class FloatFunctional(torch.nn.Module):
+    r"""State collector class for float operations.
+    The instance of this class can be used instead of the ``torch.`` prefix for
+    some operations. See example usage below.
+    .. note::
+        This class does not provide a ``forward`` hook. Instead, you must use
+        one of the underlying functions (e.g. ``add``).
+    Examples::
+        >>> f_add = FloatFunctional()
+        >>> a = torch.tensor(3.0)
+        >>> b = torch.tensor(4.0)
+        >>> f_add.add(a, b)  # Equivalent to ``torch.add(a, b)``
+    Valid operation names:
+        - add
+        - cat
+        - mul
+        - add_relu
+        - add_scalar
+        - mul_scalar
+    """
+    def __init__(self):
+        super().__init__()
+        self.activation_post_process = torch.nn.Identity()
+    def forward(self, x):
+        raise RuntimeError("FloatFunctional is not intended to use the " +
+                           "'forward'. Please use the underlying operation")
+    r"""Operation equivalent to ``torch.add(Tensor, Tensor)``"""
+    def add(self, x: Tensor, y: Tensor) -> Tensor:
+        r = torch.add(x, y)
+        r = self.activation_post_process(r)
+        return r
+    r"""Operation equivalent to ``torch.add(Tensor, float)``"""
+    def add_scalar(self, x: Tensor, y: float) -> Tensor:
+        r = torch.add(x, y)
+        # Note: this operation is not observed because the observation is not
+        # needed for the quantized op.
+        return r
+    r"""Operation equivalent to ``torch.mul(Tensor, Tensor)``"""
+    def mul(self, x: Tensor, y: Tensor) -> Tensor:
+        r = torch.mul(x, y)
+        r = self.activation_post_process(r)
+        return r
+    r"""Operation equivalent to ``torch.mul(Tensor, float)``"""
+    def mul_scalar(self, x: Tensor, y: float) -> Tensor:
+        r = torch.mul(x, y)
+        # Note: this operation is not observed because the observation is not
+        # needed for the quantized op.
+        return r
+    r"""Operation equivalent to ``torch.cat``"""
+    def cat(self, x: List[Tensor], dim: int = 0) -> Tensor:
+        r = torch.cat(x, dim=dim)
+        r = self.activation_post_process(r)
+        return r
+    r"""Operation equivalent to ``relu(torch.add(x,y))``"""
+    def add_relu(self, x: Tensor, y: Tensor) -> Tensor:
+        r = torch.add(x, y)
+        r = torch.nn.functional.relu(r)
+        r = self.activation_post_process(r)
+        return r
+    r"""Operation equivalent to ``torch.matmul(Tensor, Tensor)``"""
+    def matmul(self, x: Tensor, y: Tensor) -> Tensor:
+        r = torch.matmul(x, y)
+        r = self.activation_post_process(r)
+        return r
+class FXFloatFunctional(torch.nn.Module):
+    r""" module to replace FloatFunctional module before FX graph mode quantization,
+    since activation_post_process will be inserted in top level module directly
+    Valid operation names:
+        - add
+        - cat
+        - mul
+        - add_relu
+        - add_scalar
+        - mul_scalar
+    """
+    def forward(self, x):
+        raise RuntimeError("FloatFunctional is not intended to use the " +
+                           "'forward'. Please use the underlying operation")
+    r"""Operation equivalent to ``torch.add(Tensor, Tensor)``"""
+    def add(self, x: Tensor, y: Tensor) -> Tensor:
+        r = torch.add(x, y)
+        return r
+    r"""Operation equivalent to ``torch.add(Tensor, float)``"""
+    def add_scalar(self, x: Tensor, y: float) -> Tensor:
+        r = torch.add(x, y)
+        return r
+    r"""Operation equivalent to ``torch.mul(Tensor, Tensor)``"""
+    def mul(self, x: Tensor, y: Tensor) -> Tensor:
+        r = torch.mul(x, y)
+        return r
+    r"""Operation equivalent to ``torch.mul(Tensor, float)``"""
+    def mul_scalar(self, x: Tensor, y: float) -> Tensor:
+        r = torch.mul(x, y)
+        return r
+    r"""Operation equivalent to ``torch.cat``"""
+    def cat(self, x: List[Tensor], dim: int = 0) -> Tensor:
+        r = torch.cat(x, dim=dim)
+        return r
+    r"""Operation equivalent to ``relu(torch.add(x,y))``"""
+    def add_relu(self, x: Tensor, y: Tensor) -> Tensor:
+        r = torch.add(x, y)
+        r = torch.nn.functional.relu(r)
+        return r
+    r"""Operation equivalent to ``torch.matmul(Tensor, Tensor)``"""
+    def matmul(self, x: Tensor, y: Tensor) -> Tensor:
+        r = torch.matmul(x, y)
+        return r
+class QFunctional(torch.nn.Module):
+    r"""Wrapper class for quantized operations.
+    The instance of this class can be used instead of the
+    ``torch.ops.quantized`` prefix. See example usage below.
+    .. note::
+        This class does not provide a ``forward`` hook. Instead, you must use
+        one of the underlying functions (e.g. ``add``).
+    Examples::
+        >>> q_add = QFunctional()
+        >>> # xdoctest: +SKIP
+        >>> a = torch.quantize_per_tensor(torch.tensor(3.0), 1.0, 0, torch.qint32)
+        >>> b = torch.quantize_per_tensor(torch.tensor(4.0), 1.0, 0, torch.qint32)
+        >>> q_add.add(a, b)  # Equivalent to ``torch.ops.quantized.add(a, b, 1.0, 0)``
+    Valid operation names:
+        - add
+        - cat
+        - mul
+        - add_relu
+        - add_scalar
+        - mul_scalar
+    """
+    def __init__(self):
+        super().__init__()
+        self.scale = 1.0
+        self.zero_point = 0
+        self.activation_post_process = torch.nn.Identity()
+    def _save_to_state_dict(self, destination, prefix, keep_vars):
+        super()._save_to_state_dict(destination, prefix, keep_vars)
+        destination[prefix + 'scale'] = torch.tensor(self.scale)
+        destination[prefix + 'zero_point'] = torch.tensor(self.zero_point)
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        self.scale = float(state_dict.pop(prefix + 'scale'))
+        self.zero_point = int(state_dict.pop(prefix + 'zero_point'))
+        super()._load_from_state_dict(state_dict, prefix, local_metadata, False,
+                                      missing_keys, unexpected_keys, error_msgs)
+    def _get_name(self):
+        return 'QFunctional'
+    def extra_repr(self):
+        return f'scale={self.scale}, zero_point={self.zero_point}'
+    def forward(self, x):
+        raise RuntimeError("Functional is not intended to use the " +
+                           "'forward'. Please use the underlying operation")
+    r"""Operation equivalent to ``torch.ops.quantized.add``"""
+    def add(self, x: Tensor, y: Tensor) -> Tensor:
+        r = ops.quantized.add(x, y, scale=self.scale, zero_point=self.zero_point)
+        r = self.activation_post_process(r)
+        return r
+    r"""Operation equivalent to ``torch.ops.quantized.add(Tensor, float)``"""
+    def add_scalar(self, x: Tensor, y: float) -> Tensor:
+        r = ops.quantized.add_scalar(x, y)
+        # Note: this operation is not observed because the observation is not
+        # needed for the quantized op.
+        return r
+    r"""Operation equivalent to ``torch.ops.quantized.mul(Tensor, Tensor)``"""
+    def mul(self, x: Tensor, y: Tensor) -> Tensor:
+        r = ops.quantized.mul(x, y, scale=self.scale, zero_point=self.zero_point)
+        r = self.activation_post_process(r)
+        return r
+    r"""Operation equivalent to ``torch.ops.quantized.mul(Tensor, float)``"""
+    def mul_scalar(self, x: Tensor, y: float) -> Tensor:
+        r = ops.quantized.mul_scalar(x, y)
+        # Note: this operation is not observed because the observation is not
+        # needed for the quantized op.
+        return r
+    r"""Operation equivalent to ``torch.ops.quantized.cat``"""
+    def cat(self, x: List[Tensor], dim: int = 0) -> Tensor:
+        r = ops.quantized.cat(x, scale=self.scale, zero_point=self.zero_point, dim=dim)
+        r = self.activation_post_process(r)
+        return r
+    r"""Operation equivalent to ``torch.ops.quantized.add_relu``"""
+    def add_relu(self, x: Tensor, y: Tensor) -> Tensor:
+        r = ops.quantized.add_relu(x, y, scale=self.scale, zero_point=self.zero_point)
+        r = self.activation_post_process(r)
+        return r
+    r"""Operation equivalent to ``torch.ops.quantized.matmul(Tensor, Tensor)``"""
+    def matmul(self, x: Tensor, y: Tensor) -> Tensor:
+        r = ops.quantized.matmul(x, y, scale=self.scale, zero_point=self.zero_point)
+        # Note: this operation is not observed because the observation is not
+        # needed for the quantized op.
+        return r
+    @classmethod
+    def from_float(cls, mod):
+        assert type(mod) == FloatFunctional, \
+            "QFunctional.from_float expects an instance of FloatFunctional"
+        scale, zero_point = mod.activation_post_process.calculate_qparams()  # type: ignore[operator]
+        new_mod = QFunctional()
+        new_mod.scale = float(scale)
+        new_mod.zero_point = int(zero_point)
+        return new_mod

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from .linear import Linear
+from .conv import Conv1d, Conv2d, Conv3d, ConvTranspose1d, ConvTranspose2d, ConvTranspose3d
+from .rnn import RNNCell, LSTMCell, GRUCell, LSTM, GRU
+from .sparse import Embedding, EmbeddingBag
+__all__ = [
+    'Linear',
+    'Conv1d',
+    'Conv2d',
+    'Conv3d',
+    'ConvTranspose1d',
+    'ConvTranspose2d',
+    'ConvTranspose3d',
+    'RNNCell',
+    'LSTMCell',
+    'GRUCell',
+    'LSTM',
+    'GRU',
+    'Embedding',
+    'EmbeddingBag',
+]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/linear.cpython-311.pyc ADDED Viewed

Binary file (3.71 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/sparse.cpython-311.pyc ADDED Viewed

Binary file (6.01 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/rnn.py ADDED Viewed

	@@ -0,0 +1,614 @@

+import torch
+import torch.nn as nn
+from torch import Tensor
+from .utils import _quantize_and_dequantize_weight
+from .utils import _quantize_weight
+from typing import Optional, Dict, Any, Tuple
+from torch import _VF
+from torch.nn.utils.rnn import PackedSequence
+__all__ = ['RNNCellBase', 'RNNCell', 'LSTMCell', 'GRUCell', 'RNNBase', 'LSTM', 'GRU', 'get_quantized_weight']
+def _apply_permutation(tensor: Tensor, permutation: Tensor, dim: int = 1) -> Tensor:
+    return tensor.index_select(dim, permutation)
+def _get_weight_and_quantization_params(module, wn):
+    weight = getattr(module, wn)
+    params = [weight]
+    for param_name in [wn + n for n in ["_qscheme", "_dtype", "_scale", "_zero_point", "_axis_int"]]:
+        if hasattr(module, param_name):
+            param = getattr(module, param_name)
+        else:
+            param = None
+        params.append(param)
+    return params
+def get_quantized_weight(module, wn):
+    if not hasattr(module, wn):
+        return None
+    params = _get_weight_and_quantization_params(module, wn)
+    weight = _quantize_weight(*params)
+    return weight
+def _get_quantize_and_dequantized_weight(module, wn):
+    if not hasattr(module, wn):
+        return None
+    params = _get_weight_and_quantization_params(module, wn)
+    weight = _quantize_and_dequantize_weight(*params)
+    return weight
+class RNNCellBase(nn.RNNCellBase):
+    def __init__(self, input_size: int, hidden_size: int, bias: bool, num_chunks: int,
+                 device=None, dtype=None, weight_qparams_dict=None) -> None:
+        super().__init__(input_size, hidden_size, bias, num_chunks, device=device, dtype=dtype)
+        # TODO(jerryzh168): maybe make this arg a required arg
+        if weight_qparams_dict is None:
+            weight_qparams = {
+                "qscheme": torch.per_tensor_affine,
+                "dtype": torch.quint8,
+                "scale": 1.0,
+                "zero_point": 0
+            }
+            weight_qparams_dict = {
+                "weight_ih": weight_qparams,
+                "weight_hh": weight_qparams,
+                "is_decomposed": False,
+            }
+        assert len(weight_qparams_dict) == 3, "Expected length for weight_qparams_dict to be 3 for QuantizedRNNCellBase(Reference)"
+        self._init_weight_qparams_dict(weight_qparams_dict, device)
+    def _init_weight_qparams_dict(self, weight_qparams_dict, device):
+        assert weight_qparams_dict is not None
+        self.is_decomposed = weight_qparams_dict["is_decomposed"]
+        for key, weight_qparams in weight_qparams_dict.items():
+            if key == "is_decomposed":
+                continue
+            # TODO: refactor the duplicated code to utils.py
+            weight_qscheme = weight_qparams["qscheme"]
+            weight_dtype = weight_qparams["dtype"]
+            setattr(self, key + "_qscheme", weight_qscheme)
+            setattr(self, key + "_dtype", weight_dtype)
+            assert weight_qscheme in [None, torch.per_tensor_affine, torch.per_channel_affine], \
+                Exception(f"qscheme: {weight_qscheme} is not support in {self._get_name()}")
+            if weight_qscheme is not None:
+                scale = weight_qparams["scale"]
+                scale_tensor = scale.clone().detach() \
+                    if isinstance(scale, torch.Tensor) else \
+                    torch.tensor(scale, dtype=torch.float, device=device)
+                self.register_buffer(key + "_scale", scale_tensor)
+                zp = weight_qparams["zero_point"]
+                zp_tensor = zp.clone().detach() \
+                    if isinstance(zp, torch.Tensor) else \
+                    torch.tensor(zp, dtype=torch.int, device=device)
+                self.register_buffer(key + "_zero_point", zp_tensor)
+                if weight_qscheme == torch.per_channel_affine:
+                    axis = weight_qparams["axis"]
+                    axis_tensor = axis.clone().detach() \
+                        if isinstance(axis, torch.Tensor) else \
+                        torch.tensor(axis, dtype=torch.int, device=device)
+                    self.register_buffer(key + "_axis", axis_tensor)
+                else:
+                    # added for TorchScriptability, not used
+                    self.register_buffer(
+                        key + "_axis", torch.tensor(0, dtype=torch.int, device=device))
+                setattr(self, key + "_axis_int", getattr(self, key + "_axis").item())
+    def _get_name(self):
+        return "QuantizedRNNCellBase(Reference)"
+    def get_quantized_weight_ih(self):
+        return get_quantized_weight(self, "weight_ih")
+    def get_quantized_weight_hh(self):
+        return get_quantized_weight(self, "weight_hh")
+    def get_weight_ih(self):
+        return _get_quantize_and_dequantized_weight(self, "weight_ih")
+    def get_weight_hh(self):
+        return _get_quantize_and_dequantized_weight(self, "weight_hh")
+class RNNCell(RNNCellBase):
+    """
+    We'll store weight_qparams for all the weights (weight_ih and weight_hh),
+    we need to pass in a `weight_qparams_dict` that maps from weight name,
+    e.g. weight_ih, to the weight_qparams for that weight
+    """
+    def __init__(self, input_size: int, hidden_size: int, bias: bool = True, nonlinearity: str = "tanh",
+                 device=None, dtype=None, weight_qparams_dict: Optional[Dict[str, Any]] = None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype, 'weight_qparams_dict': weight_qparams_dict}
+        super().__init__(input_size, hidden_size, bias, num_chunks=1, **factory_kwargs)
+        self.nonlinearity = nonlinearity
+    def _get_name(self):
+        return "QuantizedRNNCell(Reference)"
+    # TODO: refactor nn.RNNCell to have a _forward that takes weight_ih and weight_hh as input
+    # and remove duplicated code, same for the other two Cell modules
+    def forward(self, input: Tensor, hx: Optional[Tensor] = None) -> Tensor:
+        assert input.dim() in (1, 2), \
+            f"RNNCell: Expected input to be 1-D or 2-D but received {input.dim()}-D tensor"
+        is_batched = input.dim() == 2
+        if not is_batched:
+            input = input.unsqueeze(0)
+        if hx is None:
+            hx = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
+        else:
+            hx = hx.unsqueeze(0) if not is_batched else hx
+        if self.nonlinearity == "tanh":
+            ret = _VF.rnn_tanh_cell(
+                input, hx,
+                self.get_weight_ih(), self.get_weight_hh(),
+                self.bias_ih, self.bias_hh,
+            )
+        elif self.nonlinearity == "relu":
+            ret = _VF.rnn_relu_cell(
+                input, hx,
+                self.get_weight_ih(), self.get_weight_hh(),
+                self.bias_ih, self.bias_hh,
+            )
+        else:
+            ret = input  # TODO: remove when jit supports exception flow
+            raise RuntimeError(
+                f"Unknown nonlinearity: {self.nonlinearity}")
+        if not is_batched:
+            ret = ret.squeeze(0)
+        return ret
+    @classmethod
+    def from_float(cls, mod, weight_qparams_dict):
+        ref_mod = cls(
+            mod.input_size,
+            mod.hidden_size,
+            mod.bias,
+            mod.nonlinearity,
+            mod.weight_ih.device,
+            mod.weight_ih.dtype,
+            weight_qparams_dict)
+        ref_mod.weight_ih = mod.weight_ih
+        ref_mod.weight_hh = mod.weight_hh
+        ref_mod.bias_ih = mod.bias_ih
+        ref_mod.bias_hh = mod.bias_hh
+        return ref_mod
+class LSTMCell(RNNCellBase):
+    """
+    We'll store weight_qparams for all the weights (weight_ih and weight_hh),
+    we need to pass in a `weight_qparams_dict` that maps from weight name,
+    e.g. weight_ih, to the weight_qparams for that weight
+    """
+    def __init__(self, input_size: int, hidden_size: int, bias: bool = True,
+                 device=None, dtype=None, weight_qparams_dict: Optional[Dict[str, Any]] = None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype, 'weight_qparams_dict': weight_qparams_dict}
+        super().__init__(input_size, hidden_size, bias, num_chunks=4, **factory_kwargs)
+    def _get_name(self):
+        return "QuantizedLSTMCell(Reference)"
+    def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:
+        assert input.dim() in (1, 2), \
+            f"LSTMCell: Expected input to be 1-D or 2-D but received {input.dim()}-D tensor"
+        is_batched = input.dim() == 2
+        if not is_batched:
+            input = input.unsqueeze(0)
+        if hx is None:
+            zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
+            hx = (zeros, zeros)
+        else:
+            hx = (hx[0].unsqueeze(0), hx[1].unsqueeze(0)) if not is_batched else hx
+        ret = _VF.lstm_cell(
+            input, hx,
+            self.get_weight_ih(), self.get_weight_hh(),
+            self.bias_ih, self.bias_hh,
+        )
+        if not is_batched:
+            ret = (ret[0].squeeze(0), ret[1].squeeze(0))
+        return ret
+    @classmethod
+    def from_float(cls, mod, weight_qparams_dict):
+        ref_mod = cls(
+            mod.input_size,
+            mod.hidden_size,
+            mod.bias,
+            mod.weight_ih.device,
+            mod.weight_ih.dtype,
+            weight_qparams_dict)
+        ref_mod.weight_ih = mod.weight_ih
+        ref_mod.weight_hh = mod.weight_hh
+        ref_mod.bias_ih = mod.bias_ih
+        ref_mod.bias_hh = mod.bias_hh
+        return ref_mod
+class GRUCell(RNNCellBase):
+    """
+    We'll store weight_qparams for all the weights (weight_ih and weight_hh),
+    we need to pass in a `weight_qparams_dict` that maps from weight name,
+    e.g. weight_ih, to the weight_qparams for that weight
+    """
+    def __init__(self, input_size: int, hidden_size: int, bias: bool = True,
+                 device=None, dtype=None, weight_qparams_dict: Optional[Dict[str, Any]] = None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype, 'weight_qparams_dict': weight_qparams_dict}
+        super().__init__(input_size, hidden_size, bias, num_chunks=3, **factory_kwargs)
+    def _get_name(self):
+        return "QuantizedGRUCell(Reference)"
+    def forward(self, input: Tensor, hx: Optional[Tensor] = None) -> Tensor:
+        assert input.dim() in (1, 2), \
+            f"GRUCell: Expected input to be 1-D or 2-D but received {input.dim()}-D tensor"
+        is_batched = input.dim() == 2
+        if not is_batched:
+            input = input.unsqueeze(0)
+        if hx is None:
+            hx = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)
+        else:
+            hx = hx.unsqueeze(0) if not is_batched else hx
+        ret = _VF.gru_cell(
+            input, hx,
+            self.get_weight_ih(), self.get_weight_hh(),
+            self.bias_ih, self.bias_hh,
+        )
+        if not is_batched:
+            ret = ret.squeeze(0)
+        return ret
+    @classmethod
+    def from_float(cls, mod, weight_qparams_dict):
+        ref_mod = cls(
+            mod.input_size,
+            mod.hidden_size,
+            mod.bias,
+            mod.weight_ih.device,
+            mod.weight_ih.dtype,
+            weight_qparams_dict)
+        ref_mod.weight_ih = mod.weight_ih
+        ref_mod.weight_hh = mod.weight_hh
+        ref_mod.bias_ih = mod.bias_ih
+        ref_mod.bias_hh = mod.bias_hh
+        return ref_mod
+class RNNBase(nn.RNNBase):
+    def __init__(self, mode: str, input_size: int, hidden_size: int,
+                 num_layers: int = 1, bias: bool = True, batch_first: bool = False,
+                 dropout: float = 0., bidirectional: bool = False, proj_size: int = 0,
+                 device=None, dtype=None,
+                 weight_qparams_dict: Optional[Dict[str, Any]] = None) -> None:
+        super().__init__(
+            mode, input_size, hidden_size, num_layers, bias, batch_first, dropout,
+            bidirectional, proj_size, device, dtype
+        )
+        # TODO(jerryzh168): maybe make this arg a required arg
+        if weight_qparams_dict is None:
+            weight_qparams = {
+                'qscheme': torch.per_tensor_affine,
+                'dtype': torch.quint8,
+                'scale': 1.0,
+                'zero_point': 0
+            }
+            weight_qparams_dict = {"is_decomposed": False}  # type: ignore[dict-item]
+            for wn in self._flat_weights_names:
+                if wn.startswith("weight"):
+                    weight_qparams_dict[wn] = weight_qparams
+        self._init_weight_qparams_dict(weight_qparams_dict, device)
+    def _init_weight_qparams_dict(self, weight_qparams_dict, device):
+        self.is_decomposed = weight_qparams_dict["is_decomposed"]
+        for key, weight_qparams in weight_qparams_dict.items():
+            if key == "is_decomposed":
+                continue
+            weight_qscheme = weight_qparams["qscheme"]
+            weight_dtype = weight_qparams["dtype"]
+            setattr(self, key + "_qscheme", weight_qscheme)
+            setattr(self, key + "_dtype", weight_dtype)
+            assert weight_qscheme in [None, torch.per_tensor_affine, torch.per_channel_affine], \
+                Exception(f"qscheme: {weight_qscheme} is not support in {self._get_name()}")
+            if weight_qscheme is not None:
+                self.register_buffer(
+                    key + "_scale",
+                    torch.tensor(weight_qparams["scale"], dtype=torch.float, device=device))
+                self.register_buffer(
+                    key + "_zero_point",
+                    torch.tensor(weight_qparams["zero_point"], dtype=torch.int, device=device))
+                if weight_qscheme == torch.per_channel_affine:
+                    self.register_buffer(
+                        key + "_axis",
+                        torch.tensor(weight_qparams["axis"], dtype=torch.int, device=device))
+                else:
+                    # added for TorchScriptability, not used
+                    self.register_buffer(
+                        key + "_axis", torch.tensor(0, dtype=torch.int, device=device))
+                setattr(self, key + "_axis_int", getattr(self, key + "_axis").item())
+class LSTM(RNNBase):
+    """ Reference Quantized LSTM Module
+    We'll store weight_qparams for all the weights in _flat_weights, we need to pass in
+    a `weight_qparams_dict` that maps from weight name, e.g. weight_ih_l0,
+    to the weight_qparams for that weight
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__('LSTM', *args, **kwargs)
+    # Same as above, see torch/nn/modules/module.py::_forward_unimplemented
+    def permute_hidden(self,  # type: ignore[override]
+                       hx: Tuple[Tensor, Tensor],
+                       permutation: Optional[Tensor]
+                       ) -> Tuple[Tensor, Tensor]:
+        if permutation is None:
+            return hx
+        return _apply_permutation(hx[0], permutation), _apply_permutation(hx[1], permutation)
+    def get_expected_cell_size(self, input: Tensor, batch_sizes: Optional[Tensor]) -> Tuple[int, int, int]:
+        if batch_sizes is not None:
+            mini_batch = int(batch_sizes[0])
+        else:
+            mini_batch = input.size(0) if self.batch_first else input.size(1)
+        num_directions = 2 if self.bidirectional else 1
+        expected_hidden_size = (self.num_layers * num_directions,
+                                mini_batch, self.hidden_size)
+        return expected_hidden_size
+    # In the future, we should prevent mypy from applying contravariance rules here.
+    # See torch/nn/modules/module.py::_forward_unimplemented
+    def check_forward_args(self,  # type: ignore[override]
+                           input: Tensor,
+                           hidden: Tuple[Tensor, Tensor],
+                           batch_sizes: Optional[Tensor],
+                           ):
+        self.check_input(input, batch_sizes)
+        self.check_hidden_size(hidden[0], self.get_expected_hidden_size(input, batch_sizes),
+                               'Expected hidden[0] size {}, got {}')
+        self.check_hidden_size(hidden[1], self.get_expected_cell_size(input, batch_sizes),
+                               'Expected hidden[1] size {}, got {}')
+    def get_quantized_weight_bias_dict(self):
+        """ dictionary from flat_weight_name to quantized weight or (unquantized) bias
+        e.g.
+        {
+          "weight_ih_l0": quantized_weight,
+          "bias_ih_l0": unquantized_bias,
+          ...
+        }
+        """
+        quantized_weight_bias_dict = {}
+        for wn in self._flat_weights_names:
+            if hasattr(self, wn):
+                if wn.startswith("weight"):
+                    weight_or_bias = get_quantized_weight(self, wn)
+                else:
+                    weight_or_bias = getattr(self, wn)
+            else:
+                weight_or_bias = None
+            quantized_weight_bias_dict[wn] = weight_or_bias
+        return quantized_weight_bias_dict
+    def get_flat_weights(self):
+        flat_weights = []
+        for wn in self._flat_weights_names:
+            if hasattr(self, wn):
+                weight = getattr(self, wn)
+                if wn.startswith("weight"):
+                    params = _get_weight_and_quantization_params(self, wn)
+                    weight = _quantize_and_dequantize_weight(*params)
+            else:
+                weight = None
+            flat_weights.append(weight)
+        return flat_weights
+    def forward(self, input, hx=None):  # noqa: F811
+        orig_input = input
+        # xxx: isinstance check needs to be in conditional for TorchScript to compile
+        batch_sizes = None
+        if isinstance(orig_input, PackedSequence):
+            input, batch_sizes, sorted_indices, unsorted_indices = input
+            max_batch_size = int(batch_sizes[0])
+        else:
+            batch_sizes = None
+            is_batched = input.dim() == 3
+            batch_dim = 0 if self.batch_first else 1
+            if not is_batched:
+                input = input.unsqueeze(batch_dim)
+            max_batch_size = input.size(0) if self.batch_first else input.size(1)
+            sorted_indices = None
+            unsorted_indices = None
+        if hx is None:
+            num_directions = 2 if self.bidirectional else 1
+            real_hidden_size = self.proj_size if self.proj_size > 0 else self.hidden_size
+            h_zeros = torch.zeros(self.num_layers * num_directions,
+                                  max_batch_size, real_hidden_size,
+                                  dtype=input.dtype, device=input.device)
+            c_zeros = torch.zeros(self.num_layers * num_directions,
+                                  max_batch_size, self.hidden_size,
+                                  dtype=input.dtype, device=input.device)
+            hx = (h_zeros, c_zeros)
+        else:
+            if batch_sizes is None:  # If not PackedSequence input.
+                if is_batched:  # type: ignore[possibly-undefined]
+                    if (hx[0].dim() != 3 or hx[1].dim() != 3):
+                        msg = ("For batched 3-D input, hx and cx should "
+                               f"also be 3-D but got ({hx[0].dim()}-D, {hx[1].dim()}-D) tensors")
+                        raise RuntimeError(msg)
+                else:
+                    if hx[0].dim() != 2 or hx[1].dim() != 2:
+                        msg = ("For unbatched 2-D input, hx and cx should "
+                               f"also be 2-D but got ({hx[0].dim()}-D, {hx[1].dim()}-D) tensors")
+                        raise RuntimeError(msg)
+                    hx = (hx[0].unsqueeze(1), hx[1].unsqueeze(1))
+            # Each batch of the hidden state should match the input sequence that
+            # the user believes he/she is passing in.
+            hx = self.permute_hidden(hx, sorted_indices)
+        self.check_forward_args(input, hx, batch_sizes)
+        if batch_sizes is None:
+            result = _VF.lstm(input, hx, self.get_flat_weights(), self.bias, self.num_layers,
+                              self.dropout, self.training, self.bidirectional, self.batch_first)
+        else:
+            result = _VF.lstm(input, batch_sizes, hx, self.get_flat_weights(), self.bias,
+                              self.num_layers, self.dropout, self.training, self.bidirectional)
+        output = result[0]
+        hidden = result[1:]
+        # xxx: isinstance check needs to be in conditional for TorchScript to compile
+        if isinstance(orig_input, PackedSequence):
+            output_packed = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
+            return output_packed, self.permute_hidden(hidden, unsorted_indices)
+        else:
+            if not is_batched:  # type: ignore[possibly-undefined]
+                output = output.squeeze(batch_dim)  # type: ignore[possibly-undefined]
+                hidden = (hidden[0].squeeze(1), hidden[1].squeeze(1))
+            return output, self.permute_hidden(hidden, unsorted_indices)
+    def _get_name(self):
+        return "QuantizedLSTM(Reference)"
+    @classmethod
+    def from_float(cls, mod, weight_qparams_dict):
+        ref_mod = cls(
+            mod.input_size,
+            mod.hidden_size,
+            mod.num_layers,
+            mod.bias,
+            mod.batch_first,
+            mod.dropout,
+            mod.bidirectional,
+            weight_qparams_dict=weight_qparams_dict)
+        for wn in mod._flat_weights_names:
+            setattr(ref_mod, wn, getattr(mod, wn))
+        return ref_mod
+class GRU(RNNBase):
+    """ Reference Quantized GRU Module
+    We'll store weight_qparams for all the weights in _flat_weights, we need to pass in
+    a `weight_qparams_dict` that maps from weight name, e.g. weight_ih_l0,
+    to the weight_qparams for that weight
+    """
+    def __init__(self, *args, **kwargs):
+        if 'proj_size' in kwargs:
+            raise ValueError("proj_size argument is only supported for LSTM, not RNN or GRU")
+        super().__init__('GRU', *args, **kwargs)
+    def get_quantized_weight_bias_dict(self):
+        """ dictionary from flat_weight_name to quantized weight or (unquantized) bias
+        e.g.
+        {
+          "weight_ih_l0": quantized_weight,
+          "bias_ih_l0": unquantized_bias,
+          ...
+        }
+        """
+        quantized_weight_bias_dict = {}
+        for wn in self._flat_weights_names:
+            if hasattr(self, wn):
+                if wn.startswith("weight"):
+                    weight_or_bias = get_quantized_weight(self, wn)
+                else:
+                    weight_or_bias = getattr(self, wn)
+            else:
+                weight_or_bias = None
+            quantized_weight_bias_dict[wn] = weight_or_bias
+        return quantized_weight_bias_dict
+    def get_flat_weights(self):
+        flat_weights = []
+        for wn in self._flat_weights_names:
+            if hasattr(self, wn):
+                weight = getattr(self, wn)
+                if wn.startswith("weight"):
+                    params = _get_weight_and_quantization_params(self, wn)
+                    weight = _quantize_and_dequantize_weight(*params)
+            else:
+                weight = None
+            flat_weights.append(weight)
+        return flat_weights
+    def forward(self, input, hx=None):  # noqa: F811
+        # Note: this is copied from the forward of GRU in https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/rnn.py
+        # only changed self._flat_weights to self.get_flat_weights()
+        # TODO: maybe we can try inheriting from that class and define get_flat_weights
+        # as a @property? this might interfere with TorchScript, if we remove that
+        # requirement in the future we should be able to do this
+        orig_input = input
+        # xxx: isinstance check needs to be in conditional for TorchScript to compile
+        if isinstance(orig_input, PackedSequence):
+            input, batch_sizes, sorted_indices, unsorted_indices = input
+            max_batch_size = int(batch_sizes[0])
+        else:
+            batch_sizes = None
+            assert (input.dim() in (2, 3)), f"GRU: Expected input to be 2-D or 3-D but received {input.dim()}-D tensor"
+            is_batched = input.dim() == 3
+            batch_dim = 0 if self.batch_first else 1
+            if not is_batched:
+                input = input.unsqueeze(batch_dim)
+                if hx is not None:
+                    if hx.dim() != 2:
+                        raise RuntimeError(
+                            f"For unbatched 2-D input, hx should also be 2-D but got {hx.dim()}-D tensor")
+                    hx = hx.unsqueeze(1)
+            else:
+                if hx is not None and hx.dim() != 3:
+                    raise RuntimeError(
+                        f"For batched 3-D input, hx should also be 3-D but got {hx.dim()}-D tensor")
+            max_batch_size = input.size(0) if self.batch_first else input.size(1)
+            sorted_indices = None
+            unsorted_indices = None
+        if hx is None:
+            num_directions = 2 if self.bidirectional else 1
+            hx = torch.zeros(self.num_layers * num_directions,
+                             max_batch_size, self.hidden_size,
+                             dtype=input.dtype, device=input.device)
+        else:
+            # Each batch of the hidden state should match the input sequence that
+            # the user believes he/she is passing in.
+            hx = self.permute_hidden(hx, sorted_indices)
+        self.check_forward_args(input, hx, batch_sizes)
+        if batch_sizes is None:
+            result = _VF.gru(input, hx, self.get_flat_weights(), self.bias, self.num_layers,
+                             self.dropout, self.training, self.bidirectional, self.batch_first)
+        else:
+            result = _VF.gru(input, batch_sizes, hx, self.get_flat_weights(), self.bias,
+                             self.num_layers, self.dropout, self.training, self.bidirectional)
+        output = result[0]
+        hidden = result[1]
+        # xxx: isinstance check needs to be in conditional for TorchScript to compile
+        if isinstance(orig_input, PackedSequence):
+            output_packed = PackedSequence(output, batch_sizes, sorted_indices, unsorted_indices)
+            return output_packed, self.permute_hidden(hidden, unsorted_indices)
+        else:
+            if not is_batched:  # type: ignore[possibly-undefined]
+                output = output.squeeze(batch_dim)  # type: ignore[possibly-undefined]
+                hidden = hidden.squeeze(1)
+            return output, self.permute_hidden(hidden, unsorted_indices)
+    def _get_name(self):
+        return "QuantizedGRU(Reference)"
+    @classmethod
+    def from_float(cls, mod, weight_qparams_dict):
+        ref_mod = cls(
+            mod.input_size,
+            mod.hidden_size,
+            mod.num_layers,
+            mod.bias,
+            mod.batch_first,
+            mod.dropout,
+            mod.bidirectional,
+            weight_qparams_dict=weight_qparams_dict)
+        for wn in mod._flat_weights_names:
+            setattr(ref_mod, wn, getattr(mod, wn))
+        return ref_mod

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/utils.py ADDED Viewed

	@@ -0,0 +1,323 @@

+import torch
+import typing
+__all__ = [
+    "ReferenceQuantizedModule",
+]
+class ReferenceQuantizedModule(torch.nn.Module):
+    def _init_weight_qparams(self, weight_qparams, device):
+        if weight_qparams is None:
+            weight_qparams = {
+                "qscheme": torch.per_tensor_affine,
+                "dtype": torch.quint8,
+                "scale": 1.0,
+                "zero_point": 0
+            }
+        self.weight_qscheme: torch.qscheme = weight_qparams["qscheme"]
+        self.weight_dtype = weight_qparams["dtype"]
+        assert self.weight_qscheme in [
+            None, torch.per_tensor_affine, torch.per_channel_affine,
+            torch.per_channel_affine_float_qparams], \
+            Exception(f"qscheme: {self.weight_qscheme} is not support in reference quantized {self._get_name()}")
+        if self.weight_dtype in [torch.quint8, torch.qint8, torch.quint4x2, torch.qint32]:
+            zero_point_dtype = weight_qparams["zero_point"].dtype if \
+                isinstance(weight_qparams["zero_point"], torch.Tensor) else \
+                torch.int
+            w_scale = weight_qparams["scale"]
+            w_scale_tensor = w_scale.clone().detach() \
+                if isinstance(w_scale, torch.Tensor) \
+                else torch.tensor(w_scale, dtype=torch.float, device=device)
+            self.register_buffer("weight_scale", w_scale_tensor)
+            w_zp = weight_qparams["zero_point"]
+            w_zp_tensor = w_zp.clone().detach() \
+                if isinstance(w_zp, torch.Tensor) \
+                else torch.tensor(w_zp, dtype=zero_point_dtype, device=device)
+            self.register_buffer("weight_zero_point", w_zp_tensor)
+            if self.weight_qscheme in [torch.per_channel_affine, torch.per_channel_affine_float_qparams]:
+                w_axis = weight_qparams["axis"]
+                w_axis_tensor = w_axis.clone().detach() \
+                    if isinstance(w_axis, torch.Tensor) \
+                    else torch.tensor(w_axis, dtype=torch.int, device=device)
+                self.register_buffer("weight_axis", w_axis_tensor)
+            else:
+                # added for TorchScriptability, not used
+                self.register_buffer(
+                    "weight_axis", torch.tensor(0, dtype=torch.int, device=device))
+        else:
+            # added for TorchScriptability, and for torch.float
+            self.register_buffer("weight_scale", torch.tensor(1.0, dtype=torch.float, device=device))
+            self.register_buffer("weight_zero_point", torch.tensor(0, dtype=torch.int, device=device))
+            self.register_buffer(
+                "weight_axis", torch.tensor(0, dtype=torch.int, device=device))
+        self.is_decomposed: bool = weight_qparams.get("is_decomposed", False)
+        # store weight_axis as weight_axis_int due to some constraints of torchdynamo.export
+        # for capturing `.item` operations
+        self.weight_axis_int: int = self.weight_axis.item()  # type: ignore[operator, assignment]
+        self.weight_quant_min: typing.Optional[int] = weight_qparams.get("quant_min", None)
+        self.weight_quant_max: typing.Optional[int] = weight_qparams.get("quant_max", None)
+    def get_weight(self):
+        """
+        Fake quantize (quantize and dequantize) the weight with
+        the quantization parameters for weight, this is used to
+        simulate the numerics for the quantized weight in a quantized
+        model
+        """
+        # suppress mypy warning
+        assert isinstance(self.weight_scale, torch.Tensor)
+        assert isinstance(self.weight_zero_point, torch.Tensor)
+        if self.is_decomposed:
+            return _quantize_and_dequantize_weight_decomposed(
+                self.weight,  # type: ignore[arg-type]
+                self.weight_qscheme,
+                self.weight_dtype,
+                self.weight_scale,
+                self.weight_zero_point,
+                self.weight_axis_int,
+                self.weight_quant_min,
+                self.weight_quant_max)
+        else:
+            return _quantize_and_dequantize_weight(
+                self.weight,  # type: ignore[arg-type]
+                self.weight_qscheme,
+                self.weight_dtype,
+                self.weight_scale,
+                self.weight_zero_point,
+                self.weight_axis_int)
+    def get_quantized_weight(self):
+        # suppress mypy warning
+        assert isinstance(self.weight_scale, torch.Tensor)
+        assert isinstance(self.weight_zero_point, torch.Tensor)
+        # assert isinstance(self.weight_axis, torch.Tensor)
+        if self.is_decomposed:
+            return _quantize_weight_decomposed(
+                self.weight,  # type: ignore[arg-type]
+                self.weight_qscheme,
+                self.weight_dtype,
+                self.weight_scale,
+                self.weight_zero_point,
+                self.weight_axis_int,
+                self.weight_quant_min,
+                self.weight_quant_max)
+        else:
+            return _quantize_weight(
+                self.weight,  # type: ignore[arg-type]
+                self.weight_qscheme,
+                self.weight_dtype,
+                self.weight_scale,
+                self.weight_zero_point,
+                self.weight_axis_int)
+    def _save_to_state_dict(self, destination, prefix, keep_vars):
+        super()._save_to_state_dict(destination, prefix, keep_vars)
+        _save_weight_qparams(
+            destination, prefix, self.weight_qscheme, self.weight_dtype,
+            self.weight_scale, self.weight_zero_point, self.weight_axis)
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        for key in _get_weight_qparam_keys(state_dict, prefix):
+            setattr(self, key, state_dict[prefix + key])
+            state_dict.pop(prefix + key)
+        super()._load_from_state_dict(
+            state_dict, prefix, local_metadata, False,
+            missing_keys, unexpected_keys, error_msgs)
+def _quantize_weight_decomposed(
+        weight: torch.Tensor,
+        weight_qscheme: torch.qscheme,
+        weight_dtype: torch.dtype,
+        weight_scale: torch.Tensor,
+        weight_zero_point: torch.Tensor,
+        weight_axis: int,
+        weight_quant_min: typing.Optional[int],
+        weight_quant_max: typing.Optional[int],
+) -> torch.Tensor:
+    _DTYPE_TO_QVALUE_BOUNDS = {
+        torch.uint8: (0, 255),
+        torch.int8: (-128, 127),
+        torch.int32: (-(2**31), 2**31 - 1),
+    }
+    # TODO: add an util function for converting qdtype to dtype
+    _QDTYPE_TO_UNDERLYING_INT_REPR_DTYPE = {
+        torch.quint8: torch.uint8,
+        torch.qint8: torch.int8,
+        torch.qint32: torch.int32,
+    }
+    if weight_qscheme == torch.per_tensor_affine:
+        if weight_dtype in [torch.quint8, torch.qint8, torch.qint32]:
+            weight_dtype_ = _QDTYPE_TO_UNDERLYING_INT_REPR_DTYPE[weight_dtype]
+            if weight_quant_min is None or weight_quant_max is None:
+                weight_quant_min, weight_quant_max = _DTYPE_TO_QVALUE_BOUNDS[weight_dtype_]
+            weight = torch.ops.quantized_decomposed.quantize_per_tensor(
+                weight,
+                weight_scale,
+                weight_zero_point,
+                weight_quant_min,
+                weight_quant_max,
+                weight_dtype_
+            )
+            return weight
+    elif weight_qscheme in [torch.per_channel_affine, torch.per_channel_affine_float_qparams]:
+        # TODO: torch.quint4x2 is not supported
+        if weight_dtype in [torch.quint8, torch.qint8, torch.qint32]:
+            weight_dtype_ = _QDTYPE_TO_UNDERLYING_INT_REPR_DTYPE[weight_dtype]
+            if weight_quant_min is None or weight_quant_max is None:
+                weight_quant_min, weight_quant_max = _DTYPE_TO_QVALUE_BOUNDS[weight_dtype_]
+            weight = torch.ops.quantized_decomposed.quantize_per_channel(
+                weight,
+                weight_scale,
+                weight_zero_point,
+                weight_axis,
+                weight_quant_min,
+                weight_quant_max,
+                weight_dtype_)  # type: ignore[arg-type]
+            return weight
+    raise Exception(f"Unsupported dtype and qscheme: {weight_dtype}, {weight_qscheme}")
+def _dequantize_weight_decomposed(
+        weight: torch.Tensor,
+        weight_qscheme: torch.qscheme,
+        weight_dtype: torch.dtype,
+        weight_scale: torch.Tensor,
+        weight_zero_point: torch.Tensor,
+        weight_axis: int,
+        weight_quant_min: typing.Optional[int],
+        weight_quant_max: typing.Optional[int],
+) -> torch.Tensor:
+    # TODO: get the quant_min and quant_max from activation_post_process
+    _DTYPE_TO_QVALUE_BOUNDS = {
+        torch.uint8: (0, 255),
+        torch.int8: (-128, 127),
+        torch.int32: (-(2**31), 2**31 - 1),
+    }
+    # TODO: add an util function for converting qdtype to dtype
+    _QDTYPE_TO_UNDERLYING_INT_REPR_DTYPE = {
+        torch.quint8: torch.uint8,
+        torch.qint8: torch.int8,
+        torch.qint32: torch.int32,
+    }
+    weight_dtype_ = _QDTYPE_TO_UNDERLYING_INT_REPR_DTYPE[weight_dtype]
+    if weight_quant_min is None or weight_quant_max is None:
+        weight_quant_min, weight_quant_max = _DTYPE_TO_QVALUE_BOUNDS[weight_dtype_]
+    if weight_qscheme == torch.per_tensor_affine:
+        if weight_dtype in [torch.quint8, torch.qint8, torch.qint32]:
+            weight = torch.ops.quantized_decomposed.dequantize_per_tensor(
+                weight,
+                weight_scale,
+                weight_zero_point,
+                weight_quant_min,
+                weight_quant_max,
+                weight_dtype_
+            )
+            return weight
+    elif weight_qscheme in [torch.per_channel_affine, torch.per_channel_affine_float_qparams]:
+        # TODO: torch.quint4x2 is not supported
+        if weight_dtype in [torch.quint8, torch.qint8, torch.qint32]:
+            weight = torch.ops.quantized_decomposed.dequantize_per_channel(
+                weight,
+                weight_scale,
+                weight_zero_point,
+                weight_axis,
+                weight_quant_min,
+                weight_quant_max,
+                weight_dtype_)  # type: ignore[arg-type]
+            return weight
+    raise Exception(f"Unsupported dtype and qscheme: {weight_dtype}, {weight_qscheme}")
+def _quantize_weight(
+        weight: torch.Tensor,
+        weight_qscheme: torch.qscheme,
+        weight_dtype: torch.dtype,
+        weight_scale: torch.Tensor,
+        weight_zero_point: torch.Tensor,
+        weight_axis_int: int
+) -> torch.Tensor:
+    if weight_dtype == torch.float16:
+        weight = weight.to(weight_dtype)
+        return weight
+    if weight_qscheme == torch.per_tensor_affine:
+        if weight_dtype in [torch.quint8, torch.qint8, torch.qint32]:
+            weight = torch.quantize_per_tensor(weight, weight_scale, weight_zero_point, weight_dtype)
+            return weight
+    elif weight_qscheme in [torch.per_channel_affine, torch.per_channel_affine_float_qparams]:
+        if weight_dtype in [torch.quint8, torch.qint8, torch.quint4x2, torch.qint32]:
+            weight = torch.quantize_per_channel(
+                weight, weight_scale,
+                weight_zero_point, weight_axis_int, weight_dtype)  # type: ignore[arg-type]
+            return weight
+    raise Exception(f"Unsupported dtype and qscheme: {weight_dtype}, {weight_qscheme}")
+def _quantize_and_dequantize_weight_decomposed(
+        weight: torch.Tensor,
+        weight_qscheme: torch.qscheme,
+        weight_dtype: torch.dtype,
+        weight_scale: torch.Tensor,
+        weight_zero_point: torch.Tensor,
+        weight_axis_int: int,
+        weight_quant_min: typing.Optional[int],
+        weight_quant_max: typing.Optional[int],
+) -> torch.Tensor:
+    """ Quantize and then dequantize the weight based on
+    the quantization parameters
+    """
+    if weight_qscheme in [
+            torch.per_tensor_affine,
+            torch.per_channel_affine,
+            torch.per_channel_affine_float_qparams]:
+        weight_quant = _quantize_weight_decomposed(
+            weight, weight_qscheme, weight_dtype, weight_scale, weight_zero_point, weight_axis_int,
+            weight_quant_min, weight_quant_max)
+        weight_dequant = _dequantize_weight_decomposed(
+            weight_quant, weight_qscheme, weight_dtype, weight_scale, weight_zero_point,
+            weight_axis_int, weight_quant_min, weight_quant_max)
+    else:
+        weight_dequant = weight
+    return weight_dequant
+def _quantize_and_dequantize_weight(
+        weight: torch.Tensor,
+        weight_qscheme: torch.qscheme,
+        weight_dtype: torch.dtype,
+        weight_scale: torch.Tensor,
+        weight_zero_point: torch.Tensor,
+        weight_axis_int: int
+) -> torch.Tensor:
+    """ Quantize and then dequantize the weight based on
+    the quantization parameters
+    """
+    if weight_qscheme in [
+            torch.per_tensor_affine,
+            torch.per_channel_affine,
+            torch.per_channel_affine_float_qparams]:
+        weight_quant = _quantize_weight(
+            weight, weight_qscheme, weight_dtype, weight_scale, weight_zero_point, weight_axis_int)
+        weight_dequant = weight_quant.dequantize()
+    else:
+        weight_dequant = weight
+    return weight_dequant
+def _save_weight_qparams(destination, prefix, weight_qscheme, weight_dtype, weight_scale, weight_zero_point, weight_axis):
+    destination[prefix + "weight_qscheme"] = weight_qscheme
+    destination[prefix + "weight_dtype"] = weight_dtype
+    if weight_qscheme is not None:
+        destination[prefix + "weight_scale"] = weight_scale
+        destination[prefix + "weight_zero_point"] = weight_zero_point
+        if weight_qscheme == torch.per_channel_affine:
+            destination[prefix + "weight_axis"] = weight_axis
+def _get_weight_qparam_keys(
+        state_dict: typing.Dict[str, typing.Any],
+        prefix: str):
+    keys = ["weight_qscheme", "weight_dtype"]
+    weight_qscheme = state_dict[prefix + "weight_qscheme"]
+    if weight_qscheme is not None:
+        keys.append("weight_scale")
+        keys.append("weight_zero_point")
+        if weight_qscheme == torch.quantize_per_channel:
+            keys.append("weight_axis")
+    return keys

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (2.36 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/dynamic/linear.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from typing import Optional
+import torch
+import torch.ao.nn.intrinsic as nni
+from torch.ao.nn.sparse.quantized import linear
+from torch.ao.nn.sparse.quantized.utils import LinearBlockSparsePattern
+from torch.ao.nn.quantized.modules.utils import _quantize_weight, _hide_packed_params_repr
+__all__ = ['Linear']
+class Linear(torch.nn.Module):
+    r"""
+    A dynamically quantized sparse linear module with float tensor as inputs and outputs.
+    """
+    _version = 1
+    _op_type = "sparse_dynamic"
+    _FLOAT_MODULE = torch.nn.Linear
+    def __init__(self, in_features, out_features, row_block_size, col_block_size, bias=True, dtype=torch.qint8):
+        super().__init__()
+        if dtype != torch.qint8:
+            raise NotImplementedError("Only QINT8 is supported for Sparse Quantized Linear Dynamic")
+        self.in_features = in_features
+        self.out_features = out_features
+        if bias:
+            bias = torch.zeros(self.out_features, dtype=torch.float)
+        else:
+            bias = None
+        qweight = torch._empty_affine_quantized([out_features, in_features],
+                                                scale=1, zero_point=0, dtype=torch.qint8)
+        self._packed_params = linear.LinearPackedParams(row_block_size=row_block_size,
+                                                        col_block_size=col_block_size,
+                                                        dtype=dtype)
+        self._packed_params.set_weight_bias(qweight, bias, row_block_size, col_block_size)
+    def _get_name(self):
+        return 'SparseQuantizedDynamicLinear'
+    def extra_repr(self):
+        return f'in_features={self.in_features}, out_features={self.out_features}, qscheme={self.weight().qscheme()}'
+    def __repr__(self):
+        return _hide_packed_params_repr(self, linear.LinearPackedParams)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return torch.ops.sparse.qlinear_dynamic(x, self._packed_params._packed_params)
+    def _save_to_state_dict(self, destination, prefix, keep_vars):
+        super()._save_to_state_dict(destination, prefix, keep_vars)
+        destination[prefix + 'op_type'] = self._op_type
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        op_type = int(state_dict[prefix + 'op_type'])
+        assert op_type == 'sparse', \
+            f"Cannot load from op_type [{op_type}], expecting [{self._op_type}]"
+        state_dict.pop(prefix + 'op_type')
+        version = local_metadata.get('version', None)
+        assert version <= self._version
+        # Is this code valid? In old quantization it seemed to be used to load
+        # older model
+        weight = state_dict.pop(prefix + 'weight')
+        bias = state_dict.pop(prefix + 'bias')
+        state_dict.update({prefix + '_packed_params.weight': weight,
+                           prefix + '_packed_params.bias': bias})
+        super()._load_from_state_dict(
+            state_dict, prefix, local_metadata, False,
+            missing_keys, unexpected_keys, error_msgs)
+    def _weight_bias(self):
+        return self._packed_params._weight_bias()
+    def weight(self):
+        return self._weight_bias()[0]
+    def bias(self):
+        return self._weight_bias()[1]
+    def set_weight_bias(self, w: torch.Tensor, b: Optional[torch.Tensor],
+                        row_block_size: Optional[int], col_block_size: Optional[int]) -> None:
+        assert row_block_size is not None and col_block_size is not None
+        self.out_features = w.shape[0]
+        self.in_features = w.shape[1]
+        self._packed_params.set_weight_bias(w, b, row_block_size, col_block_size)
+    @classmethod
+    def from_float(cls, mod):
+        r"""Create a quantized sparse dynamic module from a float module.
+        We only care about the convert at this stage, no need for observers just yet.
+        """
+        assert type(mod) == cls._FLOAT_MODULE, ' nnq.' + cls.__name__ + '.from_float only works for ' + \
+            cls._FLOAT_MODULE.__name__
+        # TODO: Need to add options to qconfig to avoid the calibration.
+        # TODO: Add calibration for the sparsity
+        assert hasattr(mod, 'qconfig'), 'Input float module must have qconfig defined'
+        if type(mod) == nni.LinearReLU:
+            mod = mod[0]
+        if mod.qconfig is not None and mod.qconfig.weight is not None:
+            weight_observer = mod.qconfig.weight()
+        else:
+            # We have the circular import issues if we import the qconfig in the beginning of this file:
+            # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the
+            # import until we need it.
+            from torch.ao.quantization.qconfig import default_dynamic_qconfig
+            weight_observer = default_dynamic_qconfig.weight()
+        # It is important to multiply by the mask BEFORE calling the `weight_observer`
+        # TODO (zaf): Mask might not be part of the qconfig (T83295194)
+        weight = mod.weight
+        if getattr(mod.qconfig, 'mask', False):
+            weight = mod.qconfig.mask * mod.weight
+        weight_observer(weight)
+        dtype = weight_observer.dtype
+        assert dtype == torch.qint8, 'Weight observer must have dtype torch.qint8'
+        w_sc, w_zp = weight_observer.calculate_qparams()
+        if isinstance(w_zp, torch.Tensor):
+            assert not torch.any(w_zp.bool()), "All weight zero points must map to 0"
+        else:
+            assert w_zp == 0, 'Weight zero point must map to 0'
+        qweight = _quantize_weight(weight.float(), weight_observer)
+        row_block_size, col_block_size = LinearBlockSparsePattern.block_size()
+        qlinear = cls(mod.in_features,
+                      mod.out_features,
+                      row_block_size,
+                      col_block_size,
+                      dtype=dtype)
+        qlinear.set_weight_bias(qweight, mod.bias, row_block_size, col_block_size)
+        return qlinear

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (231 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_scheduler/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .base_data_scheduler import BaseDataScheduler
+__all__ = [
+    "BaseDataScheduler",
+]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/__pycache__/quantization_utils.cpython-311.pyc ADDED Viewed

Binary file (6.74 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/lightning/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/data_sparsifier/quantization_utils.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import torch
+import torch.nn as nn
+from torch.ao.pruning.sparsifier.utils import module_to_fqn, fqn_to_module
+from typing import Dict, List, Optional
+SUPPORTED_MODULES = {
+    nn.Embedding,
+    nn.EmbeddingBag
+}
+def _fetch_all_embeddings(model):
+    """Fetches Embedding and EmbeddingBag modules from the model
+    """
+    embedding_modules = []
+    stack = [model]
+    while stack:
+        module = stack.pop()
+        for _, child in module.named_children():
+            fqn_name = module_to_fqn(model, child)
+            if type(child) in SUPPORTED_MODULES:
+                embedding_modules.append((fqn_name, child))
+            else:
+                stack.append(child)
+    return embedding_modules
+def post_training_sparse_quantize(model,
+                                  data_sparsifier_class,
+                                  sparsify_first=True,
+                                  select_embeddings: Optional[List[nn.Module]] = None,
+                                  **sparse_config):
+    """Takes in a model and applies sparsification and quantization to only embeddings & embeddingbags.
+    The quantization step can happen before or after sparsification depending on the `sparsify_first` argument.
+    Args:
+        - model (nn.Module)
+            model whose embeddings needs to be sparsified
+        - data_sparsifier_class (type of data sparsifier)
+            Type of sparsification that needs to be applied to model
+        - sparsify_first (bool)
+            if true, sparsifies first and then quantizes
+            otherwise, quantizes first and then sparsifies.
+        - select_embeddings (List of Embedding modules)
+            List of embedding modules to in the model to be sparsified & quantized.
+            If None, all embedding modules with be sparsified
+        - sparse_config (Dict)
+            config that will be passed to the constructor of data sparsifier object.
+    Note:
+        1. When `sparsify_first=False`, quantization occurs first followed by sparsification.
+            - before sparsifying, the embedding layers are dequantized.
+            - scales and zero-points are saved
+            - embedding layers are sparsified and `squash_mask` is applied
+            - embedding weights are requantized using the saved scales and zero-points
+        2. When `sparsify_first=True`, sparsification occurs first followed by quantization.
+            - embeddings are sparsified first
+            - quantization is applied on the sparsified embeddings
+    """
+    data_sparsifier = data_sparsifier_class(**sparse_config)
+    # if select_embeddings is None, perform it on all embeddings
+    if select_embeddings is None:
+        embedding_modules = _fetch_all_embeddings(model)
+    else:
+        embedding_modules = []
+        assert isinstance(select_embeddings, List), "the embedding_modules must be a list of embedding modules"
+        for emb in select_embeddings:
+            assert type(emb) in SUPPORTED_MODULES, "the embedding_modules list must be an embedding or embedding bags"
+            fqn_name = module_to_fqn(model, emb)
+            assert fqn_name is not None, "the embedding modules must be part of input model"
+            embedding_modules.append((fqn_name, emb))
+    if sparsify_first:
+        # sparsify
+        for name, emb_module in embedding_modules:
+            valid_name = name.replace('.', '_')
+            data_sparsifier.add_data(name=valid_name, data=emb_module)
+        data_sparsifier.step()
+        data_sparsifier.squash_mask()
+        # quantize
+        for _, emb_module in embedding_modules:
+            emb_module.qconfig = torch.ao.quantization.float_qparams_weight_only_qconfig
+        torch.ao.quantization.prepare(model, inplace=True)
+        torch.ao.quantization.convert(model, inplace=True)
+    else:
+        # quantize
+        for _, emb_module in embedding_modules:
+            emb_module.qconfig = torch.ao.quantization.float_qparams_weight_only_qconfig
+        torch.ao.quantization.prepare(model, inplace=True)
+        torch.ao.quantization.convert(model, inplace=True)
+        # retrieve scale & zero_points
+        quantize_params: Dict[str, Dict] = {'scales': {}, 'zero_points': {},
+                                            'dequant_weights': {}, 'axis': {},
+                                            'dtype': {}}
+        for name, _ in embedding_modules:
+            quantized_emb = fqn_to_module(model, name)
+            assert quantized_emb is not None  # satisfy mypy
+            quantized_weight = quantized_emb.weight()  # type: ignore[operator]
+            quantize_params['scales'][name] = quantized_weight.q_per_channel_scales()
+            quantize_params['zero_points'][name] = quantized_weight.q_per_channel_zero_points()
+            quantize_params['dequant_weights'][name] = torch.dequantize(quantized_weight)
+            quantize_params['axis'][name] = quantized_weight.q_per_channel_axis()
+            quantize_params['dtype'][name] = quantized_weight.dtype
+            # attach data to sparsifier
+            data_sparsifier.add_data(name=name.replace('.', '_'), data=quantize_params['dequant_weights'][name])
+        data_sparsifier.step()
+        data_sparsifier.squash_mask()
+        for name, _ in embedding_modules:
+            quantized_emb = fqn_to_module(model, name)
+            assert quantized_emb is not None  # satisfy mypy
+            requantized_vector = torch.quantize_per_channel(quantize_params['dequant_weights'][name],
+                                                            scales=quantize_params['scales'][name],
+                                                            zero_points=quantize_params['zero_points'][name],
+                                                            dtype=quantize_params['dtype'][name],
+                                                            axis=quantize_params['axis'][name])
+            quantized_emb.set_weight(requantized_vector)  # type: ignore[operator]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/__pycache__/FPGM_pruner.cpython-311.pyc ADDED Viewed

Binary file (5.52 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (659 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/_experimental/pruner/saliency_pruner.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from .base_structured_sparsifier import BaseStructuredSparsifier
+class SaliencyPruner(BaseStructuredSparsifier):
+    """
+    Prune rows based on the saliency (L1 norm) of each row.
+    This pruner works on N-Dimensional weight tensors.
+    For each row, we will calculate the saliency, whic is the sum the L1 norm of all weights in that row.
+    We expect that the resulting saliency vector has the same shape as our mask.
+    We then pick elements to remove until we reach the target sparsity_level.
+    """
+    def update_mask(self, module, tensor_name, **kwargs):
+        # tensor_name will give you the FQN, all other entries in sparse config is present in kwargs
+        weights = getattr(module, tensor_name)
+        mask = getattr(module.parametrizations, tensor_name)[0].mask
+        # use negative weights so we can use topk (we prune out the smallest)
+        if weights.dim() <= 1:
+            raise Exception("Structured pruning can only be applied to a 2+dim weight tensor!")
+        saliency = -weights.norm(dim=tuple(range(1, weights.dim())), p=1)
+        assert saliency.shape == mask.shape
+        num_to_pick = int(len(mask) * kwargs["sparsity_level"])
+        prune = saliency.topk(num_to_pick).indices
+        # Set the mask to be false for the rows we want to prune
+        mask.data[prune] = False

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/scheduler/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/scheduler/__pycache__/base_scheduler.cpython-311.pyc ADDED Viewed

Binary file (8.9 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/scheduler/lambda_scheduler.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import warnings
+from .base_scheduler import BaseScheduler
+__all__ = ["LambdaSL"]
+class LambdaSL(BaseScheduler):
+    """Sets the sparsity level of each parameter group to the final sl
+    times a given function. When last_epoch=-1, sets initial sl as zero.
+    Args:
+        sparsifier (BaseSparsifier): Wrapped sparsifier.
+        sl_lambda (function or list): A function which computes a multiplicative
+            factor given an integer parameter epoch, or a list of such
+            functions, one for each group in sparsifier.param_groups.
+        last_epoch (int): The index of last epoch. Default: -1.
+        verbose (bool): If ``True``, prints a message to stdout for
+            each update. Default: ``False``.
+    Example:
+        >>> # Assuming sparsifier has two groups.
+        >>> lambda1 = lambda epoch: epoch // 30
+        >>> lambda2 = lambda epoch: 0.95 ** epoch
+        >>> # xdoctest: +SKIP
+        >>> scheduler = LambdaSL(sparsifier, sl_lambda=[lambda1, lambda2])
+        >>> for epoch in range(100):
+        >>>     train(...)
+        >>>     validate(...)
+        >>>     scheduler.step()
+    """
+    def __init__(self, sparsifier, sl_lambda, last_epoch=-1, verbose=False):
+        self.sparsifier = sparsifier
+        if not isinstance(sl_lambda, list) and not isinstance(sl_lambda, tuple):
+            self.sl_lambdas = [sl_lambda] * len(sparsifier.groups)
+        else:
+            if len(sl_lambda) != len(sparsifier.groups):
+                raise ValueError(f"Expected {len(sparsifier.groups)} lr_lambdas, but got {len(sl_lambda)}")
+            self.sl_lambdas = list(sl_lambda)
+        super().__init__(sparsifier, last_epoch, verbose)
+    def get_sl(self):
+        if not self._get_sl_called_within_step:
+            warnings.warn(
+                "To get the last sparsity level computed by the scheduler, "
+                "please use `get_last_sl()`.")
+        return [base_sl * lmbda(self.last_epoch)
+                for lmbda, base_sl in zip(self.sl_lambdas, self.base_sl)]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (228 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/__pycache__/base_sparsifier.cpython-311.pyc ADDED Viewed

Binary file (17.5 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/pruning/sparsifier/__pycache__/weight_norm_sparsifier.cpython-311.pyc ADDED Viewed

Binary file (11.2 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/_equalize.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import torch
+import copy
+from typing import Dict, Any
+__all__ = [
+    "set_module_weight",
+    "set_module_bias",
+    "get_module_weight",
+    "get_module_bias",
+    "max_over_ndim",
+    "min_over_ndim",
+    "channel_range",
+    "cross_layer_equalization",
+    "equalize",
+    "converged",
+]
+_supported_types = {torch.nn.Conv2d, torch.nn.Linear}
+_supported_intrinsic_types = {torch.ao.nn.intrinsic.ConvReLU2d, torch.ao.nn.intrinsic.LinearReLU}
+_all_supported_types = _supported_types.union(_supported_intrinsic_types)
+def set_module_weight(module, weight) -> None:
+    if type(module) in _supported_types:
+        module.weight = torch.nn.Parameter(weight)
+    else:
+        module[0].weight = torch.nn.Parameter(weight)
+def set_module_bias(module, bias) -> None:
+    if type(module) in _supported_types:
+        module.bias = torch.nn.Parameter(bias)
+    else:
+        module[0].bias = torch.nn.Parameter(bias)
+def get_module_weight(module):
+    if type(module) in _supported_types:
+        return module.weight
+    else:
+        return module[0].weight
+def get_module_bias(module):
+    if type(module) in _supported_types:
+        return module.bias
+    else:
+        return module[0].bias
+def max_over_ndim(input, axis_list, keepdim=False):
+    """Apply 'torch.max' over the given axes."""
+    axis_list.sort(reverse=True)
+    for axis in axis_list:
+        input, _ = input.max(axis, keepdim)
+    return input
+def min_over_ndim(input, axis_list, keepdim=False):
+    """Apply 'torch.min' over the given axes."""
+    axis_list.sort(reverse=True)
+    for axis in axis_list:
+        input, _ = input.min(axis, keepdim)
+    return input
+def channel_range(input, axis=0):
+    """Find the range of weights associated with a specific channel."""
+    size_of_tensor_dim = input.ndim
+    axis_list = list(range(size_of_tensor_dim))
+    axis_list.remove(axis)
+    mins = min_over_ndim(input, axis_list)
+    maxs = max_over_ndim(input, axis_list)
+    assert mins.size(0) == input.size(axis), "Dimensions of resultant channel range does not match size of requested axis"
+    return maxs - mins
+def cross_layer_equalization(module1, module2, output_axis=0, input_axis=1):
+    """Scale the range of Tensor1.output to equal Tensor2.input.
+    Given two adjacent tensors', the weights are scaled such that
+    the ranges of the first tensors' output channel are equal to the
+    ranges of the second tensors' input channel
+    """
+    if type(module1) not in _all_supported_types or type(module2) not in _all_supported_types:
+        raise ValueError("module type not supported:", type(module1), " ", type(module2))
+    weight1 = get_module_weight(module1)
+    weight2 = get_module_weight(module2)
+    if weight1.size(output_axis) != weight2.size(input_axis):
+        raise TypeError("Number of output channels of first arg do not match \
+        number input channels of second arg")
+    bias = get_module_bias(module1)
+    weight1_range = channel_range(weight1, output_axis)
+    weight2_range = channel_range(weight2, input_axis)
+    # producing scaling factors to applied
+    weight2_range += 1e-9
+    scaling_factors = torch.sqrt(weight1_range / weight2_range)
+    inverse_scaling_factors = torch.reciprocal(scaling_factors)
+    bias = bias * inverse_scaling_factors
+    # formatting the scaling (1D) tensors to be applied on the given argument tensors
+    # pads axis to (1D) tensors to then be broadcasted
+    size1 = [1] * weight1.ndim
+    size1[output_axis] = weight1.size(output_axis)
+    size2 = [1] * weight2.ndim
+    size2[input_axis] = weight2.size(input_axis)
+    scaling_factors = torch.reshape(scaling_factors, size2)
+    inverse_scaling_factors = torch.reshape(inverse_scaling_factors, size1)
+    weight1 = weight1 * inverse_scaling_factors
+    weight2 = weight2 * scaling_factors
+    set_module_weight(module1, weight1)
+    set_module_bias(module1, bias)
+    set_module_weight(module2, weight2)
+def equalize(model, paired_modules_list, threshold=1e-4, inplace=True):
+    """Equalize modules until convergence is achieved.
+    Given a list of adjacent modules within a model, equalization will
+    be applied between each pair, this will repeated until convergence is achieved
+    Keeps a copy of the changing modules from the previous iteration, if the copies
+    are not that different than the current modules (determined by converged_test),
+    then the modules have converged enough that further equalizing is not necessary
+    Implementation of this referced section 4.1 of this paper https://arxiv.org/pdf/1906.04721.pdf
+    Args:
+        model: a model (nn.module) that equalization is to be applied on
+        paired_modules_list: a list of lists where each sublist is a pair of two
+            submodules found in the model, for each pair the two submodules generally
+            have to be adjacent in the model to get expected/reasonable results
+        threshold: a number used by the converged function to determine what degree
+            similarity between models is necessary for them to be called equivalent
+        inplace: determines if function is inplace or not
+    """
+    if not inplace:
+        model = copy.deepcopy(model)
+    name_to_module : Dict[str, torch.nn.Module] = {}
+    previous_name_to_module: Dict[str, Any] = {}
+    name_set = {name for pair in paired_modules_list for name in pair}
+    for name, module in model.named_modules():
+        if name in name_set:
+            name_to_module[name] = module
+            previous_name_to_module[name] = None
+    while not converged(name_to_module, previous_name_to_module, threshold):
+        for pair in paired_modules_list:
+            previous_name_to_module[pair[0]] = copy.deepcopy(name_to_module[pair[0]])
+            previous_name_to_module[pair[1]] = copy.deepcopy(name_to_module[pair[1]])
+            cross_layer_equalization(name_to_module[pair[0]], name_to_module[pair[1]])
+    return model
+def converged(curr_modules, prev_modules, threshold=1e-4):
+    """Test whether modules are converged to a specified threshold.
+    Tests for the summed norm of the differences between each set of modules
+    being less than the given threshold
+    Takes two dictionaries mapping names to modules, the set of names for each dictionary
+    should be the same, looping over the set of names, for each name take the difference
+    between the associated modules in each dictionary
+    """
+    if curr_modules.keys() != prev_modules.keys():
+        raise ValueError("The keys to the given mappings must have the same set of names of modules")
+    summed_norms = torch.tensor(0.)
+    if None in prev_modules.values():
+        return False
+    for name in curr_modules.keys():
+        curr_weight = get_module_weight(curr_modules[name])
+        prev_weight = get_module_weight(prev_modules[name])
+        difference = curr_weight.sub(prev_weight)
+        summed_norms += torch.norm(difference)
+    return bool(summed_norms < threshold)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from .backend_config import BackendConfig, BackendPatternConfig, DTypeConfig, DTypeWithConstraints, ObservationType
+from .fbgemm import get_fbgemm_backend_config
+from .native import get_native_backend_config, get_native_backend_config_dict
+from .qnnpack import get_qnnpack_backend_config
+from .tensorrt import get_tensorrt_backend_config, get_tensorrt_backend_config_dict
+from .executorch import get_executorch_backend_config
+from .onednn import get_onednn_backend_config
+__all__ = [
+    "get_fbgemm_backend_config",
+    "get_native_backend_config",
+    "get_native_backend_config_dict",
+    "get_qnnpack_backend_config",
+    "get_tensorrt_backend_config",
+    "get_tensorrt_backend_config_dict",
+    "get_executorch_backend_config",
+    "BackendConfig",
+    "BackendPatternConfig",
+    "DTypeConfig",
+    "DTypeWithConstraints",
+    "ObservationType",
+    "get_onednn_backend_config",
+]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (1.08 kB). View file