|
import torch |
|
import torch.ao.nn.quantized.dynamic as nnqd |
|
import torch.ao.nn.intrinsic as nni |
|
|
|
class LinearReLU(nnqd.Linear): |
|
r""" |
|
A LinearReLU module fused from Linear and ReLU modules that can be used |
|
for dynamic quantization. |
|
Supports both, FP16 and INT8 quantization. |
|
|
|
We adopt the same interface as :class:`torch.ao.nn.quantized.dynamic.Linear`. |
|
|
|
Attributes: |
|
Same as torch.ao.nn.quantized.dynamic.Linear |
|
|
|
Examples:: |
|
|
|
>>> m = nn.intrinsic.quantized.dynamic.LinearReLU(20, 30) |
|
>>> input = torch.randn(128, 20) |
|
>>> # xdoctest: +SKIP |
|
>>> output = m(input) |
|
>>> print(output.size()) |
|
torch.Size([128, 30]) |
|
""" |
|
_FLOAT_MODULE = nni.LinearReLU |
|
|
|
def __init__(self, in_features, out_features, bias=True, dtype=torch.qint8): |
|
super().__init__(in_features, out_features, bias, dtype) |
|
|
|
def forward(self, x: torch.Tensor) -> torch.Tensor: |
|
if self._packed_params.dtype == torch.qint8: |
|
|
|
Y = torch.ops.quantized.linear_relu_dynamic( |
|
x, self._packed_params._packed_params, reduce_range=True) |
|
elif self._packed_params.dtype == torch.float16: |
|
Y = torch.ops.quantized.linear_relu_dynamic_fp16( |
|
x, self._packed_params._packed_params) |
|
else: |
|
raise RuntimeError('Unsupported dtype on dynamic quantized linear relu!') |
|
return Y.to(x.dtype) |
|
|
|
def _get_name(self): |
|
return 'DynamicQuantizedLinearReLU' |
|
|
|
@classmethod |
|
def from_float(cls, mod): |
|
return super(LinearReLU, cls).from_float(mod) |
|
|
|
@classmethod |
|
def from_reference(cls, ref_qlinear_relu): |
|
return super().from_reference(ref_qlinear_relu[0]) |
|
|