| | import torch |
| | from .backend_config import ( |
| | BackendConfig, |
| | BackendPatternConfig, |
| | DTypeConfig, |
| | ObservationType |
| | ) |
| | from ._common_operator_config_utils import ( |
| | _get_binary_op_configs, |
| | _get_linear_configs, |
| | _get_conv_configs, |
| | _get_share_qparams_op_configs, |
| | ) |
| |
|
| | def get_tensorrt_backend_config() -> BackendConfig: |
| | """ |
| | Return the `BackendConfig` for the TensorRT backend. |
| | NOTE: Current api will change in the future, it's just to unblock experimentation for |
| | new backends, please don't use it right now. |
| | TODO: add a README when it's more stable |
| | """ |
| | |
| | weighted_op_qint8_dtype_config = DTypeConfig( |
| | input_dtype=torch.qint8, |
| | output_dtype=torch.qint8, |
| | weight_dtype=torch.qint8, |
| | bias_dtype=torch.float, |
| | ) |
| | non_weighted_op_qint8_dtype_config = DTypeConfig( |
| | input_dtype=torch.qint8, |
| | output_dtype=torch.qint8, |
| | ) |
| |
|
| | addmm_config = BackendPatternConfig(torch.addmm) \ |
| | .set_observation_type(ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT) \ |
| | .add_dtype_config(weighted_op_qint8_dtype_config) \ |
| | ._set_input_type_to_index({ |
| | "bias": 0, |
| | "input": 1, |
| | "weight": 2, |
| | }) |
| | cat_config = BackendPatternConfig(torch.cat) \ |
| | .set_observation_type(ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT) \ |
| | .add_dtype_config(non_weighted_op_qint8_dtype_config) |
| | conv_dtype_configs = [ |
| | weighted_op_qint8_dtype_config, |
| | ] |
| | linear_dtype_configs = [ |
| | weighted_op_qint8_dtype_config, |
| | ] |
| | binary_op_dtype_configs = [ |
| | weighted_op_qint8_dtype_config, |
| | ] |
| | share_qparams_op_dtype_configs = [ |
| | non_weighted_op_qint8_dtype_config, |
| | ] |
| | |
| | |
| | return BackendConfig("tensorrt") \ |
| | .set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \ |
| | .set_backend_pattern_config(addmm_config) \ |
| | .set_backend_pattern_config(cat_config) \ |
| | .set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \ |
| | .set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \ |
| | .set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) |
| |
|
| | def get_tensorrt_backend_config_dict(): |
| | """ |
| | Return the `BackendConfig` for the TensorRT backend in dictionary form. |
| | """ |
| | return get_tensorrt_backend_config().to_dict() |
| |
|
| | __all__ = [ |
| | "get_tensorrt_backend_config", |
| | "get_tensorrt_backend_config_dict", |
| | ] |
| |
|