import warnings |
from inspect import signature |
from itertools import chain |
from pathlib import Path |
from typing import TYPE_CHECKING, Iterable, List, Tuple, Union |
import numpy as np |
from packaging.version import Version, parse |
from ..tokenization_utils_base import PreTrainedTokenizerBase |
from ..utils import ( |
TensorType, |
is_tf_available, |
is_torch_available, |
logging, |
) |
from .config import OnnxConfig |
if is_torch_available(): |
from ..modeling_utils import PreTrainedModel |
if is_tf_available(): |
from ..modeling_tf_utils import TFPreTrainedModel |
from ..feature_extraction_utils import FeatureExtractionMixin |
from ..processing_utils import ProcessorMixin |
from ..tokenization_utils import PreTrainedTokenizer |
logger = logging.get_logger(__name__) |
def check_onnxruntime_requirements(minimum_version: Version): |
""" |
Check onnxruntime is installed and if the installed version match is recent enough |
Raises: |
ImportError: If onnxruntime is not installed or too old version is found |
""" |
try: |
import onnxruntime |
ort_version = parse(onnxruntime.__version__) |
raise ImportError( |
f"We found an older version of onnxruntime ({onnxruntime.__version__}) " |
f"but we require onnxruntime to be >= {minimum_version} to enable all the conversions options.\n" |
"Please update onnxruntime by running `pip install --upgrade onnxruntime`" |
) |
except ImportError: |
raise ImportError( |
"onnxruntime doesn't seem to be currently installed. " |
"Please install the onnxruntime by running `pip install onnxruntime`" |
" and relaunch the conversion." |
) |
def export_pytorch( |
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin", "ProcessorMixin"], |
model: "PreTrainedModel", |
config: OnnxConfig, |
opset: int, |
output: Path, |
tokenizer: "PreTrainedTokenizer" = None, |
device: str = "cpu", |
) -> Tuple[List[str], List[str]]: |
""" |
Export a PyTorch model to an ONNX Intermediate Representation (IR) |
Args: |
preprocessor: ([`PreTrainedTokenizer`], [`FeatureExtractionMixin`] or [`ProcessorMixin`]): |
The preprocessor used for encoding the data. |
model ([`PreTrainedModel`]): |
The model to export. |
config ([`~onnx.config.OnnxConfig`]): |
The ONNX configuration associated with the exported model. |
opset (`int`): |
The version of the ONNX operator set to use. |
output (`Path`): |
Directory to store the exported ONNX model. |
device (`str`, *optional*, defaults to `cpu`): |
The device on which the ONNX model will be exported. Either `cpu` or `cuda`. |
Returns: |
`Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from |
the ONNX configuration. |
""" |
if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None: |
raise ValueError("You cannot provide both a tokenizer and a preprocessor to export the model.") |
if tokenizer is not None: |
warnings.warn( |
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" |
" `preprocessor` instead.", |
FutureWarning, |
) |
logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") |
preprocessor = tokenizer |
if issubclass(type(model), PreTrainedModel): |
import torch |
from torch.onnx import export as onnx_export |
logger.info(f"Using framework PyTorch: {torch.__version__}") |
with torch.no_grad(): |
model.config.return_dict = True |
model.eval() |
if config.values_override is not None: |
logger.info(f"Overriding {len(config.values_override)} configuration item(s)") |
for override_config_key, override_config_value in config.values_override.items(): |
logger.info(f"\t- {override_config_key} -> {override_config_value}") |
setattr(model.config, override_config_key, override_config_value) |
model_inputs = config.generate_dummy_inputs(preprocessor, framework=TensorType.PYTORCH) |
device = torch.device(device) |
if device.type == "cuda" and torch.cuda.is_available(): |
model.to(device) |
model_inputs_device = {} |
for k, v in model_inputs.items(): |
if isinstance(v, Tuple): |
model_inputs_device[k] = tuple( |
x.to(device) if isinstance(x, torch.Tensor) else None for x in v |
) |
elif isinstance(v, List): |
model_inputs_device[k] = [ |
tuple(x.to(device) if isinstance(x, torch.Tensor) else None for x in t) for t in v |
] |
else: |
model_inputs_device[k] = v.to(device) |
model_inputs = model_inputs_device |
inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys()) |
onnx_outputs = list(config.outputs.keys()) |
if not inputs_match: |
raise ValueError("Model and config inputs doesn't match") |
config.patch_ops() |
onnx_export( |
model, |
(model_inputs,), |
f=output.as_posix(), |
input_names=list(config.inputs.keys()), |
output_names=onnx_outputs, |
dynamic_axes=dict(chain(config.inputs.items(), config.outputs.items())), |
do_constant_folding=True, |
opset_version=opset, |
) |
config.restore_ops() |
return matched_inputs, onnx_outputs |
def export_tensorflow( |
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin"], |
model: "TFPreTrainedModel", |
config: OnnxConfig, |
opset: int, |
output: Path, |
tokenizer: "PreTrainedTokenizer" = None, |
) -> Tuple[List[str], List[str]]: |
""" |
Export a TensorFlow model to an ONNX Intermediate Representation (IR) |
Args: |
preprocessor: ([`PreTrainedTokenizer`] or [`FeatureExtractionMixin`]): |
The preprocessor used for encoding the data. |
model ([`TFPreTrainedModel`]): |
The model to export. |
config ([`~onnx.config.OnnxConfig`]): |
The ONNX configuration associated with the exported model. |
opset (`int`): |
The version of the ONNX operator set to use. |
output (`Path`): |
Directory to store the exported ONNX model. |
Returns: |
`Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from |
the ONNX configuration. |
""" |
import onnx |
import tensorflow as tf |
import tf2onnx |
if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None: |
raise ValueError("You cannot provide both a tokenizer and preprocessor to export the model.") |
if tokenizer is not None: |
warnings.warn( |
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" |
" `preprocessor` instead.", |
FutureWarning, |
) |
logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") |
preprocessor = tokenizer |
model.config.return_dict = True |
if config.values_override is not None: |
logger.info(f"Overriding {len(config.values_override)} configuration item(s)") |
for override_config_key, override_config_value in config.values_override.items(): |
logger.info(f"\t- {override_config_key} -> {override_config_value}") |
setattr(model.config, override_config_key, override_config_value) |
model_inputs = config.generate_dummy_inputs(preprocessor, framework=TensorType.TENSORFLOW) |
inputs_match, matched_inputs = ensure_model_and_config_inputs_match(model, model_inputs.keys()) |
onnx_outputs = list(config.outputs.keys()) |
input_signature = [ |
tf.TensorSpec([None] * tensor.ndim, dtype=tensor.dtype, name=key) for key, tensor in model_inputs.items() |
] |
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature, opset=opset) |
onnx.save(onnx_model, output.as_posix()) |
config.restore_ops() |
return matched_inputs, onnx_outputs |
def export( |
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin", "ProcessorMixin"], |
model: Union["PreTrainedModel", "TFPreTrainedModel"], |
config: OnnxConfig, |
opset: int, |
output: Path, |
tokenizer: "PreTrainedTokenizer" = None, |
device: str = "cpu", |
) -> Tuple[List[str], List[str]]: |
""" |
Export a Pytorch or TensorFlow model to an ONNX Intermediate Representation (IR) |
Args: |
preprocessor: ([`PreTrainedTokenizer`], [`FeatureExtractionMixin`] or [`ProcessorMixin`]): |
The preprocessor used for encoding the data. |
model ([`PreTrainedModel`] or [`TFPreTrainedModel`]): |
The model to export. |
config ([`~onnx.config.OnnxConfig`]): |
The ONNX configuration associated with the exported model. |
opset (`int`): |
The version of the ONNX operator set to use. |
output (`Path`): |
Directory to store the exported ONNX model. |
device (`str`, *optional*, defaults to `cpu`): |
The device on which the ONNX model will be exported. Either `cpu` or `cuda`. Only PyTorch is supported for |
export on CUDA devices. |
Returns: |
`Tuple[List[str], List[str]]`: A tuple with an ordered list of the model's inputs, and the named inputs from |
the ONNX configuration. |
""" |
if not (is_torch_available() or is_tf_available()): |
raise ImportError( |
"Cannot convert because neither PyTorch nor TensorFlow are not installed. " |
"Please install torch or tensorflow first." |
) |
if is_tf_available() and isinstance(model, TFPreTrainedModel) and device == "cuda": |
raise RuntimeError("`tf2onnx` does not support export on CUDA device.") |
if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None: |
raise ValueError("You cannot provide both a tokenizer and a preprocessor to export the model.") |
if tokenizer is not None: |
warnings.warn( |
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" |
" `preprocessor` instead.", |
FutureWarning, |
) |
logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") |
preprocessor = tokenizer |
if is_torch_available(): |
from ..utils import get_torch_version |
if not config.is_torch_support_available: |
logger.warning( |
f"Unsupported PyTorch version for this model. Minimum required is {config.torch_onnx_minimum_version}," |
f" got: {get_torch_version()}" |
) |
if is_torch_available() and issubclass(type(model), PreTrainedModel): |
return export_pytorch(preprocessor, model, config, opset, output, tokenizer=tokenizer, device=device) |
elif is_tf_available() and issubclass(type(model), TFPreTrainedModel): |
return export_tensorflow(preprocessor, model, config, opset, output, tokenizer=tokenizer) |
def validate_model_outputs( |
config: OnnxConfig, |
preprocessor: Union["PreTrainedTokenizer", "FeatureExtractionMixin", "ProcessorMixin"], |
reference_model: Union["PreTrainedModel", "TFPreTrainedModel"], |
onnx_model: Path, |
onnx_named_outputs: List[str], |
atol: float, |
tokenizer: "PreTrainedTokenizer" = None, |
): |
from onnxruntime import InferenceSession, SessionOptions |
logger.info("Validating ONNX model...") |
if isinstance(preprocessor, PreTrainedTokenizerBase) and tokenizer is not None: |
raise ValueError("You cannot provide both a tokenizer and a preprocessor to validate the model outputs.") |
if tokenizer is not None: |
warnings.warn( |
"The `tokenizer` argument is deprecated and will be removed in version 5 of Transformers. Use" |
" `preprocessor` instead.", |
FutureWarning, |
) |
logger.info("Overwriting the `preprocessor` argument with `tokenizer` to generate dummmy inputs.") |
preprocessor = tokenizer |
if is_torch_available() and issubclass(type(reference_model), PreTrainedModel): |
reference_model_inputs = config.generate_dummy_inputs( |
preprocessor, |
batch_size=config.default_fixed_batch + 1, |
seq_length=config.default_fixed_sequence + 1, |
framework=TensorType.PYTORCH, |
) |
else: |
reference_model_inputs = config.generate_dummy_inputs( |
preprocessor, |
batch_size=config.default_fixed_batch + 1, |
seq_length=config.default_fixed_sequence + 1, |
framework=TensorType.TENSORFLOW, |
) |
options = SessionOptions() |
session = InferenceSession(onnx_model.as_posix(), options, providers=["CPUExecutionProvider"]) |
if is_torch_available() and issubclass(type(reference_model), PreTrainedModel): |
reference_model.to("cpu") |
ref_outputs = reference_model(**reference_model_inputs) |
ref_outputs_dict = {} |
for name, value in ref_outputs.items(): |
if name == "past_key_values": |
name = "present" |
if isinstance(value, (list, tuple)): |
value = config.flatten_output_collection_property(name, value) |
ref_outputs_dict.update(value) |
else: |
ref_outputs_dict[name] = value |
reference_model_inputs_onnxruntime = config.generate_dummy_inputs_onnxruntime(reference_model_inputs) |
onnx_inputs = {} |
for name, value in reference_model_inputs_onnxruntime.items(): |
if isinstance(value, (list, tuple)): |
value = config.flatten_output_collection_property(name, value) |
onnx_inputs.update({tensor_name: pt_tensor.numpy() for tensor_name, pt_tensor in value.items()}) |
else: |
onnx_inputs[name] = value.numpy() |
onnx_outputs = session.run(onnx_named_outputs, onnx_inputs) |
ref_outputs_set, onnx_outputs_set = set(ref_outputs_dict.keys()), set(onnx_named_outputs) |
if not onnx_outputs_set.issubset(ref_outputs_set): |
logger.info( |
f"\t-[x] ONNX model output names {onnx_outputs_set} do not match reference model {ref_outputs_set}" |
) |
raise ValueError( |
"Outputs doesn't match between reference model and ONNX exported model: " |
f"{onnx_outputs_set.difference(ref_outputs_set)}" |
) |
else: |
logger.info(f"\t-[✓] ONNX model output names match reference model ({onnx_outputs_set})") |
for name, ort_value in zip(onnx_named_outputs, onnx_outputs): |
if is_torch_available() and issubclass(type(reference_model), PreTrainedModel): |
ref_value = ref_outputs_dict[name].detach().numpy() |
else: |
ref_value = ref_outputs_dict[name].numpy() |
logger.info(f'\t- Validating ONNX Model output "{name}":') |
if not ort_value.shape == ref_value.shape: |
logger.info(f"\t\t-[x] shape {ort_value.shape} doesn't match {ref_value.shape}") |
raise ValueError( |
"Outputs shape doesn't match between reference model and ONNX exported model: " |
f"Got {ref_value.shape} (reference) and {ort_value.shape} (ONNX)" |
) |
else: |
logger.info(f"\t\t-[✓] {ort_value.shape} matches {ref_value.shape}") |
if not np.allclose(ref_value, ort_value, atol=atol): |
bad_indices = np.logical_not(np.isclose(ref_value, ort_value, atol=atol)) |
logger.info(f"\t\t-[x] values not close enough (atol: {atol})") |
raise ValueError( |
"Outputs values doesn't match between reference model and ONNX exported model: " |
f"Got max absolute difference of: {np.amax(np.abs(ref_value - ort_value))} for " |
f"{ref_value[bad_indices]} vs {ort_value[bad_indices]}" |
) |
else: |
logger.info(f"\t\t-[✓] all values close (atol: {atol})") |
def ensure_model_and_config_inputs_match( |
model: Union["PreTrainedModel", "TFPreTrainedModel"], model_inputs: Iterable[str] |
) -> Tuple[bool, List[str]]: |
""" |
:param model_inputs: :param config_inputs: :return: |
""" |
if is_torch_available() and issubclass(type(model), PreTrainedModel): |
forward_parameters = signature(model.forward).parameters |
else: |
forward_parameters = signature(model.call).parameters |
model_inputs_set = set(model_inputs) |
forward_inputs_set = set(forward_parameters.keys()) |
is_ok = model_inputs_set.issubset(forward_inputs_set) |
matching_inputs = forward_inputs_set.intersection(model_inputs_set) |
ordered_inputs = [parameter for parameter in forward_parameters.keys() if parameter in matching_inputs] |
return is_ok, ordered_inputs |