Spaces:
Paused
Paused
import copy | |
import warnings | |
from typing import Callable, Optional, Union | |
import numpy as np | |
import onnx | |
import torch | |
from sentence_transformers import SentenceTransformer, models | |
from sklearn.linear_model import LogisticRegression | |
from transformers.modeling_utils import PreTrainedModel | |
from setfit.exporters.utils import mean_pooling | |
class OnnxSetFitModel(torch.nn.Module): | |
"""A wrapper around SetFit model body, pooler, and model head which makes ONNX exporting easier. | |
This wrapper creates a `nn.Module` with different levels of connectivity. We can set | |
`model_body` and `pooler` and have a Module which maps inputs to embeddings or we can set all three | |
and have a model which maps inputs to final predictions. This is useful because `torch.onnx.export` | |
will work with a `nn.Module`. | |
Attributes: | |
model_body (`PreTrainedModel`): The pretrained model body of a setfit model. | |
pooler (`Union[nn.Module, Callable[[torch.Tensor], torch.Tensor]]`, *optional*, defaults to `None`): The | |
callable function that can map tensors of shape (batch, sequence, embedding_dim) to shape | |
(batch, embedding_dim). | |
model_head: (`Union[nn.Module, LogisticRegression]`, *optional*, defaults to `None`): The model head from | |
the pretrained SetFit model. If `None`, then the resulting `OnnxSetFitModel.forward` forward method will | |
return embeddings instead of predictions. | |
""" | |
def __init__( | |
self, | |
model_body: PreTrainedModel, | |
pooler: Optional[Union[torch.nn.Module, Callable[[torch.Tensor], torch.Tensor]]] = None, | |
model_head: Optional[Union[torch.nn.Module, LogisticRegression]] = None, | |
): | |
super().__init__() | |
self.model_body = model_body | |
if pooler is None: | |
print("No pooler was set so defaulting to mean pooling.") | |
self.pooler = mean_pooling | |
else: | |
self.pooler = pooler | |
self.model_head = model_head | |
def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor): | |
hidden_states = self.model_body(input_ids, attention_mask, token_type_ids) | |
hidden_states = {"token_embeddings": hidden_states[0], "attention_mask": attention_mask} | |
embeddings = self.pooler(hidden_states) | |
# If the model_head is none we are using a sklearn head and only output | |
# the embeddings from the setfit model | |
if self.model_head is None: | |
return embeddings | |
# If head is set then we have a fully torch based model and make the final predictions | |
# with the head. | |
out = self.model_head(embeddings) | |
return out | |
def export_onnx_setfit_model(setfit_model: OnnxSetFitModel, inputs, output_path, opset: int = 12): | |
"""Export the `OnnxSetFitModel`. | |
This exports the model created by the `OnnxSetFitModel` wrapper using `torch.onnx.export`. | |
Args: | |
setfit_model (`OnnxSetFitModel`): The `OnnxSetFitModel` we want to export to .onnx format. | |
inputs (`Dict[str, torch.Tensor]`): The inputs we would hypothetically pass to the model. These are | |
generated using a tokenizer. | |
output_path (`str`): The local path to save the onnx model to. | |
opset (`int`): The ONNX opset to use for the export. Defaults to 12. | |
""" | |
input_names = list(inputs.keys()) | |
output_names = ["logits"] | |
# Setup the dynamic axes for onnx conversion. | |
dynamic_axes_input = {} | |
for input_name in input_names: | |
dynamic_axes_input[input_name] = {0: "batch_size", 1: "sequence"} | |
dynamic_axes_output = {} | |
for output_name in output_names: | |
dynamic_axes_output[output_name] = {0: "batch_size"} | |
# Move inputs to the right device | |
target = setfit_model.model_body.device | |
args = tuple(value.to(target) for value in inputs.values()) | |
setfit_model.eval() | |
with torch.no_grad(): | |
torch.onnx.export( | |
setfit_model, | |
args=args, | |
f=output_path, | |
opset_version=opset, | |
input_names=["input_ids", "attention_mask", "token_type_ids"], | |
output_names=output_names, | |
dynamic_axes={**dynamic_axes_input, **dynamic_axes_output}, | |
) | |
def export_sklearn_head_to_onnx(model_head: LogisticRegression, opset: int) -> onnx.onnx_ml_pb2.ModelProto: | |
"""Convert the Scikit-Learn head from a SetFitModel to ONNX format. | |
Args: | |
model_head (`LogisticRegression`): The trained SetFit model_head. | |
opset (`int`): The ONNX opset to use for optimizing this model. The opset is not | |
guaranteed and will default to the maximum version possible for the sklearn | |
model. | |
Returns: | |
[`onnx.onnx_ml_pb2.ModelProto`] The ONNX model generated from the sklearn head. | |
Raises: | |
ImportError: If `skl2onnx` is not installed an error will be raised asking | |
to install this package. | |
""" | |
# Check if skl2onnx is installed | |
try: | |
import onnxconverter_common | |
from skl2onnx import convert_sklearn | |
from skl2onnx.common.data_types import guess_data_type | |
from skl2onnx.sklapi import CastTransformer | |
from sklearn.pipeline import Pipeline | |
except ImportError: | |
msg = """ | |
`skl2onnx` must be installed in order to convert a model with an sklearn head. | |
Please install with `pip install skl2onnx`. | |
""" | |
raise ImportError(msg) | |
# Determine the initial type and the shape of the output. | |
input_shape = (None, model_head.n_features_in_) | |
if hasattr(model_head, "coef_"): | |
dtype = guess_data_type(model_head.coef_, shape=input_shape)[0][1] | |
elif not hasattr(model_head, "coef_") and hasattr(model_head, "estimators_"): | |
if any([not hasattr(e, "coef_") for e in model_head.estimators_]): | |
raise ValueError( | |
"The model_head is a meta-estimator but not all of the estimators have a coef_ attribute." | |
) | |
dtype = guess_data_type(model_head.estimators_[0].coef_, shape=input_shape)[0][1] | |
else: | |
raise ValueError( | |
"The model_head either does not have a coef_ attribute or some estimators in model_head.estimators_ do not have a coef_ attribute. Conversion to ONNX only supports these cases." | |
) | |
dtype.shape = input_shape | |
# If the datatype of the model is double we need to cast the outputs | |
# from the setfit model to doubles for compatibility inside of ONNX. | |
if isinstance(dtype, onnxconverter_common.data_types.DoubleTensorType): | |
sklearn_model = Pipeline([("castdouble", CastTransformer(dtype=np.double)), ("head", model_head)]) | |
else: | |
sklearn_model = model_head | |
# Convert sklearn head into ONNX format | |
onnx_model = convert_sklearn( | |
sklearn_model, | |
initial_types=[("model_head", dtype)], | |
target_opset=opset, | |
options={id(sklearn_model): {"zipmap": False}}, | |
) | |
return onnx_model | |
def hummingbird_export(model, data_sample): | |
try: | |
from hummingbird.ml import convert | |
except ImportError: | |
raise ImportError( | |
"Hummingbird-ML library is not installed." "Run 'pip install hummingbird-ml' to use this type of export." | |
) | |
onnx_model = convert(model, "onnx", data_sample) | |
return onnx_model._model | |
def export_onnx( | |
model_body: SentenceTransformer, | |
model_head: Union[torch.nn.Module, LogisticRegression], | |
opset: int, | |
output_path: str = "model.onnx", | |
ignore_ir_version: bool = True, | |
use_hummingbird: bool = False, | |
) -> None: | |
"""Export a PyTorch backed SetFit model to ONNX Intermediate Representation. | |
Args: | |
model_body (`SentenceTransformer`): The model_body from a SetFit model body. This should be a | |
SentenceTransformer. | |
model_head (`torch.nn.Module` or `LogisticRegression`): The SetFit model head. This can be either a | |
dense layer SetFitHead or a Sklearn estimator. | |
opset (`int`): The actual version of the ONNX operator set to use. The final opset used might be lower. | |
ONNX will use the highest version supported by both the sklearn head and the model body. If versions | |
can't be rectified an error will be thrown. | |
output_path (`str`): The path where will be stored the generated ONNX model. At a minimum it needs to contain | |
the name of the final file. | |
ignore_ir_version (`bool`): Whether to ignore the IR version used in sklearn. The version is often missmatched | |
with the transformer models. Setting this to true coerces the versions to be the same. This might | |
cause errors but in practice works. If this is set to False you need to ensure that the IR versions | |
align between the transformer and the sklearn onnx representation. | |
""" | |
# Load the model and get all of the parts. | |
model_body_module = model_body._modules["0"] | |
model_pooler = model_body._modules["1"] | |
tokenizer = model_body_module.tokenizer | |
max_length = model_body_module.max_seq_length | |
transformer = model_body_module.auto_model | |
transformer.eval() | |
# Create dummy data to use during onnx export. | |
tokenizer_kwargs = dict( | |
max_length=max_length, | |
padding="max_length", | |
return_attention_mask=True, | |
return_token_type_ids=True, | |
return_tensors="pt", | |
) | |
dummy_sample = "It's a test." | |
dummy_inputs = tokenizer(dummy_sample, **tokenizer_kwargs) | |
# Check to see if the model uses a sklearn head or a torch dense layer. | |
if issubclass(type(model_head), models.Dense): | |
setfit_model = OnnxSetFitModel(transformer, lambda x: model_pooler(x)["sentence_embedding"], model_head).cpu() | |
export_onnx_setfit_model(setfit_model, dummy_inputs, output_path, opset) | |
# store meta data of the tokenizer for getting the correct tokenizer during inference | |
onnx_setfit_model = onnx.load(output_path) | |
meta = onnx_setfit_model.metadata_props.add() | |
for key, value in tokenizer_kwargs.items(): | |
meta = onnx_setfit_model.metadata_props.add() # create a new key-value pair to store | |
meta.key = str(key) | |
meta.value = str(value) | |
else: | |
# Export the sklearn head first to get the minimum opset. sklearn is behind | |
# in supported opsets. | |
# Hummingbird-ML can be used as an option to export to standard opset | |
if use_hummingbird: | |
with torch.no_grad(): | |
test_input = copy.deepcopy(dummy_inputs) | |
head_input = model_body(test_input)["sentence_embedding"] | |
onnx_head = hummingbird_export(model_head, head_input.detach().numpy()) | |
else: | |
onnx_head = export_sklearn_head_to_onnx(model_head, opset) | |
max_opset = max([x.version for x in onnx_head.opset_import]) | |
if max_opset != opset: | |
warnings.warn( | |
f"sklearn onnx max opset is {max_opset} requested opset {opset} using opset {max_opset} for compatibility." | |
) | |
export_onnx_setfit_model( | |
OnnxSetFitModel(transformer, lambda x: model_pooler(x)["sentence_embedding"]), | |
dummy_inputs, | |
output_path, | |
max_opset, | |
) | |
onnx_body = onnx.load(output_path) | |
# Check that the ir_versions are aligned and if not align them. | |
if ignore_ir_version: | |
onnx_head.ir_version = onnx_body.ir_version | |
elif onnx_head.ir_version != onnx_body.ir_version: | |
msg = f""" | |
IR Version mismatch between head={onnx_head.ir_version} and body={onnx_body.ir_version} | |
Make sure that the ONNX IR versions are aligned and supported between the chosen Sklearn model | |
and the transformer. You can set ignore_ir_version=True to coerce them but this might cause errors. | |
""" | |
raise ValueError(msg) | |
# Combine the onnx body and head by mapping the pooled output to the input of the sklearn model. | |
head_input_name = next(iter(onnx_head.graph.input)).name | |
onnx_setfit_model = onnx.compose.merge_models( | |
onnx_body, | |
onnx_head, | |
io_map=[("logits", head_input_name)], | |
) | |
# Save the final model. | |
onnx.save(onnx_setfit_model, output_path) | |