Spaces:

CabraVC
/

holiday_testing

Paused

App Files Files Community

holiday_testing / test_models /setfit /exporters /onnx.py

svystun-taras

created the updated web ui

0fdb130 11 months ago

raw

history blame

12.7 kB

	import copy
	import warnings
	from typing import Callable, Optional, Union

	import numpy as np
	import onnx
	import torch
	from sentence_transformers import SentenceTransformer, models
	from sklearn.linear_model import LogisticRegression
	from transformers.modeling_utils import PreTrainedModel

	from setfit.exporters.utils import mean_pooling


	class OnnxSetFitModel(torch.nn.Module):
	"""A wrapper around SetFit model body, pooler, and model head which makes ONNX exporting easier.

	This wrapper creates a `nn.Module` with different levels of connectivity. We can set
	`model_body` and `pooler` and have a Module which maps inputs to embeddings or we can set all three
	and have a model which maps inputs to final predictions. This is useful because `torch.onnx.export`
	will work with a `nn.Module`.

	Attributes:
	model_body (`PreTrainedModel`): The pretrained model body of a setfit model.
	pooler (`Union[nn.Module, Callable[[torch.Tensor], torch.Tensor]]`, optional, defaults to `None`): The
	callable function that can map tensors of shape (batch, sequence, embedding_dim) to shape
	(batch, embedding_dim).
	model_head: (`Union[nn.Module, LogisticRegression]`, optional, defaults to `None`): The model head from
	the pretrained SetFit model. If `None`, then the resulting `OnnxSetFitModel.forward` forward method will
	return embeddings instead of predictions.
	"""

	def __init__(
	self,
	model_body: PreTrainedModel,
	pooler: Optional[Union[torch.nn.Module, Callable[[torch.Tensor], torch.Tensor]]] = None,
	model_head: Optional[Union[torch.nn.Module, LogisticRegression]] = None,
	):
	super().__init__()

	self.model_body = model_body
	if pooler is None:
	print("No pooler was set so defaulting to mean pooling.")
	self.pooler = mean_pooling
	else:
	self.pooler = pooler
	self.model_head = model_head

	def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, token_type_ids: torch.Tensor):
	hidden_states = self.model_body(input_ids, attention_mask, token_type_ids)
	hidden_states = {"token_embeddings": hidden_states[0], "attention_mask": attention_mask}

	embeddings = self.pooler(hidden_states)

	# If the model_head is none we are using a sklearn head and only output
	# the embeddings from the setfit model
	if self.model_head is None:
	return embeddings

	# If head is set then we have a fully torch based model and make the final predictions
	# with the head.
	out = self.model_head(embeddings)
	return out


	def export_onnx_setfit_model(setfit_model: OnnxSetFitModel, inputs, output_path, opset: int = 12):
	"""Export the `OnnxSetFitModel`.

	This exports the model created by the `OnnxSetFitModel` wrapper using `torch.onnx.export`.

	Args:
	setfit_model (`OnnxSetFitModel`): The `OnnxSetFitModel` we want to export to .onnx format.
	inputs (`Dict[str, torch.Tensor]`): The inputs we would hypothetically pass to the model. These are
	generated using a tokenizer.
	output_path (`str`): The local path to save the onnx model to.
	opset (`int`): The ONNX opset to use for the export. Defaults to 12.
	"""
	input_names = list(inputs.keys())
	output_names = ["logits"]

	# Setup the dynamic axes for onnx conversion.
	dynamic_axes_input = {}
	for input_name in input_names:
	dynamic_axes_input[input_name] = {0: "batch_size", 1: "sequence"}

	dynamic_axes_output = {}
	for output_name in output_names:
	dynamic_axes_output[output_name] = {0: "batch_size"}

	# Move inputs to the right device
	target = setfit_model.model_body.device
	args = tuple(value.to(target) for value in inputs.values())

	setfit_model.eval()
	with torch.no_grad():
	torch.onnx.export(
	setfit_model,
	args=args,
	f=output_path,
	opset_version=opset,
	input_names=["input_ids", "attention_mask", "token_type_ids"],
	output_names=output_names,
	dynamic_axes={dynamic_axes_input, dynamic_axes_output},
	)


	def export_sklearn_head_to_onnx(model_head: LogisticRegression, opset: int) -> onnx.onnx_ml_pb2.ModelProto:
	"""Convert the Scikit-Learn head from a SetFitModel to ONNX format.

	Args:
	model_head (`LogisticRegression`): The trained SetFit model_head.
	opset (`int`): The ONNX opset to use for optimizing this model. The opset is not
	guaranteed and will default to the maximum version possible for the sklearn
	model.

	Returns:
	[`onnx.onnx_ml_pb2.ModelProto`] The ONNX model generated from the sklearn head.

	Raises:
	ImportError: If `skl2onnx` is not installed an error will be raised asking
	to install this package.
	"""

	# Check if skl2onnx is installed
	try:
	import onnxconverter_common
	from skl2onnx import convert_sklearn
	from skl2onnx.common.data_types import guess_data_type
	from skl2onnx.sklapi import CastTransformer
	from sklearn.pipeline import Pipeline
	except ImportError:
	msg = """
	`skl2onnx` must be installed in order to convert a model with an sklearn head.
	Please install with `pip install skl2onnx`.
	"""
	raise ImportError(msg)

	# Determine the initial type and the shape of the output.
	input_shape = (None, model_head.n_features_in_)
	if hasattr(model_head, "coef_"):
	dtype = guess_data_type(model_head.coef_, shape=input_shape)[0][1]
	elif not hasattr(model_head, "coef_") and hasattr(model_head, "estimators_"):
	if any([not hasattr(e, "coef_") for e in model_head.estimators_]):
	raise ValueError(
	"The model_head is a meta-estimator but not all of the estimators have a coef_ attribute."
	)
	dtype = guess_data_type(model_head.estimators_[0].coef_, shape=input_shape)[0][1]
	else:
	raise ValueError(
	"The model_head either does not have a coef_ attribute or some estimators in model_head.estimators_ do not have a coef_ attribute. Conversion to ONNX only supports these cases."
	)
	dtype.shape = input_shape

	# If the datatype of the model is double we need to cast the outputs
	# from the setfit model to doubles for compatibility inside of ONNX.
	if isinstance(dtype, onnxconverter_common.data_types.DoubleTensorType):
	sklearn_model = Pipeline([("castdouble", CastTransformer(dtype=np.double)), ("head", model_head)])
	else:
	sklearn_model = model_head

	# Convert sklearn head into ONNX format
	onnx_model = convert_sklearn(
	sklearn_model,
	initial_types=[("model_head", dtype)],
	target_opset=opset,
	options={id(sklearn_model): {"zipmap": False}},
	)

	return onnx_model


	def hummingbird_export(model, data_sample):
	try:
	from hummingbird.ml import convert
	except ImportError:
	raise ImportError(
	"Hummingbird-ML library is not installed." "Run 'pip install hummingbird-ml' to use this type of export."
	)
	onnx_model = convert(model, "onnx", data_sample)
	return onnx_model._model


	def export_onnx(
	model_body: SentenceTransformer,
	model_head: Union[torch.nn.Module, LogisticRegression],
	opset: int,
	output_path: str = "model.onnx",
	ignore_ir_version: bool = True,
	use_hummingbird: bool = False,
	) -> None:
	"""Export a PyTorch backed SetFit model to ONNX Intermediate Representation.

	Args:
	model_body (`SentenceTransformer`): The model_body from a SetFit model body. This should be a
	SentenceTransformer.
	model_head (`torch.nn.Module` or `LogisticRegression`): The SetFit model head. This can be either a
	dense layer SetFitHead or a Sklearn estimator.
	opset (`int`): The actual version of the ONNX operator set to use. The final opset used might be lower.
	ONNX will use the highest version supported by both the sklearn head and the model body. If versions
	can't be rectified an error will be thrown.
	output_path (`str`): The path where will be stored the generated ONNX model. At a minimum it needs to contain
	the name of the final file.
	ignore_ir_version (`bool`): Whether to ignore the IR version used in sklearn. The version is often missmatched
	with the transformer models. Setting this to true coerces the versions to be the same. This might
	cause errors but in practice works. If this is set to False you need to ensure that the IR versions
	align between the transformer and the sklearn onnx representation.
	"""

	# Load the model and get all of the parts.
	model_body_module = model_body._modules["0"]
	model_pooler = model_body._modules["1"]
	tokenizer = model_body_module.tokenizer
	max_length = model_body_module.max_seq_length
	transformer = model_body_module.auto_model
	transformer.eval()

	# Create dummy data to use during onnx export.
	tokenizer_kwargs = dict(
	max_length=max_length,
	padding="max_length",
	return_attention_mask=True,
	return_token_type_ids=True,
	return_tensors="pt",
	)
	dummy_sample = "It's a test."
	dummy_inputs = tokenizer(dummy_sample, **tokenizer_kwargs)

	# Check to see if the model uses a sklearn head or a torch dense layer.
	if issubclass(type(model_head), models.Dense):
	setfit_model = OnnxSetFitModel(transformer, lambda x: model_pooler(x)["sentence_embedding"], model_head).cpu()
	export_onnx_setfit_model(setfit_model, dummy_inputs, output_path, opset)

	# store meta data of the tokenizer for getting the correct tokenizer during inference
	onnx_setfit_model = onnx.load(output_path)
	meta = onnx_setfit_model.metadata_props.add()
	for key, value in tokenizer_kwargs.items():
	meta = onnx_setfit_model.metadata_props.add() # create a new key-value pair to store
	meta.key = str(key)
	meta.value = str(value)

	else:
	# Export the sklearn head first to get the minimum opset. sklearn is behind
	# in supported opsets.
	# Hummingbird-ML can be used as an option to export to standard opset
	if use_hummingbird:
	with torch.no_grad():
	test_input = copy.deepcopy(dummy_inputs)
	head_input = model_body(test_input)["sentence_embedding"]
	onnx_head = hummingbird_export(model_head, head_input.detach().numpy())
	else:
	onnx_head = export_sklearn_head_to_onnx(model_head, opset)

	max_opset = max([x.version for x in onnx_head.opset_import])

	if max_opset != opset:
	warnings.warn(
	f"sklearn onnx max opset is {max_opset} requested opset {opset} using opset {max_opset} for compatibility."
	)
	export_onnx_setfit_model(
	OnnxSetFitModel(transformer, lambda x: model_pooler(x)["sentence_embedding"]),
	dummy_inputs,
	output_path,
	max_opset,
	)

	onnx_body = onnx.load(output_path)

	# Check that the ir_versions are aligned and if not align them.
	if ignore_ir_version:
	onnx_head.ir_version = onnx_body.ir_version
	elif onnx_head.ir_version != onnx_body.ir_version:
	msg = f"""
	IR Version mismatch between head={onnx_head.ir_version} and body={onnx_body.ir_version}
	Make sure that the ONNX IR versions are aligned and supported between the chosen Sklearn model
	and the transformer. You can set ignore_ir_version=True to coerce them but this might cause errors.
	"""
	raise ValueError(msg)

	# Combine the onnx body and head by mapping the pooled output to the input of the sklearn model.
	head_input_name = next(iter(onnx_head.graph.input)).name
	onnx_setfit_model = onnx.compose.merge_models(
	onnx_body,
	onnx_head,
	io_map=[("logits", head_input_name)],
	)

	# Save the final model.
	onnx.save(onnx_setfit_model, output_path)