import json import os import tempfile import warnings from dataclasses import dataclass, field from pathlib import Path from typing import Dict, List, Optional, Set, Tuple, Union # For Python 3.7 compatibility try: from typing import Literal except ImportError: from typing_extensions import Literal import joblib import numpy as np import requests import torch from huggingface_hub import PyTorchModelHubMixin, hf_hub_download from huggingface_hub.utils import validate_hf_hub_args from sentence_transformers import SentenceTransformer, models from sklearn.linear_model import LogisticRegression from sklearn.multiclass import OneVsRestClassifier from sklearn.multioutput import ClassifierChain, MultiOutputClassifier from torch import nn from torch.utils.data import DataLoader from tqdm.auto import tqdm, trange from transformers.utils import copy_func from . import logging from .data import SetFitDataset from .model_card import SetFitModelCardData, generate_model_card from .utils import set_docstring logging.set_verbosity_info() logger = logging.get_logger(__name__) MODEL_HEAD_NAME = "model_head.pkl" CONFIG_NAME = "config_setfit.json" class SetFitHead(models.Dense): """ A SetFit head that supports multi-class classification for end-to-end training. Binary classification is treated as 2-class classification. To be compatible with Sentence Transformers, we inherit `Dense` from: https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/models/Dense.py Args: in_features (`int`, *optional*): The embedding dimension from the output of the SetFit body. If `None`, defaults to `LazyLinear`. out_features (`int`, defaults to `2`): The number of targets. If set `out_features` to 1 for binary classification, it will be changed to 2 as 2-class classification. temperature (`float`, defaults to `1.0`): A logits' scaling factor. Higher values make the model less confident and lower values make it more confident. eps (`float`, defaults to `1e-5`): A value for numerical stability when scaling logits. bias (`bool`, *optional*, defaults to `True`): Whether to add bias to the head. device (`torch.device`, str, *optional*): The device the model will be sent to. If `None`, will check whether GPU is available. multitarget (`bool`, defaults to `False`): Enable multi-target classification by making `out_features` binary predictions instead of a single multinomial prediction. """ def __init__( self, in_features: Optional[int] = None, out_features: int = 2, temperature: float = 1.0, eps: float = 1e-5, bias: bool = True, device: Optional[Union[torch.device, str]] = None, multitarget: bool = False, ) -> None: super(models.Dense, self).__init__() # init on models.Dense's parent: nn.Module if out_features == 1: logger.warning( "Change `out_features` from 1 to 2 since we use `CrossEntropyLoss` for binary classification." ) out_features = 2 if in_features is not None: self.linear = nn.Linear(in_features, out_features, bias=bias) else: self.linear = nn.LazyLinear(out_features, bias=bias) self.in_features = in_features self.out_features = out_features self.temperature = temperature self.eps = eps self.bias = bias self._device = device or "cuda" if torch.cuda.is_available() else "cpu" self.multitarget = multitarget self.to(self._device) self.apply(self._init_weight) def forward( self, features: Union[Dict[str, torch.Tensor], torch.Tensor], temperature: Optional[float] = None, ) -> Union[Dict[str, torch.Tensor], Tuple[torch.Tensor]]: """ SetFitHead can accept embeddings in: 1. Output format (`dict`) from Sentence-Transformers. 2. Pure `torch.Tensor`. Args: features (`Dict[str, torch.Tensor]` or `torch.Tensor): The embeddings from the encoder. If using `dict` format, make sure to store embeddings under the key: 'sentence_embedding' and the outputs will be under the key: 'prediction'. temperature (`float`, *optional*): A logits' scaling factor. Higher values make the model less confident and lower values make it more confident. Will override the temperature given during initialization. Returns: [`Dict[str, torch.Tensor]` or `Tuple[torch.Tensor]`] """ temperature = temperature or self.temperature is_features_dict = False # whether `features` is dict or not if isinstance(features, dict): assert "sentence_embedding" in features is_features_dict = True x = features["sentence_embedding"] if is_features_dict else features logits = self.linear(x) logits = logits / (temperature + self.eps) if self.multitarget: # multiple targets per item probs = torch.sigmoid(logits) else: # one target per item probs = nn.functional.softmax(logits, dim=-1) if is_features_dict: features.update( { "logits": logits, "probs": probs, } ) return features return logits, probs def predict_proba(self, x_test: torch.Tensor) -> torch.Tensor: self.eval() return self(x_test)[1] def predict(self, x_test: torch.Tensor) -> torch.Tensor: probs = self.predict_proba(x_test) if self.multitarget: return torch.where(probs >= 0.5, 1, 0) return torch.argmax(probs, dim=-1) def get_loss_fn(self) -> nn.Module: if self.multitarget: # if sigmoid output return torch.nn.BCEWithLogitsLoss() return torch.nn.CrossEntropyLoss() @property def device(self) -> torch.device: """ `torch.device`: The device on which the model is placed. Reference from: https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/SentenceTransformer.py#L869 """ return next(self.parameters()).device def get_config_dict(self) -> Dict[str, Optional[Union[int, float, bool]]]: return { "in_features": self.in_features, "out_features": self.out_features, "temperature": self.temperature, "bias": self.bias, "device": self.device.type, # store the string of the device, instead of `torch.device` } @staticmethod def _init_weight(module) -> None: if isinstance(module, nn.Linear): nn.init.xavier_uniform_(module.weight) if module.bias is not None: nn.init.constant_(module.bias, 1e-2) def __repr__(self) -> str: return "SetFitHead({})".format(self.get_config_dict()) @dataclass class SetFitModel(PyTorchModelHubMixin): """A SetFit model with integration to the [Hugging Face Hub](https://huggingface.co). Example:: >>> from setfit import SetFitModel >>> model = SetFitModel.from_pretrained("tomaarsen/setfit-bge-small-v1.5-sst2-8-shot") >>> model.predict([ ... "It's a charming and often affecting journey.", ... "It's slow -- very, very slow.", ... "A sometimes tedious film.", ... ]) ['positive', 'negative', 'negative'] """ model_body: Optional[SentenceTransformer] = None model_head: Optional[Union[SetFitHead, LogisticRegression]] = None multi_target_strategy: Optional[str] = None normalize_embeddings: bool = False labels: Optional[List[str]] = None model_card_data: Optional[SetFitModelCardData] = field(default_factory=SetFitModelCardData) attributes_to_save: Set[str] = field( init=False, repr=False, default_factory=lambda: {"normalize_embeddings", "labels"} ) def __post_init__(self): self.model_card_data.register_model(self) @property def has_differentiable_head(self) -> bool: # if False, sklearn is assumed to be used instead return isinstance(self.model_head, nn.Module) @property def id2label(self) -> Dict[int, str]: """Return a mapping from integer IDs to string labels.""" if self.labels is None: return {} return dict(enumerate(self.labels)) @property def label2id(self) -> Dict[str, int]: """Return a mapping from string labels to integer IDs.""" if self.labels is None: return {} return {label: idx for idx, label in enumerate(self.labels)} def fit( self, x_train: List[str], y_train: Union[List[int], List[List[int]]], num_epochs: int, batch_size: Optional[int] = None, body_learning_rate: Optional[float] = None, head_learning_rate: Optional[float] = None, end_to_end: bool = False, l2_weight: Optional[float] = None, max_length: Optional[int] = None, show_progress_bar: bool = True, ) -> None: """Train the classifier head, only used if a differentiable PyTorch head is used. Args: x_train (`List[str]`): A list of training sentences. y_train (`Union[List[int], List[List[int]]]`): A list of labels corresponding to the training sentences. num_epochs (`int`): The number of epochs to train for. batch_size (`int`, *optional*): The batch size to use. body_learning_rate (`float`, *optional*): The learning rate for the `SentenceTransformer` body in the `AdamW` optimizer. Disregarded if `end_to_end=False`. head_learning_rate (`float`, *optional*): The learning rate for the differentiable torch head in the `AdamW` optimizer. end_to_end (`bool`, defaults to `False`): If True, train the entire model end-to-end. Otherwise, freeze the `SentenceTransformer` body and only train the head. l2_weight (`float`, *optional*): The l2 weight for both the model body and head in the `AdamW` optimizer. max_length (`int`, *optional*): The maximum token length a tokenizer can generate. If not provided, the maximum length for the `SentenceTransformer` body is used. show_progress_bar (`bool`, defaults to `True`): Whether to display a progress bar for the training epochs and iterations. """ if self.has_differentiable_head: # train with pyTorch self.model_body.train() self.model_head.train() if not end_to_end: self.freeze("body") dataloader = self._prepare_dataloader(x_train, y_train, batch_size, max_length) criterion = self.model_head.get_loss_fn() optimizer = self._prepare_optimizer(head_learning_rate, body_learning_rate, l2_weight) # # # # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=.25, patience=10, threshold=5 * 1e-5, min_lr=1e-7, verbose=True) # # # # # Need to replace with ReduceOnPlateauLR() # # # # for epoch_idx in trange(num_epochs, desc="Epoch", disable=not show_progress_bar): total_loss = 0. for batch in tqdm(dataloader, desc="Iteration", disable=not show_progress_bar, leave=False): features, labels = batch optimizer.zero_grad() # to model's device features = {k: v.to(self.device) for k, v in features.items()} labels = labels.to(self.device) outputs = self.model_body(features) if self.normalize_embeddings: outputs["sentence_embedding"] = nn.functional.normalize( outputs["sentence_embedding"], p=2, dim=1 ) outputs = self.model_head(outputs) logits = outputs["logits"] loss: torch.Tensor = criterion(logits, labels) total_loss += loss.item() loss.backward() optimizer.step() if epoch_idx % 5 == 0: print() print(epoch_idx + 1, total_loss / len(dataloader)) print() scheduler.step() if not end_to_end: self.unfreeze("body") else: # train with sklearn print() print('I am using LogisticRegression!') print() embeddings = self.model_body.encode(x_train, normalize_embeddings=self.normalize_embeddings) self.model_head.fit(embeddings, y_train) def _prepare_dataloader( self, x_train: List[str], y_train: Union[List[int], List[List[int]]], batch_size: Optional[int] = None, max_length: Optional[int] = None, shuffle: bool = True, ) -> DataLoader: max_acceptable_length = self.model_body.get_max_seq_length() if max_length is None: max_length = max_acceptable_length logger.warning( f"The `max_length` is `None`. Using the maximum acceptable length according to the current model body: {max_length}." ) if max_length > max_acceptable_length: logger.warning( ( f"The specified `max_length`: {max_length} is greater than the maximum length of the current model body: {max_acceptable_length}. " f"Using {max_acceptable_length} instead." ) ) max_length = max_acceptable_length dataset = SetFitDataset( x_train, y_train, tokenizer=self.model_body.tokenizer, max_length=max_length, ) dataloader = DataLoader( dataset, batch_size=batch_size, collate_fn=dataset.collate_fn, shuffle=shuffle, pin_memory=True, # # # # # drop_last=True # # # # # ) return dataloader def _prepare_optimizer( self, head_learning_rate: float, body_learning_rate: Optional[float], l2_weight: float, ) -> torch.optim.Optimizer: body_learning_rate = body_learning_rate or head_learning_rate l2_weight = l2_weight or 1e-2 optimizer = torch.optim.Adam( [ { "params": self.model_body.parameters(), "lr": body_learning_rate, "weight_decay": l2_weight, }, {"params": self.model_head.parameters(), "lr": head_learning_rate, "weight_decay": l2_weight}, ], ) return optimizer def freeze(self, component: Optional[Literal["body", "head"]] = None) -> None: """Freeze the model body and/or the head, preventing further training on that component until unfrozen. Args: component (`Literal["body", "head"]`, *optional*): Either "body" or "head" to freeze that component. If no component is provided, freeze both. Defaults to None. """ if component is None or component == "body": self._freeze_or_not(self.model_body, to_freeze=True) if (component is None or component == "head") and self.has_differentiable_head: self._freeze_or_not(self.model_head, to_freeze=True) def unfreeze( self, component: Optional[Literal["body", "head"]] = None, keep_body_frozen: Optional[bool] = None ) -> None: """Unfreeze the model body and/or the head, allowing further training on that component. Args: component (`Literal["body", "head"]`, *optional*): Either "body" or "head" to unfreeze that component. If no component is provided, unfreeze both. Defaults to None. keep_body_frozen (`bool`, *optional*): Deprecated argument, use `component` instead. """ if keep_body_frozen is not None: warnings.warn( "`keep_body_frozen` is deprecated and will be removed in v2.0.0 of SetFit. " 'Please either pass "head", "body" or no arguments to unfreeze both.', DeprecationWarning, stacklevel=2, ) # If the body must stay frozen, only unfreeze the head. Eventually, this entire if-branch # can be removed. if keep_body_frozen and not component: component = "head" if component is None or component == "body": self._freeze_or_not(self.model_body, to_freeze=False) if (component is None or component == "head") and self.has_differentiable_head: self._freeze_or_not(self.model_head, to_freeze=False) def _freeze_or_not(self, model: nn.Module, to_freeze: bool) -> None: """Set `requires_grad=not to_freeze` for all parameters in `model`""" for param in model.parameters(): param.requires_grad = not to_freeze def encode( self, inputs: List[str], batch_size: int = 32, show_progress_bar: Optional[bool] = None ) -> Union[torch.Tensor, np.ndarray]: """Convert input sentences to embeddings using the `SentenceTransformer` body. Args: inputs (`List[str]`): The input sentences to embed. batch_size (`int`, defaults to `32`): The batch size to use in encoding the sentences to embeddings. Higher often means faster processing but higher memory usage. show_progress_bar (`Optional[bool]`, defaults to `None`): Whether to show a progress bar while encoding. Returns: Union[torch.Tensor, np.ndarray]: A matrix with shape [INPUT_LENGTH, EMBEDDING_SIZE], as a torch Tensor if this model has a differentiable Torch head, or otherwise as a numpy array. """ return self.model_body.encode( inputs, batch_size=batch_size, normalize_embeddings=self.normalize_embeddings, convert_to_tensor=self.has_differentiable_head, show_progress_bar=show_progress_bar, ) def _output_type_conversion( self, outputs: Union[torch.Tensor, np.ndarray], as_numpy: bool = False ) -> Union[torch.Tensor, np.ndarray]: """Return `outputs` in the desired type: * Numpy array if no differentiable head is used. * Torch tensor if a differentiable head is used. Note: If the model is trained with string labels, which is only possible with a non-differentiable head, then we cannot output using torch Tensors, but only using a numpy array. Returns: Union[torch.Tensor, "ndarray"]: The input, correctly converted to the desired type. """ if as_numpy and self.has_differentiable_head: outputs = outputs.detach().cpu().numpy() elif not as_numpy and not self.has_differentiable_head and outputs.dtype.char != "U": # Only output as tensor if the output isn't a string outputs = torch.from_numpy(outputs) return outputs def predict_proba( self, inputs: Union[str, List[str]], batch_size: int = 32, as_numpy: bool = False, show_progress_bar: Optional[bool] = None, ) -> Union[torch.Tensor, np.ndarray]: """Predict the probabilities of the various classes. Args: inputs (`Union[str, List[str]]`): The input sentences to predict class probabilities for. batch_size (`int`, defaults to `32`): The batch size to use in encoding the sentences to embeddings. Higher often means faster processing but higher memory usage. as_numpy (`bool`, defaults to `False`): Whether to output as numpy array instead. show_progress_bar (`Optional[bool]`, defaults to `None`): Whether to show a progress bar while encoding. Example:: >>> model = SetFitModel.from_pretrained(...) >>> model.predict_proba(["What a boring display", "Exhilarating through and through", "I'm wowed!"]) tensor([[0.9367, 0.0633], [0.0627, 0.9373], [0.0890, 0.9110]], dtype=torch.float64) >>> model.predict_proba("That was cool!") tensor([0.8421, 0.1579], dtype=torch.float64) Returns: `Union[torch.Tensor, np.ndarray]`: A matrix with shape [INPUT_LENGTH, NUM_CLASSES] denoting probabilities of predicting an input as a class. If the input is a string, then the output is a vector with shape [NUM_CLASSES,]. """ is_singular = isinstance(inputs, str) if is_singular: inputs = [inputs] embeddings = self.encode(inputs, batch_size=batch_size, show_progress_bar=show_progress_bar) probs = self.model_head.predict_proba(embeddings) outputs = self._output_type_conversion(probs, as_numpy=as_numpy) return outputs[0] if is_singular else outputs def predict( self, inputs: Union[str, List[str]], batch_size: int = 32, as_numpy: bool = False, use_labels: bool = True, show_progress_bar: Optional[bool] = None, ) -> Union[torch.Tensor, np.ndarray, List[str], int, str]: """Predict the various classes. Args: inputs (`Union[str, List[str]]`): The input sentence or sentences to predict classes for. batch_size (`int`, defaults to `32`): The batch size to use in encoding the sentences to embeddings. Higher often means faster processing but higher memory usage. as_numpy (`bool`, defaults to `False`): Whether to output as numpy array instead. use_labels (`bool`, defaults to `True`): Whether to try and return elements of `SetFitModel.labels`. show_progress_bar (`Optional[bool]`, defaults to `None`): Whether to show a progress bar while encoding. Example:: >>> model = SetFitModel.from_pretrained(...) >>> model.predict(["What a boring display", "Exhilarating through and through", "I'm wowed!"]) ["negative", "positive", "positive"] >>> model.predict("That was cool!") "positive" Returns: `Union[torch.Tensor, np.ndarray, List[str], int, str]`: A list of string labels with equal length to the inputs if `use_labels` is `True` and `SetFitModel.labels` has been defined. Otherwise a vector with equal length to the inputs, denoting to which class each input is predicted to belong. If the inputs is a single string, then the output is a single label as well. """ is_singular = isinstance(inputs, str) if is_singular: inputs = [inputs] embeddings = self.encode(inputs, batch_size=batch_size, show_progress_bar=show_progress_bar) preds = self.model_head.predict(embeddings) # If labels are defined, we don't have multilabels & the output is not already strings, then we convert to string labels if ( use_labels and self.labels and preds.ndim == 1 and (self.has_differentiable_head or preds.dtype.char != "U") ): outputs = [self.labels[int(pred)] for pred in preds] else: outputs = self._output_type_conversion(preds, as_numpy=as_numpy) return outputs[0] if is_singular else outputs def __call__( self, inputs: Union[str, List[str]], batch_size: int = 32, as_numpy: bool = False, use_labels: bool = True, show_progress_bar: Optional[bool] = None, ) -> Union[torch.Tensor, np.ndarray, List[str], int, str]: """Predict the various classes. Args: inputs (`Union[str, List[str]]`): The input sentence or sentences to predict classes for. batch_size (`int`, defaults to `32`): The batch size to use in encoding the sentences to embeddings. Higher often means faster processing but higher memory usage. as_numpy (`bool`, defaults to `False`): Whether to output as numpy array instead. use_labels (`bool`, defaults to `True`): Whether to try and return elements of `SetFitModel.labels`. show_progress_bar (`Optional[bool]`, defaults to `None`): Whether to show a progress bar while encoding. Example:: >>> model = SetFitModel.from_pretrained(...) >>> model(["What a boring display", "Exhilarating through and through", "I'm wowed!"]) ["negative", "positive", "positive"] >>> model("That was cool!") "positive" Returns: `Union[torch.Tensor, np.ndarray, List[str], int, str]`: A list of string labels with equal length to the inputs if `use_labels` is `True` and `SetFitModel.labels` has been defined. Otherwise a vector with equal length to the inputs, denoting to which class each input is predicted to belong. If the inputs is a single string, then the output is a single label as well. """ return self.predict( inputs, batch_size=batch_size, as_numpy=as_numpy, use_labels=use_labels, show_progress_bar=show_progress_bar, ) @property def device(self) -> torch.device: """Get the Torch device that this model is on. Returns: torch.device: The device that the model is on. """ return self.model_body._target_device def to(self, device: Union[str, torch.device]) -> "SetFitModel": """Move this SetFitModel to `device`, and then return `self`. This method does not copy. Args: device (Union[str, torch.device]): The identifier of the device to move the model to. Example:: >>> model = SetFitModel.from_pretrained(...) >>> model.to("cpu") >>> model(["cats are cute", "dogs are loyal"]) Returns: SetFitModel: Returns the original model, but now on the desired device. """ # Note that we must also set _target_device, or any SentenceTransformer.fit() call will reset # the body location self.model_body._target_device = device if isinstance(device, torch.device) else torch.device(device) self.model_body = self.model_body.to(device) if self.has_differentiable_head: self.model_head = self.model_head.to(device) return self def create_model_card(self, path: str, model_name: Optional[str] = "SetFit Model") -> None: """Creates and saves a model card for a SetFit model. Args: path (str): The path to save the model card to. model_name (str, *optional*): The name of the model. Defaults to `SetFit Model`. """ if not os.path.exists(path): os.makedirs(path) # If the model_path is a folder that exists locally, i.e. when create_model_card is called # via push_to_hub, and the path is in a temporary folder, then we only take the last two # directories model_path = Path(model_name) if model_path.exists() and Path(tempfile.gettempdir()) in model_path.resolve().parents: self.model_card_data.model_id = "/".join(model_path.parts[-2:]) with open(os.path.join(path, "README.md"), "w", encoding="utf-8") as f: f.write(self.generate_model_card()) def generate_model_card(self) -> str: """Generate and return a model card string based on the model card data. Returns: str: The model card string. """ return generate_model_card(self) def _save_pretrained(self, save_directory: Union[Path, str]) -> None: save_directory = str(save_directory) # Save the config config_path = os.path.join(save_directory, CONFIG_NAME) with open(config_path, "w") as f: json.dump( { attr_name: getattr(self, attr_name) for attr_name in self.attributes_to_save if hasattr(self, attr_name) }, f, indent=2, ) # Save the body self.model_body.save(path=save_directory, create_model_card=False) # Save the README # # # # # # self.create_model_card(path=save_directory, model_name=save_directory) # # # # # # Move the head to the CPU before saving if self.has_differentiable_head: self.model_head.to("cpu") # Save the classification head joblib.dump(self.model_head, str(Path(save_directory) / MODEL_HEAD_NAME)) if self.has_differentiable_head: self.model_head.to(self.device) @classmethod @validate_hf_hub_args def _from_pretrained( cls, model_id: str, revision: Optional[str] = None, cache_dir: Optional[str] = None, force_download: Optional[bool] = None, proxies: Optional[Dict] = None, resume_download: Optional[bool] = None, local_files_only: Optional[bool] = None, token: Optional[Union[bool, str]] = None, multi_target_strategy: Optional[str] = None, use_differentiable_head: bool = False, device: Optional[Union[torch.device, str]] = None, **model_kwargs, ) -> "SetFitModel": model_body = SentenceTransformer(model_id, cache_folder=cache_dir, use_auth_token=token, device=device) device = model_body._target_device model_body.to(device) # put `model_body` on the target device # Try to load a SetFit config file config_file: Optional[str] = None if os.path.isdir(model_id): if CONFIG_NAME in os.listdir(model_id): config_file = os.path.join(model_id, CONFIG_NAME) else: try: config_file = hf_hub_download( repo_id=model_id, filename=CONFIG_NAME, revision=revision, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, token=token, local_files_only=local_files_only, ) except requests.exceptions.RequestException: pass model_kwargs = {key: value for key, value in model_kwargs.items() if value is not None} if config_file is not None: with open(config_file, "r", encoding="utf-8") as f: config = json.load(f) # Update model_kwargs + warnings for setting, value in config.items(): if setting in model_kwargs: if model_kwargs[setting] != value: logger.warning( f"Overriding {setting} in model configuration from {value} to {model_kwargs[setting]}." ) else: model_kwargs[setting] = value # Try to load a model head file if os.path.isdir(model_id): if MODEL_HEAD_NAME in os.listdir(model_id): model_head_file = os.path.join(model_id, MODEL_HEAD_NAME) else: logger.info( f"{MODEL_HEAD_NAME} not found in {Path(model_id).resolve()}," " initialising classification head with random weights." " You should TRAIN this model on a downstream task to use it for predictions and inference." ) model_head_file = None else: try: model_head_file = hf_hub_download( repo_id=model_id, filename=MODEL_HEAD_NAME, revision=revision, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, token=token, local_files_only=local_files_only, ) except requests.exceptions.RequestException: logger.info( f"{MODEL_HEAD_NAME} not found on HuggingFace Hub, initialising classification head with random weights." " You should TRAIN this model on a downstream task to use it for predictions and inference." ) model_head_file = None model_card_data: SetFitModelCardData = model_kwargs.pop("model_card_data", SetFitModelCardData()) if model_head_file is not None: model_head = joblib.load(model_head_file) if isinstance(model_head, torch.nn.Module): model_head.to(device) model_card_data.infer_st_id(model_id) else: head_params = model_kwargs.pop("head_params", {}) if use_differentiable_head: if multi_target_strategy is None: use_multitarget = False else: if multi_target_strategy in ["one-vs-rest", "multi-output"]: use_multitarget = True else: raise ValueError( f"multi_target_strategy '{multi_target_strategy}' is not supported for differentiable head" ) # Base `model_head` parameters # - get the sentence embedding dimension from the `model_body` # - follow the `model_body`, put `model_head` on the target device base_head_params = { "in_features": model_body.get_sentence_embedding_dimension(), "device": device, "multitarget": use_multitarget, } model_head = SetFitHead(**{**head_params, **base_head_params}) else: clf = LogisticRegression(**head_params) if multi_target_strategy is not None: if multi_target_strategy == "one-vs-rest": multilabel_classifier = OneVsRestClassifier(clf) elif multi_target_strategy == "multi-output": multilabel_classifier = MultiOutputClassifier(clf) elif multi_target_strategy == "classifier-chain": multilabel_classifier = ClassifierChain(clf) else: raise ValueError(f"multi_target_strategy {multi_target_strategy} is not supported.") model_head = multilabel_classifier else: model_head = clf model_card_data.set_st_id(model_id if "/" in model_id else f"sentence-transformers/{model_id}") # Remove the `transformers` config model_kwargs.pop("config", None) return cls( model_body=model_body, model_head=model_head, multi_target_strategy=multi_target_strategy, model_card_data=model_card_data, **model_kwargs, ) docstring = SetFitModel.from_pretrained.__doc__ cut_index = docstring.find("model_kwargs") if cut_index != -1: docstring = ( docstring[:cut_index] + """labels (`List[str]`, *optional*): If the labels are integers ranging from `0` to `num_classes-1`, then these labels indicate the corresponding labels. model_card_data (`SetFitModelCardData`, *optional*): A `SetFitModelCardData` instance storing data such as model language, license, dataset name, etc. to be used in the automatically generated model cards. multi_target_strategy (`str`, *optional*): The strategy to use with multi-label classification. One of "one-vs-rest", "multi-output", or "classifier-chain". use_differentiable_head (`bool`, *optional*): Whether to load SetFit using a differentiable (i.e., Torch) head instead of Logistic Regression. normalize_embeddings (`bool`, *optional*): Whether to apply normalization on the embeddings produced by the Sentence Transformer body. device (`Union[torch.device, str]`, *optional*): The device on which to load the SetFit model, e.g. `"cuda:0"`, `"mps"` or `torch.device("cuda")`. Example:: >>> from setfit import SetFitModel >>> model = SetFitModel.from_pretrained( ... "sentence-transformers/paraphrase-mpnet-base-v2", ... labels=["positive", "negative"], ... ) """ ) SetFitModel.from_pretrained = set_docstring(SetFitModel.from_pretrained, docstring) SetFitModel.save_pretrained = copy_func(SetFitModel.save_pretrained) SetFitModel.save_pretrained.__doc__ = SetFitModel.save_pretrained.__doc__.replace( "~ModelHubMixin._from_pretrained", "SetFitModel.push_to_hub" )