import torch from torch import nn from transformers import AutoTokenizer, AutoModel class AutoModelForSentenceEmbedding(nn.Module): def __init__(self, model): super().__init__() self.model = model def forward(self, **kwargs): model_output = self.model(**kwargs) embeddings = self.mean_pooling(model_output, kwargs['attention_mask']) embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1) return embeddings def mean_pooling(self, model_output, attention_mask): token_embeddings = model_output[0] # First element of model_output contains all token embeddings input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) device = "cuda" if torch.cuda.is_available() else "cpu" def create_semantic_ranking_model(device=device): """Creates a HuggingFace all-MiniLM-L6-v2 model. Args: device: A torch.device Returns: A tuple of the model and tokenizer """ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2') model = AutoModelForSentenceEmbedding(AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')).to(device) for param in model.model.parameters(): param.requires_grad = False return model, tokenizer # Example usage model, tokenizer = create_semantic_ranking_model()