--- license: apache-2.0 --- ``` ##How to download the model? !sudo apt-get install git-lfs !git lfs install # Then !git clone https://huggingface.co/vamsibanda/sbert-onnx-all-roberta-large-v1 ``` ## How to generate embeddings? ``` from onnxruntime import InferenceSession import torch from transformers.modeling_outputs import BaseModelOutput from transformers import RobertaTokenizerFast import torch.nn.functional as F from sentence_transformers.models import Transformer, Pooling, Dense class RobertaEncoder(torch.nn.Module): def __init__(self, encoder_sess): super().__init__() self.encoder = encoder_sess def forward( self, input_ids, attention_mask, inputs_embeds=None, head_mask=None, output_attentions=None, output_hidden_states=None, return_dict=None, ): encoder_hidden_state = torch.from_numpy( self.encoder.run( None, { "input_ids": input_ids.cpu().numpy(), "attention_mask": attention_mask.cpu().numpy(), }, )[0] ) return BaseModelOutput(encoder_hidden_state) def mean_pooling(model_output, attention_mask): token_embeddings = model_output[0] #First element of model_output contains all token embeddings input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) def sbert_onnx_encode(sentence_input): token = roberta_tokenizer(sentence_input, return_tensors='pt') encoder_outputs = encoder_layer(input_ids=token['input_ids'], attention_mask=token['attention_mask']) sbert_embeddings = mean_pooling(encoder_outputs, token['attention_mask']) sbert_embeddings = F.normalize(sbert_embeddings, p=2, dim=1) return sbert_embeddings.tolist()[0] roberta_tokenizer = RobertaTokenizerFast.from_pretrained('sbert-onnx-all-roberta-large-v1') encoder_sess = InferenceSession('sbert-onnx-all-roberta-large-v1/sbert-roberta-large-quant.onnx') encoder_layer = RobertaEncoder(encoder_sess) pooling_layer = Pooling.load('./sbert-onnx-all-roberta-large-v1/1_Pooling/') m1 = sbert_onnx_encode('That is a happy person') m2 = sbert.encode('That is a happy person').tolist() print(util.cos_sim(m1,m2)) ##tensor([[0.9925]]) ```