|
--- |
|
pipeline_tag: sentence-similarity |
|
language: en |
|
license: apache-2.0 |
|
tags: |
|
- sentence-transformers |
|
- feature-extraction |
|
- sentence-similarity |
|
- transformers |
|
- onnx |
|
--- |
|
|
|
# |
|
|
|
This is the ONNX model of sentence-transformers/all-roberta-large-v1 [https://seb.sbert.net]. Currently, Hugging Face does not support downloading ONNX model and generate embeddings. I have created a workaround using sbert and optimum together to generate embeddings. |
|
|
|
``` |
|
pip install onnx |
|
pip install onnxruntime==1.10.0 |
|
pip install transformers>4.6.1 |
|
pip install sentencepiece |
|
pip install sentence-transformers |
|
pip install optimum |
|
pip install torch==1.9.0 |
|
``` |
|
|
|
Then you can use the model like this: |
|
|
|
```python |
|
import os |
|
from sentence_transformers.util import snapshot_download |
|
from transformers import AutoTokenizer |
|
from optimum.onnxruntime import ORTModelForFeatureExtraction |
|
from sentence_transformers.models import Transformer, Pooling, Dense |
|
import torch |
|
from transformers.modeling_outputs import BaseModelOutput |
|
import torch.nn.functional as F |
|
import shutil |
|
|
|
model_name = 'vamsibanda/sbert-onnx-all-roberta-large-v1' |
|
cache_folder = './' |
|
model_path = os.path.join(cache_folder, model_name.replace("/", "_")) |
|
|
|
def download_onnx_model(model_name, cache_folder, model_path, force_download = False): |
|
if force_download and os.path.exists(model_path): |
|
shutil.rmtree(model_path) |
|
elif os.path.exists(model_path): |
|
return |
|
snapshot_download(model_name, |
|
cache_dir=cache_folder, |
|
library_name='sentence-transformers' |
|
) |
|
return |
|
|
|
def mean_pooling(model_output, attention_mask): |
|
token_embeddings = model_output[0] #First element of model_output contains all token embeddings |
|
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() |
|
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) |
|
|
|
def generate_embedding(text): |
|
token = tokenizer(text, return_tensors='pt') |
|
embedding = model(input_ids=token['input_ids'], attention_mask=token['attention_mask']) |
|
embedding = mean_pooling(embedding, token['attention_mask']) |
|
embedding = F.normalize(embedding, p=2, dim=1) |
|
return embedding.tolist()[0] |
|
|
|
|
|
_ = download_onnx_model(model_name, cache_folder, model_path) |
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
model = ORTModelForFeatureExtraction.from_pretrained(model_path, force_download=False) |
|
pooling_layer = Pooling.load(f"{model_path}/1_Pooling") |
|
|
|
generate_embedding('That is a happy person') |
|
|
|
``` |