How to use onnx model ? I need to accerlate the embedding

by machine1116 - opened Feb 1

Discussion

machine1116

Feb 1

I need a python code about how to use onnx model , anybody can give me a example code,thanks,my friends

bwang0911

Jina AI org Feb 2

hi @machine1116 :

from transformers import AutoTokenizer, AutoModel
from onnxruntime import InferenceSession
import torch
from numpy.linalg import norm


cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))
tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-embeddings-v2-base-zh', trust_remote_code=True)
session = InferenceSession(your-downloaded-onnx_model_path)

sentence_pairs = [
    ('how is the weather today', 'What is the current weather like today?'),
    ('I went to school today', 'where did you go this morning ?'),
    ('did you buy another iphone', 'question about buying a new apple device'),
    ('what did you learn at schole', 'coding and programming'),
]

def embed_onnx(text):
    # ONNX Runtime expects NumPy arrays as input
    inputs = tokenizer(text, return_tensors="np")
    outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))


    def mean_pooling(model_output, attention_mask):
        token_embeddings = model_output
        input_mask_expanded = (
            attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        )
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
            input_mask_expanded.sum(1), min=1e-9
        )

    embeddings = mean_pooling(torch.from_numpy(outputs[0]), torch.from_numpy(inputs['attention_mask']))

    return embeddings

for sentences in sentence_pairs:
    score = float(cos_sim(embed_onnx(sentences[0]), embed_onnx(sentences[1]))[0][0])

Hansimov

Mar 19

•

edited Mar 19

If someone encounters this error when using the script from @bwang0911

onnxruntime.capi.onnxruntime_pybind11_state.InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Unexpected input data type. Actual: (tensor(string)) , expected: (tensor(int64))

You might need to convert the values in inputs to exptected dtype (tensor(int64)):

inputs = self.tokenizer(text, return_tensors="np")
inputs = { name: np.array(tensor, dtype=np.int64) for name, tensor in inputs.items() }  # Add this line

Hansimov

Mar 19

•

edited Mar 19

And here is my example script which enables both original model and the onnx model:

import os

import numpy as np
import torch

from pathlib import Path
from typing import Union

from huggingface_hub import hf_hub_download
from numpy.linalg import norm
from onnxruntime import InferenceSession
from tclogger import logger
from transformers import AutoTokenizer, AutoModel

from configs.envs import ENVS
from configs.constants import AVAILABLE_MODELS

if ENVS["HF_ENDPOINT"]:
    os.environ["HF_ENDPOINT"] = ENVS["HF_ENDPOINT"]
os.environ["HF_TOKEN"] = ENVS["HF_TOKEN"]


def cosine_similarity(a, b):
    return (a @ b.T) / (norm(a) * norm(b))


class JinaAIOnnxEmbedder:
    """https://huggingface.co/jinaai/jina-embeddings-v2-base-zh/discussions/6#65bc55a854ab5eb7b6300893"""

    def __init__(self):
        self.repo_name = "jinaai/jina-embeddings-v2-base-zh"
        self.download_model()
        self.load_model()

    def download_model(self):
        self.onnx_folder = Path(__file__).parent
        self.onnx_filename = "onnx/model_quantized.onnx"
        self.onnx_path = self.onnx_folder / self.onnx_filename
        if not self.onnx_path.exists():
            logger.note("> Downloading ONNX model")
            hf_hub_download(
                repo_id=self.repo_name,
                filename=self.onnx_filename,
                local_dir=self.onnx_folder,
                local_dir_use_symlinks=False,
            )
            logger.success(f"+ ONNX model downloaded: {self.onnx_path}")
        else:
            logger.success(f"+ ONNX model loaded: {self.onnx_path}")

    def load_model(self):
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.repo_name, trust_remote_code=True
        )
        self.session = InferenceSession(self.onnx_path)

    def mean_pooling(self, model_output, attention_mask):
        token_embeddings = model_output
        input_mask_expanded = (
            attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        )
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
            input_mask_expanded.sum(1), min=1e-9
        )

    def encode(self, text: str):
        inputs = self.tokenizer(text, return_tensors="np")
        inputs = {
            name: np.array(tensor, dtype=np.int64) for name, tensor in inputs.items()
        }
        outputs = self.session.run(
            output_names=["last_hidden_state"], input_feed=dict(inputs)
        )
        embeddings = self.mean_pooling(
            torch.from_numpy(outputs[0]), torch.from_numpy(inputs["attention_mask"])
        )
        return embeddings


class JinaAIEmbedder:
    def __init__(self, model_name: str = AVAILABLE_MODELS[0]):
        self.model_name = model_name
        self.load_model()

    def check_model_name(self):
        if self.model_name not in AVAILABLE_MODELS:
            self.model_name = AVAILABLE_MODELS[0]
        return True

    def load_model(self):
        self.check_model_name()
        self.model = AutoModel.from_pretrained(self.model_name, trust_remote_code=True)

    def switch_model(self, model_name: str):
        if model_name != self.model_name:
            self.model_name = model_name
            self.load_model()

    def encode(self, text: Union[str, list[str]]):
        if isinstance(text, str):
            text = [text]
        return self.model.encode(text)


if __name__ == "__main__":
    # embedder = JinaAIEmbedder()
    embedder = JinaAIOnnxEmbedder()
    texts = ["How is the weather today?", "今天天气怎么样?"]
    embeddings = []
    for text in texts:
        embeddings.append(embedder.encode(text))
    logger.success(embeddings)
    print(cosine_similarity(embeddings[0], embeddings[1]))

If you would like to use this script, be aware of replacing the following part with your environment config functions, which is aimed to setting envs and constants:

from configs.envs import ENVS
from configs.constants import AVAILABLE_MODELS

if ENVS["HF_ENDPOINT"]:
    os.environ["HF_ENDPOINT"] = ENVS["HF_ENDPOINT"]
os.environ["HF_TOKEN"] = ENVS["HF_TOKEN"]

The format of AVAILABLE_MODELS is like:

AVAILABLE_MODELS = [ "jinaai/jina-embeddings-v2-base-zh" ]

machine1116

Mar 20

•

edited Mar 20

thanks

machine1116 changed discussion status to closed Mar 20

machine1116 changed discussion status to open Mar 20

machine1116 changed discussion status to closed Mar 20

machine1116 changed discussion status to open Mar 20

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment