How to use onnx model ? I need to accerlate the embedding

#6
by machine1116 - opened

I need a python code about how to use onnx model , anybody can give me a example code,thanks,my friends

Jina AI org

hi @machine1116 :

from transformers import AutoTokenizer, AutoModel
from onnxruntime import InferenceSession
import torch
from numpy.linalg import norm


cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))
tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-embeddings-v2-base-zh', trust_remote_code=True)
session = InferenceSession(your-downloaded-onnx_model_path)

sentence_pairs = [
    ('how is the weather today', 'What is the current weather like today?'),
    ('I went to school today', 'where did you go this morning ?'),
    ('did you buy another iphone', 'question about buying a new apple device'),
    ('what did you learn at schole', 'coding and programming'),
]

def embed_onnx(text):
    # ONNX Runtime expects NumPy arrays as input
    inputs = tokenizer(text, return_tensors="np")
    outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))


    def mean_pooling(model_output, attention_mask):
        token_embeddings = model_output
        input_mask_expanded = (
            attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        )
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
            input_mask_expanded.sum(1), min=1e-9
        )

    embeddings = mean_pooling(torch.from_numpy(outputs[0]), torch.from_numpy(inputs['attention_mask']))

    return embeddings

for sentences in sentence_pairs:
    score = float(cos_sim(embed_onnx(sentences[0]), embed_onnx(sentences[1]))[0][0])

If someone encounters this error when using the script from @bwang0911

onnxruntime.capi.onnxruntime_pybind11_state.InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Unexpected input data type. Actual: (tensor(string)) , expected: (tensor(int64))

You might need to convert the values in inputs to exptected dtype (tensor(int64)):

inputs = self.tokenizer(text, return_tensors="np")
inputs = { name: np.array(tensor, dtype=np.int64) for name, tensor in inputs.items() }  # Add this line

And here is my example script which enables both original model and the onnx model:

import os

import numpy as np
import torch

from pathlib import Path
from typing import Union

from huggingface_hub import hf_hub_download
from numpy.linalg import norm
from onnxruntime import InferenceSession
from tclogger import logger
from transformers import AutoTokenizer, AutoModel

from configs.envs import ENVS
from configs.constants import AVAILABLE_MODELS

if ENVS["HF_ENDPOINT"]:
    os.environ["HF_ENDPOINT"] = ENVS["HF_ENDPOINT"]
os.environ["HF_TOKEN"] = ENVS["HF_TOKEN"]


def cosine_similarity(a, b):
    return (a @ b.T) / (norm(a) * norm(b))


class JinaAIOnnxEmbedder:
    """https://huggingface.co/jinaai/jina-embeddings-v2-base-zh/discussions/6#65bc55a854ab5eb7b6300893"""

    def __init__(self):
        self.repo_name = "jinaai/jina-embeddings-v2-base-zh"
        self.download_model()
        self.load_model()

    def download_model(self):
        self.onnx_folder = Path(__file__).parent
        self.onnx_filename = "onnx/model_quantized.onnx"
        self.onnx_path = self.onnx_folder / self.onnx_filename
        if not self.onnx_path.exists():
            logger.note("> Downloading ONNX model")
            hf_hub_download(
                repo_id=self.repo_name,
                filename=self.onnx_filename,
                local_dir=self.onnx_folder,
                local_dir_use_symlinks=False,
            )
            logger.success(f"+ ONNX model downloaded: {self.onnx_path}")
        else:
            logger.success(f"+ ONNX model loaded: {self.onnx_path}")

    def load_model(self):
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.repo_name, trust_remote_code=True
        )
        self.session = InferenceSession(self.onnx_path)

    def mean_pooling(self, model_output, attention_mask):
        token_embeddings = model_output
        input_mask_expanded = (
            attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        )
        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
            input_mask_expanded.sum(1), min=1e-9
        )

    def encode(self, text: str):
        inputs = self.tokenizer(text, return_tensors="np")
        inputs = {
            name: np.array(tensor, dtype=np.int64) for name, tensor in inputs.items()
        }
        outputs = self.session.run(
            output_names=["last_hidden_state"], input_feed=dict(inputs)
        )
        embeddings = self.mean_pooling(
            torch.from_numpy(outputs[0]), torch.from_numpy(inputs["attention_mask"])
        )
        return embeddings


class JinaAIEmbedder:
    def __init__(self, model_name: str = AVAILABLE_MODELS[0]):
        self.model_name = model_name
        self.load_model()

    def check_model_name(self):
        if self.model_name not in AVAILABLE_MODELS:
            self.model_name = AVAILABLE_MODELS[0]
        return True

    def load_model(self):
        self.check_model_name()
        self.model = AutoModel.from_pretrained(self.model_name, trust_remote_code=True)

    def switch_model(self, model_name: str):
        if model_name != self.model_name:
            self.model_name = model_name
            self.load_model()

    def encode(self, text: Union[str, list[str]]):
        if isinstance(text, str):
            text = [text]
        return self.model.encode(text)


if __name__ == "__main__":
    # embedder = JinaAIEmbedder()
    embedder = JinaAIOnnxEmbedder()
    texts = ["How is the weather today?", "今天天气怎么样?"]
    embeddings = []
    for text in texts:
        embeddings.append(embedder.encode(text))
    logger.success(embeddings)
    print(cosine_similarity(embeddings[0], embeddings[1]))

If you would like to use this script, be aware of replacing the following part with your environment config functions, which is aimed to setting envs and constants:

from configs.envs import ENVS
from configs.constants import AVAILABLE_MODELS

if ENVS["HF_ENDPOINT"]:
    os.environ["HF_ENDPOINT"] = ENVS["HF_ENDPOINT"]
os.environ["HF_TOKEN"] = ENVS["HF_TOKEN"]

The format of AVAILABLE_MODELS is like:

AVAILABLE_MODELS = [ "jinaai/jina-embeddings-v2-base-zh" ]

thanks

machine1116 changed discussion status to closed
machine1116 changed discussion status to open
machine1116 changed discussion status to closed
machine1116 changed discussion status to open

Sign up or log in to comment