Gladiaio
/

sentence-transformers_all-MiniLM-L12-v2_onnx

Model card Files Files and versions Community

Thytu commited on Jan 10, 2023

Commit

87b4d0f

1 Parent(s): e1b7da6

feat: paraphrase-multilingual-MiniLM-L12-v2_*_onnx/

Browse files

Files changed (11) hide show

paraphrase-multilingual-MiniLM-L12-v2_inference_onnx/config.pbtxt +66 -0
paraphrase-multilingual-MiniLM-L12-v2_model_onnx/1/model.bin +3 -0
paraphrase-multilingual-MiniLM-L12-v2_model_onnx/config.pbtxt +35 -0
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/__pycache__/model.cpython-38.pyc +0 -0
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/config.json +3 -0
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/model.py +75 -0
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/special_tokens_map.json +3 -0
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/tokenizer.json +3 -0
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/tokenizer_config.json +3 -0
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/unigram.json +3 -0
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/config.pbtxt +36 -0

paraphrase-multilingual-MiniLM-L12-v2_inference_onnx/config.pbtxt ADDED Viewed

	@@ -0,0 +1,66 @@

+name: "paraphrase-multilingual-MiniLM-L12-v2_inference_onnx"
+max_batch_size: 0
+platform: "ensemble"
+input [
+{
+    name: "TEXT"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+}
+]
+output {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [-1, -1]
+}
+ensemble_scheduling {
+    step [
+        {
+            model_name: "paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx"
+            model_version: -1
+            input_map {
+            key: "TEXT"
+            value: "TEXT"
+        }
+        output_map [
+{
+    key: "input_ids"
+    value: "input_ids"
+},
+{
+    key: "token_type_ids"
+    value: "token_type_ids"
+},
+{
+    key: "attention_mask"
+    value: "attention_mask"
+}
+        ]
+        },
+        {
+            model_name: "paraphrase-multilingual-MiniLM-L12-v2_model_onnx"
+            model_version: -1
+            input_map [
+{
+    key: "input_ids"
+    value: "input_ids"
+},
+{
+    key: "token_type_ids"
+    value: "token_type_ids"
+},
+{
+    key: "attention_mask"
+    value: "attention_mask"
+}
+            ]
+        output_map {
+                key: "output"
+                value: "output"
+            }
+        }
+    ]
+}

paraphrase-multilingual-MiniLM-L12-v2_model_onnx/1/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86aba795cdd9aac515d308b3aadaf8090fc6e4542fe5e83c97b43015d04bba38
+size 237818533

paraphrase-multilingual-MiniLM-L12-v2_model_onnx/config.pbtxt ADDED Viewed

	@@ -0,0 +1,35 @@

+name: "paraphrase-multilingual-MiniLM-L12-v2_model_onnx"
+max_batch_size: 0
+platform: "onnxruntime_onnx"
+default_model_filename: "model.bin"
+input [
+{
+    name: "input_ids"
+    data_type: TYPE_INT32
+    dims: [-1, -1]
+},
+{
+    name: "token_type_ids"
+    data_type: TYPE_INT32
+    dims: [-1, -1]
+},
+{
+    name: "attention_mask"
+    data_type: TYPE_INT32
+    dims: [-1, -1]
+}
+]
+output {
+    name: "output"
+    data_type: TYPE_FP32
+    dims: [-1, 384]
+}
+instance_group [
+    {
+      count: 1
+      kind: KIND_GPU
+    }
+]

paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/__pycache__/model.cpython-38.pyc ADDED Viewed

Binary file (2.43 kB). View file

paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fedccd318db4fb26df118947449b7a6b7aacda3f1c0309c77a94f9856b652c58
+size 673

paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/model.py ADDED Viewed

	@@ -0,0 +1,75 @@

+#  Copyright 2022, Lefebvre Dalloz Services
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+This module is copy-pasted in generated Triton configuration folder to perform the tokenization step.
+"""
+# noinspection DuplicatedCode
+from pathlib import Path
+from typing import Dict, List
+import numpy as np
+try:
+    # noinspection PyUnresolvedReferences
+    import triton_python_backend_utils as pb_utils
+except ImportError:
+    pass  # triton_python_backend_utils exists only inside Triton Python backend.
+from transformers import AutoTokenizer, BatchEncoding, PreTrainedTokenizer, TensorType
+class TritonPythonModel:
+    tokenizer: PreTrainedTokenizer
+    def initialize(self, args: Dict[str, str]) -> None:
+        """
+        Initialize the tokenization process
+        :param args: arguments from Triton config file
+        """
+        # more variables in https://github.com/triton-inference-server/python_backend/blob/main/src/python.cc
+        path: str = str(Path(args["model_repository"]).parent.absolute())
+        path: str = str(Path(args["model_repository"]).absolute()) + "/1/"
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+    def execute(self, requests) -> "List[List[pb_utils.Tensor]]":
+        """
+        Parse and tokenize each request
+        :param requests: 1 or more requests received by Triton server.
+        :return: text as input tensors
+        """
+        responses = []
+        # for loop for batch requests (disabled in our case)
+        for request in requests:
+            # binary data typed back to string
+            query = [t.decode("UTF-8") for t in pb_utils.get_input_tensor_by_name(request, "TEXT").as_numpy().tolist()]
+            tokens: BatchEncoding = self.tokenizer(
+                text=query, return_tensors=TensorType.NUMPY, padding=True, pad_to_multiple_of=8
+            )
+            # tensorrt uses int32 as input type, ort uses int64
+            tokens_dict = {k: v.astype(np.int32) for k, v in tokens.items()}
+            # communicate the tokenization results to Triton server
+            outputs = list()
+            for input_name in self.tokenizer.model_input_names:
+                tensor_input = pb_utils.Tensor(input_name, tokens_dict[input_name])
+                outputs.append(tensor_input)
+            inference_response = pb_utils.InferenceResponse(output_tensors=outputs)
+            responses.append(inference_response)
+        return responses

paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06e405a36dfe4b9604f484f6a1e619af1a7f7d09e34a8555eb0b77b66318067f
+size 280

paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b60b6b43406a48bf3638526314f3d232d97058bc93472ff2de930d43686fa441
+size 17082913

paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:303b2300b7e7248b9ba2e8d7e3d77a66316aa83f7e87be42f317178c8483087a
+size 572

paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/unigram.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71b44701d7efd054205115acfa6ef126c5d2f84bd3affe0c59e48163674d19a6
+size 14763234

paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/config.pbtxt ADDED Viewed

	@@ -0,0 +1,36 @@

+name: "paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx"
+max_batch_size: 0
+backend: "python"
+input [
+{
+    name: "TEXT"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+}
+]
+output [
+{
+    name: "input_ids"
+    data_type: TYPE_INT32
+    dims: [-1, -1]
+},
+{
+    name: "token_type_ids"
+    data_type: TYPE_INT32
+    dims: [-1, -1]
+},
+{
+    name: "attention_mask"
+    data_type: TYPE_INT32
+    dims: [-1, -1]
+}
+]
+instance_group [
+    {
+      count: 1
+      kind: KIND_GPU
+    }
+]