Thytu
commited on
Commit
·
87b4d0f
1
Parent(s):
e1b7da6
feat: paraphrase-multilingual-MiniLM-L12-v2_*_onnx/
Browse files- paraphrase-multilingual-MiniLM-L12-v2_inference_onnx/config.pbtxt +66 -0
- paraphrase-multilingual-MiniLM-L12-v2_model_onnx/1/model.bin +3 -0
- paraphrase-multilingual-MiniLM-L12-v2_model_onnx/config.pbtxt +35 -0
- paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/__pycache__/model.cpython-38.pyc +0 -0
- paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/config.json +3 -0
- paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/model.py +75 -0
- paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/special_tokens_map.json +3 -0
- paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/tokenizer.json +3 -0
- paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/tokenizer_config.json +3 -0
- paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/unigram.json +3 -0
- paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/config.pbtxt +36 -0
paraphrase-multilingual-MiniLM-L12-v2_inference_onnx/config.pbtxt
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: "paraphrase-multilingual-MiniLM-L12-v2_inference_onnx"
|
2 |
+
max_batch_size: 0
|
3 |
+
platform: "ensemble"
|
4 |
+
|
5 |
+
input [
|
6 |
+
{
|
7 |
+
name: "TEXT"
|
8 |
+
data_type: TYPE_STRING
|
9 |
+
dims: [ -1 ]
|
10 |
+
}
|
11 |
+
]
|
12 |
+
|
13 |
+
output {
|
14 |
+
name: "output"
|
15 |
+
data_type: TYPE_FP32
|
16 |
+
dims: [-1, -1]
|
17 |
+
}
|
18 |
+
|
19 |
+
ensemble_scheduling {
|
20 |
+
step [
|
21 |
+
{
|
22 |
+
model_name: "paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx"
|
23 |
+
model_version: -1
|
24 |
+
input_map {
|
25 |
+
key: "TEXT"
|
26 |
+
value: "TEXT"
|
27 |
+
}
|
28 |
+
output_map [
|
29 |
+
{
|
30 |
+
key: "input_ids"
|
31 |
+
value: "input_ids"
|
32 |
+
},
|
33 |
+
{
|
34 |
+
key: "token_type_ids"
|
35 |
+
value: "token_type_ids"
|
36 |
+
},
|
37 |
+
{
|
38 |
+
key: "attention_mask"
|
39 |
+
value: "attention_mask"
|
40 |
+
}
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
model_name: "paraphrase-multilingual-MiniLM-L12-v2_model_onnx"
|
45 |
+
model_version: -1
|
46 |
+
input_map [
|
47 |
+
{
|
48 |
+
key: "input_ids"
|
49 |
+
value: "input_ids"
|
50 |
+
},
|
51 |
+
{
|
52 |
+
key: "token_type_ids"
|
53 |
+
value: "token_type_ids"
|
54 |
+
},
|
55 |
+
{
|
56 |
+
key: "attention_mask"
|
57 |
+
value: "attention_mask"
|
58 |
+
}
|
59 |
+
]
|
60 |
+
output_map {
|
61 |
+
key: "output"
|
62 |
+
value: "output"
|
63 |
+
}
|
64 |
+
}
|
65 |
+
]
|
66 |
+
}
|
paraphrase-multilingual-MiniLM-L12-v2_model_onnx/1/model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86aba795cdd9aac515d308b3aadaf8090fc6e4542fe5e83c97b43015d04bba38
|
3 |
+
size 237818533
|
paraphrase-multilingual-MiniLM-L12-v2_model_onnx/config.pbtxt
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: "paraphrase-multilingual-MiniLM-L12-v2_model_onnx"
|
2 |
+
max_batch_size: 0
|
3 |
+
platform: "onnxruntime_onnx"
|
4 |
+
default_model_filename: "model.bin"
|
5 |
+
|
6 |
+
input [
|
7 |
+
{
|
8 |
+
name: "input_ids"
|
9 |
+
data_type: TYPE_INT32
|
10 |
+
dims: [-1, -1]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
name: "token_type_ids"
|
14 |
+
data_type: TYPE_INT32
|
15 |
+
dims: [-1, -1]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
name: "attention_mask"
|
19 |
+
data_type: TYPE_INT32
|
20 |
+
dims: [-1, -1]
|
21 |
+
}
|
22 |
+
]
|
23 |
+
|
24 |
+
output {
|
25 |
+
name: "output"
|
26 |
+
data_type: TYPE_FP32
|
27 |
+
dims: [-1, 384]
|
28 |
+
}
|
29 |
+
|
30 |
+
instance_group [
|
31 |
+
{
|
32 |
+
count: 1
|
33 |
+
kind: KIND_GPU
|
34 |
+
}
|
35 |
+
]
|
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/__pycache__/model.cpython-38.pyc
ADDED
Binary file (2.43 kB). View file
|
|
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fedccd318db4fb26df118947449b7a6b7aacda3f1c0309c77a94f9856b652c58
|
3 |
+
size 673
|
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/model.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2022, Lefebvre Dalloz Services
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
"""
|
16 |
+
This module is copy-pasted in generated Triton configuration folder to perform the tokenization step.
|
17 |
+
"""
|
18 |
+
|
19 |
+
# noinspection DuplicatedCode
|
20 |
+
from pathlib import Path
|
21 |
+
from typing import Dict, List
|
22 |
+
|
23 |
+
import numpy as np
|
24 |
+
|
25 |
+
|
26 |
+
try:
|
27 |
+
# noinspection PyUnresolvedReferences
|
28 |
+
import triton_python_backend_utils as pb_utils
|
29 |
+
except ImportError:
|
30 |
+
pass # triton_python_backend_utils exists only inside Triton Python backend.
|
31 |
+
|
32 |
+
from transformers import AutoTokenizer, BatchEncoding, PreTrainedTokenizer, TensorType
|
33 |
+
|
34 |
+
|
35 |
+
class TritonPythonModel:
|
36 |
+
tokenizer: PreTrainedTokenizer
|
37 |
+
|
38 |
+
def initialize(self, args: Dict[str, str]) -> None:
|
39 |
+
"""
|
40 |
+
Initialize the tokenization process
|
41 |
+
:param args: arguments from Triton config file
|
42 |
+
"""
|
43 |
+
# more variables in https://github.com/triton-inference-server/python_backend/blob/main/src/python.cc
|
44 |
+
|
45 |
+
path: str = str(Path(args["model_repository"]).parent.absolute())
|
46 |
+
path: str = str(Path(args["model_repository"]).absolute()) + "/1/"
|
47 |
+
|
48 |
+
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
49 |
+
|
50 |
+
def execute(self, requests) -> "List[List[pb_utils.Tensor]]":
|
51 |
+
"""
|
52 |
+
Parse and tokenize each request
|
53 |
+
:param requests: 1 or more requests received by Triton server.
|
54 |
+
:return: text as input tensors
|
55 |
+
"""
|
56 |
+
responses = []
|
57 |
+
# for loop for batch requests (disabled in our case)
|
58 |
+
for request in requests:
|
59 |
+
# binary data typed back to string
|
60 |
+
query = [t.decode("UTF-8") for t in pb_utils.get_input_tensor_by_name(request, "TEXT").as_numpy().tolist()]
|
61 |
+
tokens: BatchEncoding = self.tokenizer(
|
62 |
+
text=query, return_tensors=TensorType.NUMPY, padding=True, pad_to_multiple_of=8
|
63 |
+
)
|
64 |
+
# tensorrt uses int32 as input type, ort uses int64
|
65 |
+
tokens_dict = {k: v.astype(np.int32) for k, v in tokens.items()}
|
66 |
+
# communicate the tokenization results to Triton server
|
67 |
+
outputs = list()
|
68 |
+
for input_name in self.tokenizer.model_input_names:
|
69 |
+
tensor_input = pb_utils.Tensor(input_name, tokens_dict[input_name])
|
70 |
+
outputs.append(tensor_input)
|
71 |
+
|
72 |
+
inference_response = pb_utils.InferenceResponse(output_tensors=outputs)
|
73 |
+
responses.append(inference_response)
|
74 |
+
|
75 |
+
return responses
|
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/special_tokens_map.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06e405a36dfe4b9604f484f6a1e619af1a7f7d09e34a8555eb0b77b66318067f
|
3 |
+
size 280
|
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b60b6b43406a48bf3638526314f3d232d97058bc93472ff2de930d43686fa441
|
3 |
+
size 17082913
|
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:303b2300b7e7248b9ba2e8d7e3d77a66316aa83f7e87be42f317178c8483087a
|
3 |
+
size 572
|
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/1/unigram.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71b44701d7efd054205115acfa6ef126c5d2f84bd3affe0c59e48163674d19a6
|
3 |
+
size 14763234
|
paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx/config.pbtxt
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: "paraphrase-multilingual-MiniLM-L12-v2_tokenizer_onnx"
|
2 |
+
max_batch_size: 0
|
3 |
+
backend: "python"
|
4 |
+
|
5 |
+
input [
|
6 |
+
{
|
7 |
+
name: "TEXT"
|
8 |
+
data_type: TYPE_STRING
|
9 |
+
dims: [ -1 ]
|
10 |
+
}
|
11 |
+
]
|
12 |
+
|
13 |
+
output [
|
14 |
+
{
|
15 |
+
name: "input_ids"
|
16 |
+
data_type: TYPE_INT32
|
17 |
+
dims: [-1, -1]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
name: "token_type_ids"
|
21 |
+
data_type: TYPE_INT32
|
22 |
+
dims: [-1, -1]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
name: "attention_mask"
|
26 |
+
data_type: TYPE_INT32
|
27 |
+
dims: [-1, -1]
|
28 |
+
}
|
29 |
+
]
|
30 |
+
|
31 |
+
instance_group [
|
32 |
+
{
|
33 |
+
count: 1
|
34 |
+
kind: KIND_GPU
|
35 |
+
}
|
36 |
+
]
|