Joshua Perk
commited on
Commit
•
2e0d57d
1
Parent(s):
c38dc50
attempt to convert onnx in cloud
Browse files- export_model.py +0 -22
- handler.py +62 -0
- requirements.txt +1 -24
export_model.py
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
'''
|
2 |
-
A script to convert this normal/deepset model into an onnx model for further optimization.
|
3 |
-
Use the local .env and install the requirements
|
4 |
-
Make sure the model checkpoint name is correct (or convert to do this locally)
|
5 |
-
Make sure the auth token has access to the model
|
6 |
-
'''
|
7 |
-
|
8 |
-
|
9 |
-
from optimum.onnxruntime import ORTModelForSeq2SeqLM
|
10 |
-
from transformers import AutoTokenizer
|
11 |
-
|
12 |
-
model_checkpoint = "getvector/deepset-earnings-transcript-summary"
|
13 |
-
save_directory = "tmp/onnx/"
|
14 |
-
huggingface_auth_token = "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"
|
15 |
-
|
16 |
-
# Load a model from transformers and export it to ONNX
|
17 |
-
ort_model = ORTModelForSeq2SeqLM.from_pretrained(model_checkpoint, from_transformers=True, use_auth_token=huggingface_auth_token)
|
18 |
-
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_auth_token=huggingface_auth_token)
|
19 |
-
|
20 |
-
# Save the onnx model and tokenizer
|
21 |
-
ort_model.save_pretrained(save_directory)
|
22 |
-
tokenizer.save_pretrained(save_directory)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
handler.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from optimum.onnxruntime import ORTOptimizer, ORTQuantizer, ORTModelForSeq2SeqLM
|
2 |
+
from optimum.onnxruntime.configuration import OptimizationConfig, AutoQuantizationConfig
|
3 |
+
from transformers import AutoTokenizer, pipeline
|
4 |
+
|
5 |
+
save_dir = "/" # or "."?
|
6 |
+
|
7 |
+
class EndpointHandler():
|
8 |
+
def __init__(self, path=""):
|
9 |
+
print('hi')
|
10 |
+
# Load a PyTorch model and export it to the ONNX format
|
11 |
+
model = ORTModelForSeq2SeqLM.from_pretrained(path, from_transformers=True)
|
12 |
+
# Create the optimizer
|
13 |
+
optimizer = ORTOptimizer.from_pretrained(model)
|
14 |
+
# Define the optimization strategy by creating the appropriate configuration
|
15 |
+
optimization_config = OptimizationConfig(
|
16 |
+
optimization_level=2,
|
17 |
+
optimize_with_onnxruntime_only=False,
|
18 |
+
optimize_for_gpu=False,
|
19 |
+
)
|
20 |
+
# Optimize the model
|
21 |
+
optimizer.optimize(save_dir=save_dir, optimization_config=optimization_config)
|
22 |
+
|
23 |
+
# Load the resulting optimized model
|
24 |
+
# optimized_model = ORTModelForSeq2SeqLM.from_pretrained(
|
25 |
+
# save_dir,
|
26 |
+
# encoder_file_name="encoder_model_optimized.onnx",
|
27 |
+
# decoder_file_name="decoder_model_optimized.onnx",
|
28 |
+
# decoder_file_with_past_name="decoder_with_past_model_optimized.onnx",
|
29 |
+
# )
|
30 |
+
|
31 |
+
# Create encoder quantizer
|
32 |
+
encoder_quantizer = ORTQuantizer.from_pretrained(path, file_name="encoder_model.onnx")
|
33 |
+
|
34 |
+
# Create decoder quantizer
|
35 |
+
decoder_quantizer = ORTQuantizer.from_pretrained(path, file_name="decoder_model.onnx")
|
36 |
+
|
37 |
+
# Create decoder with past key values quantizer
|
38 |
+
decoder_wp_quantizer = ORTQuantizer.from_pretrained(path, file_name="decoder_with_past_model.onnx")
|
39 |
+
|
40 |
+
# Create Quantizer list
|
41 |
+
quantizer = [encoder_quantizer, decoder_quantizer, decoder_wp_quantizer]
|
42 |
+
|
43 |
+
# Define the quantization strategy by creating the appropriate configuration
|
44 |
+
dqconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
|
45 |
+
|
46 |
+
# Quantize the model
|
47 |
+
[print(q.quantize(save_dir=save_dir, quantization_config=dqconfig) )for q in quantizer]
|
48 |
+
|
49 |
+
newModel = ORTModelForSeq2SeqLM.from_pretrained(path)
|
50 |
+
|
51 |
+
tokenizer = AutoTokenizer.from_pretrained(path) # , return_tensors="pt")
|
52 |
+
|
53 |
+
self.pipeline = pipeline("summarization", model=newModel, tokenizer=tokenizer)
|
54 |
+
|
55 |
+
def __call__(self, data):
|
56 |
+
inputs = data.pop("inputs", data)
|
57 |
+
parameters = data.pop("parameters", None)
|
58 |
+
if parameters is not None:
|
59 |
+
summary = self.pipeline(inputs, **parameters)
|
60 |
+
else:
|
61 |
+
summary = self.pipeline(inputs)
|
62 |
+
return summary
|
requirements.txt
CHANGED
@@ -1,24 +1 @@
|
|
1 |
-
|
2 |
-
charset-normalizer==2.1.1
|
3 |
-
coloredlogs==15.0.1
|
4 |
-
filelock==3.8.0
|
5 |
-
huggingface-hub==0.10.0
|
6 |
-
humanfriendly==10.0
|
7 |
-
idna==3.4
|
8 |
-
mpmath==1.2.1
|
9 |
-
numpy==1.23.3
|
10 |
-
optimum==1.4.0
|
11 |
-
packaging==21.3
|
12 |
-
protobuf==3.20.1
|
13 |
-
pyparsing==3.0.9
|
14 |
-
PyYAML==6.0
|
15 |
-
regex==2022.9.13
|
16 |
-
requests==2.28.1
|
17 |
-
sentencepiece==0.1.97
|
18 |
-
sympy==1.11.1
|
19 |
-
tokenizers==0.12.1
|
20 |
-
torch==1.12.1
|
21 |
-
tqdm==4.64.1
|
22 |
-
transformers==4.22.2
|
23 |
-
typing_extensions==4.3.0
|
24 |
-
urllib3==1.26.12
|
1 |
+
optimum[onnxruntime]==1.4.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|