Joshua Perk commited on
Commit
2e0d57d
1 Parent(s): c38dc50

attempt to convert onnx in cloud

Browse files
Files changed (3) hide show
  1. export_model.py +0 -22
  2. handler.py +62 -0
  3. requirements.txt +1 -24
export_model.py DELETED
@@ -1,22 +0,0 @@
1
- '''
2
- A script to convert this normal/deepset model into an onnx model for further optimization.
3
- Use the local .env and install the requirements
4
- Make sure the model checkpoint name is correct (or convert to do this locally)
5
- Make sure the auth token has access to the model
6
- '''
7
-
8
-
9
- from optimum.onnxruntime import ORTModelForSeq2SeqLM
10
- from transformers import AutoTokenizer
11
-
12
- model_checkpoint = "getvector/deepset-earnings-transcript-summary"
13
- save_directory = "tmp/onnx/"
14
- huggingface_auth_token = "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"
15
-
16
- # Load a model from transformers and export it to ONNX
17
- ort_model = ORTModelForSeq2SeqLM.from_pretrained(model_checkpoint, from_transformers=True, use_auth_token=huggingface_auth_token)
18
- tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_auth_token=huggingface_auth_token)
19
-
20
- # Save the onnx model and tokenizer
21
- ort_model.save_pretrained(save_directory)
22
- tokenizer.save_pretrained(save_directory)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
handler.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from optimum.onnxruntime import ORTOptimizer, ORTQuantizer, ORTModelForSeq2SeqLM
2
+ from optimum.onnxruntime.configuration import OptimizationConfig, AutoQuantizationConfig
3
+ from transformers import AutoTokenizer, pipeline
4
+
5
+ save_dir = "/" # or "."?
6
+
7
+ class EndpointHandler():
8
+ def __init__(self, path=""):
9
+ print('hi')
10
+ # Load a PyTorch model and export it to the ONNX format
11
+ model = ORTModelForSeq2SeqLM.from_pretrained(path, from_transformers=True)
12
+ # Create the optimizer
13
+ optimizer = ORTOptimizer.from_pretrained(model)
14
+ # Define the optimization strategy by creating the appropriate configuration
15
+ optimization_config = OptimizationConfig(
16
+ optimization_level=2,
17
+ optimize_with_onnxruntime_only=False,
18
+ optimize_for_gpu=False,
19
+ )
20
+ # Optimize the model
21
+ optimizer.optimize(save_dir=save_dir, optimization_config=optimization_config)
22
+
23
+ # Load the resulting optimized model
24
+ # optimized_model = ORTModelForSeq2SeqLM.from_pretrained(
25
+ # save_dir,
26
+ # encoder_file_name="encoder_model_optimized.onnx",
27
+ # decoder_file_name="decoder_model_optimized.onnx",
28
+ # decoder_file_with_past_name="decoder_with_past_model_optimized.onnx",
29
+ # )
30
+
31
+ # Create encoder quantizer
32
+ encoder_quantizer = ORTQuantizer.from_pretrained(path, file_name="encoder_model.onnx")
33
+
34
+ # Create decoder quantizer
35
+ decoder_quantizer = ORTQuantizer.from_pretrained(path, file_name="decoder_model.onnx")
36
+
37
+ # Create decoder with past key values quantizer
38
+ decoder_wp_quantizer = ORTQuantizer.from_pretrained(path, file_name="decoder_with_past_model.onnx")
39
+
40
+ # Create Quantizer list
41
+ quantizer = [encoder_quantizer, decoder_quantizer, decoder_wp_quantizer]
42
+
43
+ # Define the quantization strategy by creating the appropriate configuration
44
+ dqconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)
45
+
46
+ # Quantize the model
47
+ [print(q.quantize(save_dir=save_dir, quantization_config=dqconfig) )for q in quantizer]
48
+
49
+ newModel = ORTModelForSeq2SeqLM.from_pretrained(path)
50
+
51
+ tokenizer = AutoTokenizer.from_pretrained(path) # , return_tensors="pt")
52
+
53
+ self.pipeline = pipeline("summarization", model=newModel, tokenizer=tokenizer)
54
+
55
+ def __call__(self, data):
56
+ inputs = data.pop("inputs", data)
57
+ parameters = data.pop("parameters", None)
58
+ if parameters is not None:
59
+ summary = self.pipeline(inputs, **parameters)
60
+ else:
61
+ summary = self.pipeline(inputs)
62
+ return summary
requirements.txt CHANGED
@@ -1,24 +1 @@
1
- certifi==2022.9.24
2
- charset-normalizer==2.1.1
3
- coloredlogs==15.0.1
4
- filelock==3.8.0
5
- huggingface-hub==0.10.0
6
- humanfriendly==10.0
7
- idna==3.4
8
- mpmath==1.2.1
9
- numpy==1.23.3
10
- optimum==1.4.0
11
- packaging==21.3
12
- protobuf==3.20.1
13
- pyparsing==3.0.9
14
- PyYAML==6.0
15
- regex==2022.9.13
16
- requests==2.28.1
17
- sentencepiece==0.1.97
18
- sympy==1.11.1
19
- tokenizers==0.12.1
20
- torch==1.12.1
21
- tqdm==4.64.1
22
- transformers==4.22.2
23
- typing_extensions==4.3.0
24
- urllib3==1.26.12
1
+ optimum[onnxruntime]==1.4.0