tlwu commited on
Commit
c110ffc
β€’
1 Parent(s): 1c94a6f

models from Olive

Browse files
README.md CHANGED
@@ -20,6 +20,11 @@ tags:
20
 
21
  This repository hosts the optimized versions of **SD Turbo** to accelerate inference with ONNX Runtime CUDA execution provider.
22
 
 
 
 
 
 
23
  See the [usage instructions](#usage-example) for how to run the SDXL pipeline with the ONNX files hosted in this repository.
24
 
25
  ## Model Description
@@ -37,10 +42,10 @@ Below is average latency of generating an image of size 512x512 using NVIDIA A10
37
 
38
  | Engine | Batch Size | Steps | PyTorch 2.1 | ONNX Runtime CUDA |
39
  |-------------|------------|------ | ----------------|-------------------|
40
- | Static | 1 | 1 | 85.3 ms | 32.9 ms |
41
- | Static | 4 | 1 | 213.8 ms | 97.5 ms |
42
- | Static | 1 | 4 | 117.4 ms | 62.5 ms |
43
- | Static | 4 | 4 | 294.3 ms | 168.3 ms |
44
 
45
 
46
  Static means the engine is built for the given batch size and image size combination, and CUDA graph is used to speed up.
@@ -61,7 +66,7 @@ cd onnxruntime
61
  2. Download the SDXL ONNX files from this repo
62
  ```shell
63
  git lfs install
64
- git clone https://huggingface.co/tlwu/sdxl-turbo-onnxruntime
65
  ```
66
 
67
  3. Launch the docker
@@ -97,5 +102,5 @@ python3 -m pip install --upgrade polygraphy onnx-graphsurgeon --extra-index-url
97
  python3 demo_txt2img.py \
98
  "starry night over Golden Gate Bridge by van gogh" \
99
  --version sd-turbo \
100
- --work-dir /workspace/sd-turbo-onnxruntime
101
  ```
 
20
 
21
  This repository hosts the optimized versions of **SD Turbo** to accelerate inference with ONNX Runtime CUDA execution provider.
22
 
23
+ The models are generated by [Olive](https://github.com/microsoft/Olive/tree/main/examples/stable_diffusion) with command like the following:
24
+ ```
25
+ python stable_diffusion.py --provider cuda --model_id stabilityai/sd-turbo --optimize --use_fp16_fixed_vae
26
+ ```
27
+
28
  See the [usage instructions](#usage-example) for how to run the SDXL pipeline with the ONNX files hosted in this repository.
29
 
30
  ## Model Description
 
42
 
43
  | Engine | Batch Size | Steps | PyTorch 2.1 | ONNX Runtime CUDA |
44
  |-------------|------------|------ | ----------------|-------------------|
45
+ | Static | 1 | 1 | 85.3 ms | 38.2 ms |
46
+ | Static | 4 | 1 | 213.8 ms | 120.2 ms |
47
+ | Static | 1 | 4 | 117.4 ms | 68.7 ms |
48
+ | Static | 4 | 4 | 294.3 ms | 192.6 ms |
49
 
50
 
51
  Static means the engine is built for the given batch size and image size combination, and CUDA graph is used to speed up.
 
66
  2. Download the SDXL ONNX files from this repo
67
  ```shell
68
  git lfs install
69
+ git clone https://huggingface.co/tlwu/sd-turbo-onnxruntime
70
  ```
71
 
72
  3. Launch the docker
 
102
  python3 demo_txt2img.py \
103
  "starry night over Golden Gate Bridge by van gogh" \
104
  --version sd-turbo \
105
+ --engine-dir /workspace/sd-turbo-onnxruntime
106
  ```
model_index.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "OnnxStableDiffusionPipeline",
3
+ "_diffusers_version": "0.24.0",
4
+ "feature_extractor": [
5
+ null,
6
+ null
7
+ ],
8
+ "requires_safety_checker": true,
9
+ "safety_checker": [
10
+ null,
11
+ null
12
+ ],
13
+ "scheduler": [
14
+ "diffusers",
15
+ "EulerDiscreteScheduler"
16
+ ],
17
+ "text_encoder": [
18
+ "diffusers",
19
+ "OnnxRuntimeModel"
20
+ ],
21
+ "tokenizer": [
22
+ "transformers",
23
+ "CLIPTokenizer"
24
+ ],
25
+ "unet": [
26
+ "diffusers",
27
+ "OnnxRuntimeModel"
28
+ ],
29
+ "vae_decoder": [
30
+ "diffusers",
31
+ "OnnxRuntimeModel"
32
+ ],
33
+ "vae_encoder": [
34
+ "diffusers",
35
+ "OnnxRuntimeModel"
36
+ ]
37
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "EulerDiscreteScheduler",
3
+ "_diffusers_version": "0.24.0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "interpolation_type": "linear",
9
+ "num_train_timesteps": 1000,
10
+ "prediction_type": "epsilon",
11
+ "sample_max_value": 1.0,
12
+ "set_alpha_to_one": false,
13
+ "sigma_max": null,
14
+ "sigma_min": null,
15
+ "skip_prk_steps": true,
16
+ "steps_offset": 1,
17
+ "timestep_spacing": "trailing",
18
+ "timestep_type": "discrete",
19
+ "trained_betas": null,
20
+ "use_karras_sigmas": false
21
+ }
{ORT_CUDA/sd-turbo/engine/clip.ort_cuda.fp16 β†’ text_encoder}/model.onnx RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f1b1d827fb6013c67ac3f349b06d1a159373c2faf5253555e20b14ce2ebaacf
3
- size 680852028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0d07fcce1858783b635c4027a562c89690d7c9f3f36c6129f1aa35af380f3d8
3
+ size 680854339
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "!",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "!",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49406": {
13
+ "content": "<|startoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "49407": {
21
+ "content": "<|endoftext|>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "bos_token": "<|startoftext|>",
30
+ "clean_up_tokenization_spaces": true,
31
+ "do_lower_case": true,
32
+ "eos_token": "<|endoftext|>",
33
+ "errors": "replace",
34
+ "model_max_length": 77,
35
+ "pad_token": "!",
36
+ "tokenizer_class": "CLIPTokenizer",
37
+ "unk_token": "<|endoftext|>"
38
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
ORT_CUDA/sd-turbo/engine/unet.ort_cuda.fp16/model.onnx.data β†’ unet/model.onnx RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:402069ca12429e3f7b810770a49f0121a3f0f025a38317ebfd4b956d7de1c41e
3
- size 1732024320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b1170b54ca0ceafe1c709cd0268bd98892928e5589a7d72aec817669472beda
3
+ size 1732410827
{ORT_CUDA/sd-turbo/engine/unet.ort_cuda.fp16 β†’ vae_decoder}/model.onnx RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf77e0fc1a30bd77cbe86ddefccaadc30e2dc3a667c92418f3811f5417ce2af1
3
- size 371766
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b5a9263fbd3ebd2647bae8e99cc99fdcff70a0b1b6a77ca7544c5f3e9e91649
3
+ size 99072656
{ORT_CUDA/sd-turbo/engine/vae.ort_cuda.fp16 β†’ vae_encoder}/model.onnx RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a72b771027dde10c6bb0af1347c9e30b0df54f186f3ff7c688f2b89354bcc7a2
3
- size 99070385
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dac0a94d703ba0ed900efaadc9d6e8dc4e9f1977d0f87f050aa05cb40d7c1638
3
+ size 68412355