philschmid HF staff commited on
Commit
3d0c874
1 Parent(s): a5ad144

add handler

Browse files
.gitattributes CHANGED
@@ -7,7 +7,6 @@
7
  *.h5 filter=lfs diff=lfs merge=lfs -text
8
  *.joblib filter=lfs diff=lfs merge=lfs -text
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
11
  *.model filter=lfs diff=lfs merge=lfs -text
12
  *.msgpack filter=lfs diff=lfs merge=lfs -text
13
  *.npy filter=lfs diff=lfs merge=lfs -text
 
7
  *.h5 filter=lfs diff=lfs merge=lfs -text
8
  *.joblib filter=lfs diff=lfs merge=lfs -text
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
12
  *.npy filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,84 @@
1
  ---
2
  license: other
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: other
3
+ tags:
4
+ - stable-diffusion
5
+ - stable-diffusion-diffusers
6
+ - text-to-image
7
+ - endpoints-template
8
+ inference: false
9
+ extra_gated_prompt: |-
10
+ One more step before getting this model.
11
+ This model is open access and available to all, with a CreativeML OpenRAIL-M license further specifying rights and usage.
12
+ The CreativeML OpenRAIL License specifies:
13
+
14
+ 1. You can't use the model to deliberately produce nor share illegal or harmful outputs or content
15
+ 2. CompVis claims no rights on the outputs you generate, you are free to use them and are accountable for their use which must not go against the provisions set in the license
16
+ 3. You may re-distribute the weights and use the model commercially and/or as a service. If you do, please be aware you have to include the same use restrictions as the ones in the license and share a copy of the CreativeML OpenRAIL-M to all your users (please read the license entirely and carefully)
17
+ Please read the full license here: https://huggingface.co/spaces/CompVis/stable-diffusion-license
18
+
19
+ By clicking on "Access repository" below, you accept that your *contact information* (email address and username) can be shared with the model authors as well.
20
+
21
+ extra_gated_fields:
22
+ I have read the License and agree with its terms: checkbox
23
  ---
24
+
25
+ # Fork of [CompVis/stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4)
26
+
27
+ > Stable Diffusion is a latent text-to-image diffusion model capable of generating photo-realistic images given any text input.
28
+ > For more information about how Stable Diffusion functions, please have a look at [🤗's Stable Diffusion with 🧨Diffusers blog](https://huggingface.co/blog/stable_diffusion).
29
+
30
+ For more information about the model, license and limitations check the original model card at [CompVis/stable-diffusion-v1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4).
31
+
32
+ ### License (CreativeML OpenRAIL-M)
33
+
34
+ The full license can be found here: https://huggingface.co/spaces/CompVis/stable-diffusion-license
35
+
36
+ ---
37
+
38
+ This repository implements a custom `handler` task for `text-to-image` for 🤗 Inference Endpoints. The code for the customized pipeline is in the [pipeline.py](https://huggingface.cophilschmid/stable-diffusion-v1-4-endpoints/blob/main/handler.py).
39
+
40
+ There is also a [notebook](https://huggingface.cophilschmid/stable-diffusion-v1-4-endpoints/blob/main/create_handler.ipynb) included, on how to create the `handler.py`
41
+
42
+ ### expected Request payload
43
+ ```json
44
+ {
45
+ "inputs": "A prompt used for image generation"
46
+ }
47
+ ```
48
+
49
+ below is an example on how to run a request using Python and `requests`.
50
+
51
+ ## Run Request
52
+ ```python
53
+ import json
54
+ from typing import List
55
+ import requests as r
56
+ import base64
57
+ from PIL import Image
58
+ from io import BytesIO
59
+
60
+ ENDPOINT_URL = ""
61
+ HF_TOKEN = ""
62
+
63
+ # helper decoder
64
+ def decode_base64_image(image_string):
65
+ base64_image = base64.b64decode(image_string)
66
+ buffer = BytesIO(base64_image)
67
+ return Image.open(buffer)
68
+
69
+
70
+ def predict(prompt:str=None):
71
+ payload = {"inputs": code_snippet,"parameters": parameters}
72
+ response = r.post(
73
+ ENDPOINT_URL, headers={"Authorization": f"Bearer {HF_TOKEN}"}, json={"inputs": prompt}
74
+ )
75
+ resp = response.json()
76
+ return decode_base64_image(resp["image"])
77
+
78
+ prediction = predict(
79
+ prompt="the first animal on the mars"
80
+ )
81
+ ```
82
+ expected output
83
+
84
+ ![sample](sample.jpg)
create_handler.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": 224,
3
+ "do_center_crop": true,
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_resize": true,
7
+ "feature_extractor_type": "CLIPFeatureExtractor",
8
+ "image_mean": [
9
+ 0.48145466,
10
+ 0.4578275,
11
+ 0.40821073
12
+ ],
13
+ "image_std": [
14
+ 0.26862954,
15
+ 0.26130258,
16
+ 0.27577711
17
+ ],
18
+ "resample": 3,
19
+ "size": 224
20
+ }
handler.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+ import torch
3
+ from torch import autocast
4
+ from diffusers import StableDiffusionPipeline
5
+ import base64
6
+ from io import BytesIO
7
+
8
+
9
+ # set device
10
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
11
+
12
+ class EndpointHandler():
13
+ def __init__(self, path=""):
14
+ # load the optimized model
15
+ self.pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
16
+ self.pipe = self.pipe.to(device)
17
+
18
+
19
+ def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
20
+ """
21
+ Args:
22
+ data (:obj:):
23
+ includes the input data and the parameters for the inference.
24
+ Return:
25
+ A :obj:`dict`:. base64 encoded image
26
+ """
27
+ inputs = data.pop("inputs", data)
28
+
29
+ # run inference pipeline
30
+ with autocast(device.type):
31
+ image = self.pipe(inputs, guidance_scale=7.5)["sample"][0]
32
+
33
+ # encode image as base 64
34
+ buffered = BytesIO()
35
+ image.save(buffered, format="JPEG")
36
+ img_str = base64.b64encode(buffered.getvalue())
37
+
38
+ # postprocess the prediction
39
+ return {"image": img_str}
model_index.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.2.3",
4
+ "feature_extractor": [
5
+ "transformers",
6
+ "CLIPFeatureExtractor"
7
+ ],
8
+ "safety_checker": [
9
+ "stable_diffusion",
10
+ "StableDiffusionSafetyChecker"
11
+ ],
12
+ "scheduler": [
13
+ "diffusers",
14
+ "PNDMScheduler"
15
+ ],
16
+ "text_encoder": [
17
+ "transformers",
18
+ "CLIPTextModel"
19
+ ],
20
+ "tokenizer": [
21
+ "transformers",
22
+ "CLIPTokenizer"
23
+ ],
24
+ "unet": [
25
+ "diffusers",
26
+ "UNet2DConditionModel"
27
+ ],
28
+ "vae": [
29
+ "diffusers",
30
+ "AutoencoderKL"
31
+ ]
32
+ }
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ diffusers
safety_checker/config.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./safety_checker",
3
+ "architectures": [
4
+ "StableDiffusionSafetyChecker"
5
+ ],
6
+ "initializer_factor": 1.0,
7
+ "logit_scale_init_value": 2.6592,
8
+ "model_type": "clip",
9
+ "projection_dim": 768,
10
+ "text_config": {
11
+ "_name_or_path": "",
12
+ "add_cross_attention": false,
13
+ "architectures": null,
14
+ "attention_dropout": 0.0,
15
+ "bad_words_ids": null,
16
+ "bos_token_id": 0,
17
+ "chunk_size_feed_forward": 0,
18
+ "cross_attention_hidden_size": null,
19
+ "decoder_start_token_id": null,
20
+ "diversity_penalty": 0.0,
21
+ "do_sample": false,
22
+ "dropout": 0.0,
23
+ "early_stopping": false,
24
+ "encoder_no_repeat_ngram_size": 0,
25
+ "eos_token_id": 2,
26
+ "exponential_decay_length_penalty": null,
27
+ "finetuning_task": null,
28
+ "forced_bos_token_id": null,
29
+ "forced_eos_token_id": null,
30
+ "hidden_act": "quick_gelu",
31
+ "hidden_size": 768,
32
+ "id2label": {
33
+ "0": "LABEL_0",
34
+ "1": "LABEL_1"
35
+ },
36
+ "initializer_factor": 1.0,
37
+ "initializer_range": 0.02,
38
+ "intermediate_size": 3072,
39
+ "is_decoder": false,
40
+ "is_encoder_decoder": false,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1
44
+ },
45
+ "layer_norm_eps": 1e-05,
46
+ "length_penalty": 1.0,
47
+ "max_length": 20,
48
+ "max_position_embeddings": 77,
49
+ "min_length": 0,
50
+ "model_type": "clip_text_model",
51
+ "no_repeat_ngram_size": 0,
52
+ "num_attention_heads": 12,
53
+ "num_beam_groups": 1,
54
+ "num_beams": 1,
55
+ "num_hidden_layers": 12,
56
+ "num_return_sequences": 1,
57
+ "output_attentions": false,
58
+ "output_hidden_states": false,
59
+ "output_scores": false,
60
+ "pad_token_id": 1,
61
+ "prefix": null,
62
+ "problem_type": null,
63
+ "pruned_heads": {},
64
+ "remove_invalid_values": false,
65
+ "repetition_penalty": 1.0,
66
+ "return_dict": true,
67
+ "return_dict_in_generate": false,
68
+ "sep_token_id": null,
69
+ "task_specific_params": null,
70
+ "temperature": 1.0,
71
+ "tf_legacy_loss": false,
72
+ "tie_encoder_decoder": false,
73
+ "tie_word_embeddings": true,
74
+ "tokenizer_class": null,
75
+ "top_k": 50,
76
+ "top_p": 1.0,
77
+ "torch_dtype": null,
78
+ "torchscript": false,
79
+ "transformers_version": "4.21.1",
80
+ "typical_p": 1.0,
81
+ "use_bfloat16": false,
82
+ "vocab_size": 49408
83
+ },
84
+ "text_config_dict": {
85
+ "hidden_size": 768,
86
+ "intermediate_size": 3072,
87
+ "num_attention_heads": 12,
88
+ "num_hidden_layers": 12
89
+ },
90
+ "torch_dtype": "float16",
91
+ "transformers_version": null,
92
+ "vision_config": {
93
+ "_name_or_path": "",
94
+ "add_cross_attention": false,
95
+ "architectures": null,
96
+ "attention_dropout": 0.0,
97
+ "bad_words_ids": null,
98
+ "bos_token_id": null,
99
+ "chunk_size_feed_forward": 0,
100
+ "cross_attention_hidden_size": null,
101
+ "decoder_start_token_id": null,
102
+ "diversity_penalty": 0.0,
103
+ "do_sample": false,
104
+ "dropout": 0.0,
105
+ "early_stopping": false,
106
+ "encoder_no_repeat_ngram_size": 0,
107
+ "eos_token_id": null,
108
+ "exponential_decay_length_penalty": null,
109
+ "finetuning_task": null,
110
+ "forced_bos_token_id": null,
111
+ "forced_eos_token_id": null,
112
+ "hidden_act": "quick_gelu",
113
+ "hidden_size": 1024,
114
+ "id2label": {
115
+ "0": "LABEL_0",
116
+ "1": "LABEL_1"
117
+ },
118
+ "image_size": 224,
119
+ "initializer_factor": 1.0,
120
+ "initializer_range": 0.02,
121
+ "intermediate_size": 4096,
122
+ "is_decoder": false,
123
+ "is_encoder_decoder": false,
124
+ "label2id": {
125
+ "LABEL_0": 0,
126
+ "LABEL_1": 1
127
+ },
128
+ "layer_norm_eps": 1e-05,
129
+ "length_penalty": 1.0,
130
+ "max_length": 20,
131
+ "min_length": 0,
132
+ "model_type": "clip_vision_model",
133
+ "no_repeat_ngram_size": 0,
134
+ "num_attention_heads": 16,
135
+ "num_beam_groups": 1,
136
+ "num_beams": 1,
137
+ "num_channels": 3,
138
+ "num_hidden_layers": 24,
139
+ "num_return_sequences": 1,
140
+ "output_attentions": false,
141
+ "output_hidden_states": false,
142
+ "output_scores": false,
143
+ "pad_token_id": null,
144
+ "patch_size": 14,
145
+ "prefix": null,
146
+ "problem_type": null,
147
+ "pruned_heads": {},
148
+ "remove_invalid_values": false,
149
+ "repetition_penalty": 1.0,
150
+ "return_dict": true,
151
+ "return_dict_in_generate": false,
152
+ "sep_token_id": null,
153
+ "task_specific_params": null,
154
+ "temperature": 1.0,
155
+ "tf_legacy_loss": false,
156
+ "tie_encoder_decoder": false,
157
+ "tie_word_embeddings": true,
158
+ "tokenizer_class": null,
159
+ "top_k": 50,
160
+ "top_p": 1.0,
161
+ "torch_dtype": null,
162
+ "torchscript": false,
163
+ "transformers_version": "4.21.1",
164
+ "typical_p": 1.0,
165
+ "use_bfloat16": false
166
+ },
167
+ "vision_config_dict": {
168
+ "hidden_size": 1024,
169
+ "intermediate_size": 4096,
170
+ "num_attention_heads": 16,
171
+ "num_hidden_layers": 24,
172
+ "patch_size": 14
173
+ }
174
+ }
safety_checker/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d37ca6e57ace94e4c2f03ed0f67b6dc83e1ef1160892074917aa68b28e2afc1
3
+ size 608098599
sample.jpg ADDED
scheduler/.ipynb_checkpoints/scheduler_config-checkpoint.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "PNDMScheduler",
3
+ "_diffusers_version": "0.2.2",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "num_train_timesteps": 1000,
8
+ "skip_prk_steps": true
9
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "PNDMScheduler",
3
+ "_diffusers_version": "0.2.3",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "num_train_timesteps": 1000,
8
+ "skip_prk_steps": true
9
+ }
text_encoder/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./text_encoder",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "torch_dtype": "float16",
22
+ "transformers_version": "4.21.1",
23
+ "vocab_size": 49408
24
+ }
text_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88bd85efb0f84e70521633f578715afb2873db4f2615fdfb1f66e99934715865
3
+ size 246184375
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "do_lower_case": true,
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 77,
22
+ "name_or_path": "./tokenizer",
23
+ "pad_token": "<|endoftext|>",
24
+ "special_tokens_map_file": "./special_tokens_map.json",
25
+ "tokenizer_class": "CLIPTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
unet/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.2.3",
4
+ "_name_or_path": "./unet",
5
+ "act_fn": "silu",
6
+ "attention_head_dim": 8,
7
+ "block_out_channels": [
8
+ 320,
9
+ 640,
10
+ 1280,
11
+ 1280
12
+ ],
13
+ "center_input_sample": false,
14
+ "cross_attention_dim": 768,
15
+ "down_block_types": [
16
+ "CrossAttnDownBlock2D",
17
+ "CrossAttnDownBlock2D",
18
+ "CrossAttnDownBlock2D",
19
+ "DownBlock2D"
20
+ ],
21
+ "downsample_padding": 1,
22
+ "flip_sin_to_cos": true,
23
+ "freq_shift": 0,
24
+ "in_channels": 4,
25
+ "layers_per_block": 2,
26
+ "mid_block_scale_factor": 1,
27
+ "norm_eps": 1e-05,
28
+ "norm_num_groups": 32,
29
+ "out_channels": 4,
30
+ "sample_size": 64,
31
+ "up_block_types": [
32
+ "UpBlock2D",
33
+ "CrossAttnUpBlock2D",
34
+ "CrossAttnUpBlock2D",
35
+ "CrossAttnUpBlock2D"
36
+ ]
37
+ }
unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d98edd280d5e040ee77f5802b8e3be3513de757335d1dedc4e495647e7c2d573
3
+ size 1719312805
vae/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.2.3",
4
+ "_name_or_path": "./vae",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "in_channels": 3,
19
+ "latent_channels": 4,
20
+ "layers_per_block": 2,
21
+ "out_channels": 3,
22
+ "sample_size": 512,
23
+ "up_block_types": [
24
+ "UpDecoderBlock2D",
25
+ "UpDecoderBlock2D",
26
+ "UpDecoderBlock2D",
27
+ "UpDecoderBlock2D"
28
+ ]
29
+ }
vae/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51c8904bc921e1e6f354b5fa8e99a1c82ead2f0540114de21557b8abfbb24ad0
3
+ size 167399505