pengdaqian commited on
Commit
a5fe30f
1 Parent(s): c947846
Files changed (3) hide show
  1. app.py +87 -66
  2. pipeline_openvino_stable_diffusion.py +0 -404
  3. requirements.txt +1 -8
app.py CHANGED
@@ -4,21 +4,22 @@ import gradio as gr
4
  from datasets import load_dataset
5
  from PIL import Image
6
 
7
- from model import get_sd_small, get_sd_tiny, get_sd_every
8
  from trans_google import google_translator
 
9
 
10
  from i18n import i18nTranslator
11
 
12
  word_list_dataset = load_dataset("Gustavosta/Stable-Diffusion-Prompts")
13
  word_list = word_list_dataset["train"]['Prompt']
14
-
15
- from diffusers import EulerDiscreteScheduler, DDIMScheduler, KDPM2AncestralDiscreteScheduler, \
16
- UniPCMultistepScheduler, DPMSolverSinglestepScheduler, DEISMultistepScheduler, PNDMScheduler, \
17
- DPMSolverMultistepScheduler, HeunDiscreteScheduler, EulerAncestralDiscreteScheduler, DDPMScheduler, \
18
- LMSDiscreteScheduler, KDPM2DiscreteScheduler
19
- import torch
20
- import base64
21
- from io import BytesIO
22
 
23
  is_gpu_busy = False
24
 
@@ -39,22 +40,30 @@ samplers = [
39
  "DDIM",
40
  "LMSDiscrete",
41
  ]
 
 
 
 
 
 
 
 
42
 
43
  rand = random.Random()
44
  translator = google_translator()
45
 
46
- tiny_pipe = get_sd_tiny()
47
- small_pipe = get_sd_small()
48
- every_pipe = get_sd_every()
49
 
50
 
51
- def get_pipe(width: int, height: int):
52
- if width == 512 and height == 512:
53
- return tiny_pipe
54
- elif width == 256 and height == 256:
55
- return small_pipe
56
- else:
57
- return every_pipe
58
 
59
 
60
  def infer(prompt: str, negative: str, width: int, height: int, sampler: str, steps: int, seed: int, scale):
@@ -64,40 +73,40 @@ def infer(prompt: str, negative: str, width: int, height: int, sampler: str, ste
64
  seed = rand.randint(0, 10000)
65
  else:
66
  seed = int(seed)
67
-
68
- pipeline = get_pipe(width, height)
69
-
70
  images = []
71
- if torch.cuda.is_available():
72
- generator = torch.Generator(device="cuda").manual_seed(seed)
73
- else:
74
- generator = None
75
- if sampler == "EulerDiscrete":
76
- pipeline.scheduler = EulerDiscreteScheduler.from_config(pipeline.scheduler.config)
77
- elif sampler == "EulerAncestralDiscrete":
78
- pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config)
79
- elif sampler == "KDPM2Discrete":
80
- pipeline.scheduler = KDPM2DiscreteScheduler.from_config(pipeline.scheduler.config)
81
- elif sampler == "KDPM2AncestralDiscrete":
82
- pipeline.scheduler = KDPM2AncestralDiscreteScheduler.from_config(pipeline.scheduler.config)
83
- elif sampler == "UniPCMultistep":
84
- pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config)
85
- elif sampler == "DPMSolverSinglestep":
86
- pipeline.scheduler = DPMSolverSinglestepScheduler.from_config(pipeline.scheduler.config)
87
- elif sampler == "DPMSolverMultistep":
88
- pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)
89
- elif sampler == "HeunDiscrete":
90
- pipeline.scheduler = HeunDiscreteScheduler.from_config(pipeline.scheduler.config)
91
- elif sampler == "DEISMultistep":
92
- pipeline.scheduler = DEISMultistepScheduler.from_config(pipeline.scheduler.config)
93
- elif sampler == "PNDM":
94
- pipeline.scheduler = PNDMScheduler.from_config(pipeline.scheduler.config)
95
- elif sampler == "DDPM":
96
- pipeline.scheduler = DDPMScheduler.from_config(pipeline.scheduler.config)
97
- elif sampler == "DDIM":
98
- pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
99
- elif sampler == "LMSDiscrete":
100
- pipeline.scheduler = LMSDiscreteScheduler.from_config(pipeline.scheduler.config)
101
 
102
  try:
103
  translate_prompt = translator.translate(prompt, lang_tgt='en')
@@ -107,20 +116,32 @@ def infer(prompt: str, negative: str, width: int, height: int, sampler: str, ste
107
  translate_prompt = prompt
108
  translate_negative = negative
109
 
110
- image = pipeline(prompt=translate_prompt,
111
- negative_prompt=translate_negative,
112
- guidance_scale=scale,
113
- num_inference_steps=steps,
114
- generator=generator,
115
- height=height,
116
- width=width).images[0]
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- buffered = BytesIO()
119
- image.save(buffered, format="JPEG")
120
- img_str = base64.b64encode(buffered.getvalue())
121
- img_base64 = bytes("data:image/jpeg;base64,", encoding='utf-8') + img_str
122
 
123
- images.append(img_base64)
124
 
125
  return images
126
 
@@ -146,7 +167,7 @@ css = """
146
  padding-top: 1.5rem;
147
  }
148
  #prompt-column {
149
- min-height: 520px
150
  }
151
  #gallery {
152
  min-height: 22rem;
@@ -416,7 +437,7 @@ with block:
416
  with gr.Row(elem_id="txt2img_sampler", scale=4):
417
  seed = gr.Number(value=0, label="Seed", elem_id="txt2img_seed")
418
  sampler = gr.Dropdown(
419
- samplers, value="",
420
  multiselect=False,
421
  label="Sampler",
422
  info="sampler select"
 
4
  from datasets import load_dataset
5
  from PIL import Image
6
 
7
+ # from model import get_sd_small, get_sd_tiny, get_sd_every
8
  from trans_google import google_translator
9
+ import replicate
10
 
11
  from i18n import i18nTranslator
12
 
13
  word_list_dataset = load_dataset("Gustavosta/Stable-Diffusion-Prompts")
14
  word_list = word_list_dataset["train"]['Prompt']
15
+ #
16
+ # from diffusers import EulerDiscreteScheduler, DDIMScheduler, KDPM2AncestralDiscreteScheduler, \
17
+ # UniPCMultistepScheduler, DPMSolverSinglestepScheduler, DEISMultistepScheduler, PNDMScheduler, \
18
+ # DPMSolverMultistepScheduler, HeunDiscreteScheduler, EulerAncestralDiscreteScheduler, DDPMScheduler, \
19
+ # LMSDiscreteScheduler, KDPM2DiscreteScheduler
20
+ # import torch
21
+ # import base64
22
+ # from io import BytesIO
23
 
24
  is_gpu_busy = False
25
 
 
40
  "DDIM",
41
  "LMSDiscrete",
42
  ]
43
+ re_sampler = [
44
+ "DDIM",
45
+ "K_EULER",
46
+ "DPMSolverMultistep",
47
+ "K_EULER_ANCESTRAL",
48
+ "PNDM",
49
+ "KLMS"
50
+ ]
51
 
52
  rand = random.Random()
53
  translator = google_translator()
54
 
55
+ # tiny_pipe = get_sd_tiny()
56
+ # small_pipe = get_sd_small()
57
+ # every_pipe = get_sd_every()
58
 
59
 
60
+ # def get_pipe(width: int, height: int):
61
+ # if width == 512 and height == 512:
62
+ # return tiny_pipe
63
+ # elif width == 256 and height == 256:
64
+ # return small_pipe
65
+ # else:
66
+ # return every_pipe
67
 
68
 
69
  def infer(prompt: str, negative: str, width: int, height: int, sampler: str, steps: int, seed: int, scale):
 
73
  seed = rand.randint(0, 10000)
74
  else:
75
  seed = int(seed)
76
+ #
77
+ # pipeline = get_pipe(width, height)
78
+ #
79
  images = []
80
+ # if torch.cuda.is_available():
81
+ # generator = torch.Generator(device="cuda").manual_seed(seed)
82
+ # else:
83
+ # generator = None
84
+ # if sampler == "EulerDiscrete":
85
+ # pipeline.scheduler = EulerDiscreteScheduler.from_config(pipeline.scheduler.config)
86
+ # elif sampler == "EulerAncestralDiscrete":
87
+ # pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(pipeline.scheduler.config)
88
+ # elif sampler == "KDPM2Discrete":
89
+ # pipeline.scheduler = KDPM2DiscreteScheduler.from_config(pipeline.scheduler.config)
90
+ # elif sampler == "KDPM2AncestralDiscrete":
91
+ # pipeline.scheduler = KDPM2AncestralDiscreteScheduler.from_config(pipeline.scheduler.config)
92
+ # elif sampler == "UniPCMultistep":
93
+ # pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config)
94
+ # elif sampler == "DPMSolverSinglestep":
95
+ # pipeline.scheduler = DPMSolverSinglestepScheduler.from_config(pipeline.scheduler.config)
96
+ # elif sampler == "DPMSolverMultistep":
97
+ # pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)
98
+ # elif sampler == "HeunDiscrete":
99
+ # pipeline.scheduler = HeunDiscreteScheduler.from_config(pipeline.scheduler.config)
100
+ # elif sampler == "DEISMultistep":
101
+ # pipeline.scheduler = DEISMultistepScheduler.from_config(pipeline.scheduler.config)
102
+ # elif sampler == "PNDM":
103
+ # pipeline.scheduler = PNDMScheduler.from_config(pipeline.scheduler.config)
104
+ # elif sampler == "DDPM":
105
+ # pipeline.scheduler = DDPMScheduler.from_config(pipeline.scheduler.config)
106
+ # elif sampler == "DDIM":
107
+ # pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
108
+ # elif sampler == "LMSDiscrete":
109
+ # pipeline.scheduler = LMSDiscreteScheduler.from_config(pipeline.scheduler.config)
110
 
111
  try:
112
  translate_prompt = translator.translate(prompt, lang_tgt='en')
 
116
  translate_prompt = prompt
117
  translate_negative = negative
118
 
119
+ output = replicate.run(
120
+ "stability-ai/stable-diffusion:db21e45d3f7023abc2a46ee38a23973f6dce16bb082a930b0c49861f96d1e5bf",
121
+ input={
122
+ "prompt": translate_prompt,
123
+ "negative_prompt": translate_negative,
124
+ "guidance_scale": scale,
125
+ "num_inference_steps": steps,
126
+ "seed": seed,
127
+ "scheduler": sampler,
128
+ }
129
+ )
130
+
131
+ # image = pipeline(prompt=translate_prompt,
132
+ # negative_prompt=translate_negative,
133
+ # guidance_scale=scale,
134
+ # num_inference_steps=steps,
135
+ # generator=generator,
136
+ # height=height,
137
+ # width=width).images[0]
138
 
139
+ # buffered = BytesIO()
140
+ # image.save(buffered, format="JPEG")
141
+ # img_str = base64.b64encode(buffered.getvalue())
142
+ # img_base64 = bytes("data:image/jpeg;base64,", encoding='utf-8') + img_str
143
 
144
+ images.append(output[0])
145
 
146
  return images
147
 
 
167
  padding-top: 1.5rem;
168
  }
169
  #prompt-column {
170
+ min-height: 500px
171
  }
172
  #gallery {
173
  min-height: 22rem;
 
437
  with gr.Row(elem_id="txt2img_sampler", scale=4):
438
  seed = gr.Number(value=0, label="Seed", elem_id="txt2img_seed")
439
  sampler = gr.Dropdown(
440
+ re_sampler, value="DPMSolverMultistep",
441
  multiselect=False,
442
  label="Sampler",
443
  info="sampler select"
pipeline_openvino_stable_diffusion.py DELETED
@@ -1,404 +0,0 @@
1
- # Copyright 2022 The OFA-Sys Team.
2
- # This source code is licensed under the Apache 2.0 license
3
- # found in the LICENSE file in the root directory.
4
- # Copyright 2022 The HuggingFace Inc. team.
5
- # All rights reserved.
6
- # This source code is licensed under the Apache 2.0 license
7
- # found in the LICENSE file in the root directory.
8
-
9
- import inspect
10
- from typing import Callable, List, Optional, Union
11
-
12
- import numpy as np
13
- import torch
14
- import os
15
-
16
- from transformers import CLIPFeatureExtractor, CLIPTokenizer
17
-
18
- from diffusers.configuration_utils import FrozenDict
19
- from diffusers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler
20
- from diffusers.utils import deprecate, logging
21
- from diffusers import OnnxRuntimeModel
22
-
23
- from diffusers import OnnxStableDiffusionPipeline, DiffusionPipeline
24
- from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
25
- from openvino.runtime import Core
26
- ORT_TO_NP_TYPE = {
27
- "tensor(bool)": np.bool_,
28
- "tensor(int8)": np.int8,
29
- "tensor(uint8)": np.uint8,
30
- "tensor(int16)": np.int16,
31
- "tensor(uint16)": np.uint16,
32
- "tensor(int32)": np.int32,
33
- "tensor(uint32)": np.uint32,
34
- "tensor(int64)": np.int64,
35
- "tensor(uint64)": np.uint64,
36
- "tensor(float16)": np.float16,
37
- "tensor(float)": np.float32,
38
- "tensor(double)": np.float64,
39
- }
40
-
41
- logger = logging.get_logger(__name__)
42
-
43
-
44
- class OpenVINOStableDiffusionPipeline(DiffusionPipeline):
45
- vae_encoder: OnnxRuntimeModel
46
- vae_decoder: OnnxRuntimeModel
47
- text_encoder: OnnxRuntimeModel
48
- tokenizer: CLIPTokenizer
49
- unet: OnnxRuntimeModel
50
- scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler]
51
- safety_checker: OnnxRuntimeModel
52
- feature_extractor: CLIPFeatureExtractor
53
-
54
- _optional_components = ["safety_checker", "feature_extractor"]
55
-
56
- def __init__(
57
- self,
58
- vae_encoder: OnnxRuntimeModel,
59
- vae_decoder: OnnxRuntimeModel,
60
- text_encoder: OnnxRuntimeModel,
61
- tokenizer: CLIPTokenizer,
62
- unet: OnnxRuntimeModel,
63
- scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler],
64
- safety_checker: OnnxRuntimeModel,
65
- feature_extractor: CLIPFeatureExtractor,
66
- requires_safety_checker: bool = True,
67
- ):
68
- super().__init__()
69
-
70
- if hasattr(scheduler.config,
71
- "steps_offset") and scheduler.config.steps_offset != 1:
72
- deprecation_message = (
73
- f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
74
- f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
75
- "to update the config accordingly as leaving `steps_offset` might led to incorrect results"
76
- " in future versions. If you have downloaded this checkpoint from the Hugging Face Hub,"
77
- " it would be very nice if you could open a Pull request for the `scheduler/scheduler_config.json`"
78
- " file")
79
- deprecate("steps_offset!=1",
80
- "1.0.0",
81
- deprecation_message,
82
- standard_warn=False)
83
- new_config = dict(scheduler.config)
84
- new_config["steps_offset"] = 1
85
- scheduler._internal_dict = FrozenDict(new_config)
86
-
87
- if hasattr(scheduler.config,
88
- "clip_sample") and scheduler.config.clip_sample is True:
89
- deprecation_message = (
90
- f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`."
91
- " `clip_sample` should be set to False in the configuration file. Please make sure to update the"
92
- " config accordingly as not setting `clip_sample` in the config might lead to incorrect results in"
93
- " future versions. If you have downloaded this checkpoint from the Hugging Face Hub, it would be very"
94
- " nice if you could open a Pull request for the `scheduler/scheduler_config.json` file"
95
- )
96
- deprecate("clip_sample not set",
97
- "1.0.0",
98
- deprecation_message,
99
- standard_warn=False)
100
- new_config = dict(scheduler.config)
101
- new_config["clip_sample"] = False
102
- scheduler._internal_dict = FrozenDict(new_config)
103
-
104
- if safety_checker is None and requires_safety_checker:
105
- logger.warning(
106
- f"You have disabled the safety checker for {self.__class__} by passing `safety_checker=None`. Ensure"
107
- " that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered"
108
- " results in services or applications open to the public. Both the diffusers team and Hugging Face"
109
- " strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling"
110
- " it only for use-cases that involve analyzing network behavior or auditing its results. For more"
111
- " information, please have a look at https://github.com/huggingface/diffusers/pull/254 ."
112
- )
113
-
114
- if safety_checker is not None and feature_extractor is None:
115
- raise ValueError(
116
- "Make sure to define a feature extractor when loading {self.__class__} if you want to use the safety"
117
- " checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
118
- )
119
-
120
- self.register_modules(
121
- vae_encoder=vae_encoder,
122
- vae_decoder=vae_decoder,
123
- text_encoder=text_encoder,
124
- tokenizer=tokenizer,
125
- unet=unet,
126
- scheduler=scheduler,
127
- safety_checker=safety_checker,
128
- feature_extractor=feature_extractor,
129
- )
130
- self.convert_to_openvino()
131
- self.register_to_config(
132
- requires_safety_checker=requires_safety_checker)
133
-
134
- @classmethod
135
- def from_onnx_pipeline(cls, onnx_pipe: OnnxStableDiffusionPipeline):
136
- r"""
137
- Create OpenVINOStableDiffusionPipeline from a onnx stable pipeline.
138
- Parameters:
139
- onnx_pipe (OnnxStableDiffusionPipeline)
140
- """
141
- return cls(onnx_pipe.vae_encoder, onnx_pipe.vae_decoder,
142
- onnx_pipe.text_encoder, onnx_pipe.tokenizer, onnx_pipe.unet,
143
- onnx_pipe.scheduler, onnx_pipe.safety_checker,
144
- onnx_pipe.feature_extractor, True)
145
-
146
- def convert_to_openvino(self):
147
- ie = Core()
148
-
149
- # VAE decoder
150
- vae_decoder_onnx = ie.read_model(
151
- model=os.path.join(self.vae_decoder.model_save_dir, "model.onnx"))
152
- vae_decoder = ie.compile_model(model=vae_decoder_onnx,
153
- device_name="CPU")
154
-
155
- # Text encoder
156
- text_encoder_onnx = ie.read_model(
157
- model=os.path.join(self.text_encoder.model_save_dir, "model.onnx"))
158
- text_encoder = ie.compile_model(model=text_encoder_onnx,
159
- device_name="CPU")
160
-
161
- # Unet
162
- unet_onnx = ie.read_model(model=os.path.join(self.unet.model_save_dir, "model.onnx"))
163
- unet = ie.compile_model(model=unet_onnx, device_name="CPU")
164
-
165
- self.register_modules(vae_decoder=vae_decoder,
166
- text_encoder=text_encoder,
167
- unet=unet)
168
-
169
- def _encode_prompt(self, prompt, num_images_per_prompt,
170
- do_classifier_free_guidance, negative_prompt):
171
- r"""
172
- Encodes the prompt into text encoder hidden states.
173
- Args:
174
- prompt (`str` or `List[str]`):
175
- prompt to be encoded
176
- num_images_per_prompt (`int`):
177
- number of images that should be generated per prompt
178
- do_classifier_free_guidance (`bool`):
179
- whether to use classifier free guidance or not
180
- negative_prompt (`str` or `List[str]`):
181
- The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
182
- if `guidance_scale` is less than `1`).
183
- """
184
- batch_size = len(prompt) if isinstance(prompt, list) else 1
185
-
186
- # get prompt text embeddings
187
- text_inputs = self.tokenizer(
188
- prompt,
189
- padding="max_length",
190
- max_length=self.tokenizer.model_max_length,
191
- truncation=True,
192
- return_tensors="np",
193
- )
194
- text_input_ids = text_inputs.input_ids
195
- untruncated_ids = self.tokenizer(prompt,
196
- padding="max_length",
197
- return_tensors="np").input_ids
198
-
199
- if not np.array_equal(text_input_ids, untruncated_ids):
200
- removed_text = self.tokenizer.batch_decode(
201
- untruncated_ids[:, self.tokenizer.model_max_length - 1:-1])
202
- logger.warning(
203
- "The following part of your input was truncated because CLIP can only handle sequences up to"
204
- f" {self.tokenizer.model_max_length} tokens: {removed_text}")
205
-
206
- prompt_embeds = self.text_encoder(
207
- {"input_ids":
208
- text_input_ids.astype(np.int32)})[self.text_encoder.outputs[0]]
209
- prompt_embeds = np.repeat(prompt_embeds, num_images_per_prompt, axis=0)
210
-
211
- # get unconditional embeddings for classifier free guidance
212
- if do_classifier_free_guidance:
213
- uncond_tokens: List[str]
214
- if negative_prompt is None:
215
- uncond_tokens = [""] * batch_size
216
- elif type(prompt) is not type(negative_prompt):
217
- raise TypeError(
218
- f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
219
- f" {type(prompt)}.")
220
- elif isinstance(negative_prompt, str):
221
- uncond_tokens = [negative_prompt] * batch_size
222
- elif batch_size != len(negative_prompt):
223
- raise ValueError(
224
- f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
225
- f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
226
- " the batch size of `prompt`.")
227
- else:
228
- uncond_tokens = negative_prompt
229
-
230
- max_length = text_input_ids.shape[-1]
231
- uncond_input = self.tokenizer(
232
- uncond_tokens,
233
- padding="max_length",
234
- max_length=max_length,
235
- truncation=True,
236
- return_tensors="np",
237
- )
238
- negative_prompt_embeds = self.text_encoder({
239
- "input_ids":
240
- uncond_input.input_ids.astype(np.int32)
241
- })[self.text_encoder.outputs[0]]
242
- negative_prompt_embeds = np.repeat(negative_prompt_embeds,
243
- num_images_per_prompt,
244
- axis=0)
245
-
246
- # For classifier free guidance, we need to do two forward passes.
247
- # Here we concatenate the unconditional and text embeddings into a single batch
248
- # to avoid doing two forward passes
249
- prompt_embeds = np.concatenate(
250
- [negative_prompt_embeds, prompt_embeds])
251
-
252
- return prompt_embeds
253
-
254
- def __call__(
255
- self,
256
- prompt: Union[str, List[str]],
257
- height: Optional[int] = 512,
258
- width: Optional[int] = 512,
259
- num_inference_steps: Optional[int] = 50,
260
- guidance_scale: Optional[float] = 7.5,
261
- negative_prompt: Optional[Union[str, List[str]]] = None,
262
- num_images_per_prompt: Optional[int] = 1,
263
- eta: Optional[float] = 0.0,
264
- generator: Optional[np.random.RandomState] = None,
265
- latents: Optional[np.ndarray] = None,
266
- output_type: Optional[str] = "pil",
267
- return_dict: bool = True,
268
- callback: Optional[Callable[[int, int, np.ndarray], None]] = None,
269
- callback_steps: Optional[int] = 1,
270
- ):
271
- if isinstance(prompt, str):
272
- batch_size = 1
273
- elif isinstance(prompt, list):
274
- batch_size = len(prompt)
275
- else:
276
- raise ValueError(
277
- f"`prompt` has to be of type `str` or `list` but is {type(prompt)}"
278
- )
279
-
280
- if height % 8 != 0 or width % 8 != 0:
281
- raise ValueError(
282
- f"`height` and `width` have to be divisible by 8 but are {height} and {width}."
283
- )
284
-
285
- if (callback_steps is None) or (callback_steps is not None and
286
- (not isinstance(callback_steps, int)
287
- or callback_steps <= 0)):
288
- raise ValueError(
289
- f"`callback_steps` has to be a positive integer but is {callback_steps} of type"
290
- f" {type(callback_steps)}.")
291
-
292
- if generator is None:
293
- generator = np.random
294
-
295
- # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
296
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
297
- # corresponds to doing no classifier free guidance.
298
- do_classifier_free_guidance = guidance_scale > 1.0
299
-
300
- prompt_embeds = self._encode_prompt(prompt, num_images_per_prompt,
301
- do_classifier_free_guidance,
302
- negative_prompt)
303
-
304
- # get the initial random noise unless the user supplied it
305
- latents_dtype = prompt_embeds.dtype
306
- latents_shape = (batch_size * num_images_per_prompt, 4, height // 8,
307
- width // 8)
308
- if latents is None:
309
- latents = generator.randn(*latents_shape).astype(latents_dtype)
310
- elif latents.shape != latents_shape:
311
- raise ValueError(
312
- f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}"
313
- )
314
-
315
- # set timesteps
316
- self.scheduler.set_timesteps(num_inference_steps)
317
-
318
- latents = latents * np.float64(self.scheduler.init_noise_sigma)
319
-
320
- # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
321
- # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
322
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
323
- # and should be between [0, 1]
324
- accepts_eta = "eta" in set(
325
- inspect.signature(self.scheduler.step).parameters.keys())
326
- extra_step_kwargs = {}
327
- if accepts_eta:
328
- extra_step_kwargs["eta"] = eta
329
-
330
- # timestep_dtype = next(
331
- # (input.type for input in self.unet.model.get_inputs() if input.name == "timestep"), "tensor(float)"
332
- # )
333
- timestep_dtype = 'tensor(int64)'
334
- timestep_dtype = ORT_TO_NP_TYPE[timestep_dtype]
335
-
336
- for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)):
337
- # expand the latents if we are doing classifier free guidance
338
- latent_model_input = np.concatenate(
339
- [latents] * 2) if do_classifier_free_guidance else latents
340
- latent_model_input = self.scheduler.scale_model_input(
341
- torch.from_numpy(latent_model_input), t)
342
- latent_model_input = latent_model_input.cpu().numpy()
343
-
344
- # predict the noise residual
345
- timestep = np.array([t], dtype=timestep_dtype)
346
- unet_input = {
347
- "sample": latent_model_input,
348
- "timestep": timestep,
349
- "encoder_hidden_states": prompt_embeds
350
- }
351
- noise_pred = self.unet(unet_input)[self.unet.outputs[0]]
352
- # noise_pred = noise_pred[0]
353
-
354
- # perform guidance
355
- if do_classifier_free_guidance:
356
- noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2)
357
- noise_pred = noise_pred_uncond + guidance_scale * (
358
- noise_pred_text - noise_pred_uncond)
359
-
360
- # compute the previous noisy sample x_t -> x_t-1
361
- scheduler_output = self.scheduler.step(
362
- torch.from_numpy(noise_pred), t, torch.from_numpy(latents),
363
- **extra_step_kwargs)
364
- latents = scheduler_output.prev_sample.numpy()
365
-
366
- # call the callback, if provided
367
- if callback is not None and i % callback_steps == 0:
368
- callback(i, t, latents)
369
-
370
- latents = 1 / 0.18215 * latents
371
- image = self.vae_decoder({"latent_sample":
372
- latents})[self.vae_decoder.outputs[0]]
373
-
374
- image = np.clip(image / 2 + 0.5, 0, 1)
375
- image = image.transpose((0, 2, 3, 1))
376
-
377
- if self.safety_checker is not None:
378
- safety_checker_input = self.feature_extractor(
379
- self.numpy_to_pil(image),
380
- return_tensors="np").pixel_values.astype(image.dtype)
381
-
382
- image, has_nsfw_concepts = self.safety_checker(
383
- clip_input=safety_checker_input, images=image)
384
-
385
- # There will throw an error if use safety_checker batchsize>1
386
- images, has_nsfw_concept = [], []
387
- for i in range(image.shape[0]):
388
- image_i, has_nsfw_concept_i = self.safety_checker(
389
- clip_input=safety_checker_input[i:i + 1],
390
- images=image[i:i + 1])
391
- images.append(image_i)
392
- has_nsfw_concept.append(has_nsfw_concept_i[0])
393
- image = np.concatenate(images)
394
- else:
395
- has_nsfw_concept = None
396
-
397
- if output_type == "pil":
398
- image = self.numpy_to_pil(image)
399
-
400
- if not return_dict:
401
- return (image, has_nsfw_concept)
402
-
403
- return StableDiffusionPipelineOutput(
404
- images=image, nsfw_content_detected=has_nsfw_concept)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,11 +1,4 @@
1
  python-dotenv
2
- diffusers
3
- transformers<5
4
- accelerate
5
- scipy
6
- safetensors
7
- onnx
8
- openvino
9
- onnxruntime-openvino
10
  ftfy
11
  py-cpuinfo
 
1
  python-dotenv
2
+ replicate
 
 
 
 
 
 
 
3
  ftfy
4
  py-cpuinfo