diffuse-custom

Runtime error

App Files Files Community

diffuse-custom / diffusers /pipelines /latent_diffusion /pipeline_latent_diffusion_superresolution.py

Jackflack09

Duplicate from YeOldHermit/Super-Resolution-Anime-Diffusion

522606a over 1 year ago

raw

history blame

7.38 kB

	import inspect
	from typing import Optional, Tuple, Union

	import numpy as np
	import torch
	import torch.utils.checkpoint

	import PIL

	from ...models import UNet2DModel, VQModel
	from ...pipeline_utils import DiffusionPipeline, ImagePipelineOutput
	from ...schedulers import (
	DDIMScheduler,
	DPMSolverMultistepScheduler,
	EulerAncestralDiscreteScheduler,
	EulerDiscreteScheduler,
	LMSDiscreteScheduler,
	PNDMScheduler,
	)
	from ...utils import PIL_INTERPOLATION, deprecate


	def preprocess(image):
	w, h = image.size
	w, h = map(lambda x: x - x % 32, (w, h)) # resize to integer multiple of 32
	image = image.resize((w, h), resample=PIL_INTERPOLATION["lanczos"])
	image = np.array(image).astype(np.float32) / 255.0
	image = image[None].transpose(0, 3, 1, 2)
	image = torch.from_numpy(image)
	return 2.0 * image - 1.0


	class LDMSuperResolutionPipeline(DiffusionPipeline):
	r"""
	A pipeline for image super-resolution using Latent

	This class inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
	library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

	Parameters:
	vqvae ([`VQModel`]):
	Vector-quantized (VQ) VAE Model to encode and decode images to and from latent representations.
	unet ([`UNet2DModel`]): U-Net architecture to denoise the encoded image.
	scheduler ([`SchedulerMixin`]):
	A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of
	[`DDIMScheduler`], [`LMSDiscreteScheduler`], [`EulerDiscreteScheduler`],
	[`EulerAncestralDiscreteScheduler`], [`DPMSolverMultistepScheduler`], or [`PNDMScheduler`].
	"""

	def __init__(
	self,
	vqvae: VQModel,
	unet: UNet2DModel,
	scheduler: Union[
	DDIMScheduler,
	PNDMScheduler,
	LMSDiscreteScheduler,
	EulerDiscreteScheduler,
	EulerAncestralDiscreteScheduler,
	DPMSolverMultistepScheduler,
	],
	):
	super().__init__()
	self.register_modules(vqvae=vqvae, unet=unet, scheduler=scheduler)

	@torch.no_grad()
	def __call__(
	self,
	image: Union[torch.Tensor, PIL.Image.Image],
	batch_size: Optional[int] = 1,
	num_inference_steps: Optional[int] = 100,
	eta: Optional[float] = 0.0,
	generator: Optional[torch.Generator] = None,
	output_type: Optional[str] = "pil",
	return_dict: bool = True,
	**kwargs,
	) -> Union[Tuple, ImagePipelineOutput]:
	r"""
	Args:
	image (`torch.Tensor` or `PIL.Image.Image`):
	`Image`, or tensor representing an image batch, that will be used as the starting point for the
	process.
	batch_size (`int`, optional, defaults to 1):
	Number of images to generate.
	num_inference_steps (`int`, optional, defaults to 100):
	The number of denoising steps. More denoising steps usually lead to a higher quality image at the
	expense of slower inference.
	eta (`float`, optional, defaults to 0.0):
	Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
	[`schedulers.DDIMScheduler`], will be ignored for others.
	generator (`torch.Generator`, optional):
	A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
	deterministic.
	output_type (`str`, optional, defaults to `"pil"`):
	The output format of the generate image. Choose between
	[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
	return_dict (`bool`, optional):
	Whether or not to return a [`~pipeline_utils.ImagePipelineOutput`] instead of a plain tuple.

	Returns:
	[`~pipeline_utils.ImagePipelineOutput`] or `tuple`: [`~pipelines.utils.ImagePipelineOutput`] if
	`return_dict` is True, otherwise a `tuple. When returning a tuple, the first element is a list with the
	generated images.
	"""
	message = "Please use `image` instead of `init_image`."
	init_image = deprecate("init_image", "0.12.0", message, take_from=kwargs)
	image = init_image or image

	if isinstance(image, PIL.Image.Image):
	batch_size = 1
	elif isinstance(image, torch.Tensor):
	batch_size = image.shape[0]
	else:
	raise ValueError(f"`image` has to be of type `PIL.Image.Image` or `torch.Tensor` but is {type(image)}")

	if isinstance(image, PIL.Image.Image):
	image = preprocess(image)

	height, width = image.shape[-2:]

	# in_channels should be 6: 3 for latents, 3 for low resolution image
	latents_shape = (batch_size, self.unet.in_channels // 2, height, width)
	latents_dtype = next(self.unet.parameters()).dtype

	if self.device.type == "mps":
	# randn does not work reproducibly on mps
	latents = torch.randn(latents_shape, generator=generator, device="cpu", dtype=latents_dtype)
	latents = latents.to(self.device)
	else:
	latents = torch.randn(latents_shape, generator=generator, device=self.device, dtype=latents_dtype)

	image = image.to(device=self.device, dtype=latents_dtype)

	# set timesteps and move to the correct device
	self.scheduler.set_timesteps(num_inference_steps, device=self.device)
	timesteps_tensor = self.scheduler.timesteps

	# scale the initial noise by the standard deviation required by the scheduler
	latents = latents * self.scheduler.init_noise_sigma

	# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature.
	# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
	# eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
	# and should be between [0, 1]
	accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
	extra_kwargs = {}
	if accepts_eta:
	extra_kwargs["eta"] = eta

	for t in self.progress_bar(timesteps_tensor):
	# concat latents and low resolution image in the channel dimension.
	latents_input = torch.cat([latents, image], dim=1)
	latents_input = self.scheduler.scale_model_input(latents_input, t)
	# predict the noise residual
	noise_pred = self.unet(latents_input, t).sample
	# compute the previous noisy sample x_t -> x_t-1
	latents = self.scheduler.step(noise_pred, t, latents, **extra_kwargs).prev_sample

	# decode the image latents with the VQVAE
	image = self.vqvae.decode(latents).sample
	image = torch.clamp(image, -1.0, 1.0)
	image = image / 2 + 0.5
	image = image.cpu().permute(0, 2, 3, 1).numpy()

	if output_type == "pil":
	image = self.numpy_to_pil(image)

	if not return_dict:
	return (image,)

	return ImagePipelineOutput(images=image)