Spaces:

svjack
/

Genshin-Impact-XL-MasaCtrl

Running

App Files Files Community

Genshin-Impact-XL-MasaCtrl / run_synthesis_sdxl_processor.py

svjack

Upload 23 files

f070657 verified 5 days ago

raw

history blame contribute delete

3.15 kB

	import os
	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	import numpy as np

	from tqdm import tqdm
	from einops import rearrange, repeat
	from omegaconf import OmegaConf
	from diffusers import DDIMScheduler, StableDiffusionPipeline, DiffusionPipeline
	from torchvision.utils import save_image
	from torchvision.io import read_image
	from pytorch_lightning import seed_everything

	from masactrl.masactrl_processor import register_attention_processor

	torch.cuda.set_device(0) # set the GPU device

	# Note that you may add your Hugging Face token to get access to the models
	device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
	weight_dtype = torch.float16
	model_path = "stabilityai/stable-diffusion-xl-base-1.0"
	scheduler = DDIMScheduler(
	beta_start=0.00085,
	beta_end=0.012,
	beta_schedule="scaled_linear",
	clip_sample=False,
	set_alpha_to_one=False
	)
	pipe = DiffusionPipeline.from_pretrained(
	model_path,
	scheduler=scheduler,
	torch_dtype=weight_dtype
	).to(device)


	def consistent_synthesis():
	seed = 42
	seed_everything(seed)

	out_dir_ori = "./workdir/masactrl_exp/oldman_smiling"
	os.makedirs(out_dir_ori, exist_ok=True)

	prompts = [
	"A portrait of an old man, facing camera, best quality",
	"A portrait of an old man, facing camera, smiling, best quality",
	]

	# inference the synthesized image with MasaCtrl
	# TODO: note that the hyper paramerter of MasaCtrl for SDXL may be not optimal
	STEP = 4
	LAYER_LIST = [44, 54, 64] # run the synthesis with MasaCtrl at three different layer configs
	MODEL_TYPE = "SDXL"

	# initialize the noise map
	start_code = torch.randn([1, 4, 128, 128], dtype=weight_dtype, device=device)
	# start_code = None
	start_code = start_code.expand(len(prompts), -1, -1, -1)

	# inference the synthesized image without MasaCtrl
	image_ori = pipe(prompts, latents=start_code, guidance_scale=7.5).images

	for LAYER in LAYER_LIST:
	# hijack the attention module with MasaCtrl processor
	processor_args = {
	"start_step": STEP,
	"start_layer": LAYER,
	"model_type": MODEL_TYPE
	}
	register_attention_processor(pipe.unet, processor_type="MasaCtrlProcessor")

	# inference the synthesized image
	image_masactrl = pipe(prompts, latents=start_code, guidance_scale=7.5).images

	sample_count = len(os.listdir(out_dir_ori))
	out_dir = os.path.join(out_dir_ori, f"sample_{sample_count}")
	os.makedirs(out_dir, exist_ok=True)
	image_ori[0].save(os.path.join(out_dir, f"source_step{STEP}_layer{LAYER}.png"))
	image_ori[1].save(os.path.join(out_dir, f"without_step{STEP}_layer{LAYER}.png"))
	image_masactrl[-1].save(os.path.join(out_dir, f"masactrl_step{STEP}_layer{LAYER}.png"))
	with open(os.path.join(out_dir, f"prompts.txt"), "w") as f:
	for p in prompts:
	f.write(p + "\n")
	f.write(f"seed: {seed}\n")
	print("Syntheiszed images are saved in", out_dir)


	if __name__ == "__main__":
	consistent_synthesis()