CRM / pipelines.py
Zhengyi's picture
fix
c5236d2
import torch
from libs.base_utils import do_resize_content
from imagedream.ldm.util import (
instantiate_from_config,
get_obj_from_str,
)
from omegaconf import OmegaConf
from PIL import Image
import numpy as np
class TwoStagePipeline(object):
def __init__(
self,
stage1_model_config,
stage2_model_config,
stage1_sampler_config,
stage2_sampler_config,
device="cuda",
dtype=torch.float16,
resize_rate=1,
) -> None:
"""
only for two stage generate process.
- the first stage was condition on single pixel image, gererate multi-view pixel image, based on the v2pp config
- the second stage was condition on multiview pixel image generated by the first stage, generate the final image, based on the stage2-test config
"""
self.resize_rate = resize_rate
self.stage1_model = instantiate_from_config(OmegaConf.load(stage1_model_config.config).model)
self.stage1_model.load_state_dict(torch.load(stage1_model_config.resume, map_location="cpu"), strict=False)
self.stage1_model = self.stage1_model.to(device).to(dtype)
self.stage2_model = instantiate_from_config(OmegaConf.load(stage2_model_config.config).model)
sd = torch.load(stage2_model_config.resume, map_location="cpu")
self.stage2_model.load_state_dict(sd, strict=False)
self.stage2_model = self.stage2_model.to(device).to(dtype)
self.stage1_model.device = device
self.stage2_model.device = device
self.device = device
self.dtype = dtype
self.stage1_sampler = get_obj_from_str(stage1_sampler_config.target)(
self.stage1_model, device=device, dtype=dtype, **stage1_sampler_config.params
)
self.stage2_sampler = get_obj_from_str(stage2_sampler_config.target)(
self.stage2_model, device=device, dtype=dtype, **stage2_sampler_config.params
)
def stage1_sample(
self,
pixel_img,
prompt="3D assets",
neg_texts="uniform low no texture ugly, boring, bad anatomy, blurry, pixelated, obscure, unnatural colors, poor lighting, dull, and unclear.",
step=50,
scale=5,
ddim_eta=0.0,
):
if type(pixel_img) == str:
pixel_img = Image.open(pixel_img)
if isinstance(pixel_img, Image.Image):
if pixel_img.mode == "RGBA":
background = Image.new('RGBA', pixel_img.size, (0, 0, 0, 0))
pixel_img = Image.alpha_composite(background, pixel_img).convert("RGB")
else:
pixel_img = pixel_img.convert("RGB")
else:
raise
uc = self.stage1_sampler.model.get_learned_conditioning([neg_texts]).to(self.device)
stage1_images = self.stage1_sampler.i2i(
self.stage1_sampler.model,
self.stage1_sampler.size,
prompt,
uc=uc,
sampler=self.stage1_sampler.sampler,
ip=pixel_img,
step=step,
scale=scale,
batch_size=self.stage1_sampler.batch_size,
ddim_eta=ddim_eta,
dtype=self.stage1_sampler.dtype,
device=self.stage1_sampler.device,
camera=self.stage1_sampler.camera,
num_frames=self.stage1_sampler.num_frames,
pixel_control=(self.stage1_sampler.mode == "pixel"),
transform=self.stage1_sampler.image_transform,
offset_noise=self.stage1_sampler.offset_noise,
)
stage1_images = [Image.fromarray(img) for img in stage1_images]
stage1_images.pop(self.stage1_sampler.ref_position)
return stage1_images
def stage2_sample(self, pixel_img, stage1_images, scale=5, step=50):
if type(pixel_img) == str:
pixel_img = Image.open(pixel_img)
if isinstance(pixel_img, Image.Image):
if pixel_img.mode == "RGBA":
background = Image.new('RGBA', pixel_img.size, (0, 0, 0, 0))
pixel_img = Image.alpha_composite(background, pixel_img).convert("RGB")
else:
pixel_img = pixel_img.convert("RGB")
else:
raise
stage2_images = self.stage2_sampler.i2iStage2(
self.stage2_sampler.model,
self.stage2_sampler.size,
"3D assets",
self.stage2_sampler.uc,
self.stage2_sampler.sampler,
pixel_images=stage1_images,
ip=pixel_img,
step=step,
scale=scale,
batch_size=self.stage2_sampler.batch_size,
ddim_eta=0.0,
dtype=self.stage2_sampler.dtype,
device=self.stage2_sampler.device,
camera=self.stage2_sampler.camera,
num_frames=self.stage2_sampler.num_frames,
pixel_control=(self.stage2_sampler.mode == "pixel"),
transform=self.stage2_sampler.image_transform,
offset_noise=self.stage2_sampler.offset_noise,
)
stage2_images = [Image.fromarray(img) for img in stage2_images]
return stage2_images
def set_seed(self, seed):
self.stage1_sampler.seed = seed
self.stage2_sampler.seed = seed
def __call__(self, pixel_img, prompt="3D assets", scale=5, step=50):
pixel_img = do_resize_content(pixel_img, self.resize_rate)
stage1_images = self.stage1_sample(pixel_img, prompt, scale=scale, step=step)
stage2_images = self.stage2_sample(pixel_img, stage1_images, scale=scale, step=step)
return {
"ref_img": pixel_img,
"stage1_images": stage1_images,
"stage2_images": stage2_images,
}
if __name__ == "__main__":
stage1_config = OmegaConf.load("configs/nf7_v3_SNR_rd_size_stroke.yaml").config
stage2_config = OmegaConf.load("configs/stage2-v2-snr.yaml").config
stage2_sampler_config = stage2_config.sampler
stage1_sampler_config = stage1_config.sampler
stage1_model_config = stage1_config.models
stage2_model_config = stage2_config.models
pipeline = TwoStagePipeline(
stage1_model_config,
stage2_model_config,
stage1_sampler_config,
stage2_sampler_config,
)
img = Image.open("assets/astronaut.png")
rt_dict = pipeline(img)
stage1_images = rt_dict["stage1_images"]
stage2_images = rt_dict["stage2_images"]
np_imgs = np.concatenate(stage1_images, 1)
np_xyzs = np.concatenate(stage2_images, 1)
Image.fromarray(np_imgs).save("pixel_images.png")
Image.fromarray(np_xyzs).save("xyz_images.png")