import gradio as gr import numpy as np import torch import PIL from PIL import Image import os import sys import rembg import time import json import cv2 from datetime import datetime from einops import repeat, rearrange from omegaconf import OmegaConf from typing import Dict, Optional, Tuple, List from dataclasses import dataclass from .utils import * from huggingface_hub import hf_hub_download import spaces parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) class GenMVImage(object): def __init__(self, device): self.seed = 1024 self.guidance_scale = 7.5 self.step = 50 self.pipelines = {} self.device = device @spaces.GPU def gen_image_from_crm(self, image): from .third_party.CRM.pipelines import TwoStagePipeline stage1_config = OmegaConf.load(f"{parent_dir}/apps/third_party/CRM/configs/nf7_v3_SNR_rd_size_stroke.yaml").config stage1_sampler_config = stage1_config.sampler stage1_model_config = stage1_config.models stage1_model_config.resume = hf_hub_download(repo_id="Zhengyi/CRM", filename="pixel-diffusion.pth", repo_type="model") stage1_model_config.config = f"{parent_dir}/apps/third_party/CRM/" + stage1_model_config.config if "crm" in self.pipelines.keys(): pipeline = self.pipelines['crm'] else: self.pipelines['crm'] = TwoStagePipeline( stage1_model_config, stage1_sampler_config, device=self.device, dtype=torch.float16 ) pipeline = self.pipelines['crm'] pipeline.set_seed(self.seed) rt_dict = pipeline(image, scale=self.guidance_scale, step=self.step) mv_imgs = rt_dict["stage1_images"] return mv_imgs[5], mv_imgs[3], mv_imgs[2], mv_imgs[0] @spaces.GPU def gen_image_from_mvdream(self, image, text): from .third_party.mvdream_diffusers.pipeline_mvdream import MVDreamPipeline if image is None: if "mvdream" in self.pipelines.keys(): pipe_MVDream = self.pipelines['mvdream'] else: self.pipelines['mvdream'] = MVDreamPipeline.from_pretrained( "ashawkey/mvdream-sd2.1-diffusers", # remote weights torch_dtype=torch.float16, trust_remote_code=True, ) self.pipelines['mvdream'] = self.pipelines['mvdream'].to(self.device) pipe_MVDream = self.pipelines['mvdream'] mv_imgs = pipe_MVDream( text, negative_prompt="ugly, deformed, disfigured, poor details, bad anatomy", num_inference_steps=self.step, guidance_scale=self.guidance_scale, generator = torch.Generator(self.device).manual_seed(self.seed) ) else: image = np.array(image) image = image.astype(np.float32) / 255.0 image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4]) if "imagedream" in self.pipelines.keys(): pipe_imagedream = self.pipelines['imagedream'] else: self.pipelines['imagedream'] = MVDreamPipeline.from_pretrained( "ashawkey/imagedream-ipmv-diffusers", # remote weights torch_dtype=torch.float16, trust_remote_code=True, ) self.pipelines['imagedream'] = self.pipelines['imagedream'].to(self.device) pipe_imagedream = self.pipelines['imagedream'] mv_imgs = pipe_imagedream( text, image, negative_prompt="ugly, deformed, disfigured, poor details, bad anatomy", num_inference_steps=self.step, guidance_scale=self.guidance_scale, generator = torch.Generator(self.device).manual_seed(self.seed) ) return mv_imgs[1], mv_imgs[2], mv_imgs[3], mv_imgs[0] @spaces.GPU def gen_image_from_wonder3d(self, image, crop_size): sys.path.append(f"{parent_dir}/apps/third_party/Wonder3D") from diffusers import DiffusionPipeline # only tested on diffusers[torch]==0.19.3, may have conflicts with newer versions of diffusers weight_dtype = torch.float16 batch = prepare_data(image, crop_size) if "wonder3d" in self.pipelines.keys(): pipeline = self.pipelines['wonder3d'] else: self.pipelines['wonder3d'] = DiffusionPipeline.from_pretrained( 'flamehaze1115/wonder3d-v1.0', # or use local checkpoint './ckpts' custom_pipeline='flamehaze1115/wonder3d-pipeline', torch_dtype=torch.float16 ) self.pipelines['wonder3d'].unet.enable_xformers_memory_efficient_attention() self.pipelines['wonder3d'].to(self.device) self.pipelines['wonder3d'].set_progress_bar_config(disable=True) pipeline = self.pipelines['wonder3d'] generator = torch.Generator(device=pipeline.unet.device).manual_seed(self.seed) # repeat (2B, Nv, 3, H, W) imgs_in = torch.cat([batch['imgs_in']] * 2, dim=0).to(weight_dtype) # (2B, Nv, Nce) camera_embeddings = torch.cat([batch['camera_embeddings']] * 2, dim=0).to(weight_dtype) task_embeddings = torch.cat([batch['normal_task_embeddings'], batch['color_task_embeddings']], dim=0).to(weight_dtype) camera_embeddings = torch.cat([camera_embeddings, task_embeddings], dim=-1).to(weight_dtype) # (B*Nv, 3, H, W) imgs_in = rearrange(imgs_in, "Nv C H W -> (Nv) C H W") # (B*Nv, Nce) out = pipeline( imgs_in, # camera_embeddings, generator=generator, guidance_scale=self.guidance_scale, num_inference_steps=self.step, output_type='pt', num_images_per_prompt=1, **{'eta': 1.0}, ).images bsz = out.shape[0] // 2 normals_pred = out[:bsz] images_pred = out[bsz:] normals_pred = [save_image(normals_pred[i]) for i in range(bsz)] images_pred = [save_image(images_pred[i]) for i in range(bsz)] mv_imgs = images_pred return mv_imgs[0], mv_imgs[2], mv_imgs[4], mv_imgs[5] def run(self, mvimg_model, text, image, crop_size, seed, guidance_scale, step): self.seed = seed self.guidance_scale = guidance_scale self.step = step if mvimg_model.upper() == "CRM": return self.gen_image_from_crm(image) elif mvimg_model.upper() == "IMAGEDREAM": return self.gen_image_from_mvdream(image, text) elif mvimg_model.upper() == "WONDER3D": return self.gen_image_from_wonder3d(image, crop_size)