# adapted from https://huggingface.co/spaces/HumanAIGC/OutfitAnyone/blob/main/app.py import os from os.path import join as opj token = os.getenv("ACCESS_TOKEN") os.system(f"python -m pip install git+https://{token}@github.com/logn-2024/StableGarment.git") import torch import gradio as gr from PIL import Image import numpy as np from torchvision import transforms from transformers import CLIPTextModel, CLIPTokenizer from diffusers import UniPCMultistepScheduler from diffusers import AutoencoderKL from diffusers import StableDiffusionPipeline from diffusers.loaders import LoraLoaderMixin import intel_extension_for_pytorch as ipex from stablegarment.models import GarmentEncoderModel,ControlNetModel from stablegarment.piplines import StableGarmentPipeline,StableGarmentControlNetPipeline device = "cuda" if torch.cuda.is_available() else "cpu" torch_dtype = torch.bfloat16 if device=="cpu" else torch.float16 height = 512 width = 384 base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE" vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse").to(dtype=torch_dtype,device=device) scheduler = UniPCMultistepScheduler.from_pretrained("runwayml/stable-diffusion-v1-5",subfolder="scheduler") pretrained_garment_encoder_path = "loooooong/StableGarment_text2img" garment_encoder = GarmentEncoderModel.from_pretrained(pretrained_garment_encoder_path,torch_dtype=torch_dtype,subfolder="garment_encoder") garment_encoder = garment_encoder.to(device=device,dtype=torch_dtype) pipeline_t2i = StableGarmentPipeline.from_pretrained(base_model_path, vae=vae, torch_dtype=torch_dtype, use_safetensors=True,).to(device=device) # variant="fp16" # pipeline = StableDiffusionPipeline.from_pretrained("SG161222/Realistic_Vision_V4.0_noVAE", vae=vae, torch_dtype=torch_dtype).to(device=device) pipeline_t2i.scheduler = scheduler if device=="cpu": # speed up for cpu # to channels last pipeline_t2i.unet = pipeline_t2i.unet.to(memory_format=torch.channels_last) pipeline_t2i.vae = pipeline_t2i.vae.to(memory_format=torch.channels_last) pipeline_t2i.text_encoder = pipeline_t2i.text_encoder.to(memory_format=torch.channels_last) # pipeline_t2i.safety_checker = pipeline_t2i.safety_checker.to(memory_format=torch.channels_last) # Create random input to enable JIT compilation sample = torch.randn(2,4,64,64).type(torch_dtype) timestep = torch.rand(1)*999 encoder_hidden_status = torch.randn(2,77,768).type(torch_dtype) input_example = (sample, timestep, encoder_hidden_status) # optimize with IPEX pipeline_t2i.unet = ipex.optimize(pipeline_t2i.unet.eval(), dtype=torch.bfloat16, inplace=True, sample_input=input_example) pipeline_t2i.vae = ipex.optimize(pipeline_t2i.vae.eval(), dtype=torch.bfloat16, inplace=True) pipeline_t2i.text_encoder = ipex.optimize(pipeline_t2i.text_encoder.eval(), dtype=torch.bfloat16, inplace=True) # pipeline_t2i.safety_checker = ipex.optimize(pipeline_t2i.safety_checker.eval(), dtype=torch.bfloat16, inplace=True) pipeline_tryon = None ''' # not ready pretrained_model_path = "part_module_controlnet_imp2" controlnet = ControlNetModel.from_pretrained(pretrained_model_path,subfolder="controlnet") text_encoder = CLIPTextModel.from_pretrained(base_model_path, subfolder='text_encoder') tokenizer = CLIPTokenizer.from_pretrained(base_model_path, subfolder='tokenizer') pipeline_tryon = StableGarmentControlNetPipeline( vae, text_encoder, tokenizer, pipeline_t2i.unet, controlnet, scheduler, ).to(device=device,dtype=torch_dtype) ''' def prepare_controlnet_inputs(agn_mask_list,densepose_list): for i,agn_mask_img in enumerate(agn_mask_list): agn_mask_img = np.array(agn_mask_img.convert("L")) agn_mask_img = np.expand_dims(agn_mask_img, axis=-1) agn_mask_img = (agn_mask_img >= 128).astype(np.float32) # 0 or 1 agn_mask_list[i] = 1. - agn_mask_img densepose_list = [np.array(img)/255. for img in densepose_list] controlnet_inputs = [] for mask,pose in zip(agn_mask_list,densepose_list): controlnet_inputs.append(torch.tensor(np.concatenate([mask, pose], axis=-1)).permute(2,0,1)) controlnet_inputs = torch.stack(controlnet_inputs) return controlnet_inputs def tryon(prompt,init_image,garment_top,garment_down,): basename = os.path.splitext(os.path.basename(init_image))[0] image_agn = Image.open(opj(parse_dir,basename+"_agn.jpg")).resize((width,height)) image_agn_mask = Image.open(opj(parse_dir,basename+"_mask.png")).resize((width,height)) densepose_image = Image.open(opj(parse_dir,basename+"_densepose.png")).resize((width,height)) garment_top = Image.open(garment_top).resize((width,height)) garment_images = [garment_top,] prompt = [prompt,] cloth_prompt = ["",] controlnet_condition = prepare_controlnet_inputs([image_agn_mask],[densepose_image]) images = pipeline_tryon(prompt, negative_prompt="",cloth_prompt=cloth_prompt, # negative_cloth_prompt = n_prompt, height=height,width=width,num_inference_steps=25,guidance_scale=1.5,eta=0.0, controlnet_condition=controlnet_condition,reference_image=garment_images, garment_encoder=garment_encoder,condition_extra=image_agn, generator=None,).images return images[0] def text2image(prompt,init_image,garment_top,garment_down,style_fidelity=1.): garment_top = Image.open(garment_top).resize((width,height)) garment_top = transforms.CenterCrop((height,width))(transforms.Resize(max(height, width))(garment_top)) # always enable classifier-free-guidance as it is related to garment cfg = 4 # if prompt else 0 garment_images = [garment_top,] prompt = [prompt,] cloth_prompt = ["",] n_prompt = "nsfw, unsaturated, abnormal, unnatural, artifact" negative_prompt = [n_prompt] images = pipeline_t2i(prompt,negative_prompt=negative_prompt,cloth_prompt=cloth_prompt,height=height,width=width, num_inference_steps=30,guidance_scale=cfg,num_images_per_prompt=1,style_fidelity=style_fidelity, garment_encoder=garment_encoder,garment_image=garment_images,).images return images[0] # def text2image(prompt,init_image,garment_top,garment_down,): # return pipeline(prompt).images[0] def infer(prompt,init_image,garment_top,garment_down,t2i_only,style_fidelity): if t2i_only: return text2image(prompt,init_image,garment_top,garment_down,style_fidelity) else: return tryon(prompt,init_image,garment_top,garment_down) init_state,prompt_state = None,"" t2i_only_state = True def set_mode(t2i_only,person_condition,prompt): global init_state, prompt_state, t2i_only_state t2i_only_state = not t2i_only_state init_state, prompt_state = person_condition or init_state, prompt_state or prompt if t2i_only: return [gr.Image(sources='clipboard', type="filepath", label="model",value=None, interactive=False), gr.Textbox(placeholder="", label="prompt(for t2i)", value=prompt_state, interactive=True), ] else: return [gr.Image(sources='clipboard', type="filepath", label="model",value=init_state, interactive=False), gr.Textbox(placeholder="", label="prompt(for t2i)", value="", interactive=False), ] def example_fn(inputs,): if t2i_only_state: return gr.Image(sources='clipboard', type="filepath", label="model", value=None, interactive=False) return gr.Image(sources='clipboard', type="filepath", label="model",value=inputs, interactive=False) gr.set_static_paths(paths=["assets/images/model"]) model_dir = opj(os.path.dirname(__file__), "assets/images/model") garment_dir = opj(os.path.dirname(__file__), "assets/images/garment") parse_dir = opj(os.path.dirname(__file__), "assets/images/image_parse") model = opj(model_dir, "13987_00.jpg") all_person = [opj(model_dir,fname) for fname in os.listdir(model_dir) if fname.endswith(".jpg")] with gr.Blocks(css = ".output-image, .input-image, .image-preview {height: 400px !important} ", ) as gradio_app: gr.Markdown("# StableGarment") with gr.Row(): with gr.Column(): init_image = gr.Image(sources='clipboard', type="filepath", label="model", value=None, interactive=False) example = gr.Examples(inputs=gr.Image(visible=False), #init_image, examples_per_page=4, examples=all_person, run_on_click=True, outputs=init_image, fn=example_fn,) with gr.Column(): with gr.Row(): images_top = [opj(garment_dir,fname) for fname in os.listdir(garment_dir) if fname.endswith(".jpg")] garment_top = gr.Image(sources='upload', type="filepath", label="top garment",value=images_top[0]) # ,interactive=False example_top = gr.Examples(inputs=garment_top, examples_per_page=4, examples=images_top) images_down = [] garment_down = gr.Image(sources='upload', type="filepath", label="lower garment",interactive=False, visible=False) example_down = gr.Examples(inputs=garment_down, examples_per_page=4, examples=images_down) prompt = gr.Textbox(placeholder="", label="prompt(for t2i)",) # interactive=False with gr.Row(): t2i_only = gr.Checkbox(label="t2i with garment", info="Only text and garment.", elem_id="t2i_switch", value=True, interactive=False,) run_button = gr.Button(value="Run") t2i_only.change(fn=set_mode,inputs=[t2i_only,init_image,prompt],outputs=[init_image,prompt,]) with gr.Accordion("advance options", open=False): gr.Markdown("Garment fidelity control(Tune down it to reduce white edge).") style_fidelity = gr.Slider(0, 1, value=1, label="fidelity(only for t2i)") # , info="" with gr.Column(): gallery = gr.Image() run_button.click(fn=infer, inputs=[ prompt, init_image, garment_top, garment_down, t2i_only, style_fidelity, ], outputs=[gallery],) if __name__ == "__main__": gradio_app.launch()