import os import torch import numpy as np import cv2 import gradio as gr from PIL import Image from datetime import datetime from morph_attn import DiffMorpherPipeline from lora_utils import train_lora LENGTH=480 def train_lora_interface( image, prompt, model_path, output_path, lora_steps, lora_rank, lora_lr, num ): os.makedirs(output_path, exist_ok=True) train_lora(image, prompt, output_path, model_path, lora_steps=lora_steps, lora_lr=lora_lr, lora_rank=lora_rank, weight_name=f"lora_{num}.ckpt", progress=gr.Progress()) return f"Train LoRA {'A' if num == 0 else 'B'} Done!" def run_diffmorpher( image_0, image_1, prompt_0, prompt_1, model_path, lora_mode, lamb, use_adain, use_reschedule, num_frames, fps, load_lora_path_0, load_lora_path_1, output_path ): run_id = datetime.now().strftime("%H%M") + "_" + datetime.now().strftime("%Y%m%d") os.makedirs(output_path, exist_ok=True) morpher_pipeline = DiffMorpherPipeline.from_pretrained(model_path, torch_dtype=torch.float32).to("cuda") if lora_mode == "Fix LoRA 0": fix_lora = 0 elif lora_mode == "Fix LoRA 1": fix_lora = 1 else: fix_lora = None if not load_lora_path_0: load_lora_path_0 = f"{output_path}/lora_0.ckpt" if not load_lora_path_1: load_lora_path_1 = f"{output_path}/lora_1.ckpt" images = morpher_pipeline( img_0=image_0, img_1=image_1, prompt_0=prompt_0, prompt_1=prompt_1, load_lora_path_0=load_lora_path_0, load_lora_path_1=load_lora_path_1, lamb=lamb, use_adain=use_adain, use_reschedule=use_reschedule, num_frames=num_frames, fix_lora=fix_lora, progress=gr.Progress() ) video_path = f"{output_path}/{run_id}.mp4" video = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (LENGTH, LENGTH)) for image in images: video.write(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)) video.release() cv2.destroyAllWindows() return output_video.update(value=video_path) def run_all( image_0, image_1, prompt_0, prompt_1, model_path, lora_mode, lamb, use_adain, use_reschedule, num_frames, fps, load_lora_path_0, load_lora_path_1, output_path, lora_steps, lora_rank, lora_lr ): os.makedirs(output_path, exist_ok=True) train_lora(image_0, prompt_0, output_path, model_path, lora_steps=lora_steps, lora_lr=lora_lr, lora_rank=lora_rank, weight_name=f"lora_0.ckpt", progress=gr.Progress()) train_lora(image_1, prompt_1, output_path, model_path, lora_steps=lora_steps, lora_lr=lora_lr, lora_rank=lora_rank, weight_name=f"lora_1.ckpt", progress=gr.Progress()) return run_diffmorpher( image_0, image_1, prompt_0, prompt_1, model_path, lora_mode, lamb, use_adain, use_reschedule, num_frames, fps, load_lora_path_0, load_lora_path_1, output_path ) with gr.Blocks() as demo: with gr.Row(): gr.Markdown(""" # Official Implementation of [DiffMorpher](https://kevin-thu.github.io/DiffMorpher_page/) """) original_image_0, original_image_1 = gr.State(Image.open("Trump.jpg").convert("RGB").resize((512,512), Image.BILINEAR)), gr.State(Image.open("Biden.jpg").convert("RGB").resize((512,512), Image.BILINEAR)) # key_points_0, key_points_1 = gr.State([]), gr.State([]) # to_change_points = gr.State([]) with gr.Row(): with gr.Column(): input_img_0 = gr.Image(type="numpy", label="Input image A", value="Trump.jpg", show_label=True, height=LENGTH, width=LENGTH, interactive=True) prompt_0 = gr.Textbox(label="Prompt for image A", value="a photo of an American man", interactive=True) with gr.Row(): train_lora_0_button = gr.Button("Train LoRA A") train_lora_1_button = gr.Button("Train LoRA B") # show_correspond_button = gr.Button("Show correspondence points") with gr.Column(): input_img_1 = gr.Image(type="numpy", label="Input image B ", value="Biden.jpg", show_label=True, height=LENGTH, width=LENGTH, interactive=True) prompt_1 = gr.Textbox(label="Prompt for image B", value="a photo of an American man", interactive=True) with gr.Row(): clear_button = gr.Button("Clear All") run_button = gr.Button("Run w/o LoRA training") with gr.Column(): output_video = gr.Video(format="mp4", label="Output video", show_label=True, height=LENGTH, width=LENGTH, interactive=False) lora_progress_bar = gr.Textbox(label="Display LoRA training progress", interactive=False) run_all_button = gr.Button("Run!") # with gr.Column(): # output_video = gr.Video(label="Output video", show_label=True, height=LENGTH, width=LENGTH) with gr.Row(): gr.Markdown(""" ### Usage: 1. Upload two images (with correspondence) and fill out the prompts. 2. Click **"Run!"** Or: 1. Upload two images (with correspondence) and fill out the prompts. 2. Click the **"Train LoRA A/B"** button to fit two LoRAs for two images respectively.
   If you have trained LoRA A or LoRA B before, you can skip the step and fill the specific LoRA path in LoRA settings.
   Trained LoRAs are saved to `[Output Path]/lora_0.ckpt` and `[Output Path]/lora_1.ckpt` by default. 3. You might also change the settings below. 4. Click **"Run w/o LoRA training"** ### Note: 1. To speed up the generation process, you can **ruduce the number of frames** or **turn off "Use Reschedule"** ("Use Reschedule" will double the generation time). 2. You can try the influence of different prompts. It seems that using the same prompts or aligned prompts works better. ### Have fun! """) with gr.Accordion(label="Algorithm Parameters"): with gr.Tab("Basic Settings"): with gr.Row(): # local_models_dir = 'local_pretrained_models' # local_models_choice = \ # [os.path.join(local_models_dir,d) for d in os.listdir(local_models_dir) if os.path.isdir(os.path.join(local_models_dir,d))] model_path = gr.Text(value="stabilityai/stable-diffusion-2-1-base", label="Diffusion Model Path", interactive=True ) lamb = gr.Slider(value=0.6, minimum=0, maximum=1, step=0.1, label="Lambda for attention replacement", interactive=True) lora_mode = gr.Dropdown(value="LoRA Interp", label="LoRA Interp. or Fix LoRA", choices=["LoRA Interp", "Fix LoRA A", "Fix LoRA B"], interactive=True ) use_adain = gr.Checkbox(value=True, label="Use AdaIN", interactive=True) use_reschedule = gr.Checkbox(value=True, label="Use Reschedule", interactive=True) with gr.Row(): num_frames = gr.Number(value=15, minimum=0, label="Number of Frames", precision=0, interactive=True) fps = gr.Number(value=8, minimum=0, label="FPS (Frame rate)", precision=0, interactive=True) output_path = gr.Text(value="./results", label="Output Path", interactive=True) with gr.Tab("LoRA Settings"): with gr.Row(): lora_steps = gr.Number(value=200, label="LoRA training steps", precision=0, interactive=True) lora_lr = gr.Number(value=0.0002, label="LoRA learning rate", interactive=True) lora_rank = gr.Number(value=16, label="LoRA rank", precision=0, interactive=True) # save_lora_dir = gr.Text(value="./lora", label="LoRA model save path", interactive=True) load_lora_path_0 = gr.Text(value="", label="LoRA model load path for image A", interactive=True) load_lora_path_1 = gr.Text(value="", label="LoRA model load path for image B", interactive=True) def store_img(img): image = Image.fromarray(img).convert("RGB").resize((512,512), Image.BILINEAR) # resize the input to 512x512 # image = image.resize((512,512), Image.BILINEAR) # image = np.array(image) # when new image is uploaded, `selected_points` should be empty return image input_img_0.upload( store_img, [input_img_0], [original_image_0] ) input_img_1.upload( store_img, [input_img_1], [original_image_1] ) def clear(LENGTH): return gr.Image.update(value=None, width=LENGTH, height=LENGTH), \ gr.Image.update(value=None, width=LENGTH, height=LENGTH), \ None, None, None, None clear_button.click( clear, [gr.Number(value=LENGTH, visible=False, precision=0)], [input_img_0, input_img_1, original_image_0, original_image_1, prompt_0, prompt_1] ) train_lora_0_button.click( train_lora_interface, [ original_image_0, prompt_0, model_path, output_path, lora_steps, lora_rank, lora_lr, gr.Number(value=0, visible=False, precision=0) ], [lora_progress_bar] ) train_lora_1_button.click( train_lora_interface, [ original_image_1, prompt_1, model_path, output_path, lora_steps, lora_rank, lora_lr, gr.Number(value=1, visible=False, precision=0) ], [lora_progress_bar] ) run_button.click( run_diffmorpher, [ original_image_0, original_image_1, prompt_0, prompt_1, model_path, lora_mode, lamb, use_adain, use_reschedule, num_frames, fps, load_lora_path_0, load_lora_path_1, output_path ], [output_video] ) run_all_button.click( run_all, [ original_image_0, original_image_1, prompt_0, prompt_1, model_path, lora_mode, lamb, use_adain, use_reschedule, num_frames, fps, load_lora_path_0, load_lora_path_1, output_path, lora_steps, lora_rank, lora_lr ], [output_video] ) demo.queue().launch(debug=True)