Spaces:

smartfeed
/

turbo_hc

Sleeping

App Files Files Community

zhiweili commited on Oct 16

Commit

b312a31

•

1 Parent(s): fe60d00

test app base

Browse files

Files changed (4) hide show

app.py +1 -1
app_base.py +118 -0
app_haircolor.py +2 -2
inversion_run_base.py +219 -0

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 # from app_base import create_demo as create_demo_face
-from app_haircolor import create_demo as create_demo_haircolor
 with gr.Blocks(css="style.css") as demo:
     with gr.Tabs():

 import gradio as gr
 # from app_base import create_demo as create_demo_face
+from app_base import create_demo as create_demo_haircolor
 with gr.Blocks(css="style.css") as demo:
     with gr.Tabs():

app_base.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import spaces
+import gradio as gr
+import time
+import torch
+from PIL import Image
+from segment_utils import(
+    segment_image,
+    restore_result,
+)
+from enhance_utils import enhance_image
+DEFAULT_SRC_PROMPT = "a woman, photo"
+DEFAULT_EDIT_PROMPT = "a beautiful woman, photo, hollywood style face, 8k, high quality"
+DEFAULT_CATEGORY = "hair"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+def create_demo() -> gr.Blocks:
+    from inversion_run_base import run as base_run
+    @spaces.GPU(duration=10)
+    def image_to_image(
+        input_image: Image,
+        input_image_prompt: str,
+        edit_prompt: str,
+        seed: int,
+        w1: float,
+        num_steps: int,
+        start_step: int,
+        guidance_scale: float,
+        strength: float,
+        generate_size: int,
+    ):
+        w2 = 1.0
+        run_task_time = 0
+        time_cost_str = ''
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
+        run_model = base_run
+        res_image = run_model(
+            input_image,
+            input_image_prompt,
+            edit_prompt,
+            generate_size,
+            seed,
+            w1,
+            w2,
+            num_steps,
+            start_step,
+            guidance_scale,
+            strength,
+        )
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
+        enhanced_image = enhance_image(res_image, False)
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
+        return enhanced_image, res_image, time_cost_str
+    def get_time_cost(run_task_time, time_cost_str):
+        now_time = int(time.time()*1000)
+        if run_task_time == 0:
+            time_cost_str = 'start'
+        else:
+            if time_cost_str != '':
+                time_cost_str += f'-->'
+            time_cost_str += f'{now_time - run_task_time}'
+        run_task_time = now_time
+        return run_task_time, time_cost_str
+    with gr.Blocks() as demo:
+        croper = gr.State()
+        with gr.Row():
+            with gr.Column():
+                input_image_prompt = gr.Textbox(lines=1, label="Input Image Prompt", value=DEFAULT_SRC_PROMPT)
+                edit_prompt = gr.Textbox(lines=1, label="Edit Prompt", value=DEFAULT_EDIT_PROMPT)
+                category = gr.Textbox(label="Category", value=DEFAULT_CATEGORY, visible=False)
+            with gr.Column():
+                num_steps = gr.Slider(minimum=1, maximum=100, value=20, step=1, label="Num Steps")
+                start_step = gr.Slider(minimum=1, maximum=100, value=15, step=1, label="Start Step")
+                strength = gr.Slider(minimum=0, maximum=2, value=0.3, step=0.1, label="Strength")
+                with gr.Accordion("Advanced Options", open=False):
+                    guidance_scale = gr.Slider(minimum=0, maximum=20, value=0, step=0.5, label="Guidance Scale")
+                    generate_size = gr.Number(label="Generate Size", value=1024)
+                    mask_expansion = gr.Number(label="Mask Expansion", value=50, visible=True)
+                    mask_dilation = gr.Slider(minimum=0, maximum=10, value=2, step=1, label="Mask Dilation")
+            with gr.Column():
+                seed = gr.Number(label="Seed", value=8)
+                w1 = gr.Number(label="W1", value=2)
+                g_btn = gr.Button("Edit Image")
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image(label="Input Image", type="pil")
+            with gr.Column():
+                restored_image = gr.Image(label="Restored Image", type="pil", interactive=False)
+                download_path = gr.File(label="Download the output image", interactive=False)
+            with gr.Column():
+                origin_area_image = gr.Image(label="Origin Area Image", type="pil", interactive=False)
+                enhanced_image = gr.Image(label="Enhanced Image", type="pil", interactive=False)
+                generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False)
+                generated_image = gr.Image(label="Generated Image", type="pil", interactive=False)
+        g_btn.click(
+            fn=segment_image,
+            inputs=[input_image, category, generate_size, mask_expansion, mask_dilation],
+            outputs=[origin_area_image, croper],
+        ).success(
+            fn=image_to_image,
+            inputs=[origin_area_image, input_image_prompt, edit_prompt,seed,w1, num_steps, start_step, guidance_scale, strength, generate_size],
+            outputs=[enhanced_image, generated_image, generated_cost],
+        ).success(
+            fn=restore_result,
+            inputs=[croper, category, enhanced_image],
+            outputs=[restored_image, download_path],
+        )
+    return demo

app_haircolor.py CHANGED Viewed

@@ -12,8 +12,8 @@ from enhance_utils import enhance_image
 from inversion_run_adapter import run as adapter_run
-DEFAULT_SRC_PROMPT = "a woman, with hair"
-DEFAULT_EDIT_PROMPT = "a woman, with red hair, 8k, high quality"
 DEFAULT_CATEGORY = "hair"

 from inversion_run_adapter import run as adapter_run
+DEFAULT_SRC_PROMPT = "RAW photo"
+DEFAULT_EDIT_PROMPT = "RAW photo, Fujifilm XT3, sharp hair, high resolution hair, hair tones, natural hair, magazine hair, white color hair"
 DEFAULT_CATEGORY = "hair"

inversion_run_base.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import torch
+from diffusers import (
+    DDPMScheduler,
+    StableDiffusionXLImg2ImgPipeline,
+)
+from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img import retrieve_timesteps, retrieve_latents
+from PIL import Image
+from inversion_utils import get_ddpm_inversion_scheduler, create_xts
+from config import get_config, get_num_steps_actual
+from functools import partial
+from compel import Compel, ReturnedEmbeddingsType
+class Object(object):
+    pass
+args = Object()
+args.images_paths = None
+args.images_folder = None
+args.force_use_cpu = False
+args.folder_name = 'test_measure_time'
+args.config_from_file = 'run_configs/noise_shift_guidance_1_5.yaml'
+args.save_intermediate_results = False
+args.batch_size = None
+args.skip_p_to_p = True
+args.only_p_to_p = False
+args.fp16 = False
+args.prompts_file = 'dataset_measure_time/dataset.json'
+args.images_in_prompts_file = None
+args.seed = 986
+args.time_measure_n = 1
+assert (
+    args.batch_size is None or args.save_intermediate_results is False
+), "save_intermediate_results is not implemented for batch_size > 1"
+generator = None
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# BASE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
+BASE_MODEL = "stabilityai/sdxl-turbo"
+pipeline = StableDiffusionXLImg2ImgPipeline.from_pretrained(
+    BASE_MODEL,
+    torch_dtype=torch.float16,
+    variant="fp16",
+    use_safetensors=True,
+)
+pipeline = pipeline.to(device)
+pipeline.scheduler = DDPMScheduler.from_pretrained(
+    BASE_MODEL,
+    subfolder="scheduler",
+)
+config = get_config(args)
+compel_proc = Compel(
+  tokenizer=[pipeline.tokenizer, pipeline.tokenizer_2] ,
+  text_encoder=[pipeline.text_encoder, pipeline.text_encoder_2],
+  returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+  requires_pooled=[False, True]
+)
+def run(
+    input_image:Image,
+    src_prompt:str,
+    tgt_prompt:str,
+    generate_size:int,
+    seed:int,
+    w1:float,
+    w2:float,
+    num_steps:int,
+    start_step:int,
+    guidance_scale:float,
+    strength:float,
+):
+    generator = torch.Generator().manual_seed(seed)
+    config.num_steps_inversion = num_steps
+    config.step_start = start_step
+    num_steps_actual = get_num_steps_actual(config)
+    num_steps_inversion = config.num_steps_inversion
+    denoising_start = (num_steps_inversion - num_steps_actual) / num_steps_inversion
+    print(f"-------->num_steps_inversion: {num_steps_inversion} num_steps_actual: {num_steps_actual} denoising_start: {denoising_start}")
+    timesteps, num_inference_steps = retrieve_timesteps(
+        pipeline.scheduler, num_steps_inversion, device, None
+    )
+    timesteps, num_inference_steps = pipeline.get_timesteps(
+        num_inference_steps=num_inference_steps,
+        denoising_start=denoising_start,
+        strength=strength,
+        device=device,
+    )
+    timesteps = timesteps.type(torch.int64)
+    timesteps = [torch.tensor(t) for t in timesteps.tolist()]
+    timesteps_len = len(timesteps)
+    config.step_start = start_step + num_steps_actual - timesteps_len
+    num_steps_actual = timesteps_len
+    config.max_norm_zs = [-1] * (num_steps_actual - 1) + [15.5]
+    print(f"-------->num_steps_inversion: {num_steps_inversion} num_steps_actual: {num_steps_actual} step_start: {config.step_start}")
+    print(f"-------->timesteps len: {len(timesteps)} max_norm_zs len: {len(config.max_norm_zs)}")
+    pipeline.__call__ = partial(
+        pipeline.__call__,
+        num_inference_steps=num_steps_inversion,
+        guidance_scale=guidance_scale,
+        generator=generator,
+        denoising_start=denoising_start,
+        strength=strength,
+    )
+    x_0_image = input_image
+    x_0 = encode_image(x_0_image, pipeline)
+    x_ts = create_xts(1, None, 0, generator, pipeline.scheduler, timesteps, x_0, no_add_noise=False)
+    x_ts = [xt.to(dtype=torch.float16) for xt in x_ts]
+    latents = [x_ts[0]]
+    x_ts_c_hat = [None]
+    config.ws1 = [w1] * num_steps_actual
+    config.ws2 = [w2] * num_steps_actual
+    pipeline.scheduler = get_ddpm_inversion_scheduler(
+                    pipeline.scheduler,
+                    config.step_function,
+                    config,
+                    timesteps,
+                    config.save_timesteps,
+                    latents,
+                    x_ts,
+                    x_ts_c_hat,
+                    args.save_intermediate_results,
+                    pipeline,
+                    x_0,
+                    v1s_images := [],
+                    v2s_images := [],
+                    deltas_images := [],
+                    v1_x0s := [],
+                    v2_x0s := [],
+                    deltas_x0s := [],
+                    "res12",
+                    image_name="im_name",
+                    time_measure_n=args.time_measure_n,
+                )
+    latent = latents[0].expand(3, -1, -1, -1)
+    prompt = [src_prompt, src_prompt, tgt_prompt]
+    conditioning, pooled = compel_proc(prompt)
+    image = pipeline.__call__(
+        image=latent,
+        prompt_embeds=conditioning,
+        pooled_prompt_embeds=pooled,
+        eta=1,
+    ).images
+    return image[2]
+def encode_image(image, pipe):
+    image = pipe.image_processor.preprocess(image)
+    originDtype = pipe.dtype
+    image = image.to(device=device, dtype=originDtype)
+    if pipe.vae.config.force_upcast:
+        image = image.float()
+        pipe.vae.to(dtype=torch.float32)
+    if isinstance(generator, list):
+        init_latents = [
+            retrieve_latents(pipe.vae.encode(image[i : i + 1]), generator=generator[i])
+            for i in range(1)
+        ]
+        init_latents = torch.cat(init_latents, dim=0)
+    else:
+        init_latents = retrieve_latents(pipe.vae.encode(image), generator=generator)
+    if pipe.vae.config.force_upcast:
+        pipe.vae.to(originDtype)
+    init_latents = init_latents.to(originDtype)
+    init_latents = pipe.vae.config.scaling_factor * init_latents
+    return init_latents.to(dtype=torch.float16)
+def get_timesteps(pipe, num_inference_steps, strength, device, denoising_start=None):
+    # get the original timestep using init_timestep
+    if denoising_start is None:
+        init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
+        t_start = max(num_inference_steps - init_timestep, 0)
+    else:
+        t_start = 0
+    timesteps = pipe.scheduler.timesteps[t_start * pipe.scheduler.order :]
+    # Strength is irrelevant if we directly request a timestep to start at;
+    # that is, strength is determined by the denoising_start instead.
+    if denoising_start is not None:
+        discrete_timestep_cutoff = int(
+            round(
+                pipe.scheduler.config.num_train_timesteps
+                - (denoising_start * pipe.scheduler.config.num_train_timesteps)
+            )
+        )
+        num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
+        if pipe.scheduler.order == 2 and num_inference_steps % 2 == 0:
+            # if the scheduler is a 2nd order scheduler we might have to do +1
+            # because `num_inference_steps` might be even given that every timestep
+            # (except the highest one) is duplicated. If `num_inference_steps` is even it would
+            # mean that we cut the timesteps in the middle of the denoising step
+            # (between 1st and 2nd derivative) which leads to incorrect results. By adding 1
+            # we ensure that the denoising process always ends after the 2nd derivate step of the scheduler
+            num_inference_steps = num_inference_steps + 1
+        # because t_n+1 >= t_n, we slice the timesteps starting from the end
+        timesteps = timesteps[-num_inference_steps:]
+        return timesteps, num_inference_steps
+    return timesteps, num_inference_steps - t_start