Spaces:
Sleeping
Sleeping
zhiweili
commited on
Commit
•
7fe0dcd
1
Parent(s):
48c5d15
fix pipeline
Browse files- app.py +3 -3
- app_haircolor.py +1 -1
- pipelines/pipeline_sdxl_adapter_img2img.py +60 -14
app.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
from app_base import create_demo as create_demo_face
|
4 |
from app_haircolor import create_demo as create_demo_haircolor
|
5 |
|
6 |
with gr.Blocks(css="style.css") as demo:
|
7 |
with gr.Tabs():
|
8 |
-
with gr.Tab(label="Face"):
|
9 |
-
|
10 |
with gr.Tab(label="Hair Color"):
|
11 |
create_demo_haircolor()
|
12 |
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
+
# from app_base import create_demo as create_demo_face
|
4 |
from app_haircolor import create_demo as create_demo_haircolor
|
5 |
|
6 |
with gr.Blocks(css="style.css") as demo:
|
7 |
with gr.Tabs():
|
8 |
+
# with gr.Tab(label="Face"):
|
9 |
+
# create_demo_face()
|
10 |
with gr.Tab(label="Hair Color"):
|
11 |
create_demo_haircolor()
|
12 |
|
app_haircolor.py
CHANGED
@@ -13,6 +13,7 @@ from segment_utils import(
|
|
13 |
from gfpgan.utils import GFPGANer
|
14 |
from basicsr.archs.srvgg_arch import SRVGGNetCompact
|
15 |
from realesrgan.utils import RealESRGANer
|
|
|
16 |
|
17 |
|
18 |
DEFAULT_SRC_PROMPT = "a woman"
|
@@ -23,7 +24,6 @@ DEFAULT_CATEGORY = "hair"
|
|
23 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
24 |
|
25 |
def create_demo() -> gr.Blocks:
|
26 |
-
from inversion_run_realvxl_adapter import run as realvxl_run
|
27 |
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
|
28 |
model_path = 'realesr-general-x4v3.pth'
|
29 |
half = True if torch.cuda.is_available() else False
|
|
|
13 |
from gfpgan.utils import GFPGANer
|
14 |
from basicsr.archs.srvgg_arch import SRVGGNetCompact
|
15 |
from realesrgan.utils import RealESRGANer
|
16 |
+
from inversion_run_realvxl_adapter import run as realvxl_run
|
17 |
|
18 |
|
19 |
DEFAULT_SRC_PROMPT = "a woman"
|
|
|
24 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
25 |
|
26 |
def create_demo() -> gr.Blocks:
|
|
|
27 |
model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
|
28 |
model_path = 'realesr-general-x4v3.pth'
|
29 |
half = True if torch.cuda.is_available() else False
|
pipelines/pipeline_sdxl_adapter_img2img.py
CHANGED
@@ -89,6 +89,7 @@ from diffusers.pipelines.stable_diffusion_xl.pipeline_output import (
|
|
89 |
StableDiffusionXLPipelineOutput,
|
90 |
)
|
91 |
|
|
|
92 |
if is_invisible_watermark_available():
|
93 |
from diffusers.pipelines.stable_diffusion_xl.watermark import (
|
94 |
StableDiffusionXLWatermarker,
|
@@ -123,6 +124,7 @@ EXAMPLE_DOC_STRING = """
|
|
123 |
```
|
124 |
"""
|
125 |
|
|
|
126 |
def _preprocess_adapter_image(image, height, width):
|
127 |
if isinstance(image, torch.Tensor):
|
128 |
return image
|
@@ -591,6 +593,52 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
591 |
|
592 |
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
593 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
594 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
595 |
def prepare_extra_step_kwargs(self, generator, eta):
|
596 |
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
@@ -704,16 +752,14 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
704 |
if denoising_start is None:
|
705 |
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
706 |
t_start = max(num_inference_steps - init_timestep, 0)
|
|
|
|
|
707 |
|
708 |
-
|
709 |
-
if hasattr(self.scheduler, "set_begin_index"):
|
710 |
-
self.scheduler.set_begin_index(t_start * self.scheduler.order)
|
711 |
-
|
712 |
-
return timesteps, num_inference_steps - t_start
|
713 |
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
discrete_timestep_cutoff = int(
|
718 |
round(
|
719 |
self.scheduler.config.num_train_timesteps
|
@@ -721,7 +767,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
721 |
)
|
722 |
)
|
723 |
|
724 |
-
num_inference_steps = (
|
725 |
if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
|
726 |
# if the scheduler is a 2nd order scheduler we might have to do +1
|
727 |
# because `num_inference_steps` might be even given that every timestep
|
@@ -732,12 +778,11 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
732 |
num_inference_steps = num_inference_steps + 1
|
733 |
|
734 |
# because t_n+1 >= t_n, we slice the timesteps starting from the end
|
735 |
-
|
736 |
-
timesteps = self.scheduler.timesteps[t_start:]
|
737 |
-
if hasattr(self.scheduler, "set_begin_index"):
|
738 |
-
self.scheduler.set_begin_index(t_start)
|
739 |
return timesteps, num_inference_steps
|
740 |
|
|
|
|
|
741 |
def prepare_latents(
|
742 |
self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
|
743 |
):
|
@@ -1409,6 +1454,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1409 |
target_size = target_size or (height, width)
|
1410 |
|
1411 |
# 8. Prepare added time ids & embeddings
|
|
|
1412 |
if isinstance(self.adapter, MultiAdapter):
|
1413 |
adapter_state = self.adapter(adapter_input, adapter_conditioning_scale)
|
1414 |
for k, v in enumerate(adapter_state):
|
@@ -1521,7 +1567,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1521 |
down_intrablock_additional_residuals = [state.clone() for state in adapter_state]
|
1522 |
else:
|
1523 |
down_intrablock_additional_residuals = None
|
1524 |
-
|
1525 |
noise_pred = self.unet(
|
1526 |
latent_model_input,
|
1527 |
t,
|
|
|
89 |
StableDiffusionXLPipelineOutput,
|
90 |
)
|
91 |
|
92 |
+
|
93 |
if is_invisible_watermark_available():
|
94 |
from diffusers.pipelines.stable_diffusion_xl.watermark import (
|
95 |
StableDiffusionXLWatermarker,
|
|
|
124 |
```
|
125 |
"""
|
126 |
|
127 |
+
|
128 |
def _preprocess_adapter_image(image, height, width):
|
129 |
if isinstance(image, torch.Tensor):
|
130 |
return image
|
|
|
593 |
|
594 |
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
595 |
|
596 |
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_ip_adapter_image_embeds
|
597 |
+
def prepare_ip_adapter_image_embeds(
|
598 |
+
self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
|
599 |
+
):
|
600 |
+
image_embeds = []
|
601 |
+
if do_classifier_free_guidance:
|
602 |
+
negative_image_embeds = []
|
603 |
+
if ip_adapter_image_embeds is None:
|
604 |
+
if not isinstance(ip_adapter_image, list):
|
605 |
+
ip_adapter_image = [ip_adapter_image]
|
606 |
+
|
607 |
+
if len(ip_adapter_image) != len(self.unet.encoder_hid_proj.image_projection_layers):
|
608 |
+
raise ValueError(
|
609 |
+
f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
|
610 |
+
)
|
611 |
+
|
612 |
+
for single_ip_adapter_image, image_proj_layer in zip(
|
613 |
+
ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
|
614 |
+
):
|
615 |
+
output_hidden_state = not isinstance(image_proj_layer, ImageProjection)
|
616 |
+
single_image_embeds, single_negative_image_embeds = self.encode_image(
|
617 |
+
single_ip_adapter_image, device, 1, output_hidden_state
|
618 |
+
)
|
619 |
+
|
620 |
+
image_embeds.append(single_image_embeds[None, :])
|
621 |
+
if do_classifier_free_guidance:
|
622 |
+
negative_image_embeds.append(single_negative_image_embeds[None, :])
|
623 |
+
else:
|
624 |
+
for single_image_embeds in ip_adapter_image_embeds:
|
625 |
+
if do_classifier_free_guidance:
|
626 |
+
single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
|
627 |
+
negative_image_embeds.append(single_negative_image_embeds)
|
628 |
+
image_embeds.append(single_image_embeds)
|
629 |
+
|
630 |
+
ip_adapter_image_embeds = []
|
631 |
+
for i, single_image_embeds in enumerate(image_embeds):
|
632 |
+
single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
|
633 |
+
if do_classifier_free_guidance:
|
634 |
+
single_negative_image_embeds = torch.cat([negative_image_embeds[i]] * num_images_per_prompt, dim=0)
|
635 |
+
single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds], dim=0)
|
636 |
+
|
637 |
+
single_image_embeds = single_image_embeds.to(device=device)
|
638 |
+
ip_adapter_image_embeds.append(single_image_embeds)
|
639 |
+
|
640 |
+
return ip_adapter_image_embeds
|
641 |
+
|
642 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
|
643 |
def prepare_extra_step_kwargs(self, generator, eta):
|
644 |
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
|
|
752 |
if denoising_start is None:
|
753 |
init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
|
754 |
t_start = max(num_inference_steps - init_timestep, 0)
|
755 |
+
else:
|
756 |
+
t_start = 0
|
757 |
|
758 |
+
timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
|
|
|
|
|
|
|
|
|
759 |
|
760 |
+
# Strength is irrelevant if we directly request a timestep to start at;
|
761 |
+
# that is, strength is determined by the denoising_start instead.
|
762 |
+
if denoising_start is not None:
|
763 |
discrete_timestep_cutoff = int(
|
764 |
round(
|
765 |
self.scheduler.config.num_train_timesteps
|
|
|
767 |
)
|
768 |
)
|
769 |
|
770 |
+
num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
|
771 |
if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
|
772 |
# if the scheduler is a 2nd order scheduler we might have to do +1
|
773 |
# because `num_inference_steps` might be even given that every timestep
|
|
|
778 |
num_inference_steps = num_inference_steps + 1
|
779 |
|
780 |
# because t_n+1 >= t_n, we slice the timesteps starting from the end
|
781 |
+
timesteps = timesteps[-num_inference_steps:]
|
|
|
|
|
|
|
782 |
return timesteps, num_inference_steps
|
783 |
|
784 |
+
return timesteps, num_inference_steps - t_start
|
785 |
+
|
786 |
def prepare_latents(
|
787 |
self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
|
788 |
):
|
|
|
1454 |
target_size = target_size or (height, width)
|
1455 |
|
1456 |
# 8. Prepare added time ids & embeddings
|
1457 |
+
# adapter_input = adapter_input.type(latents.dtype)
|
1458 |
if isinstance(self.adapter, MultiAdapter):
|
1459 |
adapter_state = self.adapter(adapter_input, adapter_conditioning_scale)
|
1460 |
for k, v in enumerate(adapter_state):
|
|
|
1567 |
down_intrablock_additional_residuals = [state.clone() for state in adapter_state]
|
1568 |
else:
|
1569 |
down_intrablock_additional_residuals = None
|
1570 |
+
|
1571 |
noise_pred = self.unet(
|
1572 |
latent_model_input,
|
1573 |
t,
|