Spaces:
Runtime error
Runtime error
Commit
Β·
e53bd2f
1
Parent(s):
a5c3b38
feat: added _get_crops_coords_list function to automatically define ctop,cleft coord to focus on image generation, helps to better harmonize the image and corrects the problem of flattened elements.
Browse files- app.py +5 -4
- mixture_tiling_sdxl.py +58 -9
app.py
CHANGED
|
@@ -32,7 +32,7 @@ pipe = StableDiffusionXLTilingPipeline.from_pretrained(
|
|
| 32 |
#variant="fp16",
|
| 33 |
).to("cuda")
|
| 34 |
|
| 35 |
-
|
| 36 |
pipe.enable_vae_tiling()
|
| 37 |
pipe.enable_vae_slicing()
|
| 38 |
|
|
@@ -50,6 +50,8 @@ def select_scheduler(scheduler_name):
|
|
| 50 |
scheduler = scheduler.from_config(pipe.scheduler.config, **add_kwargs)
|
| 51 |
return scheduler
|
| 52 |
|
|
|
|
|
|
|
| 53 |
@spaces.GPU
|
| 54 |
def predict(left_prompt, center_prompt, right_prompt, negative_prompt, left_gs, center_gs, right_gs, overlap_pixels, steps, generation_seed, scheduler, tile_height, tile_width, target_height, target_width):
|
| 55 |
global pipe
|
|
@@ -80,10 +82,9 @@ def predict(left_prompt, center_prompt, right_prompt, negative_prompt, left_gs,
|
|
| 80 |
tile_width=tile_width,
|
| 81 |
tile_row_overlap=0,
|
| 82 |
tile_col_overlap=overlap_pixels,
|
| 83 |
-
guidance_scale_tiles=[[left_gs, center_gs, right_gs]],
|
| 84 |
height=target_height,
|
| 85 |
-
width=target_width,
|
| 86 |
-
target_size=(target_height, target_width),
|
| 87 |
generator=generator,
|
| 88 |
num_inference_steps=steps,
|
| 89 |
)["images"][0]
|
|
|
|
| 32 |
#variant="fp16",
|
| 33 |
).to("cuda")
|
| 34 |
|
| 35 |
+
pipe.enable_model_cpu_offload() #<< Enable this if you have limited VRAM
|
| 36 |
pipe.enable_vae_tiling()
|
| 37 |
pipe.enable_vae_slicing()
|
| 38 |
|
|
|
|
| 50 |
scheduler = scheduler.from_config(pipe.scheduler.config, **add_kwargs)
|
| 51 |
return scheduler
|
| 52 |
|
| 53 |
+
|
| 54 |
+
|
| 55 |
@spaces.GPU
|
| 56 |
def predict(left_prompt, center_prompt, right_prompt, negative_prompt, left_gs, center_gs, right_gs, overlap_pixels, steps, generation_seed, scheduler, tile_height, tile_width, target_height, target_width):
|
| 57 |
global pipe
|
|
|
|
| 82 |
tile_width=tile_width,
|
| 83 |
tile_row_overlap=0,
|
| 84 |
tile_col_overlap=overlap_pixels,
|
| 85 |
+
guidance_scale_tiles=[[left_gs, center_gs, right_gs]],
|
| 86 |
height=target_height,
|
| 87 |
+
width=target_width,
|
|
|
|
| 88 |
generator=generator,
|
| 89 |
num_inference_steps=steps,
|
| 90 |
)["images"][0]
|
mixture_tiling_sdxl.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# Copyright
|
| 2 |
#
|
| 3 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
# you may not use this file except in compliance with the License.
|
|
@@ -150,6 +150,49 @@ def _tile2latent_exclusive_indices(
|
|
| 150 |
# return row_init, row_end, col_init, col_end
|
| 151 |
return row_segment[0], row_segment[1], col_segment[0], col_segment[1]
|
| 152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
| 155 |
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
@@ -757,10 +800,10 @@ class StableDiffusionXLTilingPipeline(
|
|
| 757 |
return_dict: bool = True,
|
| 758 |
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
| 759 |
original_size: Optional[Tuple[int, int]] = None,
|
| 760 |
-
crops_coords_top_left: Tuple[int, int] =
|
| 761 |
target_size: Optional[Tuple[int, int]] = None,
|
| 762 |
negative_original_size: Optional[Tuple[int, int]] = None,
|
| 763 |
-
negative_crops_coords_top_left: Tuple[int, int] =
|
| 764 |
negative_target_size: Optional[Tuple[int, int]] = None,
|
| 765 |
clip_skip: Optional[int] = None,
|
| 766 |
tile_height: Optional[int] = 1024,
|
|
@@ -826,7 +869,7 @@ class StableDiffusionXLTilingPipeline(
|
|
| 826 |
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
| 827 |
explained in section 2.2 of
|
| 828 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
| 829 |
-
crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
| 830 |
`crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
|
| 831 |
`crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
|
| 832 |
`crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
|
|
@@ -840,7 +883,7 @@ class StableDiffusionXLTilingPipeline(
|
|
| 840 |
micro-conditioning as explained in section 2.2 of
|
| 841 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
| 842 |
information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
|
| 843 |
-
negative_crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
| 844 |
To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
|
| 845 |
micro-conditioning as explained in section 2.2 of
|
| 846 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
|
@@ -883,6 +926,8 @@ class StableDiffusionXLTilingPipeline(
|
|
| 883 |
|
| 884 |
original_size = original_size or (height, width)
|
| 885 |
target_size = target_size or (height, width)
|
|
|
|
|
|
|
| 886 |
|
| 887 |
self._guidance_scale = guidance_scale
|
| 888 |
self._clip_skip = clip_skip
|
|
@@ -890,8 +935,7 @@ class StableDiffusionXLTilingPipeline(
|
|
| 890 |
self._interrupt = False
|
| 891 |
|
| 892 |
grid_rows = len(prompt)
|
| 893 |
-
grid_cols = len(prompt[0])
|
| 894 |
-
|
| 895 |
tiles_mode = [mode.value for mode in self.SeedTilesMode]
|
| 896 |
|
| 897 |
if isinstance(seed_tiles_mode, str):
|
|
@@ -913,6 +957,11 @@ class StableDiffusionXLTilingPipeline(
|
|
| 913 |
batch_size = 1
|
| 914 |
|
| 915 |
device = self._execution_device
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 916 |
|
| 917 |
# update height and width tile size and tile overlap size
|
| 918 |
height = tile_height + (grid_rows - 1) * (tile_height - tile_row_overlap)
|
|
@@ -1020,7 +1069,7 @@ class StableDiffusionXLTilingPipeline(
|
|
| 1020 |
text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
|
| 1021 |
add_time_ids = self._get_add_time_ids(
|
| 1022 |
original_size,
|
| 1023 |
-
crops_coords_top_left,
|
| 1024 |
target_size,
|
| 1025 |
dtype=prompt_embeds.dtype,
|
| 1026 |
text_encoder_projection_dim=text_encoder_projection_dim,
|
|
@@ -1028,7 +1077,7 @@ class StableDiffusionXLTilingPipeline(
|
|
| 1028 |
if negative_original_size is not None and negative_target_size is not None:
|
| 1029 |
negative_add_time_ids = self._get_add_time_ids(
|
| 1030 |
negative_original_size,
|
| 1031 |
-
negative_crops_coords_top_left,
|
| 1032 |
negative_target_size,
|
| 1033 |
dtype=prompt_embeds.dtype,
|
| 1034 |
text_encoder_projection_dim=text_encoder_projection_dim,
|
|
|
|
| 1 |
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
| 2 |
#
|
| 3 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
# you may not use this file except in compliance with the License.
|
|
|
|
| 150 |
# return row_init, row_end, col_init, col_end
|
| 151 |
return row_segment[0], row_segment[1], col_segment[0], col_segment[1]
|
| 152 |
|
| 153 |
+
def _get_crops_coords_list(num_rows, num_cols, output_width):
|
| 154 |
+
"""
|
| 155 |
+
Generates a list of lists of `crops_coords_top_left` tuples for focusing on
|
| 156 |
+
different horizontal parts of an image, and repeats this list for the specified
|
| 157 |
+
number of rows in the output structure.
|
| 158 |
+
|
| 159 |
+
This function calculates `crops_coords_top_left` tuples to create horizontal
|
| 160 |
+
focus variations (like left, center, right focus) based on `output_width`
|
| 161 |
+
and `num_cols` (which represents the number of horizontal focus points/columns).
|
| 162 |
+
It then repeats the *list* of these horizontal focus tuples `num_rows` times to
|
| 163 |
+
create the final list of lists output structure.
|
| 164 |
+
|
| 165 |
+
Args:
|
| 166 |
+
num_rows (int): The desired number of rows in the output list of lists.
|
| 167 |
+
This determines how many times the list of horizontal
|
| 168 |
+
focus variations will be repeated.
|
| 169 |
+
num_cols (int): The number of horizontal focus points (columns) to generate.
|
| 170 |
+
This determines how many horizontal focus variations are
|
| 171 |
+
created based on dividing the `output_width`.
|
| 172 |
+
output_width (int): The desired width of the output image.
|
| 173 |
+
|
| 174 |
+
Returns:
|
| 175 |
+
list[list[tuple[int, int]]]: A list of lists of tuples. Each inner list
|
| 176 |
+
contains `num_cols` tuples of `(ctop, cleft)`,
|
| 177 |
+
representing horizontal focus points. The outer list
|
| 178 |
+
contains `num_rows` such inner lists.
|
| 179 |
+
"""
|
| 180 |
+
crops_coords_list = []
|
| 181 |
+
if num_cols <= 0:
|
| 182 |
+
crops_coords_list = []
|
| 183 |
+
elif num_cols == 1:
|
| 184 |
+
crops_coords_list = [(0, 0)]
|
| 185 |
+
else:
|
| 186 |
+
section_width = output_width / num_cols
|
| 187 |
+
for i in range(num_cols):
|
| 188 |
+
cleft = int(round(i * section_width))
|
| 189 |
+
crops_coords_list.append((0, cleft))
|
| 190 |
+
|
| 191 |
+
result_list = []
|
| 192 |
+
for _ in range(num_rows):
|
| 193 |
+
result_list.append(list(crops_coords_list))
|
| 194 |
+
|
| 195 |
+
return result_list
|
| 196 |
|
| 197 |
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.rescale_noise_cfg
|
| 198 |
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
|
|
| 800 |
return_dict: bool = True,
|
| 801 |
cross_attention_kwargs: Optional[Dict[str, Any]] = None,
|
| 802 |
original_size: Optional[Tuple[int, int]] = None,
|
| 803 |
+
crops_coords_top_left: Optional[List[List[Tuple[int, int]]]] = None,
|
| 804 |
target_size: Optional[Tuple[int, int]] = None,
|
| 805 |
negative_original_size: Optional[Tuple[int, int]] = None,
|
| 806 |
+
negative_crops_coords_top_left: Optional[List[List[Tuple[int, int]]]] = None,
|
| 807 |
negative_target_size: Optional[Tuple[int, int]] = None,
|
| 808 |
clip_skip: Optional[int] = None,
|
| 809 |
tile_height: Optional[int] = 1024,
|
|
|
|
| 869 |
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
| 870 |
explained in section 2.2 of
|
| 871 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
|
| 872 |
+
crops_coords_top_left (`List[List[Tuple[int, int]]]`, *optional*, defaults to (0, 0)):
|
| 873 |
`crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
|
| 874 |
`crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
|
| 875 |
`crops_coords_top_left` to (0, 0). Part of SDXL's micro-conditioning as explained in section 2.2 of
|
|
|
|
| 883 |
micro-conditioning as explained in section 2.2 of
|
| 884 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
| 885 |
information, refer to this issue thread: https://github.com/huggingface/diffusers/issues/4208.
|
| 886 |
+
negative_crops_coords_top_left (`List[List[Tuple[int, int]]]`, *optional*, defaults to (0, 0)):
|
| 887 |
To negatively condition the generation process based on a specific crop coordinates. Part of SDXL's
|
| 888 |
micro-conditioning as explained in section 2.2 of
|
| 889 |
[https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952). For more
|
|
|
|
| 926 |
|
| 927 |
original_size = original_size or (height, width)
|
| 928 |
target_size = target_size or (height, width)
|
| 929 |
+
negative_original_size = negative_original_size or (height, width)
|
| 930 |
+
negative_target_size = negative_target_size or (height, width)
|
| 931 |
|
| 932 |
self._guidance_scale = guidance_scale
|
| 933 |
self._clip_skip = clip_skip
|
|
|
|
| 935 |
self._interrupt = False
|
| 936 |
|
| 937 |
grid_rows = len(prompt)
|
| 938 |
+
grid_cols = len(prompt[0])
|
|
|
|
| 939 |
tiles_mode = [mode.value for mode in self.SeedTilesMode]
|
| 940 |
|
| 941 |
if isinstance(seed_tiles_mode, str):
|
|
|
|
| 957 |
batch_size = 1
|
| 958 |
|
| 959 |
device = self._execution_device
|
| 960 |
+
|
| 961 |
+
# update crops coords list
|
| 962 |
+
crops_coords_top_left = _get_crops_coords_list(grid_rows, grid_cols, tile_width)
|
| 963 |
+
if negative_original_size is not None and negative_target_size is not None:
|
| 964 |
+
negative_crops_coords_top_left = _get_crops_coords_list(grid_rows, grid_cols, tile_width)
|
| 965 |
|
| 966 |
# update height and width tile size and tile overlap size
|
| 967 |
height = tile_height + (grid_rows - 1) * (tile_height - tile_row_overlap)
|
|
|
|
| 1069 |
text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
|
| 1070 |
add_time_ids = self._get_add_time_ids(
|
| 1071 |
original_size,
|
| 1072 |
+
crops_coords_top_left[row][col],
|
| 1073 |
target_size,
|
| 1074 |
dtype=prompt_embeds.dtype,
|
| 1075 |
text_encoder_projection_dim=text_encoder_projection_dim,
|
|
|
|
| 1077 |
if negative_original_size is not None and negative_target_size is not None:
|
| 1078 |
negative_add_time_ids = self._get_add_time_ids(
|
| 1079 |
negative_original_size,
|
| 1080 |
+
negative_crops_coords_top_left[row][col],
|
| 1081 |
negative_target_size,
|
| 1082 |
dtype=prompt_embeds.dtype,
|
| 1083 |
text_encoder_projection_dim=text_encoder_projection_dim,
|