import time import math import logging from PIL import Image, ImageDraw, ImageFilter from backend.models.lcmdiffusion_setting import DiffusionTask from context import Context from constants import DEVICE def generate_upscaled_image( config, input_path=None, strength=0.3, scale_factor=2.0, tile_overlap=16, upscale_settings=None, context: Context = None, output_path=None, image_format="PNG", ): if config == None or ( input_path == None or input_path == "" and upscale_settings == None ): logging.error("Wrong arguments in tiled upscale function call!") return # Use the upscale_settings dict if provided; otherwise, build the # upscale_settings dict using the function arguments and default values if upscale_settings == None: upscale_settings = { "source_file": input_path, "target_file": None, "output_format": image_format, "strength": strength, "scale_factor": scale_factor, "prompt": config.lcm_diffusion_setting.prompt, "tile_overlap": tile_overlap, "tile_size": 256, "tiles": [], } source_image = Image.open(input_path) # PIL image else: source_image = Image.open(upscale_settings["source_file"]) upscale_settings["source_image"] = source_image if upscale_settings["target_file"]: result = Image.open(upscale_settings["target_file"]) else: result = Image.new( mode="RGBA", size=( source_image.size[0] * int(upscale_settings["scale_factor"]), source_image.size[1] * int(upscale_settings["scale_factor"]), ), color=(0, 0, 0, 0), ) upscale_settings["target_image"] = result # If the custom tile definition array 'tiles' is empty, proceed with the # default tiled upscale task by defining all the possible image tiles; note # that the actual tile size is 'tile_size' + 'tile_overlap' and the target # image width and height are no longer constrained to multiples of 256 but # are instead multiples of the actual tile size if len(upscale_settings["tiles"]) == 0: tile_size = upscale_settings["tile_size"] scale_factor = upscale_settings["scale_factor"] tile_overlap = upscale_settings["tile_overlap"] total_cols = math.ceil( source_image.size[0] / tile_size ) # Image width / tile size total_rows = math.ceil( source_image.size[1] / tile_size ) # Image height / tile size for y in range(0, total_rows): y_offset = tile_overlap if y > 0 else 0 # Tile mask offset for x in range(0, total_cols): x_offset = tile_overlap if x > 0 else 0 # Tile mask offset x1 = x * tile_size y1 = y * tile_size w = tile_size + (tile_overlap if x < total_cols - 1 else 0) h = tile_size + (tile_overlap if y < total_rows - 1 else 0) mask_box = ( # Default tile mask box definition x_offset, y_offset, int(w * scale_factor), int(h * scale_factor), ) upscale_settings["tiles"].append( { "x": x1, "y": y1, "w": w, "h": h, "mask_box": mask_box, "prompt": upscale_settings["prompt"], # Use top level prompt if available "scale_factor": scale_factor, } ) # Generate the output image tiles for i in range(0, len(upscale_settings["tiles"])): generate_upscaled_tile( config, i, upscale_settings, context=context, ) # Save completed upscaled image if upscale_settings["output_format"].upper() == "JPEG": result_rgb = result.convert("RGB") result.close() result = result_rgb result.save(output_path) result.close() source_image.close() return def get_current_tile( config, context, strength, ): config.lcm_diffusion_setting.strength = strength config.lcm_diffusion_setting.diffusion_task = DiffusionTask.image_to_image.value if ( config.lcm_diffusion_setting.use_tiny_auto_encoder and config.lcm_diffusion_setting.use_openvino ): config.lcm_diffusion_setting.use_tiny_auto_encoder = False current_tile = context.generate_text_to_image( settings=config, reshape=True, device=DEVICE, save_images=False, save_config=False, )[0] return current_tile # Generates a single tile from the source image as defined in the # upscale_settings["tiles"] array with the corresponding index and pastes the # generated tile into the target image using the corresponding mask and scale # factor; note that scale factor for the target image and the individual tiles # can be different, this function will adjust scale factors as needed def generate_upscaled_tile( config, index, upscale_settings, context: Context = None, ): if config == None or upscale_settings == None: logging.error("Wrong arguments in tile creation function call!") return x = upscale_settings["tiles"][index]["x"] y = upscale_settings["tiles"][index]["y"] w = upscale_settings["tiles"][index]["w"] h = upscale_settings["tiles"][index]["h"] tile_prompt = upscale_settings["tiles"][index]["prompt"] scale_factor = upscale_settings["scale_factor"] tile_scale_factor = upscale_settings["tiles"][index]["scale_factor"] target_width = int(w * tile_scale_factor) target_height = int(h * tile_scale_factor) strength = upscale_settings["strength"] source_image = upscale_settings["source_image"] target_image = upscale_settings["target_image"] mask_image = generate_tile_mask(config, index, upscale_settings) config.lcm_diffusion_setting.number_of_images = 1 config.lcm_diffusion_setting.prompt = tile_prompt config.lcm_diffusion_setting.image_width = target_width config.lcm_diffusion_setting.image_height = target_height config.lcm_diffusion_setting.init_image = source_image.crop((x, y, x + w, y + h)) current_tile = None print(f"[SD Upscale] Generating tile {index + 1}/{len(upscale_settings['tiles'])} ") if tile_prompt == None or tile_prompt == "": config.lcm_diffusion_setting.prompt = "" config.lcm_diffusion_setting.negative_prompt = "" current_tile = get_current_tile(config, context, strength) else: # Attempt to use img2img with low denoising strength to # generate the tiles with the extra aid of a prompt # context = get_context(InterfaceType.CLI) current_tile = get_current_tile(config, context, strength) if math.isclose(scale_factor, tile_scale_factor): target_image.paste( current_tile, (int(x * scale_factor), int(y * scale_factor)), mask_image ) else: target_image.paste( current_tile.resize((int(w * scale_factor), int(h * scale_factor))), (int(x * scale_factor), int(y * scale_factor)), mask_image.resize((int(w * scale_factor), int(h * scale_factor))), ) mask_image.close() current_tile.close() config.lcm_diffusion_setting.init_image.close() # Generate tile mask using the box definition in the upscale_settings["tiles"] # array with the corresponding index; note that tile masks for the default # tiled upscale task can be reused but that would complicate the code, so # new tile masks are instead created for each tile def generate_tile_mask( config, index, upscale_settings, ): scale_factor = upscale_settings["scale_factor"] tile_overlap = upscale_settings["tile_overlap"] tile_scale_factor = upscale_settings["tiles"][index]["scale_factor"] w = int(upscale_settings["tiles"][index]["w"] * tile_scale_factor) h = int(upscale_settings["tiles"][index]["h"] * tile_scale_factor) # The Stable Diffusion pipeline automatically adjusts the output size # to multiples of 8 pixels; the mask must be created with the same # size as the output tile w = w - (w % 8) h = h - (h % 8) mask_box = upscale_settings["tiles"][index]["mask_box"] if mask_box == None: # Build a default solid mask with soft/transparent edges mask_box = ( tile_overlap, tile_overlap, w - tile_overlap, h - tile_overlap, ) mask_image = Image.new(mode="RGBA", size=(w, h), color=(0, 0, 0, 0)) mask_draw = ImageDraw.Draw(mask_image) mask_draw.rectangle(tuple(mask_box), fill=(0, 0, 0)) mask_blur = mask_image.filter(ImageFilter.BoxBlur(tile_overlap - 1)) mask_image.close() return mask_blur