toto10 commited on Jul 30, 2023

Commit

55ce6e1

•

1 Parent(s): 82b129b

79a9c1df70e530032ecec83bd18723ea452cf5fbcce098c3153421501b705acf

Browse files

Files changed (50) hide show

extensions-builtin/LDSR/__pycache__/ldsr_model_arch.cpython-310.pyc +0 -0
extensions-builtin/LDSR/__pycache__/preload.cpython-310.pyc +0 -0
extensions-builtin/LDSR/__pycache__/sd_hijack_autoencoder.cpython-310.pyc +0 -0
extensions-builtin/LDSR/__pycache__/sd_hijack_ddpm_v1.cpython-310.pyc +0 -0
extensions-builtin/LDSR/__pycache__/vqvae_quantize.cpython-310.pyc +0 -0
extensions-builtin/LDSR/ldsr_model_arch.py +250 -0
extensions-builtin/LDSR/preload.py +6 -0
extensions-builtin/LDSR/scripts/__pycache__/ldsr_model.cpython-310.pyc +0 -0
extensions-builtin/LDSR/scripts/ldsr_model.py +68 -0
extensions-builtin/LDSR/sd_hijack_autoencoder.py +293 -0
extensions-builtin/LDSR/sd_hijack_ddpm_v1.py +1443 -0
extensions-builtin/LDSR/vqvae_quantize.py +147 -0
extensions-builtin/Lora/__pycache__/extra_networks_lora.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/lora.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/lyco_helpers.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/network.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/network_full.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/network_hada.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/network_ia3.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/network_lokr.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/network_lora.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/networks.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/preload.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/ui_edit_user_metadata.cpython-310.pyc +0 -0
extensions-builtin/Lora/__pycache__/ui_extra_networks_lora.cpython-310.pyc +0 -0
extensions-builtin/Lora/extra_networks_lora.py +59 -0
extensions-builtin/Lora/lora.py +9 -0
extensions-builtin/Lora/lyco_helpers.py +21 -0
extensions-builtin/Lora/network.py +154 -0
extensions-builtin/Lora/network_full.py +22 -0
extensions-builtin/Lora/network_hada.py +55 -0
extensions-builtin/Lora/network_ia3.py +30 -0
extensions-builtin/Lora/network_lokr.py +64 -0
extensions-builtin/Lora/network_lora.py +86 -0
extensions-builtin/Lora/networks.py +468 -0
extensions-builtin/Lora/preload.py +7 -0
extensions-builtin/Lora/scripts/__pycache__/lora_script.cpython-310.pyc +0 -0
extensions-builtin/Lora/scripts/lora_script.py +123 -0
extensions-builtin/Lora/ui_edit_user_metadata.py +216 -0
extensions-builtin/Lora/ui_extra_networks_lora.py +78 -0
extensions-builtin/ScuNET/__pycache__/preload.cpython-310.pyc +0 -0
extensions/stable-diffusion-webui-images-browser/scripts/wib/__pycache__/wib_db.cpython-310.pyc +0 -0
extensions/stable-diffusion-webui-images-browser/scripts/wib/wib_db.py +888 -0
extensions/stable-diffusion-webui-images-browser/style.css +23 -0
extensions/stable-diffusion-webui-images-browser/wib.sqlite3 +0 -0
extensions/ultimate-upscale-for-automatic1111/.gitignore +1 -0
extensions/ultimate-upscale-for-automatic1111/LICENSE +674 -0
extensions/ultimate-upscale-for-automatic1111/README.md +43 -0
extensions/ultimate-upscale-for-automatic1111/scripts/__pycache__/ultimate-upscale.cpython-310.pyc +0 -0
extensions/ultimate-upscale-for-automatic1111/scripts/ultimate-upscale.py +557 -0

extensions-builtin/LDSR/__pycache__/ldsr_model_arch.cpython-310.pyc ADDED Viewed

Binary file (6.68 kB). View file

extensions-builtin/LDSR/__pycache__/preload.cpython-310.pyc ADDED Viewed

Binary file (483 Bytes). View file

extensions-builtin/LDSR/__pycache__/sd_hijack_autoencoder.cpython-310.pyc ADDED Viewed

Binary file (8.92 kB). View file

extensions-builtin/LDSR/__pycache__/sd_hijack_ddpm_v1.cpython-310.pyc ADDED Viewed

Binary file (42.4 kB). View file

extensions-builtin/LDSR/__pycache__/vqvae_quantize.cpython-310.pyc ADDED Viewed

Binary file (3.64 kB). View file

extensions-builtin/LDSR/ldsr_model_arch.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import os
+import gc
+import time
+import numpy as np
+import torch
+import torchvision
+from PIL import Image
+from einops import rearrange, repeat
+from omegaconf import OmegaConf
+import safetensors.torch
+from ldm.models.diffusion.ddim import DDIMSampler
+from ldm.util import instantiate_from_config, ismap
+from modules import shared, sd_hijack, devices
+cached_ldsr_model: torch.nn.Module = None
+# Create LDSR Class
+class LDSR:
+    def load_model_from_config(self, half_attention):
+        global cached_ldsr_model
+        if shared.opts.ldsr_cached and cached_ldsr_model is not None:
+            print("Loading model from cache")
+            model: torch.nn.Module = cached_ldsr_model
+        else:
+            print(f"Loading model from {self.modelPath}")
+            _, extension = os.path.splitext(self.modelPath)
+            if extension.lower() == ".safetensors":
+                pl_sd = safetensors.torch.load_file(self.modelPath, device="cpu")
+            else:
+                pl_sd = torch.load(self.modelPath, map_location="cpu")
+            sd = pl_sd["state_dict"] if "state_dict" in pl_sd else pl_sd
+            config = OmegaConf.load(self.yamlPath)
+            config.model.target = "ldm.models.diffusion.ddpm.LatentDiffusionV1"
+            model: torch.nn.Module = instantiate_from_config(config.model)
+            model.load_state_dict(sd, strict=False)
+            model = model.to(shared.device)
+            if half_attention:
+                model = model.half()
+            if shared.cmd_opts.opt_channelslast:
+                model = model.to(memory_format=torch.channels_last)
+            sd_hijack.model_hijack.hijack(model) # apply optimization
+            model.eval()
+            if shared.opts.ldsr_cached:
+                cached_ldsr_model = model
+        return {"model": model}
+    def __init__(self, model_path, yaml_path):
+        self.modelPath = model_path
+        self.yamlPath = yaml_path
+    @staticmethod
+    def run(model, selected_path, custom_steps, eta):
+        example = get_cond(selected_path)
+        n_runs = 1
+        guider = None
+        ckwargs = None
+        ddim_use_x0_pred = False
+        temperature = 1.
+        eta = eta
+        custom_shape = None
+        height, width = example["image"].shape[1:3]
+        split_input = height >= 128 and width >= 128
+        if split_input:
+            ks = 128
+            stride = 64
+            vqf = 4  #
+            model.split_input_params = {"ks": (ks, ks), "stride": (stride, stride),
+                                        "vqf": vqf,
+                                        "patch_distributed_vq": True,
+                                        "tie_braker": False,
+                                        "clip_max_weight": 0.5,
+                                        "clip_min_weight": 0.01,
+                                        "clip_max_tie_weight": 0.5,
+                                        "clip_min_tie_weight": 0.01}
+        else:
+            if hasattr(model, "split_input_params"):
+                delattr(model, "split_input_params")
+        x_t = None
+        logs = None
+        for _ in range(n_runs):
+            if custom_shape is not None:
+                x_t = torch.randn(1, custom_shape[1], custom_shape[2], custom_shape[3]).to(model.device)
+                x_t = repeat(x_t, '1 c h w -> b c h w', b=custom_shape[0])
+            logs = make_convolutional_sample(example, model,
+                                             custom_steps=custom_steps,
+                                             eta=eta, quantize_x0=False,
+                                             custom_shape=custom_shape,
+                                             temperature=temperature, noise_dropout=0.,
+                                             corrector=guider, corrector_kwargs=ckwargs, x_T=x_t,
+                                             ddim_use_x0_pred=ddim_use_x0_pred
+                                             )
+        return logs
+    def super_resolution(self, image, steps=100, target_scale=2, half_attention=False):
+        model = self.load_model_from_config(half_attention)
+        # Run settings
+        diffusion_steps = int(steps)
+        eta = 1.0
+        gc.collect()
+        devices.torch_gc()
+        im_og = image
+        width_og, height_og = im_og.size
+        # If we can adjust the max upscale size, then the 4 below should be our variable
+        down_sample_rate = target_scale / 4
+        wd = width_og * down_sample_rate
+        hd = height_og * down_sample_rate
+        width_downsampled_pre = int(np.ceil(wd))
+        height_downsampled_pre = int(np.ceil(hd))
+        if down_sample_rate != 1:
+            print(
+                f'Downsampling from [{width_og}, {height_og}] to [{width_downsampled_pre}, {height_downsampled_pre}]')
+            im_og = im_og.resize((width_downsampled_pre, height_downsampled_pre), Image.LANCZOS)
+        else:
+            print(f"Down sample rate is 1 from {target_scale} / 4 (Not downsampling)")
+        # pad width and height to multiples of 64, pads with the edge values of image to avoid artifacts
+        pad_w, pad_h = np.max(((2, 2), np.ceil(np.array(im_og.size) / 64).astype(int)), axis=0) * 64 - im_og.size
+        im_padded = Image.fromarray(np.pad(np.array(im_og), ((0, pad_h), (0, pad_w), (0, 0)), mode='edge'))
+        logs = self.run(model["model"], im_padded, diffusion_steps, eta)
+        sample = logs["sample"]
+        sample = sample.detach().cpu()
+        sample = torch.clamp(sample, -1., 1.)
+        sample = (sample + 1.) / 2. * 255
+        sample = sample.numpy().astype(np.uint8)
+        sample = np.transpose(sample, (0, 2, 3, 1))
+        a = Image.fromarray(sample[0])
+        # remove padding
+        a = a.crop((0, 0) + tuple(np.array(im_og.size) * 4))
+        del model
+        gc.collect()
+        devices.torch_gc()
+        return a
+def get_cond(selected_path):
+    example = {}
+    up_f = 4
+    c = selected_path.convert('RGB')
+    c = torch.unsqueeze(torchvision.transforms.ToTensor()(c), 0)
+    c_up = torchvision.transforms.functional.resize(c, size=[up_f * c.shape[2], up_f * c.shape[3]],
+                                                    antialias=True)
+    c_up = rearrange(c_up, '1 c h w -> 1 h w c')
+    c = rearrange(c, '1 c h w -> 1 h w c')
+    c = 2. * c - 1.
+    c = c.to(shared.device)
+    example["LR_image"] = c
+    example["image"] = c_up
+    return example
+@torch.no_grad()
+def convsample_ddim(model, cond, steps, shape, eta=1.0, callback=None, normals_sequence=None,
+                    mask=None, x0=None, quantize_x0=False, temperature=1., score_corrector=None,
+                    corrector_kwargs=None, x_t=None
+                    ):
+    ddim = DDIMSampler(model)
+    bs = shape[0]
+    shape = shape[1:]
+    print(f"Sampling with eta = {eta}; steps: {steps}")
+    samples, intermediates = ddim.sample(steps, batch_size=bs, shape=shape, conditioning=cond, callback=callback,
+                                         normals_sequence=normals_sequence, quantize_x0=quantize_x0, eta=eta,
+                                         mask=mask, x0=x0, temperature=temperature, verbose=False,
+                                         score_corrector=score_corrector,
+                                         corrector_kwargs=corrector_kwargs, x_t=x_t)
+    return samples, intermediates
+@torch.no_grad()
+def make_convolutional_sample(batch, model, custom_steps=None, eta=1.0, quantize_x0=False, custom_shape=None, temperature=1., noise_dropout=0., corrector=None,
+                              corrector_kwargs=None, x_T=None, ddim_use_x0_pred=False):
+    log = {}
+    z, c, x, xrec, xc = model.get_input(batch, model.first_stage_key,
+                                        return_first_stage_outputs=True,
+                                        force_c_encode=not (hasattr(model, 'split_input_params')
+                                                            and model.cond_stage_key == 'coordinates_bbox'),
+                                        return_original_cond=True)
+    if custom_shape is not None:
+        z = torch.randn(custom_shape)
+        print(f"Generating {custom_shape[0]} samples of shape {custom_shape[1:]}")
+    z0 = None
+    log["input"] = x
+    log["reconstruction"] = xrec
+    if ismap(xc):
+        log["original_conditioning"] = model.to_rgb(xc)
+        if hasattr(model, 'cond_stage_key'):
+            log[model.cond_stage_key] = model.to_rgb(xc)
+    else:
+        log["original_conditioning"] = xc if xc is not None else torch.zeros_like(x)
+        if model.cond_stage_model:
+            log[model.cond_stage_key] = xc if xc is not None else torch.zeros_like(x)
+            if model.cond_stage_key == 'class_label':
+                log[model.cond_stage_key] = xc[model.cond_stage_key]
+    with model.ema_scope("Plotting"):
+        t0 = time.time()
+        sample, intermediates = convsample_ddim(model, c, steps=custom_steps, shape=z.shape,
+                                                eta=eta,
+                                                quantize_x0=quantize_x0, mask=None, x0=z0,
+                                                temperature=temperature, score_corrector=corrector, corrector_kwargs=corrector_kwargs,
+                                                x_t=x_T)
+        t1 = time.time()
+        if ddim_use_x0_pred:
+            sample = intermediates['pred_x0'][-1]
+    x_sample = model.decode_first_stage(sample)
+    try:
+        x_sample_noquant = model.decode_first_stage(sample, force_not_quantize=True)
+        log["sample_noquant"] = x_sample_noquant
+        log["sample_diff"] = torch.abs(x_sample_noquant - x_sample)
+    except Exception:
+        pass
+    log["sample"] = x_sample
+    log["time"] = t1 - t0
+    return log

extensions-builtin/LDSR/preload.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import os
+from modules import paths
+def preload(parser):
+    parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(paths.models_path, 'LDSR'))

extensions-builtin/LDSR/scripts/__pycache__/ldsr_model.cpython-310.pyc ADDED Viewed

Binary file (3.18 kB). View file

extensions-builtin/LDSR/scripts/ldsr_model.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+from modules.modelloader import load_file_from_url
+from modules.upscaler import Upscaler, UpscalerData
+from ldsr_model_arch import LDSR
+from modules import shared, script_callbacks, errors
+import sd_hijack_autoencoder  # noqa: F401
+import sd_hijack_ddpm_v1  # noqa: F401
+class UpscalerLDSR(Upscaler):
+    def __init__(self, user_path):
+        self.name = "LDSR"
+        self.user_path = user_path
+        self.model_url = "https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1"
+        self.yaml_url = "https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1"
+        super().__init__()
+        scaler_data = UpscalerData("LDSR", None, self)
+        self.scalers = [scaler_data]
+    def load_model(self, path: str):
+        # Remove incorrect project.yaml file if too big
+        yaml_path = os.path.join(self.model_path, "project.yaml")
+        old_model_path = os.path.join(self.model_path, "model.pth")
+        new_model_path = os.path.join(self.model_path, "model.ckpt")
+        local_model_paths = self.find_models(ext_filter=[".ckpt", ".safetensors"])
+        local_ckpt_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("model.ckpt")]), None)
+        local_safetensors_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("model.safetensors")]), None)
+        local_yaml_path = next(iter([local_model for local_model in local_model_paths if local_model.endswith("project.yaml")]), None)
+        if os.path.exists(yaml_path):
+            statinfo = os.stat(yaml_path)
+            if statinfo.st_size >= 10485760:
+                print("Removing invalid LDSR YAML file.")
+                os.remove(yaml_path)
+        if os.path.exists(old_model_path):
+            print("Renaming model from model.pth to model.ckpt")
+            os.rename(old_model_path, new_model_path)
+        if local_safetensors_path is not None and os.path.exists(local_safetensors_path):
+            model = local_safetensors_path
+        else:
+            model = local_ckpt_path or load_file_from_url(self.model_url, model_dir=self.model_download_path, file_name="model.ckpt")
+        yaml = local_yaml_path or load_file_from_url(self.yaml_url, model_dir=self.model_download_path, file_name="project.yaml")
+        return LDSR(model, yaml)
+    def do_upscale(self, img, path):
+        try:
+            ldsr = self.load_model(path)
+        except Exception:
+            errors.report(f"Failed loading LDSR model {path}", exc_info=True)
+            return img
+        ddim_steps = shared.opts.ldsr_steps
+        return ldsr.super_resolution(img, ddim_steps, self.scale)
+def on_ui_settings():
+    import gradio as gr
+    shared.opts.add_option("ldsr_steps", shared.OptionInfo(100, "LDSR processing steps. Lower = faster", gr.Slider, {"minimum": 1, "maximum": 200, "step": 1}, section=('upscaling', "Upscaling")))
+    shared.opts.add_option("ldsr_cached", shared.OptionInfo(False, "Cache LDSR model in memory", gr.Checkbox, {"interactive": True}, section=('upscaling', "Upscaling")))
+script_callbacks.on_ui_settings(on_ui_settings)

extensions-builtin/LDSR/sd_hijack_autoencoder.py ADDED Viewed

	@@ -0,0 +1,293 @@

+# The content of this file comes from the ldm/models/autoencoder.py file of the compvis/stable-diffusion repo
+# The VQModel & VQModelInterface were subsequently removed from ldm/models/autoencoder.py when we moved to the stability-ai/stablediffusion repo
+# As the LDSR upscaler relies on VQModel & VQModelInterface, the hijack aims to put them back into the ldm.models.autoencoder
+import numpy as np
+import torch
+import pytorch_lightning as pl
+import torch.nn.functional as F
+from contextlib import contextmanager
+from torch.optim.lr_scheduler import LambdaLR
+from ldm.modules.ema import LitEma
+from vqvae_quantize import VectorQuantizer2 as VectorQuantizer
+from ldm.modules.diffusionmodules.model import Encoder, Decoder
+from ldm.util import instantiate_from_config
+import ldm.models.autoencoder
+from packaging import version
+class VQModel(pl.LightningModule):
+    def __init__(self,
+                 ddconfig,
+                 lossconfig,
+                 n_embed,
+                 embed_dim,
+                 ckpt_path=None,
+                 ignore_keys=None,
+                 image_key="image",
+                 colorize_nlabels=None,
+                 monitor=None,
+                 batch_resize_range=None,
+                 scheduler_config=None,
+                 lr_g_factor=1.0,
+                 remap=None,
+                 sane_index_shape=False, # tell vector quantizer to return indices as bhw
+                 use_ema=False
+                 ):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.n_embed = n_embed
+        self.image_key = image_key
+        self.encoder = Encoder(**ddconfig)
+        self.decoder = Decoder(**ddconfig)
+        self.loss = instantiate_from_config(lossconfig)
+        self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25,
+                                        remap=remap,
+                                        sane_index_shape=sane_index_shape)
+        self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1)
+        self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
+        if colorize_nlabels is not None:
+            assert type(colorize_nlabels)==int
+            self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1))
+        if monitor is not None:
+            self.monitor = monitor
+        self.batch_resize_range = batch_resize_range
+        if self.batch_resize_range is not None:
+            print(f"{self.__class__.__name__}: Using per-batch resizing in range {batch_resize_range}.")
+        self.use_ema = use_ema
+        if self.use_ema:
+            self.model_ema = LitEma(self)
+            print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
+        if ckpt_path is not None:
+            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys or [])
+        self.scheduler_config = scheduler_config
+        self.lr_g_factor = lr_g_factor
+    @contextmanager
+    def ema_scope(self, context=None):
+        if self.use_ema:
+            self.model_ema.store(self.parameters())
+            self.model_ema.copy_to(self)
+            if context is not None:
+                print(f"{context}: Switched to EMA weights")
+        try:
+            yield None
+        finally:
+            if self.use_ema:
+                self.model_ema.restore(self.parameters())
+                if context is not None:
+                    print(f"{context}: Restored training weights")
+    def init_from_ckpt(self, path, ignore_keys=None):
+        sd = torch.load(path, map_location="cpu")["state_dict"]
+        keys = list(sd.keys())
+        for k in keys:
+            for ik in ignore_keys or []:
+                if k.startswith(ik):
+                    print("Deleting key {} from state_dict.".format(k))
+                    del sd[k]
+        missing, unexpected = self.load_state_dict(sd, strict=False)
+        print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
+        if missing:
+            print(f"Missing Keys: {missing}")
+        if unexpected:
+            print(f"Unexpected Keys: {unexpected}")
+    def on_train_batch_end(self, *args, **kwargs):
+        if self.use_ema:
+            self.model_ema(self)
+    def encode(self, x):
+        h = self.encoder(x)
+        h = self.quant_conv(h)
+        quant, emb_loss, info = self.quantize(h)
+        return quant, emb_loss, info
+    def encode_to_prequant(self, x):
+        h = self.encoder(x)
+        h = self.quant_conv(h)
+        return h
+    def decode(self, quant):
+        quant = self.post_quant_conv(quant)
+        dec = self.decoder(quant)
+        return dec
+    def decode_code(self, code_b):
+        quant_b = self.quantize.embed_code(code_b)
+        dec = self.decode(quant_b)
+        return dec
+    def forward(self, input, return_pred_indices=False):
+        quant, diff, (_,_,ind) = self.encode(input)
+        dec = self.decode(quant)
+        if return_pred_indices:
+            return dec, diff, ind
+        return dec, diff
+    def get_input(self, batch, k):
+        x = batch[k]
+        if len(x.shape) == 3:
+            x = x[..., None]
+        x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float()
+        if self.batch_resize_range is not None:
+            lower_size = self.batch_resize_range[0]
+            upper_size = self.batch_resize_range[1]
+            if self.global_step <= 4:
+                # do the first few batches with max size to avoid later oom
+                new_resize = upper_size
+            else:
+                new_resize = np.random.choice(np.arange(lower_size, upper_size+16, 16))
+            if new_resize != x.shape[2]:
+                x = F.interpolate(x, size=new_resize, mode="bicubic")
+            x = x.detach()
+        return x
+    def training_step(self, batch, batch_idx, optimizer_idx):
+        # https://github.com/pytorch/pytorch/issues/37142
+        # try not to fool the heuristics
+        x = self.get_input(batch, self.image_key)
+        xrec, qloss, ind = self(x, return_pred_indices=True)
+        if optimizer_idx == 0:
+            # autoencode
+            aeloss, log_dict_ae = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
+                                            last_layer=self.get_last_layer(), split="train",
+                                            predicted_indices=ind)
+            self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True)
+            return aeloss
+        if optimizer_idx == 1:
+            # discriminator
+            discloss, log_dict_disc = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
+                                            last_layer=self.get_last_layer(), split="train")
+            self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True)
+            return discloss
+    def validation_step(self, batch, batch_idx):
+        log_dict = self._validation_step(batch, batch_idx)
+        with self.ema_scope():
+            self._validation_step(batch, batch_idx, suffix="_ema")
+        return log_dict
+    def _validation_step(self, batch, batch_idx, suffix=""):
+        x = self.get_input(batch, self.image_key)
+        xrec, qloss, ind = self(x, return_pred_indices=True)
+        aeloss, log_dict_ae = self.loss(qloss, x, xrec, 0,
+                                        self.global_step,
+                                        last_layer=self.get_last_layer(),
+                                        split="val"+suffix,
+                                        predicted_indices=ind
+                                        )
+        discloss, log_dict_disc = self.loss(qloss, x, xrec, 1,
+                                            self.global_step,
+                                            last_layer=self.get_last_layer(),
+                                            split="val"+suffix,
+                                            predicted_indices=ind
+                                            )
+        rec_loss = log_dict_ae[f"val{suffix}/rec_loss"]
+        self.log(f"val{suffix}/rec_loss", rec_loss,
+                   prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
+        self.log(f"val{suffix}/aeloss", aeloss,
+                   prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
+        if version.parse(pl.__version__) >= version.parse('1.4.0'):
+            del log_dict_ae[f"val{suffix}/rec_loss"]
+        self.log_dict(log_dict_ae)
+        self.log_dict(log_dict_disc)
+        return self.log_dict
+    def configure_optimizers(self):
+        lr_d = self.learning_rate
+        lr_g = self.lr_g_factor*self.learning_rate
+        print("lr_d", lr_d)
+        print("lr_g", lr_g)
+        opt_ae = torch.optim.Adam(list(self.encoder.parameters())+
+                                  list(self.decoder.parameters())+
+                                  list(self.quantize.parameters())+
+                                  list(self.quant_conv.parameters())+
+                                  list(self.post_quant_conv.parameters()),
+                                  lr=lr_g, betas=(0.5, 0.9))
+        opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(),
+                                    lr=lr_d, betas=(0.5, 0.9))
+        if self.scheduler_config is not None:
+            scheduler = instantiate_from_config(self.scheduler_config)
+            print("Setting up LambdaLR scheduler...")
+            scheduler = [
+                {
+                    'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule),
+                    'interval': 'step',
+                    'frequency': 1
+                },
+                {
+                    'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule),
+                    'interval': 'step',
+                    'frequency': 1
+                },
+            ]
+            return [opt_ae, opt_disc], scheduler
+        return [opt_ae, opt_disc], []
+    def get_last_layer(self):
+        return self.decoder.conv_out.weight
+    def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs):
+        log = {}
+        x = self.get_input(batch, self.image_key)
+        x = x.to(self.device)
+        if only_inputs:
+            log["inputs"] = x
+            return log
+        xrec, _ = self(x)
+        if x.shape[1] > 3:
+            # colorize with random projection
+            assert xrec.shape[1] > 3
+            x = self.to_rgb(x)
+            xrec = self.to_rgb(xrec)
+        log["inputs"] = x
+        log["reconstructions"] = xrec
+        if plot_ema:
+            with self.ema_scope():
+                xrec_ema, _ = self(x)
+                if x.shape[1] > 3:
+                    xrec_ema = self.to_rgb(xrec_ema)
+                log["reconstructions_ema"] = xrec_ema
+        return log
+    def to_rgb(self, x):
+        assert self.image_key == "segmentation"
+        if not hasattr(self, "colorize"):
+            self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x))
+        x = F.conv2d(x, weight=self.colorize)
+        x = 2.*(x-x.min())/(x.max()-x.min()) - 1.
+        return x
+class VQModelInterface(VQModel):
+    def __init__(self, embed_dim, *args, **kwargs):
+        super().__init__(*args, embed_dim=embed_dim, **kwargs)
+        self.embed_dim = embed_dim
+    def encode(self, x):
+        h = self.encoder(x)
+        h = self.quant_conv(h)
+        return h
+    def decode(self, h, force_not_quantize=False):
+        # also go through quantization layer
+        if not force_not_quantize:
+            quant, emb_loss, info = self.quantize(h)
+        else:
+            quant = h
+        quant = self.post_quant_conv(quant)
+        dec = self.decoder(quant)
+        return dec
+ldm.models.autoencoder.VQModel = VQModel
+ldm.models.autoencoder.VQModelInterface = VQModelInterface

extensions-builtin/LDSR/sd_hijack_ddpm_v1.py ADDED Viewed

	@@ -0,0 +1,1443 @@

+# This script is copied from the compvis/stable-diffusion repo (aka the SD V1 repo)
+# Original filename: ldm/models/diffusion/ddpm.py
+# The purpose to reinstate the old DDPM logic which works with VQ, whereas the V2 one doesn't
+# Some models such as LDSR require VQ to work correctly
+# The classes are suffixed with "V1" and added back to the "ldm.models.diffusion.ddpm" module
+import torch
+import torch.nn as nn
+import numpy as np
+import pytorch_lightning as pl
+from torch.optim.lr_scheduler import LambdaLR
+from einops import rearrange, repeat
+from contextlib import contextmanager
+from functools import partial
+from tqdm import tqdm
+from torchvision.utils import make_grid
+from pytorch_lightning.utilities.distributed import rank_zero_only
+from ldm.util import log_txt_as_img, exists, default, ismap, isimage, mean_flat, count_params, instantiate_from_config
+from ldm.modules.ema import LitEma
+from ldm.modules.distributions.distributions import normal_kl, DiagonalGaussianDistribution
+from ldm.models.autoencoder import VQModelInterface, IdentityFirstStage, AutoencoderKL
+from ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like
+from ldm.models.diffusion.ddim import DDIMSampler
+import ldm.models.diffusion.ddpm
+__conditioning_keys__ = {'concat': 'c_concat',
+                         'crossattn': 'c_crossattn',
+                         'adm': 'y'}
+def disabled_train(self, mode=True):
+    """Overwrite model.train with this function to make sure train/eval mode
+    does not change anymore."""
+    return self
+def uniform_on_device(r1, r2, shape, device):
+    return (r1 - r2) * torch.rand(*shape, device=device) + r2
+class DDPMV1(pl.LightningModule):
+    # classic DDPM with Gaussian diffusion, in image space
+    def __init__(self,
+                 unet_config,
+                 timesteps=1000,
+                 beta_schedule="linear",
+                 loss_type="l2",
+                 ckpt_path=None,
+                 ignore_keys=None,
+                 load_only_unet=False,
+                 monitor="val/loss",
+                 use_ema=True,
+                 first_stage_key="image",
+                 image_size=256,
+                 channels=3,
+                 log_every_t=100,
+                 clip_denoised=True,
+                 linear_start=1e-4,
+                 linear_end=2e-2,
+                 cosine_s=8e-3,
+                 given_betas=None,
+                 original_elbo_weight=0.,
+                 v_posterior=0.,  # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta
+                 l_simple_weight=1.,
+                 conditioning_key=None,
+                 parameterization="eps",  # all assuming fixed variance schedules
+                 scheduler_config=None,
+                 use_positional_encodings=False,
+                 learn_logvar=False,
+                 logvar_init=0.,
+                 ):
+        super().__init__()
+        assert parameterization in ["eps", "x0"], 'currently only supporting "eps" and "x0"'
+        self.parameterization = parameterization
+        print(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode")
+        self.cond_stage_model = None
+        self.clip_denoised = clip_denoised
+        self.log_every_t = log_every_t
+        self.first_stage_key = first_stage_key
+        self.image_size = image_size  # try conv?
+        self.channels = channels
+        self.use_positional_encodings = use_positional_encodings
+        self.model = DiffusionWrapperV1(unet_config, conditioning_key)
+        count_params(self.model, verbose=True)
+        self.use_ema = use_ema
+        if self.use_ema:
+            self.model_ema = LitEma(self.model)
+            print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
+        self.use_scheduler = scheduler_config is not None
+        if self.use_scheduler:
+            self.scheduler_config = scheduler_config
+        self.v_posterior = v_posterior
+        self.original_elbo_weight = original_elbo_weight
+        self.l_simple_weight = l_simple_weight
+        if monitor is not None:
+            self.monitor = monitor
+        if ckpt_path is not None:
+            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys or [], only_model=load_only_unet)
+        self.register_schedule(given_betas=given_betas, beta_schedule=beta_schedule, timesteps=timesteps,
+                               linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s)
+        self.loss_type = loss_type
+        self.learn_logvar = learn_logvar
+        self.logvar = torch.full(fill_value=logvar_init, size=(self.num_timesteps,))
+        if self.learn_logvar:
+            self.logvar = nn.Parameter(self.logvar, requires_grad=True)
+    def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
+                          linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+        if exists(given_betas):
+            betas = given_betas
+        else:
+            betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end,
+                                       cosine_s=cosine_s)
+        alphas = 1. - betas
+        alphas_cumprod = np.cumprod(alphas, axis=0)
+        alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
+        timesteps, = betas.shape
+        self.num_timesteps = int(timesteps)
+        self.linear_start = linear_start
+        self.linear_end = linear_end
+        assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep'
+        to_torch = partial(torch.tensor, dtype=torch.float32)
+        self.register_buffer('betas', to_torch(betas))
+        self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
+        self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev))
+        # calculations for diffusion q(x_t | x_{t-1}) and others
+        self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod)))
+        self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod)))
+        self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod)))
+        self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod)))
+        self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1)))
+        # calculations for posterior q(x_{t-1} | x_t, x_0)
+        posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / (
+                    1. - alphas_cumprod) + self.v_posterior * betas
+        # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t)
+        self.register_buffer('posterior_variance', to_torch(posterior_variance))
+        # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain
+        self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20))))
+        self.register_buffer('posterior_mean_coef1', to_torch(
+            betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod)))
+        self.register_buffer('posterior_mean_coef2', to_torch(
+            (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod)))
+        if self.parameterization == "eps":
+            lvlb_weights = self.betas ** 2 / (
+                        2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod))
+        elif self.parameterization == "x0":
+            lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod))
+        else:
+            raise NotImplementedError("mu not supported")
+        # TODO how to choose this term
+        lvlb_weights[0] = lvlb_weights[1]
+        self.register_buffer('lvlb_weights', lvlb_weights, persistent=False)
+        assert not torch.isnan(self.lvlb_weights).all()
+    @contextmanager
+    def ema_scope(self, context=None):
+        if self.use_ema:
+            self.model_ema.store(self.model.parameters())
+            self.model_ema.copy_to(self.model)
+            if context is not None:
+                print(f"{context}: Switched to EMA weights")
+        try:
+            yield None
+        finally:
+            if self.use_ema:
+                self.model_ema.restore(self.model.parameters())
+                if context is not None:
+                    print(f"{context}: Restored training weights")
+    def init_from_ckpt(self, path, ignore_keys=None, only_model=False):
+        sd = torch.load(path, map_location="cpu")
+        if "state_dict" in list(sd.keys()):
+            sd = sd["state_dict"]
+        keys = list(sd.keys())
+        for k in keys:
+            for ik in ignore_keys or []:
+                if k.startswith(ik):
+                    print("Deleting key {} from state_dict.".format(k))
+                    del sd[k]
+        missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(
+            sd, strict=False)
+        print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
+        if missing:
+            print(f"Missing Keys: {missing}")
+        if unexpected:
+            print(f"Unexpected Keys: {unexpected}")
+    def q_mean_variance(self, x_start, t):
+        """
+        Get the distribution q(x_t | x_0).
+        :param x_start: the [N x C x ...] tensor of noiseless inputs.
+        :param t: the number of diffusion steps (minus 1). Here, 0 means one step.
+        :return: A tuple (mean, variance, log_variance), all of x_start's shape.
+        """
+        mean = (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start)
+        variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape)
+        log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape)
+        return mean, variance, log_variance
+    def predict_start_from_noise(self, x_t, t, noise):
+        return (
+                extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t -
+                extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise
+        )
+    def q_posterior(self, x_start, x_t, t):
+        posterior_mean = (
+                extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start +
+                extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t
+        )
+        posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape)
+        posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape)
+        return posterior_mean, posterior_variance, posterior_log_variance_clipped
+    def p_mean_variance(self, x, t, clip_denoised: bool):
+        model_out = self.model(x, t)
+        if self.parameterization == "eps":
+            x_recon = self.predict_start_from_noise(x, t=t, noise=model_out)
+        elif self.parameterization == "x0":
+            x_recon = model_out
+        if clip_denoised:
+            x_recon.clamp_(-1., 1.)
+        model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t)
+        return model_mean, posterior_variance, posterior_log_variance
+    @torch.no_grad()
+    def p_sample(self, x, t, clip_denoised=True, repeat_noise=False):
+        b, *_, device = *x.shape, x.device
+        model_mean, _, model_log_variance = self.p_mean_variance(x=x, t=t, clip_denoised=clip_denoised)
+        noise = noise_like(x.shape, device, repeat_noise)
+        # no noise when t == 0
+        nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1)))
+        return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise
+    @torch.no_grad()
+    def p_sample_loop(self, shape, return_intermediates=False):
+        device = self.betas.device
+        b = shape[0]
+        img = torch.randn(shape, device=device)
+        intermediates = [img]
+        for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps):
+            img = self.p_sample(img, torch.full((b,), i, device=device, dtype=torch.long),
+                                clip_denoised=self.clip_denoised)
+            if i % self.log_every_t == 0 or i == self.num_timesteps - 1:
+                intermediates.append(img)
+        if return_intermediates:
+            return img, intermediates
+        return img
+    @torch.no_grad()
+    def sample(self, batch_size=16, return_intermediates=False):
+        image_size = self.image_size
+        channels = self.channels
+        return self.p_sample_loop((batch_size, channels, image_size, image_size),
+                                  return_intermediates=return_intermediates)
+    def q_sample(self, x_start, t, noise=None):
+        noise = default(noise, lambda: torch.randn_like(x_start))
+        return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start +
+                extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise)
+    def get_loss(self, pred, target, mean=True):
+        if self.loss_type == 'l1':
+            loss = (target - pred).abs()
+            if mean:
+                loss = loss.mean()
+        elif self.loss_type == 'l2':
+            if mean:
+                loss = torch.nn.functional.mse_loss(target, pred)
+            else:
+                loss = torch.nn.functional.mse_loss(target, pred, reduction='none')
+        else:
+            raise NotImplementedError("unknown loss type '{loss_type}'")
+        return loss
+    def p_losses(self, x_start, t, noise=None):
+        noise = default(noise, lambda: torch.randn_like(x_start))
+        x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
+        model_out = self.model(x_noisy, t)
+        loss_dict = {}
+        if self.parameterization == "eps":
+            target = noise
+        elif self.parameterization == "x0":
+            target = x_start
+        else:
+            raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported")
+        loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3])
+        log_prefix = 'train' if self.training else 'val'
+        loss_dict.update({f'{log_prefix}/loss_simple': loss.mean()})
+        loss_simple = loss.mean() * self.l_simple_weight
+        loss_vlb = (self.lvlb_weights[t] * loss).mean()
+        loss_dict.update({f'{log_prefix}/loss_vlb': loss_vlb})
+        loss = loss_simple + self.original_elbo_weight * loss_vlb
+        loss_dict.update({f'{log_prefix}/loss': loss})
+        return loss, loss_dict
+    def forward(self, x, *args, **kwargs):
+        # b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size
+        # assert h == img_size and w == img_size, f'height and width of image must be {img_size}'
+        t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long()
+        return self.p_losses(x, t, *args, **kwargs)
+    def get_input(self, batch, k):
+        x = batch[k]
+        if len(x.shape) == 3:
+            x = x[..., None]
+        x = rearrange(x, 'b h w c -> b c h w')
+        x = x.to(memory_format=torch.contiguous_format).float()
+        return x
+    def shared_step(self, batch):
+        x = self.get_input(batch, self.first_stage_key)
+        loss, loss_dict = self(x)
+        return loss, loss_dict
+    def training_step(self, batch, batch_idx):
+        loss, loss_dict = self.shared_step(batch)
+        self.log_dict(loss_dict, prog_bar=True,
+                      logger=True, on_step=True, on_epoch=True)
+        self.log("global_step", self.global_step,
+                 prog_bar=True, logger=True, on_step=True, on_epoch=False)
+        if self.use_scheduler:
+            lr = self.optimizers().param_groups[0]['lr']
+            self.log('lr_abs', lr, prog_bar=True, logger=True, on_step=True, on_epoch=False)
+        return loss
+    @torch.no_grad()
+    def validation_step(self, batch, batch_idx):
+        _, loss_dict_no_ema = self.shared_step(batch)
+        with self.ema_scope():
+            _, loss_dict_ema = self.shared_step(batch)
+            loss_dict_ema = {key + '_ema': loss_dict_ema[key] for key in loss_dict_ema}
+        self.log_dict(loss_dict_no_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True)
+        self.log_dict(loss_dict_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True)
+    def on_train_batch_end(self, *args, **kwargs):
+        if self.use_ema:
+            self.model_ema(self.model)
+    def _get_rows_from_list(self, samples):
+        n_imgs_per_row = len(samples)
+        denoise_grid = rearrange(samples, 'n b c h w -> b n c h w')
+        denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w')
+        denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row)
+        return denoise_grid
+    @torch.no_grad()
+    def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=None, **kwargs):
+        log = {}
+        x = self.get_input(batch, self.first_stage_key)
+        N = min(x.shape[0], N)
+        n_row = min(x.shape[0], n_row)
+        x = x.to(self.device)[:N]
+        log["inputs"] = x
+        # get diffusion row
+        diffusion_row = []
+        x_start = x[:n_row]
+        for t in range(self.num_timesteps):
+            if t % self.log_every_t == 0 or t == self.num_timesteps - 1:
+                t = repeat(torch.tensor([t]), '1 -> b', b=n_row)
+                t = t.to(self.device).long()
+                noise = torch.randn_like(x_start)
+                x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
+                diffusion_row.append(x_noisy)
+        log["diffusion_row"] = self._get_rows_from_list(diffusion_row)
+        if sample:
+            # get denoise row
+            with self.ema_scope("Plotting"):
+                samples, denoise_row = self.sample(batch_size=N, return_intermediates=True)
+            log["samples"] = samples
+            log["denoise_row"] = self._get_rows_from_list(denoise_row)
+        if return_keys:
+            if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0:
+                return log
+            else:
+                return {key: log[key] for key in return_keys}
+        return log
+    def configure_optimizers(self):
+        lr = self.learning_rate
+        params = list(self.model.parameters())
+        if self.learn_logvar:
+            params = params + [self.logvar]
+        opt = torch.optim.AdamW(params, lr=lr)
+        return opt
+class LatentDiffusionV1(DDPMV1):
+    """main class"""
+    def __init__(self,
+                 first_stage_config,
+                 cond_stage_config,
+                 num_timesteps_cond=None,
+                 cond_stage_key="image",
+                 cond_stage_trainable=False,
+                 concat_mode=True,
+                 cond_stage_forward=None,
+                 conditioning_key=None,
+                 scale_factor=1.0,
+                 scale_by_std=False,
+                 *args, **kwargs):
+        self.num_timesteps_cond = default(num_timesteps_cond, 1)
+        self.scale_by_std = scale_by_std
+        assert self.num_timesteps_cond <= kwargs['timesteps']
+        # for backwards compatibility after implementation of DiffusionWrapper
+        if conditioning_key is None:
+            conditioning_key = 'concat' if concat_mode else 'crossattn'
+        if cond_stage_config == '__is_unconditional__':
+            conditioning_key = None
+        ckpt_path = kwargs.pop("ckpt_path", None)
+        ignore_keys = kwargs.pop("ignore_keys", [])
+        super().__init__(*args, conditioning_key=conditioning_key, **kwargs)
+        self.concat_mode = concat_mode
+        self.cond_stage_trainable = cond_stage_trainable
+        self.cond_stage_key = cond_stage_key
+        try:
+            self.num_downs = len(first_stage_config.params.ddconfig.ch_mult) - 1
+        except Exception:
+            self.num_downs = 0
+        if not scale_by_std:
+            self.scale_factor = scale_factor
+        else:
+            self.register_buffer('scale_factor', torch.tensor(scale_factor))
+        self.instantiate_first_stage(first_stage_config)
+        self.instantiate_cond_stage(cond_stage_config)
+        self.cond_stage_forward = cond_stage_forward
+        self.clip_denoised = False
+        self.bbox_tokenizer = None
+        self.restarted_from_ckpt = False
+        if ckpt_path is not None:
+            self.init_from_ckpt(ckpt_path, ignore_keys)
+            self.restarted_from_ckpt = True
+    def make_cond_schedule(self, ):
+        self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long)
+        ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long()
+        self.cond_ids[:self.num_timesteps_cond] = ids
+    @rank_zero_only
+    @torch.no_grad()
+    def on_train_batch_start(self, batch, batch_idx, dataloader_idx):
+        # only for very first batch
+        if self.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0 and not self.restarted_from_ckpt:
+            assert self.scale_factor == 1., 'rather not use custom rescaling and std-rescaling simultaneously'
+            # set rescale weight to 1./std of encodings
+            print("### USING STD-RESCALING ###")
+            x = super().get_input(batch, self.first_stage_key)
+            x = x.to(self.device)
+            encoder_posterior = self.encode_first_stage(x)
+            z = self.get_first_stage_encoding(encoder_posterior).detach()
+            del self.scale_factor
+            self.register_buffer('scale_factor', 1. / z.flatten().std())
+            print(f"setting self.scale_factor to {self.scale_factor}")
+            print("### USING STD-RESCALING ###")
+    def register_schedule(self,
+                          given_betas=None, beta_schedule="linear", timesteps=1000,
+                          linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+        super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s)
+        self.shorten_cond_schedule = self.num_timesteps_cond > 1
+        if self.shorten_cond_schedule:
+            self.make_cond_schedule()
+    def instantiate_first_stage(self, config):
+        model = instantiate_from_config(config)
+        self.first_stage_model = model.eval()
+        self.first_stage_model.train = disabled_train
+        for param in self.first_stage_model.parameters():
+            param.requires_grad = False
+    def instantiate_cond_stage(self, config):
+        if not self.cond_stage_trainable:
+            if config == "__is_first_stage__":
+                print("Using first stage also as cond stage.")
+                self.cond_stage_model = self.first_stage_model
+            elif config == "__is_unconditional__":
+                print(f"Training {self.__class__.__name__} as an unconditional model.")
+                self.cond_stage_model = None
+                # self.be_unconditional = True
+            else:
+                model = instantiate_from_config(config)
+                self.cond_stage_model = model.eval()
+                self.cond_stage_model.train = disabled_train
+                for param in self.cond_stage_model.parameters():
+                    param.requires_grad = False
+        else:
+            assert config != '__is_first_stage__'
+            assert config != '__is_unconditional__'
+            model = instantiate_from_config(config)
+            self.cond_stage_model = model
+    def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False):
+        denoise_row = []
+        for zd in tqdm(samples, desc=desc):
+            denoise_row.append(self.decode_first_stage(zd.to(self.device),
+                                                            force_not_quantize=force_no_decoder_quantization))
+        n_imgs_per_row = len(denoise_row)
+        denoise_row = torch.stack(denoise_row)  # n_log_step, n_row, C, H, W
+        denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w')
+        denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w')
+        denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row)
+        return denoise_grid
+    def get_first_stage_encoding(self, encoder_posterior):
+        if isinstance(encoder_posterior, DiagonalGaussianDistribution):
+            z = encoder_posterior.sample()
+        elif isinstance(encoder_posterior, torch.Tensor):
+            z = encoder_posterior
+        else:
+            raise NotImplementedError(f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented")
+        return self.scale_factor * z
+    def get_learned_conditioning(self, c):
+        if self.cond_stage_forward is None:
+            if hasattr(self.cond_stage_model, 'encode') and callable(self.cond_stage_model.encode):
+                c = self.cond_stage_model.encode(c)
+                if isinstance(c, DiagonalGaussianDistribution):
+                    c = c.mode()
+            else:
+                c = self.cond_stage_model(c)
+        else:
+            assert hasattr(self.cond_stage_model, self.cond_stage_forward)
+            c = getattr(self.cond_stage_model, self.cond_stage_forward)(c)
+        return c
+    def meshgrid(self, h, w):
+        y = torch.arange(0, h).view(h, 1, 1).repeat(1, w, 1)
+        x = torch.arange(0, w).view(1, w, 1).repeat(h, 1, 1)
+        arr = torch.cat([y, x], dim=-1)
+        return arr
+    def delta_border(self, h, w):
+        """
+        :param h: height
+        :param w: width
+        :return: normalized distance to image border,
+         wtith min distance = 0 at border and max dist = 0.5 at image center
+        """
+        lower_right_corner = torch.tensor([h - 1, w - 1]).view(1, 1, 2)
+        arr = self.meshgrid(h, w) / lower_right_corner
+        dist_left_up = torch.min(arr, dim=-1, keepdims=True)[0]
+        dist_right_down = torch.min(1 - arr, dim=-1, keepdims=True)[0]
+        edge_dist = torch.min(torch.cat([dist_left_up, dist_right_down], dim=-1), dim=-1)[0]
+        return edge_dist
+    def get_weighting(self, h, w, Ly, Lx, device):
+        weighting = self.delta_border(h, w)
+        weighting = torch.clip(weighting, self.split_input_params["clip_min_weight"],
+                               self.split_input_params["clip_max_weight"], )
+        weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device)
+        if self.split_input_params["tie_braker"]:
+            L_weighting = self.delta_border(Ly, Lx)
+            L_weighting = torch.clip(L_weighting,
+                                     self.split_input_params["clip_min_tie_weight"],
+                                     self.split_input_params["clip_max_tie_weight"])
+            L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device)
+            weighting = weighting * L_weighting
+        return weighting
+    def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1):  # todo load once not every time, shorten code
+        """
+        :param x: img of size (bs, c, h, w)
+        :return: n img crops of size (n, bs, c, kernel_size[0], kernel_size[1])
+        """
+        bs, nc, h, w = x.shape
+        # number of crops in image
+        Ly = (h - kernel_size[0]) // stride[0] + 1
+        Lx = (w - kernel_size[1]) // stride[1] + 1
+        if uf == 1 and df == 1:
+            fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride)
+            unfold = torch.nn.Unfold(**fold_params)
+            fold = torch.nn.Fold(output_size=x.shape[2:], **fold_params)
+            weighting = self.get_weighting(kernel_size[0], kernel_size[1], Ly, Lx, x.device).to(x.dtype)
+            normalization = fold(weighting).view(1, 1, h, w)  # normalizes the overlap
+            weighting = weighting.view((1, 1, kernel_size[0], kernel_size[1], Ly * Lx))
+        elif uf > 1 and df == 1:
+            fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride)
+            unfold = torch.nn.Unfold(**fold_params)
+            fold_params2 = dict(kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf),
+                                dilation=1, padding=0,
+                                stride=(stride[0] * uf, stride[1] * uf))
+            fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2)
+            weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype)
+            normalization = fold(weighting).view(1, 1, h * uf, w * uf)  # normalizes the overlap
+            weighting = weighting.view((1, 1, kernel_size[0] * uf, kernel_size[1] * uf, Ly * Lx))
+        elif df > 1 and uf == 1:
+            fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride)
+            unfold = torch.nn.Unfold(**fold_params)
+            fold_params2 = dict(kernel_size=(kernel_size[0] // df, kernel_size[0] // df),
+                                dilation=1, padding=0,
+                                stride=(stride[0] // df, stride[1] // df))
+            fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2)
+            weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype)
+            normalization = fold(weighting).view(1, 1, h // df, w // df)  # normalizes the overlap
+            weighting = weighting.view((1, 1, kernel_size[0] // df, kernel_size[1] // df, Ly * Lx))
+        else:
+            raise NotImplementedError
+        return fold, unfold, normalization, weighting
+    @torch.no_grad()
+    def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=False,
+                  cond_key=None, return_original_cond=False, bs=None):
+        x = super().get_input(batch, k)
+        if bs is not None:
+            x = x[:bs]
+        x = x.to(self.device)
+        encoder_posterior = self.encode_first_stage(x)
+        z = self.get_first_stage_encoding(encoder_posterior).detach()
+        if self.model.conditioning_key is not None:
+            if cond_key is None:
+                cond_key = self.cond_stage_key
+            if cond_key != self.first_stage_key:
+                if cond_key in ['caption', 'coordinates_bbox']:
+                    xc = batch[cond_key]
+                elif cond_key == 'class_label':
+                    xc = batch
+                else:
+                    xc = super().get_input(batch, cond_key).to(self.device)
+            else:
+                xc = x
+            if not self.cond_stage_trainable or force_c_encode:
+                if isinstance(xc, dict) or isinstance(xc, list):
+                    # import pudb; pudb.set_trace()
+                    c = self.get_learned_conditioning(xc)
+                else:
+                    c = self.get_learned_conditioning(xc.to(self.device))
+            else:
+                c = xc
+            if bs is not None:
+                c = c[:bs]
+            if self.use_positional_encodings:
+                pos_x, pos_y = self.compute_latent_shifts(batch)
+                ckey = __conditioning_keys__[self.model.conditioning_key]
+                c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y}
+        else:
+            c = None
+            xc = None
+            if self.use_positional_encodings:
+                pos_x, pos_y = self.compute_latent_shifts(batch)
+                c = {'pos_x': pos_x, 'pos_y': pos_y}
+        out = [z, c]
+        if return_first_stage_outputs:
+            xrec = self.decode_first_stage(z)
+            out.extend([x, xrec])
+        if return_original_cond:
+            out.append(xc)
+        return out
+    @torch.no_grad()
+    def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False):
+        if predict_cids:
+            if z.dim() == 4:
+                z = torch.argmax(z.exp(), dim=1).long()
+            z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None)
+            z = rearrange(z, 'b h w c -> b c h w').contiguous()
+        z = 1. / self.scale_factor * z
+        if hasattr(self, "split_input_params"):
+            if self.split_input_params["patch_distributed_vq"]:
+                ks = self.split_input_params["ks"]  # eg. (128, 128)
+                stride = self.split_input_params["stride"]  # eg. (64, 64)
+                uf = self.split_input_params["vqf"]
+                bs, nc, h, w = z.shape
+                if ks[0] > h or ks[1] > w:
+                    ks = (min(ks[0], h), min(ks[1], w))
+                    print("reducing Kernel")
+                if stride[0] > h or stride[1] > w:
+                    stride = (min(stride[0], h), min(stride[1], w))
+                    print("reducing stride")
+                fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf)
+                z = unfold(z)  # (bn, nc * prod(**ks), L)
+                # 1. Reshape to img shape
+                z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1]))  # (bn, nc, ks[0], ks[1], L )
+                # 2. apply model loop over last dim
+                if isinstance(self.first_stage_model, VQModelInterface):
+                    output_list = [self.first_stage_model.decode(z[:, :, :, :, i],
+                                                                 force_not_quantize=predict_cids or force_not_quantize)
+                                   for i in range(z.shape[-1])]
+                else:
+                    output_list = [self.first_stage_model.decode(z[:, :, :, :, i])
+                                   for i in range(z.shape[-1])]
+                o = torch.stack(output_list, axis=-1)  # # (bn, nc, ks[0], ks[1], L)
+                o = o * weighting
+                # Reverse 1. reshape to img shape
+                o = o.view((o.shape[0], -1, o.shape[-1]))  # (bn, nc * ks[0] * ks[1], L)
+                # stitch crops together
+                decoded = fold(o)
+                decoded = decoded / normalization  # norm is shape (1, 1, h, w)
+                return decoded
+            else:
+                if isinstance(self.first_stage_model, VQModelInterface):
+                    return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize)
+                else:
+                    return self.first_stage_model.decode(z)
+        else:
+            if isinstance(self.first_stage_model, VQModelInterface):
+                return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize)
+            else:
+                return self.first_stage_model.decode(z)
+    # same as above but without decorator
+    def differentiable_decode_first_stage(self, z, predict_cids=False, force_not_quantize=False):
+        if predict_cids:
+            if z.dim() == 4:
+                z = torch.argmax(z.exp(), dim=1).long()
+            z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None)
+            z = rearrange(z, 'b h w c -> b c h w').contiguous()
+        z = 1. / self.scale_factor * z
+        if hasattr(self, "split_input_params"):
+            if self.split_input_params["patch_distributed_vq"]:
+                ks = self.split_input_params["ks"]  # eg. (128, 128)
+                stride = self.split_input_params["stride"]  # eg. (64, 64)
+                uf = self.split_input_params["vqf"]
+                bs, nc, h, w = z.shape
+                if ks[0] > h or ks[1] > w:
+                    ks = (min(ks[0], h), min(ks[1], w))
+                    print("reducing Kernel")
+                if stride[0] > h or stride[1] > w:
+                    stride = (min(stride[0], h), min(stride[1], w))
+                    print("reducing stride")
+                fold, unfold, normalization, weighting = self.get_fold_unfold(z, ks, stride, uf=uf)
+                z = unfold(z)  # (bn, nc * prod(**ks), L)
+                # 1. Reshape to img shape
+                z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1]))  # (bn, nc, ks[0], ks[1], L )
+                # 2. apply model loop over last dim
+                if isinstance(self.first_stage_model, VQModelInterface):
+                    output_list = [self.first_stage_model.decode(z[:, :, :, :, i],
+                                                                 force_not_quantize=predict_cids or force_not_quantize)
+                                   for i in range(z.shape[-1])]
+                else:
+                    output_list = [self.first_stage_model.decode(z[:, :, :, :, i])
+                                   for i in range(z.shape[-1])]
+                o = torch.stack(output_list, axis=-1)  # # (bn, nc, ks[0], ks[1], L)
+                o = o * weighting
+                # Reverse 1. reshape to img shape
+                o = o.view((o.shape[0], -1, o.shape[-1]))  # (bn, nc * ks[0] * ks[1], L)
+                # stitch crops together
+                decoded = fold(o)
+                decoded = decoded / normalization  # norm is shape (1, 1, h, w)
+                return decoded
+            else:
+                if isinstance(self.first_stage_model, VQModelInterface):
+                    return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize)
+                else:
+                    return self.first_stage_model.decode(z)
+        else:
+            if isinstance(self.first_stage_model, VQModelInterface):
+                return self.first_stage_model.decode(z, force_not_quantize=predict_cids or force_not_quantize)
+            else:
+                return self.first_stage_model.decode(z)
+    @torch.no_grad()
+    def encode_first_stage(self, x):
+        if hasattr(self, "split_input_params"):
+            if self.split_input_params["patch_distributed_vq"]:
+                ks = self.split_input_params["ks"]  # eg. (128, 128)
+                stride = self.split_input_params["stride"]  # eg. (64, 64)
+                df = self.split_input_params["vqf"]
+                self.split_input_params['original_image_size'] = x.shape[-2:]
+                bs, nc, h, w = x.shape
+                if ks[0] > h or ks[1] > w:
+                    ks = (min(ks[0], h), min(ks[1], w))
+                    print("reducing Kernel")
+                if stride[0] > h or stride[1] > w:
+                    stride = (min(stride[0], h), min(stride[1], w))
+                    print("reducing stride")
+                fold, unfold, normalization, weighting = self.get_fold_unfold(x, ks, stride, df=df)
+                z = unfold(x)  # (bn, nc * prod(**ks), L)
+                # Reshape to img shape
+                z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1]))  # (bn, nc, ks[0], ks[1], L )
+                output_list = [self.first_stage_model.encode(z[:, :, :, :, i])
+                               for i in range(z.shape[-1])]
+                o = torch.stack(output_list, axis=-1)
+                o = o * weighting
+                # Reverse reshape to img shape
+                o = o.view((o.shape[0], -1, o.shape[-1]))  # (bn, nc * ks[0] * ks[1], L)
+                # stitch crops together
+                decoded = fold(o)
+                decoded = decoded / normalization
+                return decoded
+            else:
+                return self.first_stage_model.encode(x)
+        else:
+            return self.first_stage_model.encode(x)
+    def shared_step(self, batch, **kwargs):
+        x, c = self.get_input(batch, self.first_stage_key)
+        loss = self(x, c)
+        return loss
+    def forward(self, x, c, *args, **kwargs):
+        t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long()
+        if self.model.conditioning_key is not None:
+            assert c is not None
+            if self.cond_stage_trainable:
+                c = self.get_learned_conditioning(c)
+            if self.shorten_cond_schedule:  # TODO: drop this option
+                tc = self.cond_ids[t].to(self.device)
+                c = self.q_sample(x_start=c, t=tc, noise=torch.randn_like(c.float()))
+        return self.p_losses(x, c, t, *args, **kwargs)
+    def apply_model(self, x_noisy, t, cond, return_ids=False):
+        if isinstance(cond, dict):
+            # hybrid case, cond is exptected to be a dict
+            pass
+        else:
+            if not isinstance(cond, list):
+                cond = [cond]
+            key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn'
+            cond = {key: cond}
+        if hasattr(self, "split_input_params"):
+            assert len(cond) == 1  # todo can only deal with one conditioning atm
+            assert not return_ids
+            ks = self.split_input_params["ks"]  # eg. (128, 128)
+            stride = self.split_input_params["stride"]  # eg. (64, 64)
+            h, w = x_noisy.shape[-2:]
+            fold, unfold, normalization, weighting = self.get_fold_unfold(x_noisy, ks, stride)
+            z = unfold(x_noisy)  # (bn, nc * prod(**ks), L)
+            # Reshape to img shape
+            z = z.view((z.shape[0], -1, ks[0], ks[1], z.shape[-1]))  # (bn, nc, ks[0], ks[1], L )
+            z_list = [z[:, :, :, :, i] for i in range(z.shape[-1])]
+            if self.cond_stage_key in ["image", "LR_image", "segmentation",
+                                       'bbox_img'] and self.model.conditioning_key:  # todo check for completeness
+                c_key = next(iter(cond.keys()))  # get key
+                c = next(iter(cond.values()))  # get value
+                assert (len(c) == 1)  # todo extend to list with more than one elem
+                c = c[0]  # get element
+                c = unfold(c)
+                c = c.view((c.shape[0], -1, ks[0], ks[1], c.shape[-1]))  # (bn, nc, ks[0], ks[1], L )
+                cond_list = [{c_key: [c[:, :, :, :, i]]} for i in range(c.shape[-1])]
+            elif self.cond_stage_key == 'coordinates_bbox':
+                assert 'original_image_size' in self.split_input_params, 'BoudingBoxRescaling is missing original_image_size'
+                # assuming padding of unfold is always 0 and its dilation is always 1
+                n_patches_per_row = int((w - ks[0]) / stride[0] + 1)
+                full_img_h, full_img_w = self.split_input_params['original_image_size']
+                # as we are operating on latents, we need the factor from the original image size to the
+                # spatial latent size to properly rescale the crops for regenerating the bbox annotations
+                num_downs = self.first_stage_model.encoder.num_resolutions - 1
+                rescale_latent = 2 ** (num_downs)
+                # get top left postions of patches as conforming for the bbbox tokenizer, therefore we
+                # need to rescale the tl patch coordinates to be in between (0,1)
+                tl_patch_coordinates = [(rescale_latent * stride[0] * (patch_nr % n_patches_per_row) / full_img_w,
+                                         rescale_latent * stride[1] * (patch_nr // n_patches_per_row) / full_img_h)
+                                        for patch_nr in range(z.shape[-1])]
+                # patch_limits are tl_coord, width and height coordinates as (x_tl, y_tl, h, w)
+                patch_limits = [(x_tl, y_tl,
+                                 rescale_latent * ks[0] / full_img_w,
+                                 rescale_latent * ks[1] / full_img_h) for x_tl, y_tl in tl_patch_coordinates]
+                # patch_values = [(np.arange(x_tl,min(x_tl+ks, 1.)),np.arange(y_tl,min(y_tl+ks, 1.))) for x_tl, y_tl in tl_patch_coordinates]
+                # tokenize crop coordinates for the bounding boxes of the respective patches
+                patch_limits_tknzd = [torch.LongTensor(self.bbox_tokenizer._crop_encoder(bbox))[None].to(self.device)
+                                      for bbox in patch_limits]  # list of length l with tensors of shape (1, 2)
+                print(patch_limits_tknzd[0].shape)
+                # cut tknzd crop position from conditioning
+                assert isinstance(cond, dict), 'cond must be dict to be fed into model'
+                cut_cond = cond['c_crossattn'][0][..., :-2].to(self.device)
+                print(cut_cond.shape)
+                adapted_cond = torch.stack([torch.cat([cut_cond, p], dim=1) for p in patch_limits_tknzd])
+                adapted_cond = rearrange(adapted_cond, 'l b n -> (l b) n')
+                print(adapted_cond.shape)
+                adapted_cond = self.get_learned_conditioning(adapted_cond)
+                print(adapted_cond.shape)
+                adapted_cond = rearrange(adapted_cond, '(l b) n d -> l b n d', l=z.shape[-1])
+                print(adapted_cond.shape)
+                cond_list = [{'c_crossattn': [e]} for e in adapted_cond]
+            else:
+                cond_list = [cond for i in range(z.shape[-1])]  # Todo make this more efficient
+            # apply model by loop over crops
+            output_list = [self.model(z_list[i], t, **cond_list[i]) for i in range(z.shape[-1])]
+            assert not isinstance(output_list[0],
+                                  tuple)  # todo cant deal with multiple model outputs check this never happens
+            o = torch.stack(output_list, axis=-1)
+            o = o * weighting
+            # Reverse reshape to img shape
+            o = o.view((o.shape[0], -1, o.shape[-1]))  # (bn, nc * ks[0] * ks[1], L)
+            # stitch crops together
+            x_recon = fold(o) / normalization
+        else:
+            x_recon = self.model(x_noisy, t, **cond)
+        if isinstance(x_recon, tuple) and not return_ids:
+            return x_recon[0]
+        else:
+            return x_recon
+    def _predict_eps_from_xstart(self, x_t, t, pred_xstart):
+        return (extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart) / \
+               extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape)
+    def _prior_bpd(self, x_start):
+        """
+        Get the prior KL term for the variational lower-bound, measured in
+        bits-per-dim.
+        This term can't be optimized, as it only depends on the encoder.
+        :param x_start: the [N x C x ...] tensor of inputs.
+        :return: a batch of [N] KL values (in bits), one per batch element.
+        """
+        batch_size = x_start.shape[0]
+        t = torch.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device)
+        qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t)
+        kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0)
+        return mean_flat(kl_prior) / np.log(2.0)
+    def p_losses(self, x_start, cond, t, noise=None):
+        noise = default(noise, lambda: torch.randn_like(x_start))
+        x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
+        model_output = self.apply_model(x_noisy, t, cond)
+        loss_dict = {}
+        prefix = 'train' if self.training else 'val'
+        if self.parameterization == "x0":
+            target = x_start
+        elif self.parameterization == "eps":
+            target = noise
+        else:
+            raise NotImplementedError()
+        loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3])
+        loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()})
+        logvar_t = self.logvar[t].to(self.device)
+        loss = loss_simple / torch.exp(logvar_t) + logvar_t
+        # loss = loss_simple / torch.exp(self.logvar) + self.logvar
+        if self.learn_logvar:
+            loss_dict.update({f'{prefix}/loss_gamma': loss.mean()})
+            loss_dict.update({'logvar': self.logvar.data.mean()})
+        loss = self.l_simple_weight * loss.mean()
+        loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3))
+        loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean()
+        loss_dict.update({f'{prefix}/loss_vlb': loss_vlb})
+        loss += (self.original_elbo_weight * loss_vlb)
+        loss_dict.update({f'{prefix}/loss': loss})
+        return loss, loss_dict
+    def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=False, quantize_denoised=False,
+                        return_x0=False, score_corrector=None, corrector_kwargs=None):
+        t_in = t
+        model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids)
+        if score_corrector is not None:
+            assert self.parameterization == "eps"
+            model_out = score_corrector.modify_score(self, model_out, x, t, c, **corrector_kwargs)
+        if return_codebook_ids:
+            model_out, logits = model_out
+        if self.parameterization == "eps":
+            x_recon = self.predict_start_from_noise(x, t=t, noise=model_out)
+        elif self.parameterization == "x0":
+            x_recon = model_out
+        else:
+            raise NotImplementedError()
+        if clip_denoised:
+            x_recon.clamp_(-1., 1.)
+        if quantize_denoised:
+            x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon)
+        model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t)
+        if return_codebook_ids:
+            return model_mean, posterior_variance, posterior_log_variance, logits
+        elif return_x0:
+            return model_mean, posterior_variance, posterior_log_variance, x_recon
+        else:
+            return model_mean, posterior_variance, posterior_log_variance
+    @torch.no_grad()
+    def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False,
+                 return_codebook_ids=False, quantize_denoised=False, return_x0=False,
+                 temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None):
+        b, *_, device = *x.shape, x.device
+        outputs = self.p_mean_variance(x=x, c=c, t=t, clip_denoised=clip_denoised,
+                                       return_codebook_ids=return_codebook_ids,
+                                       quantize_denoised=quantize_denoised,
+                                       return_x0=return_x0,
+                                       score_corrector=score_corrector, corrector_kwargs=corrector_kwargs)
+        if return_codebook_ids:
+            raise DeprecationWarning("Support dropped.")
+            model_mean, _, model_log_variance, logits = outputs
+        elif return_x0:
+            model_mean, _, model_log_variance, x0 = outputs
+        else:
+            model_mean, _, model_log_variance = outputs
+        noise = noise_like(x.shape, device, repeat_noise) * temperature
+        if noise_dropout > 0.:
+            noise = torch.nn.functional.dropout(noise, p=noise_dropout)
+        # no noise when t == 0
+        nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1)))
+        if return_codebook_ids:
+            return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, logits.argmax(dim=1)
+        if return_x0:
+            return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0
+        else:
+            return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise
+    @torch.no_grad()
+    def progressive_denoising(self, cond, shape, verbose=True, callback=None, quantize_denoised=False,
+                              img_callback=None, mask=None, x0=None, temperature=1., noise_dropout=0.,
+                              score_corrector=None, corrector_kwargs=None, batch_size=None, x_T=None, start_T=None,
+                              log_every_t=None):
+        if not log_every_t:
+            log_every_t = self.log_every_t
+        timesteps = self.num_timesteps
+        if batch_size is not None:
+            b = batch_size if batch_size is not None else shape[0]
+            shape = [batch_size] + list(shape)
+        else:
+            b = batch_size = shape[0]
+        if x_T is None:
+            img = torch.randn(shape, device=self.device)
+        else:
+            img = x_T
+        intermediates = []
+        if cond is not None:
+            if isinstance(cond, dict):
+                cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else
+                [x[:batch_size] for x in cond[key]] for key in cond}
+            else:
+                cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size]
+        if start_T is not None:
+            timesteps = min(timesteps, start_T)
+        iterator = tqdm(reversed(range(0, timesteps)), desc='Progressive Generation',
+                        total=timesteps) if verbose else reversed(
+            range(0, timesteps))
+        if type(temperature) == float:
+            temperature = [temperature] * timesteps
+        for i in iterator:
+            ts = torch.full((b,), i, device=self.device, dtype=torch.long)
+            if self.shorten_cond_schedule:
+                assert self.model.conditioning_key != 'hybrid'
+                tc = self.cond_ids[ts].to(cond.device)
+                cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond))
+            img, x0_partial = self.p_sample(img, cond, ts,
+                                            clip_denoised=self.clip_denoised,
+                                            quantize_denoised=quantize_denoised, return_x0=True,
+                                            temperature=temperature[i], noise_dropout=noise_dropout,
+                                            score_corrector=score_corrector, corrector_kwargs=corrector_kwargs)
+            if mask is not None:
+                assert x0 is not None
+                img_orig = self.q_sample(x0, ts)
+                img = img_orig * mask + (1. - mask) * img
+            if i % log_every_t == 0 or i == timesteps - 1:
+                intermediates.append(x0_partial)
+            if callback:
+                callback(i)
+            if img_callback:
+                img_callback(img, i)
+        return img, intermediates
+    @torch.no_grad()
+    def p_sample_loop(self, cond, shape, return_intermediates=False,
+                      x_T=None, verbose=True, callback=None, timesteps=None, quantize_denoised=False,
+                      mask=None, x0=None, img_callback=None, start_T=None,
+                      log_every_t=None):
+        if not log_every_t:
+            log_every_t = self.log_every_t
+        device = self.betas.device
+        b = shape[0]
+        if x_T is None:
+            img = torch.randn(shape, device=device)
+        else:
+            img = x_T
+        intermediates = [img]
+        if timesteps is None:
+            timesteps = self.num_timesteps
+        if start_T is not None:
+            timesteps = min(timesteps, start_T)
+        iterator = tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) if verbose else reversed(
+            range(0, timesteps))
+        if mask is not None:
+            assert x0 is not None
+            assert x0.shape[2:3] == mask.shape[2:3]  # spatial size has to match
+        for i in iterator:
+            ts = torch.full((b,), i, device=device, dtype=torch.long)
+            if self.shorten_cond_schedule:
+                assert self.model.conditioning_key != 'hybrid'
+                tc = self.cond_ids[ts].to(cond.device)
+                cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond))
+            img = self.p_sample(img, cond, ts,
+                                clip_denoised=self.clip_denoised,
+                                quantize_denoised=quantize_denoised)
+            if mask is not None:
+                img_orig = self.q_sample(x0, ts)
+                img = img_orig * mask + (1. - mask) * img
+            if i % log_every_t == 0 or i == timesteps - 1:
+                intermediates.append(img)
+            if callback:
+                callback(i)
+            if img_callback:
+                img_callback(img, i)
+        if return_intermediates:
+            return img, intermediates
+        return img
+    @torch.no_grad()
+    def sample(self, cond, batch_size=16, return_intermediates=False, x_T=None,
+               verbose=True, timesteps=None, quantize_denoised=False,
+               mask=None, x0=None, shape=None,**kwargs):
+        if shape is None:
+            shape = (batch_size, self.channels, self.image_size, self.image_size)
+        if cond is not None:
+            if isinstance(cond, dict):
+                cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else
+                [x[:batch_size] for x in cond[key]] for key in cond}
+            else:
+                cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size]
+        return self.p_sample_loop(cond,
+                                  shape,
+                                  return_intermediates=return_intermediates, x_T=x_T,
+                                  verbose=verbose, timesteps=timesteps, quantize_denoised=quantize_denoised,
+                                  mask=mask, x0=x0)
+    @torch.no_grad()
+    def sample_log(self,cond,batch_size,ddim, ddim_steps,**kwargs):
+        if ddim:
+            ddim_sampler = DDIMSampler(self)
+            shape = (self.channels, self.image_size, self.image_size)
+            samples, intermediates =ddim_sampler.sample(ddim_steps,batch_size,
+                                                        shape,cond,verbose=False,**kwargs)
+        else:
+            samples, intermediates = self.sample(cond=cond, batch_size=batch_size,
+                                                 return_intermediates=True,**kwargs)
+        return samples, intermediates
+    @torch.no_grad()
+    def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta=1., return_keys=None,
+                   quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True,
+                   plot_diffusion_rows=True, **kwargs):
+        use_ddim = ddim_steps is not None
+        log = {}
+        z, c, x, xrec, xc = self.get_input(batch, self.first_stage_key,
+                                           return_first_stage_outputs=True,
+                                           force_c_encode=True,
+                                           return_original_cond=True,
+                                           bs=N)
+        N = min(x.shape[0], N)
+        n_row = min(x.shape[0], n_row)
+        log["inputs"] = x
+        log["reconstruction"] = xrec
+        if self.model.conditioning_key is not None:
+            if hasattr(self.cond_stage_model, "decode"):
+                xc = self.cond_stage_model.decode(c)
+                log["conditioning"] = xc
+            elif self.cond_stage_key in ["caption"]:
+                xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["caption"])
+                log["conditioning"] = xc
+            elif self.cond_stage_key == 'class_label':
+                xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"])
+                log['conditioning'] = xc
+            elif isimage(xc):
+                log["conditioning"] = xc
+            if ismap(xc):
+                log["original_conditioning"] = self.to_rgb(xc)
+        if plot_diffusion_rows:
+            # get diffusion row
+            diffusion_row = []
+            z_start = z[:n_row]
+            for t in range(self.num_timesteps):
+                if t % self.log_every_t == 0 or t == self.num_timesteps - 1:
+                    t = repeat(torch.tensor([t]), '1 -> b', b=n_row)
+                    t = t.to(self.device).long()
+                    noise = torch.randn_like(z_start)
+                    z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise)
+                    diffusion_row.append(self.decode_first_stage(z_noisy))
+            diffusion_row = torch.stack(diffusion_row)  # n_log_step, n_row, C, H, W
+            diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w')
+            diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w')
+            diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0])
+            log["diffusion_row"] = diffusion_grid
+        if sample:
+            # get denoise row
+            with self.ema_scope("Plotting"):
+                samples, z_denoise_row = self.sample_log(cond=c,batch_size=N,ddim=use_ddim,
+                                                         ddim_steps=ddim_steps,eta=ddim_eta)
+                # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True)
+            x_samples = self.decode_first_stage(samples)
+            log["samples"] = x_samples
+            if plot_denoise_rows:
+                denoise_grid = self._get_denoise_row_from_list(z_denoise_row)
+                log["denoise_row"] = denoise_grid
+            if quantize_denoised and not isinstance(self.first_stage_model, AutoencoderKL) and not isinstance(
+                    self.first_stage_model, IdentityFirstStage):
+                # also display when quantizing x0 while sampling
+                with self.ema_scope("Plotting Quantized Denoised"):
+                    samples, z_denoise_row = self.sample_log(cond=c,batch_size=N,ddim=use_ddim,
+                                                             ddim_steps=ddim_steps,eta=ddim_eta,
+                                                             quantize_denoised=True)
+                    # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True,
+                    #                                      quantize_denoised=True)
+                x_samples = self.decode_first_stage(samples.to(self.device))
+                log["samples_x0_quantized"] = x_samples
+            if inpaint:
+                # make a simple center square
+                h, w = z.shape[2], z.shape[3]
+                mask = torch.ones(N, h, w).to(self.device)
+                # zeros will be filled in
+                mask[:, h // 4:3 * h // 4, w // 4:3 * w // 4] = 0.
+                mask = mask[:, None, ...]
+                with self.ema_scope("Plotting Inpaint"):
+                    samples, _ = self.sample_log(cond=c,batch_size=N,ddim=use_ddim, eta=ddim_eta,
+                                                ddim_steps=ddim_steps, x0=z[:N], mask=mask)
+                x_samples = self.decode_first_stage(samples.to(self.device))
+                log["samples_inpainting"] = x_samples
+                log["mask"] = mask
+                # outpaint
+                with self.ema_scope("Plotting Outpaint"):
+                    samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim,eta=ddim_eta,
+                                                ddim_steps=ddim_steps, x0=z[:N], mask=mask)
+                x_samples = self.decode_first_stage(samples.to(self.device))
+                log["samples_outpainting"] = x_samples
+        if plot_progressive_rows:
+            with self.ema_scope("Plotting Progressives"):
+                img, progressives = self.progressive_denoising(c,
+                                                               shape=(self.channels, self.image_size, self.image_size),
+                                                               batch_size=N)
+            prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation")
+            log["progressive_row"] = prog_row
+        if return_keys:
+            if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0:
+                return log
+            else:
+                return {key: log[key] for key in return_keys}
+        return log
+    def configure_optimizers(self):
+        lr = self.learning_rate
+        params = list(self.model.parameters())
+        if self.cond_stage_trainable:
+            print(f"{self.__class__.__name__}: Also optimizing conditioner params!")
+            params = params + list(self.cond_stage_model.parameters())
+        if self.learn_logvar:
+            print('Diffusion model optimizing logvar')
+            params.append(self.logvar)
+        opt = torch.optim.AdamW(params, lr=lr)
+        if self.use_scheduler:
+            assert 'target' in self.scheduler_config
+            scheduler = instantiate_from_config(self.scheduler_config)
+            print("Setting up LambdaLR scheduler...")
+            scheduler = [
+                {
+                    'scheduler': LambdaLR(opt, lr_lambda=scheduler.schedule),
+                    'interval': 'step',
+                    'frequency': 1
+                }]
+            return [opt], scheduler
+        return opt
+    @torch.no_grad()
+    def to_rgb(self, x):
+        x = x.float()
+        if not hasattr(self, "colorize"):
+            self.colorize = torch.randn(3, x.shape[1], 1, 1).to(x)
+        x = nn.functional.conv2d(x, weight=self.colorize)
+        x = 2. * (x - x.min()) / (x.max() - x.min()) - 1.
+        return x
+class DiffusionWrapperV1(pl.LightningModule):
+    def __init__(self, diff_model_config, conditioning_key):
+        super().__init__()
+        self.diffusion_model = instantiate_from_config(diff_model_config)
+        self.conditioning_key = conditioning_key
+        assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm']
+    def forward(self, x, t, c_concat: list = None, c_crossattn: list = None):
+        if self.conditioning_key is None:
+            out = self.diffusion_model(x, t)
+        elif self.conditioning_key == 'concat':
+            xc = torch.cat([x] + c_concat, dim=1)
+            out = self.diffusion_model(xc, t)
+        elif self.conditioning_key == 'crossattn':
+            cc = torch.cat(c_crossattn, 1)
+            out = self.diffusion_model(x, t, context=cc)
+        elif self.conditioning_key == 'hybrid':
+            xc = torch.cat([x] + c_concat, dim=1)
+            cc = torch.cat(c_crossattn, 1)
+            out = self.diffusion_model(xc, t, context=cc)
+        elif self.conditioning_key == 'adm':
+            cc = c_crossattn[0]
+            out = self.diffusion_model(x, t, y=cc)
+        else:
+            raise NotImplementedError()
+        return out
+class Layout2ImgDiffusionV1(LatentDiffusionV1):
+    # TODO: move all layout-specific hacks to this class
+    def __init__(self, cond_stage_key, *args, **kwargs):
+        assert cond_stage_key == 'coordinates_bbox', 'Layout2ImgDiffusion only for cond_stage_key="coordinates_bbox"'
+        super().__init__(*args, cond_stage_key=cond_stage_key, **kwargs)
+    def log_images(self, batch, N=8, *args, **kwargs):
+        logs = super().log_images(*args, batch=batch, N=N, **kwargs)
+        key = 'train' if self.training else 'validation'
+        dset = self.trainer.datamodule.datasets[key]
+        mapper = dset.conditional_builders[self.cond_stage_key]
+        bbox_imgs = []
+        map_fn = lambda catno: dset.get_textual_label(dset.get_category_id(catno))
+        for tknzd_bbox in batch[self.cond_stage_key][:N]:
+            bboximg = mapper.plot(tknzd_bbox.detach().cpu(), map_fn, (256, 256))
+            bbox_imgs.append(bboximg)
+        cond_img = torch.stack(bbox_imgs, dim=0)
+        logs['bbox_image'] = cond_img
+        return logs
+ldm.models.diffusion.ddpm.DDPMV1 = DDPMV1
+ldm.models.diffusion.ddpm.LatentDiffusionV1 = LatentDiffusionV1
+ldm.models.diffusion.ddpm.DiffusionWrapperV1 = DiffusionWrapperV1
+ldm.models.diffusion.ddpm.Layout2ImgDiffusionV1 = Layout2ImgDiffusionV1

extensions-builtin/LDSR/vqvae_quantize.py ADDED Viewed

	@@ -0,0 +1,147 @@

+# Vendored from https://raw.githubusercontent.com/CompVis/taming-transformers/24268930bf1dce879235a7fddd0b2355b84d7ea6/taming/modules/vqvae/quantize.py,
+# where the license is as follows:
+#
+# Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+# OR OTHER DEALINGS IN THE SOFTWARE./
+import torch
+import torch.nn as nn
+import numpy as np
+from einops import rearrange
+class VectorQuantizer2(nn.Module):
+    """
+    Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly
+    avoids costly matrix multiplications and allows for post-hoc remapping of indices.
+    """
+    # NOTE: due to a bug the beta term was applied to the wrong term. for
+    # backwards compatibility we use the buggy version by default, but you can
+    # specify legacy=False to fix it.
+    def __init__(self, n_e, e_dim, beta, remap=None, unknown_index="random",
+                 sane_index_shape=False, legacy=True):
+        super().__init__()
+        self.n_e = n_e
+        self.e_dim = e_dim
+        self.beta = beta
+        self.legacy = legacy
+        self.embedding = nn.Embedding(self.n_e, self.e_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)
+        self.remap = remap
+        if self.remap is not None:
+            self.register_buffer("used", torch.tensor(np.load(self.remap)))
+            self.re_embed = self.used.shape[0]
+            self.unknown_index = unknown_index  # "random" or "extra" or integer
+            if self.unknown_index == "extra":
+                self.unknown_index = self.re_embed
+                self.re_embed = self.re_embed + 1
+            print(f"Remapping {self.n_e} indices to {self.re_embed} indices. "
+                  f"Using {self.unknown_index} for unknown indices.")
+        else:
+            self.re_embed = n_e
+        self.sane_index_shape = sane_index_shape
+    def remap_to_used(self, inds):
+        ishape = inds.shape
+        assert len(ishape) > 1
+        inds = inds.reshape(ishape[0], -1)
+        used = self.used.to(inds)
+        match = (inds[:, :, None] == used[None, None, ...]).long()
+        new = match.argmax(-1)
+        unknown = match.sum(2) < 1
+        if self.unknown_index == "random":
+            new[unknown] = torch.randint(0, self.re_embed, size=new[unknown].shape).to(device=new.device)
+        else:
+            new[unknown] = self.unknown_index
+        return new.reshape(ishape)
+    def unmap_to_all(self, inds):
+        ishape = inds.shape
+        assert len(ishape) > 1
+        inds = inds.reshape(ishape[0], -1)
+        used = self.used.to(inds)
+        if self.re_embed > self.used.shape[0]:  # extra token
+            inds[inds >= self.used.shape[0]] = 0  # simply set to zero
+        back = torch.gather(used[None, :][inds.shape[0] * [0], :], 1, inds)
+        return back.reshape(ishape)
+    def forward(self, z, temp=None, rescale_logits=False, return_logits=False):
+        assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel"
+        assert rescale_logits is False, "Only for interface compatible with Gumbel"
+        assert return_logits is False, "Only for interface compatible with Gumbel"
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = rearrange(z, 'b c h w -> b h w c').contiguous()
+        z_flattened = z.view(-1, self.e_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+        d = torch.sum(z_flattened ** 2, dim=1, keepdim=True) + \
+            torch.sum(self.embedding.weight ** 2, dim=1) - 2 * \
+            torch.einsum('bd,dn->bn', z_flattened, rearrange(self.embedding.weight, 'n d -> d n'))
+        min_encoding_indices = torch.argmin(d, dim=1)
+        z_q = self.embedding(min_encoding_indices).view(z.shape)
+        perplexity = None
+        min_encodings = None
+        # compute loss for embedding
+        if not self.legacy:
+            loss = self.beta * torch.mean((z_q.detach() - z) ** 2) + \
+                   torch.mean((z_q - z.detach()) ** 2)
+        else:
+            loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * \
+                   torch.mean((z_q - z.detach()) ** 2)
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+        # reshape back to match original input shape
+        z_q = rearrange(z_q, 'b h w c -> b c h w').contiguous()
+        if self.remap is not None:
+            min_encoding_indices = min_encoding_indices.reshape(z.shape[0], -1)  # add batch axis
+            min_encoding_indices = self.remap_to_used(min_encoding_indices)
+            min_encoding_indices = min_encoding_indices.reshape(-1, 1)  # flatten
+        if self.sane_index_shape:
+            min_encoding_indices = min_encoding_indices.reshape(
+                z_q.shape[0], z_q.shape[2], z_q.shape[3])
+        return z_q, loss, (perplexity, min_encodings, min_encoding_indices)
+    def get_codebook_entry(self, indices, shape):
+        # shape specifying (batch, height, width, channel)
+        if self.remap is not None:
+            indices = indices.reshape(shape[0], -1)  # add batch axis
+            indices = self.unmap_to_all(indices)
+            indices = indices.reshape(-1)  # flatten again
+        # get quantized latent vectors
+        z_q = self.embedding(indices)
+        if shape is not None:
+            z_q = z_q.view(shape)
+            # reshape back to match original input shape
+            z_q = z_q.permute(0, 3, 1, 2).contiguous()
+        return z_q

extensions-builtin/Lora/__pycache__/extra_networks_lora.cpython-310.pyc ADDED Viewed

Binary file (2.35 kB). View file

extensions-builtin/Lora/__pycache__/lora.cpython-310.pyc ADDED Viewed

Binary file (547 Bytes). View file

extensions-builtin/Lora/__pycache__/lyco_helpers.cpython-310.pyc ADDED Viewed

Binary file (946 Bytes). View file

extensions-builtin/Lora/__pycache__/network.cpython-310.pyc ADDED Viewed

Binary file (5.65 kB). View file

extensions-builtin/Lora/__pycache__/network_full.cpython-310.pyc ADDED Viewed

Binary file (1.51 kB). View file

extensions-builtin/Lora/__pycache__/network_hada.cpython-310.pyc ADDED Viewed

Binary file (2.24 kB). View file

extensions-builtin/Lora/__pycache__/network_ia3.cpython-310.pyc ADDED Viewed

Binary file (1.62 kB). View file

extensions-builtin/Lora/__pycache__/network_lokr.cpython-310.pyc ADDED Viewed

Binary file (2.43 kB). View file

extensions-builtin/Lora/__pycache__/network_lora.cpython-310.pyc ADDED Viewed

Binary file (3.5 kB). View file

extensions-builtin/Lora/__pycache__/networks.cpython-310.pyc ADDED Viewed

Binary file (12.7 kB). View file

extensions-builtin/Lora/__pycache__/preload.cpython-310.pyc ADDED Viewed

Binary file (632 Bytes). View file

extensions-builtin/Lora/__pycache__/ui_edit_user_metadata.cpython-310.pyc ADDED Viewed

Binary file (7.47 kB). View file

extensions-builtin/Lora/__pycache__/ui_extra_networks_lora.cpython-310.pyc ADDED Viewed

Binary file (3.07 kB). View file

extensions-builtin/Lora/extra_networks_lora.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from modules import extra_networks, shared
+import networks
+class ExtraNetworkLora(extra_networks.ExtraNetwork):
+    def __init__(self):
+        super().__init__('lora')
+    def activate(self, p, params_list):
+        additional = shared.opts.sd_lora
+        if additional != "None" and additional in networks.available_networks and not any(x for x in params_list if x.items[0] == additional):
+            p.all_prompts = [x + f"<lora:{additional}:{shared.opts.extra_networks_default_multiplier}>" for x in p.all_prompts]
+            params_list.append(extra_networks.ExtraNetworkParams(items=[additional, shared.opts.extra_networks_default_multiplier]))
+        names = []
+        te_multipliers = []
+        unet_multipliers = []
+        dyn_dims = []
+        for params in params_list:
+            assert params.items
+            names.append(params.positional[0])
+            te_multiplier = float(params.positional[1]) if len(params.positional) > 1 else 1.0
+            te_multiplier = float(params.named.get("te", te_multiplier))
+            unet_multiplier = float(params.positional[2]) if len(params.positional) > 2 else te_multiplier
+            unet_multiplier = float(params.named.get("unet", unet_multiplier))
+            dyn_dim = int(params.positional[3]) if len(params.positional) > 3 else None
+            dyn_dim = int(params.named["dyn"]) if "dyn" in params.named else dyn_dim
+            te_multipliers.append(te_multiplier)
+            unet_multipliers.append(unet_multiplier)
+            dyn_dims.append(dyn_dim)
+        networks.load_networks(names, te_multipliers, unet_multipliers, dyn_dims)
+        if shared.opts.lora_add_hashes_to_infotext:
+            network_hashes = []
+            for item in networks.loaded_networks:
+                shorthash = item.network_on_disk.shorthash
+                if not shorthash:
+                    continue
+                alias = item.mentioned_name
+                if not alias:
+                    continue
+                alias = alias.replace(":", "").replace(",", "")
+                network_hashes.append(f"{alias}: {shorthash}")
+            if network_hashes:
+                p.extra_generation_params["Lora hashes"] = ", ".join(network_hashes)
+    def deactivate(self, p):
+        pass

extensions-builtin/Lora/lora.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import networks
+list_available_loras = networks.list_available_networks
+available_loras = networks.available_networks
+available_lora_aliases = networks.available_network_aliases
+available_lora_hash_lookup = networks.available_network_hash_lookup
+forbidden_lora_aliases = networks.forbidden_network_aliases
+loaded_loras = networks.loaded_networks

extensions-builtin/Lora/lyco_helpers.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import torch
+def make_weight_cp(t, wa, wb):
+    temp = torch.einsum('i j k l, j r -> i r k l', t, wb)
+    return torch.einsum('i j k l, i r -> r j k l', temp, wa)
+def rebuild_conventional(up, down, shape, dyn_dim=None):
+    up = up.reshape(up.size(0), -1)
+    down = down.reshape(down.size(0), -1)
+    if dyn_dim is not None:
+        up = up[:, :dyn_dim]
+        down = down[:dyn_dim, :]
+    return (up @ down).reshape(shape)
+def rebuild_cp_decomposition(up, down, mid):
+    up = up.reshape(up.size(0), -1)
+    down = down.reshape(down.size(0), -1)
+    return torch.einsum('n m k l, i n, m j -> i j k l', mid, up, down)

extensions-builtin/Lora/network.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import os
+from collections import namedtuple
+import enum
+from modules import sd_models, cache, errors, hashes, shared
+NetworkWeights = namedtuple('NetworkWeights', ['network_key', 'sd_key', 'w', 'sd_module'])
+metadata_tags_order = {"ss_sd_model_name": 1, "ss_resolution": 2, "ss_clip_skip": 3, "ss_num_train_images": 10, "ss_tag_frequency": 20}
+class SdVersion(enum.Enum):
+    Unknown = 1
+    SD1 = 2
+    SD2 = 3
+    SDXL = 4
+class NetworkOnDisk:
+    def __init__(self, name, filename):
+        self.name = name
+        self.filename = filename
+        self.metadata = {}
+        self.is_safetensors = os.path.splitext(filename)[1].lower() == ".safetensors"
+        def read_metadata():
+            metadata = sd_models.read_metadata_from_safetensors(filename)
+            metadata.pop('ssmd_cover_images', None)  # those are cover images, and they are too big to display in UI as text
+            return metadata
+        if self.is_safetensors:
+            try:
+                self.metadata = cache.cached_data_for_file('safetensors-metadata', "lora/" + self.name, filename, read_metadata)
+            except Exception as e:
+                errors.display(e, f"reading lora {filename}")
+        if self.metadata:
+            m = {}
+            for k, v in sorted(self.metadata.items(), key=lambda x: metadata_tags_order.get(x[0], 999)):
+                m[k] = v
+            self.metadata = m
+        self.alias = self.metadata.get('ss_output_name', self.name)
+        self.hash = None
+        self.shorthash = None
+        self.set_hash(
+            self.metadata.get('sshs_model_hash') or
+            hashes.sha256_from_cache(self.filename, "lora/" + self.name, use_addnet_hash=self.is_safetensors) or
+            ''
+        )
+        self.sd_version = self.detect_version()
+    def detect_version(self):
+        if str(self.metadata.get('ss_base_model_version', "")).startswith("sdxl_"):
+            return SdVersion.SDXL
+        elif str(self.metadata.get('ss_v2', "")) == "True":
+            return SdVersion.SD2
+        elif len(self.metadata):
+            return SdVersion.SD1
+        return SdVersion.Unknown
+    def set_hash(self, v):
+        self.hash = v
+        self.shorthash = self.hash[0:12]
+        if self.shorthash:
+            import networks
+            networks.available_network_hash_lookup[self.shorthash] = self
+    def read_hash(self):
+        if not self.hash:
+            self.set_hash(hashes.sha256(self.filename, "lora/" + self.name, use_addnet_hash=self.is_safetensors) or '')
+    def get_alias(self):
+        import networks
+        if shared.opts.lora_preferred_name == "Filename" or self.alias.lower() in networks.forbidden_network_aliases:
+            return self.name
+        else:
+            return self.alias
+class Network:  # LoraModule
+    def __init__(self, name, network_on_disk: NetworkOnDisk):
+        self.name = name
+        self.network_on_disk = network_on_disk
+        self.te_multiplier = 1.0
+        self.unet_multiplier = 1.0
+        self.dyn_dim = None
+        self.modules = {}
+        self.mtime = None
+        self.mentioned_name = None
+        """the text that was used to add the network to prompt - can be either name or an alias"""
+class ModuleType:
+    def create_module(self, net: Network, weights: NetworkWeights) -> Network | None:
+        return None
+class NetworkModule:
+    def __init__(self, net: Network, weights: NetworkWeights):
+        self.network = net
+        self.network_key = weights.network_key
+        self.sd_key = weights.sd_key
+        self.sd_module = weights.sd_module
+        if hasattr(self.sd_module, 'weight'):
+            self.shape = self.sd_module.weight.shape
+        self.dim = None
+        self.bias = weights.w.get("bias")
+        self.alpha = weights.w["alpha"].item() if "alpha" in weights.w else None
+        self.scale = weights.w["scale"].item() if "scale" in weights.w else None
+    def multiplier(self):
+        if 'transformer' in self.sd_key[:20]:
+            return self.network.te_multiplier
+        else:
+            return self.network.unet_multiplier
+    def calc_scale(self):
+        if self.scale is not None:
+            return self.scale
+        if self.dim is not None and self.alpha is not None:
+            return self.alpha / self.dim
+        return 1.0
+    def finalize_updown(self, updown, orig_weight, output_shape):
+        if self.bias is not None:
+            updown = updown.reshape(self.bias.shape)
+            updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            updown = updown.reshape(output_shape)
+        if len(output_shape) == 4:
+            updown = updown.reshape(output_shape)
+        if orig_weight.size().numel() == updown.size().numel():
+            updown = updown.reshape(orig_weight.shape)
+        return updown * self.calc_scale() * self.multiplier()
+    def calc_updown(self, target):
+        raise NotImplementedError()
+    def forward(self, x, y):
+        raise NotImplementedError()

extensions-builtin/Lora/network_full.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import network
+class ModuleTypeFull(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["diff"]):
+            return NetworkModuleFull(net, weights)
+        return None
+class NetworkModuleFull(network.NetworkModule):
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        self.weight = weights.w.get("diff")
+    def calc_updown(self, orig_weight):
+        output_shape = self.weight.shape
+        updown = self.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        return self.finalize_updown(updown, orig_weight, output_shape)

extensions-builtin/Lora/network_hada.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import lyco_helpers
+import network
+class ModuleTypeHada(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["hada_w1_a", "hada_w1_b", "hada_w2_a", "hada_w2_b"]):
+            return NetworkModuleHada(net, weights)
+        return None
+class NetworkModuleHada(network.NetworkModule):
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        if hasattr(self.sd_module, 'weight'):
+            self.shape = self.sd_module.weight.shape
+        self.w1a = weights.w["hada_w1_a"]
+        self.w1b = weights.w["hada_w1_b"]
+        self.dim = self.w1b.shape[0]
+        self.w2a = weights.w["hada_w2_a"]
+        self.w2b = weights.w["hada_w2_b"]
+        self.t1 = weights.w.get("hada_t1")
+        self.t2 = weights.w.get("hada_t2")
+    def calc_updown(self, orig_weight):
+        w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
+        w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
+        w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
+        w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+        output_shape = [w1a.size(0), w1b.size(1)]
+        if self.t1 is not None:
+            output_shape = [w1a.size(1), w1b.size(1)]
+            t1 = self.t1.to(orig_weight.device, dtype=orig_weight.dtype)
+            updown1 = lyco_helpers.make_weight_cp(t1, w1a, w1b)
+            output_shape += t1.shape[2:]
+        else:
+            if len(w1b.shape) == 4:
+                output_shape += w1b.shape[2:]
+            updown1 = lyco_helpers.rebuild_conventional(w1a, w1b, output_shape)
+        if self.t2 is not None:
+            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
+            updown2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
+        else:
+            updown2 = lyco_helpers.rebuild_conventional(w2a, w2b, output_shape)
+        updown = updown1 * updown2
+        return self.finalize_updown(updown, orig_weight, output_shape)

extensions-builtin/Lora/network_ia3.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import network
+class ModuleTypeIa3(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["weight"]):
+            return NetworkModuleIa3(net, weights)
+        return None
+class NetworkModuleIa3(network.NetworkModule):
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        self.w = weights.w["weight"]
+        self.on_input = weights.w["on_input"].item()
+    def calc_updown(self, orig_weight):
+        w = self.w.to(orig_weight.device, dtype=orig_weight.dtype)
+        output_shape = [w.size(0), orig_weight.size(1)]
+        if self.on_input:
+            output_shape.reverse()
+        else:
+            w = w.reshape(-1, 1)
+        updown = orig_weight * w
+        return self.finalize_updown(updown, orig_weight, output_shape)

extensions-builtin/Lora/network_lokr.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import torch
+import lyco_helpers
+import network
+class ModuleTypeLokr(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        has_1 = "lokr_w1" in weights.w or ("lokr_w1_a" in weights.w and "lokr_w1_b" in weights.w)
+        has_2 = "lokr_w2" in weights.w or ("lokr_w2_a" in weights.w and "lokr_w2_b" in weights.w)
+        if has_1 and has_2:
+            return NetworkModuleLokr(net, weights)
+        return None
+def make_kron(orig_shape, w1, w2):
+    if len(w2.shape) == 4:
+        w1 = w1.unsqueeze(2).unsqueeze(2)
+    w2 = w2.contiguous()
+    return torch.kron(w1, w2).reshape(orig_shape)
+class NetworkModuleLokr(network.NetworkModule):
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        self.w1 = weights.w.get("lokr_w1")
+        self.w1a = weights.w.get("lokr_w1_a")
+        self.w1b = weights.w.get("lokr_w1_b")
+        self.dim = self.w1b.shape[0] if self.w1b is not None else self.dim
+        self.w2 = weights.w.get("lokr_w2")
+        self.w2a = weights.w.get("lokr_w2_a")
+        self.w2b = weights.w.get("lokr_w2_b")
+        self.dim = self.w2b.shape[0] if self.w2b is not None else self.dim
+        self.t2 = weights.w.get("lokr_t2")
+    def calc_updown(self, orig_weight):
+        if self.w1 is not None:
+            w1 = self.w1.to(orig_weight.device, dtype=orig_weight.dtype)
+        else:
+            w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1 = w1a @ w1b
+        if self.w2 is not None:
+            w2 = self.w2.to(orig_weight.device, dtype=orig_weight.dtype)
+        elif self.t2 is None:
+            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2 = w2a @ w2b
+        else:
+            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
+        output_shape = [w1.size(0) * w2.size(0), w1.size(1) * w2.size(1)]
+        if len(orig_weight.shape) == 4:
+            output_shape = orig_weight.shape
+        updown = make_kron(output_shape, w1, w2)
+        return self.finalize_updown(updown, orig_weight, output_shape)

extensions-builtin/Lora/network_lora.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch
+import lyco_helpers
+import network
+from modules import devices
+class ModuleTypeLora(network.ModuleType):
+    def create_module(self, net: network.Network, weights: network.NetworkWeights):
+        if all(x in weights.w for x in ["lora_up.weight", "lora_down.weight"]):
+            return NetworkModuleLora(net, weights)
+        return None
+class NetworkModuleLora(network.NetworkModule):
+    def __init__(self,  net: network.Network, weights: network.NetworkWeights):
+        super().__init__(net, weights)
+        self.up_model = self.create_module(weights.w, "lora_up.weight")
+        self.down_model = self.create_module(weights.w, "lora_down.weight")
+        self.mid_model = self.create_module(weights.w, "lora_mid.weight", none_ok=True)
+        self.dim = weights.w["lora_down.weight"].shape[0]
+    def create_module(self, weights, key, none_ok=False):
+        weight = weights.get(key)
+        if weight is None and none_ok:
+            return None
+        is_linear = type(self.sd_module) in [torch.nn.Linear, torch.nn.modules.linear.NonDynamicallyQuantizableLinear, torch.nn.MultiheadAttention]
+        is_conv = type(self.sd_module) in [torch.nn.Conv2d]
+        if is_linear:
+            weight = weight.reshape(weight.shape[0], -1)
+            module = torch.nn.Linear(weight.shape[1], weight.shape[0], bias=False)
+        elif is_conv and key == "lora_down.weight" or key == "dyn_up":
+            if len(weight.shape) == 2:
+                weight = weight.reshape(weight.shape[0], -1, 1, 1)
+            if weight.shape[2] != 1 or weight.shape[3] != 1:
+                module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
+            else:
+                module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
+        elif is_conv and key == "lora_mid.weight":
+            module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], self.sd_module.kernel_size, self.sd_module.stride, self.sd_module.padding, bias=False)
+        elif is_conv and key == "lora_up.weight" or key == "dyn_down":
+            module = torch.nn.Conv2d(weight.shape[1], weight.shape[0], (1, 1), bias=False)
+        else:
+            raise AssertionError(f'Lora layer {self.network_key} matched a layer with unsupported type: {type(self.sd_module).__name__}')
+        with torch.no_grad():
+            if weight.shape != module.weight.shape:
+                weight = weight.reshape(module.weight.shape)
+            module.weight.copy_(weight)
+        module.to(device=devices.cpu, dtype=devices.dtype)
+        module.weight.requires_grad_(False)
+        return module
+    def calc_updown(self, orig_weight):
+        up = self.up_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        down = self.down_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        output_shape = [up.size(0), down.size(1)]
+        if self.mid_model is not None:
+            # cp-decomposition
+            mid = self.mid_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+            updown = lyco_helpers.rebuild_cp_decomposition(up, down, mid)
+            output_shape += mid.shape[2:]
+        else:
+            if len(down.shape) == 4:
+                output_shape += down.shape[2:]
+            updown = lyco_helpers.rebuild_conventional(up, down, output_shape, self.network.dyn_dim)
+        return self.finalize_updown(updown, orig_weight, output_shape)
+    def forward(self, x, y):
+        self.up_model.to(device=devices.device)
+        self.down_model.to(device=devices.device)
+        return y + self.up_model(self.down_model(x)) * self.multiplier() * self.calc_scale()

extensions-builtin/Lora/networks.py ADDED Viewed

	@@ -0,0 +1,468 @@

+import os
+import re
+import network
+import network_lora
+import network_hada
+import network_ia3
+import network_lokr
+import network_full
+import torch
+from typing import Union
+from modules import shared, devices, sd_models, errors, scripts, sd_hijack
+module_types = [
+    network_lora.ModuleTypeLora(),
+    network_hada.ModuleTypeHada(),
+    network_ia3.ModuleTypeIa3(),
+    network_lokr.ModuleTypeLokr(),
+    network_full.ModuleTypeFull(),
+]
+re_digits = re.compile(r"\d+")
+re_x_proj = re.compile(r"(.*)_([qkv]_proj)$")
+re_compiled = {}
+suffix_conversion = {
+    "attentions": {},
+    "resnets": {
+        "conv1": "in_layers_2",
+        "conv2": "out_layers_3",
+        "time_emb_proj": "emb_layers_1",
+        "conv_shortcut": "skip_connection",
+    }
+}
+def convert_diffusers_name_to_compvis(key, is_sd2):
+    def match(match_list, regex_text):
+        regex = re_compiled.get(regex_text)
+        if regex is None:
+            regex = re.compile(regex_text)
+            re_compiled[regex_text] = regex
+        r = re.match(regex, key)
+        if not r:
+            return False
+        match_list.clear()
+        match_list.extend([int(x) if re.match(re_digits, x) else x for x in r.groups()])
+        return True
+    m = []
+    if match(m, r"lora_unet_conv_in(.*)"):
+        return f'diffusion_model_input_blocks_0_0{m[0]}'
+    if match(m, r"lora_unet_conv_out(.*)"):
+        return f'diffusion_model_out_2{m[0]}'
+    if match(m, r"lora_unet_time_embedding_linear_(\d+)(.*)"):
+        return f"diffusion_model_time_embed_{m[0] * 2 - 2}{m[1]}"
+    if match(m, r"lora_unet_down_blocks_(\d+)_(attentions|resnets)_(\d+)_(.+)"):
+        suffix = suffix_conversion.get(m[1], {}).get(m[3], m[3])
+        return f"diffusion_model_input_blocks_{1 + m[0] * 3 + m[2]}_{1 if m[1] == 'attentions' else 0}_{suffix}"
+    if match(m, r"lora_unet_mid_block_(attentions|resnets)_(\d+)_(.+)"):
+        suffix = suffix_conversion.get(m[0], {}).get(m[2], m[2])
+        return f"diffusion_model_middle_block_{1 if m[0] == 'attentions' else m[1] * 2}_{suffix}"
+    if match(m, r"lora_unet_up_blocks_(\d+)_(attentions|resnets)_(\d+)_(.+)"):
+        suffix = suffix_conversion.get(m[1], {}).get(m[3], m[3])
+        return f"diffusion_model_output_blocks_{m[0] * 3 + m[2]}_{1 if m[1] == 'attentions' else 0}_{suffix}"
+    if match(m, r"lora_unet_down_blocks_(\d+)_downsamplers_0_conv"):
+        return f"diffusion_model_input_blocks_{3 + m[0] * 3}_0_op"
+    if match(m, r"lora_unet_up_blocks_(\d+)_upsamplers_0_conv"):
+        return f"diffusion_model_output_blocks_{2 + m[0] * 3}_{2 if m[0]>0 else 1}_conv"
+    if match(m, r"lora_te_text_model_encoder_layers_(\d+)_(.+)"):
+        if is_sd2:
+            if 'mlp_fc1' in m[1]:
+                return f"model_transformer_resblocks_{m[0]}_{m[1].replace('mlp_fc1', 'mlp_c_fc')}"
+            elif 'mlp_fc2' in m[1]:
+                return f"model_transformer_resblocks_{m[0]}_{m[1].replace('mlp_fc2', 'mlp_c_proj')}"
+            else:
+                return f"model_transformer_resblocks_{m[0]}_{m[1].replace('self_attn', 'attn')}"
+        return f"transformer_text_model_encoder_layers_{m[0]}_{m[1]}"
+    if match(m, r"lora_te2_text_model_encoder_layers_(\d+)_(.+)"):
+        if 'mlp_fc1' in m[1]:
+            return f"1_model_transformer_resblocks_{m[0]}_{m[1].replace('mlp_fc1', 'mlp_c_fc')}"
+        elif 'mlp_fc2' in m[1]:
+            return f"1_model_transformer_resblocks_{m[0]}_{m[1].replace('mlp_fc2', 'mlp_c_proj')}"
+        else:
+            return f"1_model_transformer_resblocks_{m[0]}_{m[1].replace('self_attn', 'attn')}"
+    return key
+def assign_network_names_to_compvis_modules(sd_model):
+    network_layer_mapping = {}
+    if shared.sd_model.is_sdxl:
+        for i, embedder in enumerate(shared.sd_model.conditioner.embedders):
+            if not hasattr(embedder, 'wrapped'):
+                continue
+            for name, module in embedder.wrapped.named_modules():
+                network_name = f'{i}_{name.replace(".", "_")}'
+                network_layer_mapping[network_name] = module
+                module.network_layer_name = network_name
+    else:
+        for name, module in shared.sd_model.cond_stage_model.wrapped.named_modules():
+            network_name = name.replace(".", "_")
+            network_layer_mapping[network_name] = module
+            module.network_layer_name = network_name
+    for name, module in shared.sd_model.model.named_modules():
+        network_name = name.replace(".", "_")
+        network_layer_mapping[network_name] = module
+        module.network_layer_name = network_name
+    sd_model.network_layer_mapping = network_layer_mapping
+def load_network(name, network_on_disk):
+    net = network.Network(name, network_on_disk)
+    net.mtime = os.path.getmtime(network_on_disk.filename)
+    sd = sd_models.read_state_dict(network_on_disk.filename)
+    # this should not be needed but is here as an emergency fix for an unknown error people are experiencing in 1.2.0
+    if not hasattr(shared.sd_model, 'network_layer_mapping'):
+        assign_network_names_to_compvis_modules(shared.sd_model)
+    keys_failed_to_match = {}
+    is_sd2 = 'model_transformer_resblocks' in shared.sd_model.network_layer_mapping
+    matched_networks = {}
+    for key_network, weight in sd.items():
+        key_network_without_network_parts, network_part = key_network.split(".", 1)
+        key = convert_diffusers_name_to_compvis(key_network_without_network_parts, is_sd2)
+        sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+        if sd_module is None:
+            m = re_x_proj.match(key)
+            if m:
+                sd_module = shared.sd_model.network_layer_mapping.get(m.group(1), None)
+        # SDXL loras seem to already have correct compvis keys, so only need to replace "lora_unet" with "diffusion_model"
+        if sd_module is None and "lora_unet" in key_network_without_network_parts:
+            key = key_network_without_network_parts.replace("lora_unet", "diffusion_model")
+            sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+        elif sd_module is None and "lora_te1_text_model" in key_network_without_network_parts:
+            key = key_network_without_network_parts.replace("lora_te1_text_model", "0_transformer_text_model")
+            sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+            # some SD1 Loras also have correct compvis keys
+            if sd_module is None:
+                key = key_network_without_network_parts.replace("lora_te1_text_model", "transformer_text_model")
+                sd_module = shared.sd_model.network_layer_mapping.get(key, None)
+        if sd_module is None:
+            keys_failed_to_match[key_network] = key
+            continue
+        if key not in matched_networks:
+            matched_networks[key] = network.NetworkWeights(network_key=key_network, sd_key=key, w={}, sd_module=sd_module)
+        matched_networks[key].w[network_part] = weight
+    for key, weights in matched_networks.items():
+        net_module = None
+        for nettype in module_types:
+            net_module = nettype.create_module(net, weights)
+            if net_module is not None:
+                break
+        if net_module is None:
+            raise AssertionError(f"Could not find a module type (out of {', '.join([x.__class__.__name__ for x in module_types])}) that would accept those keys: {', '.join(weights.w)}")
+        net.modules[key] = net_module
+    if keys_failed_to_match:
+        print(f"Failed to match keys when loading network {network_on_disk.filename}: {keys_failed_to_match}")
+    return net
+def load_networks(names, te_multipliers=None, unet_multipliers=None, dyn_dims=None):
+    already_loaded = {}
+    for net in loaded_networks:
+        if net.name in names:
+            already_loaded[net.name] = net
+    loaded_networks.clear()
+    networks_on_disk = [available_network_aliases.get(name, None) for name in names]
+    if any(x is None for x in networks_on_disk):
+        list_available_networks()
+        networks_on_disk = [available_network_aliases.get(name, None) for name in names]
+    failed_to_load_networks = []
+    for i, name in enumerate(names):
+        net = already_loaded.get(name, None)
+        network_on_disk = networks_on_disk[i]
+        if network_on_disk is not None:
+            if net is None or os.path.getmtime(network_on_disk.filename) > net.mtime:
+                try:
+                    net = load_network(name, network_on_disk)
+                except Exception as e:
+                    errors.display(e, f"loading network {network_on_disk.filename}")
+                    continue
+            net.mentioned_name = name
+            network_on_disk.read_hash()
+        if net is None:
+            failed_to_load_networks.append(name)
+            print(f"Couldn't find network with name {name}")
+            continue
+        net.te_multiplier = te_multipliers[i] if te_multipliers else 1.0
+        net.unet_multiplier = unet_multipliers[i] if unet_multipliers else 1.0
+        net.dyn_dim = dyn_dims[i] if dyn_dims else 1.0
+        loaded_networks.append(net)
+    if failed_to_load_networks:
+        sd_hijack.model_hijack.comments.append("Failed to find networks: " + ", ".join(failed_to_load_networks))
+def network_restore_weights_from_backup(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.MultiheadAttention]):
+    weights_backup = getattr(self, "network_weights_backup", None)
+    if weights_backup is None:
+        return
+    if isinstance(self, torch.nn.MultiheadAttention):
+        self.in_proj_weight.copy_(weights_backup[0])
+        self.out_proj.weight.copy_(weights_backup[1])
+    else:
+        self.weight.copy_(weights_backup)
+def network_apply_weights(self: Union[torch.nn.Conv2d, torch.nn.Linear, torch.nn.MultiheadAttention]):
+    """
+    Applies the currently selected set of networks to the weights of torch layer self.
+    If weights already have this particular set of networks applied, does nothing.
+    If not, restores orginal weights from backup and alters weights according to networks.
+    """
+    network_layer_name = getattr(self, 'network_layer_name', None)
+    if network_layer_name is None:
+        return
+    current_names = getattr(self, "network_current_names", ())
+    wanted_names = tuple((x.name, x.te_multiplier, x.unet_multiplier, x.dyn_dim) for x in loaded_networks)
+    weights_backup = getattr(self, "network_weights_backup", None)
+    if weights_backup is None:
+        if isinstance(self, torch.nn.MultiheadAttention):
+            weights_backup = (self.in_proj_weight.to(devices.cpu, copy=True), self.out_proj.weight.to(devices.cpu, copy=True))
+        else:
+            weights_backup = self.weight.to(devices.cpu, copy=True)
+        self.network_weights_backup = weights_backup
+    if current_names != wanted_names:
+        network_restore_weights_from_backup(self)
+        for net in loaded_networks:
+            module = net.modules.get(network_layer_name, None)
+            if module is not None and hasattr(self, 'weight'):
+                with torch.no_grad():
+                    updown = module.calc_updown(self.weight)
+                    if len(self.weight.shape) == 4 and self.weight.shape[1] == 9:
+                        # inpainting model. zero pad updown to make channel[1]  4 to 9
+                        updown = torch.nn.functional.pad(updown, (0, 0, 0, 0, 0, 5))
+                    self.weight += updown
+                    continue
+            module_q = net.modules.get(network_layer_name + "_q_proj", None)
+            module_k = net.modules.get(network_layer_name + "_k_proj", None)
+            module_v = net.modules.get(network_layer_name + "_v_proj", None)
+            module_out = net.modules.get(network_layer_name + "_out_proj", None)
+            if isinstance(self, torch.nn.MultiheadAttention) and module_q and module_k and module_v and module_out:
+                with torch.no_grad():
+                    updown_q = module_q.calc_updown(self.in_proj_weight)
+                    updown_k = module_k.calc_updown(self.in_proj_weight)
+                    updown_v = module_v.calc_updown(self.in_proj_weight)
+                    updown_qkv = torch.vstack([updown_q, updown_k, updown_v])
+                    updown_out = module_out.calc_updown(self.out_proj.weight)
+                    self.in_proj_weight += updown_qkv
+                    self.out_proj.weight += updown_out
+                    continue
+            if module is None:
+                continue
+            print(f'failed to calculate network weights for layer {network_layer_name}')
+        self.network_current_names = wanted_names
+def network_forward(module, input, original_forward):
+    """
+    Old way of applying Lora by executing operations during layer's forward.
+    Stacking many loras this way results in big performance degradation.
+    """
+    if len(loaded_networks) == 0:
+        return original_forward(module, input)
+    input = devices.cond_cast_unet(input)
+    network_restore_weights_from_backup(module)
+    network_reset_cached_weight(module)
+    y = original_forward(module, input)
+    network_layer_name = getattr(module, 'network_layer_name', None)
+    for lora in loaded_networks:
+        module = lora.modules.get(network_layer_name, None)
+        if module is None:
+            continue
+        y = module.forward(y, input)
+    return y
+def network_reset_cached_weight(self: Union[torch.nn.Conv2d, torch.nn.Linear]):
+    self.network_current_names = ()
+    self.network_weights_backup = None
+def network_Linear_forward(self, input):
+    if shared.opts.lora_functional:
+        return network_forward(self, input, torch.nn.Linear_forward_before_network)
+    network_apply_weights(self)
+    return torch.nn.Linear_forward_before_network(self, input)
+def network_Linear_load_state_dict(self, *args, **kwargs):
+    network_reset_cached_weight(self)
+    return torch.nn.Linear_load_state_dict_before_network(self, *args, **kwargs)
+def network_Conv2d_forward(self, input):
+    if shared.opts.lora_functional:
+        return network_forward(self, input, torch.nn.Conv2d_forward_before_network)
+    network_apply_weights(self)
+    return torch.nn.Conv2d_forward_before_network(self, input)
+def network_Conv2d_load_state_dict(self, *args, **kwargs):
+    network_reset_cached_weight(self)
+    return torch.nn.Conv2d_load_state_dict_before_network(self, *args, **kwargs)
+def network_MultiheadAttention_forward(self, *args, **kwargs):
+    network_apply_weights(self)
+    return torch.nn.MultiheadAttention_forward_before_network(self, *args, **kwargs)
+def network_MultiheadAttention_load_state_dict(self, *args, **kwargs):
+    network_reset_cached_weight(self)
+    return torch.nn.MultiheadAttention_load_state_dict_before_network(self, *args, **kwargs)
+def list_available_networks():
+    available_networks.clear()
+    available_network_aliases.clear()
+    forbidden_network_aliases.clear()
+    available_network_hash_lookup.clear()
+    forbidden_network_aliases.update({"none": 1, "Addams": 1})
+    os.makedirs(shared.cmd_opts.lora_dir, exist_ok=True)
+    candidates = list(shared.walk_files(shared.cmd_opts.lora_dir, allowed_extensions=[".pt", ".ckpt", ".safetensors"]))
+    candidates += list(shared.walk_files(shared.cmd_opts.lyco_dir_backcompat, allowed_extensions=[".pt", ".ckpt", ".safetensors"]))
+    for filename in candidates:
+        if os.path.isdir(filename):
+            continue
+        name = os.path.splitext(os.path.basename(filename))[0]
+        try:
+            entry = network.NetworkOnDisk(name, filename)
+        except OSError:  # should catch FileNotFoundError and PermissionError etc.
+            errors.report(f"Failed to load network {name} from {filename}", exc_info=True)
+            continue
+        available_networks[name] = entry
+        if entry.alias in available_network_aliases:
+            forbidden_network_aliases[entry.alias.lower()] = 1
+        available_network_aliases[name] = entry
+        available_network_aliases[entry.alias] = entry
+re_network_name = re.compile(r"(.*)\s*\([0-9a-fA-F]+\)")
+def infotext_pasted(infotext, params):
+    if "AddNet Module 1" in [x[1] for x in scripts.scripts_txt2img.infotext_fields]:
+        return  # if the other extension is active, it will handle those fields, no need to do anything
+    added = []
+    for k in params:
+        if not k.startswith("AddNet Model "):
+            continue
+        num = k[13:]
+        if params.get("AddNet Module " + num) != "LoRA":
+            continue
+        name = params.get("AddNet Model " + num)
+        if name is None:
+            continue
+        m = re_network_name.match(name)
+        if m:
+            name = m.group(1)
+        multiplier = params.get("AddNet Weight A " + num, "1.0")
+        added.append(f"<lora:{name}:{multiplier}>")
+    if added:
+        params["Prompt"] += "\n" + "".join(added)
+available_networks = {}
+available_network_aliases = {}
+loaded_networks = []
+available_network_hash_lookup = {}
+forbidden_network_aliases = {}
+list_available_networks()

extensions-builtin/Lora/preload.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import os
+from modules import paths
+def preload(parser):
+    parser.add_argument("--lora-dir", type=str, help="Path to directory with Lora networks.", default=os.path.join(paths.models_path, 'Lora'))
+    parser.add_argument("--lyco-dir-backcompat", type=str, help="Path to directory with LyCORIS networks (for backawards compatibility; can also use --lyco-dir).", default=os.path.join(paths.models_path, 'LyCORIS'))

extensions-builtin/Lora/scripts/__pycache__/lora_script.cpython-310.pyc ADDED Viewed

Binary file (5.11 kB). View file

extensions-builtin/Lora/scripts/lora_script.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import re
+import torch
+import gradio as gr
+from fastapi import FastAPI
+import network
+import networks
+import lora  # noqa:F401
+import extra_networks_lora
+import ui_extra_networks_lora
+from modules import script_callbacks, ui_extra_networks, extra_networks, shared
+def unload():
+    torch.nn.Linear.forward = torch.nn.Linear_forward_before_network
+    torch.nn.Linear._load_from_state_dict = torch.nn.Linear_load_state_dict_before_network
+    torch.nn.Conv2d.forward = torch.nn.Conv2d_forward_before_network
+    torch.nn.Conv2d._load_from_state_dict = torch.nn.Conv2d_load_state_dict_before_network
+    torch.nn.MultiheadAttention.forward = torch.nn.MultiheadAttention_forward_before_network
+    torch.nn.MultiheadAttention._load_from_state_dict = torch.nn.MultiheadAttention_load_state_dict_before_network
+def before_ui():
+    ui_extra_networks.register_page(ui_extra_networks_lora.ExtraNetworksPageLora())
+    extra_network = extra_networks_lora.ExtraNetworkLora()
+    extra_networks.register_extra_network(extra_network)
+    extra_networks.register_extra_network_alias(extra_network, "lyco")
+if not hasattr(torch.nn, 'Linear_forward_before_network'):
+    torch.nn.Linear_forward_before_network = torch.nn.Linear.forward
+if not hasattr(torch.nn, 'Linear_load_state_dict_before_network'):
+    torch.nn.Linear_load_state_dict_before_network = torch.nn.Linear._load_from_state_dict
+if not hasattr(torch.nn, 'Conv2d_forward_before_network'):
+    torch.nn.Conv2d_forward_before_network = torch.nn.Conv2d.forward
+if not hasattr(torch.nn, 'Conv2d_load_state_dict_before_network'):
+    torch.nn.Conv2d_load_state_dict_before_network = torch.nn.Conv2d._load_from_state_dict
+if not hasattr(torch.nn, 'MultiheadAttention_forward_before_network'):
+    torch.nn.MultiheadAttention_forward_before_network = torch.nn.MultiheadAttention.forward
+if not hasattr(torch.nn, 'MultiheadAttention_load_state_dict_before_network'):
+    torch.nn.MultiheadAttention_load_state_dict_before_network = torch.nn.MultiheadAttention._load_from_state_dict
+torch.nn.Linear.forward = networks.network_Linear_forward
+torch.nn.Linear._load_from_state_dict = networks.network_Linear_load_state_dict
+torch.nn.Conv2d.forward = networks.network_Conv2d_forward
+torch.nn.Conv2d._load_from_state_dict = networks.network_Conv2d_load_state_dict
+torch.nn.MultiheadAttention.forward = networks.network_MultiheadAttention_forward
+torch.nn.MultiheadAttention._load_from_state_dict = networks.network_MultiheadAttention_load_state_dict
+script_callbacks.on_model_loaded(networks.assign_network_names_to_compvis_modules)
+script_callbacks.on_script_unloaded(unload)
+script_callbacks.on_before_ui(before_ui)
+script_callbacks.on_infotext_pasted(networks.infotext_pasted)
+shared.options_templates.update(shared.options_section(('extra_networks', "Extra Networks"), {
+    "sd_lora": shared.OptionInfo("None", "Add network to prompt", gr.Dropdown, lambda: {"choices": ["None", *networks.available_networks]}, refresh=networks.list_available_networks),
+    "lora_preferred_name": shared.OptionInfo("Alias from file", "When adding to prompt, refer to Lora by", gr.Radio, {"choices": ["Alias from file", "Filename"]}),
+    "lora_add_hashes_to_infotext": shared.OptionInfo(True, "Add Lora hashes to infotext"),
+    "lora_show_all": shared.OptionInfo(False, "Always show all networks on the Lora page").info("otherwise, those detected as for incompatible version of Stable Diffusion will be hidden"),
+    "lora_hide_unknown_for_versions": shared.OptionInfo([], "Hide networks of unknown versions for model versions", gr.CheckboxGroup, {"choices": ["SD1", "SD2", "SDXL"]}),
+}))
+shared.options_templates.update(shared.options_section(('compatibility', "Compatibility"), {
+    "lora_functional": shared.OptionInfo(False, "Lora/Networks: use old method that takes longer when you have multiple Loras active and produces same results as kohya-ss/sd-webui-additional-networks extension"),
+}))
+def create_lora_json(obj: network.NetworkOnDisk):
+    return {
+        "name": obj.name,
+        "alias": obj.alias,
+        "path": obj.filename,
+        "metadata": obj.metadata,
+    }
+def api_networks(_: gr.Blocks, app: FastAPI):
+    @app.get("/sdapi/v1/loras")
+    async def get_loras():
+        return [create_lora_json(obj) for obj in networks.available_networks.values()]
+    @app.post("/sdapi/v1/refresh-loras")
+    async def refresh_loras():
+        return networks.list_available_networks()
+script_callbacks.on_app_started(api_networks)
+re_lora = re.compile("<lora:([^:]+):")
+def infotext_pasted(infotext, d):
+    hashes = d.get("Lora hashes")
+    if not hashes:
+        return
+    hashes = [x.strip().split(':', 1) for x in hashes.split(",")]
+    hashes = {x[0].strip().replace(",", ""): x[1].strip() for x in hashes}
+    def network_replacement(m):
+        alias = m.group(1)
+        shorthash = hashes.get(alias)
+        if shorthash is None:
+            return m.group(0)
+        network_on_disk = networks.available_network_hash_lookup.get(shorthash)
+        if network_on_disk is None:
+            return m.group(0)
+        return f'<lora:{network_on_disk.get_alias()}:'
+    d["Prompt"] = re.sub(re_lora, network_replacement, d["Prompt"])
+script_callbacks.on_infotext_pasted(infotext_pasted)

extensions-builtin/Lora/ui_edit_user_metadata.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import datetime
+import html
+import random
+import gradio as gr
+import re
+from modules import ui_extra_networks_user_metadata
+def is_non_comma_tagset(tags):
+    average_tag_length = sum(len(x) for x in tags.keys()) / len(tags)
+    return average_tag_length >= 16
+re_word = re.compile(r"[-_\w']+")
+re_comma = re.compile(r" *, *")
+def build_tags(metadata):
+    tags = {}
+    for _, tags_dict in metadata.get("ss_tag_frequency", {}).items():
+        for tag, tag_count in tags_dict.items():
+            tag = tag.strip()
+            tags[tag] = tags.get(tag, 0) + int(tag_count)
+    if tags and is_non_comma_tagset(tags):
+        new_tags = {}
+        for text, text_count in tags.items():
+            for word in re.findall(re_word, text):
+                if len(word) < 3:
+                    continue
+                new_tags[word] = new_tags.get(word, 0) + text_count
+        tags = new_tags
+    ordered_tags = sorted(tags.keys(), key=tags.get, reverse=True)
+    return [(tag, tags[tag]) for tag in ordered_tags]
+class LoraUserMetadataEditor(ui_extra_networks_user_metadata.UserMetadataEditor):
+    def __init__(self, ui, tabname, page):
+        super().__init__(ui, tabname, page)
+        self.select_sd_version = None
+        self.taginfo = None
+        self.edit_activation_text = None
+        self.slider_preferred_weight = None
+        self.edit_notes = None
+    def save_lora_user_metadata(self, name, desc, sd_version, activation_text, preferred_weight, notes):
+        user_metadata = self.get_user_metadata(name)
+        user_metadata["description"] = desc
+        user_metadata["sd version"] = sd_version
+        user_metadata["activation text"] = activation_text
+        user_metadata["preferred weight"] = preferred_weight
+        user_metadata["notes"] = notes
+        self.write_user_metadata(name, user_metadata)
+    def get_metadata_table(self, name):
+        table = super().get_metadata_table(name)
+        item = self.page.items.get(name, {})
+        metadata = item.get("metadata") or {}
+        keys = {
+            'ss_sd_model_name': "Model:",
+            'ss_clip_skip': "Clip skip:",
+            'ss_network_module': "Kohya module:",
+        }
+        for key, label in keys.items():
+            value = metadata.get(key, None)
+            if value is not None and str(value) != "None":
+                table.append((label, html.escape(value)))
+        ss_training_started_at = metadata.get('ss_training_started_at')
+        if ss_training_started_at:
+            table.append(("Date trained:", datetime.datetime.utcfromtimestamp(float(ss_training_started_at)).strftime('%Y-%m-%d %H:%M')))
+        ss_bucket_info = metadata.get("ss_bucket_info")
+        if ss_bucket_info and "buckets" in ss_bucket_info:
+            resolutions = {}
+            for _, bucket in ss_bucket_info["buckets"].items():
+                resolution = bucket["resolution"]
+                resolution = f'{resolution[1]}x{resolution[0]}'
+                resolutions[resolution] = resolutions.get(resolution, 0) + int(bucket["count"])
+            resolutions_list = sorted(resolutions.keys(), key=resolutions.get, reverse=True)
+            resolutions_text = html.escape(", ".join(resolutions_list[0:4]))
+            if len(resolutions) > 4:
+                resolutions_text += ", ..."
+                resolutions_text = f"<span title='{html.escape(', '.join(resolutions_list))}'>{resolutions_text}</span>"
+            table.append(('Resolutions:' if len(resolutions_list) > 1 else 'Resolution:', resolutions_text))
+        image_count = 0
+        for _, params in metadata.get("ss_dataset_dirs", {}).items():
+            image_count += int(params.get("img_count", 0))
+        if image_count:
+            table.append(("Dataset size:", image_count))
+        return table
+    def put_values_into_components(self, name):
+        user_metadata = self.get_user_metadata(name)
+        values = super().put_values_into_components(name)
+        item = self.page.items.get(name, {})
+        metadata = item.get("metadata") or {}
+        tags = build_tags(metadata)
+        gradio_tags = [(tag, str(count)) for tag, count in tags[0:24]]
+        return [
+            *values[0:5],
+            item.get("sd_version", "Unknown"),
+            gr.HighlightedText.update(value=gradio_tags, visible=True if tags else False),
+            user_metadata.get('activation text', ''),
+            float(user_metadata.get('preferred weight', 0.0)),
+            gr.update(visible=True if tags else False),
+            gr.update(value=self.generate_random_prompt_from_tags(tags), visible=True if tags else False),
+        ]
+    def generate_random_prompt(self, name):
+        item = self.page.items.get(name, {})
+        metadata = item.get("metadata") or {}
+        tags = build_tags(metadata)
+        return self.generate_random_prompt_from_tags(tags)
+    def generate_random_prompt_from_tags(self, tags):
+        max_count = None
+        res = []
+        for tag, count in tags:
+            if not max_count:
+                max_count = count
+            v = random.random() * max_count
+            if count > v:
+                res.append(tag)
+        return ", ".join(sorted(res))
+    def create_extra_default_items_in_left_column(self):
+        # this would be a lot better as gr.Radio but I can't make it work
+        self.select_sd_version = gr.Dropdown(['SD1', 'SD2', 'SDXL', 'Unknown'], value='Unknown', label='Stable Diffusion version', interactive=True)
+    def create_editor(self):
+        self.create_default_editor_elems()
+        self.taginfo = gr.HighlightedText(label="Training dataset tags")
+        self.edit_activation_text = gr.Text(label='Activation text', info="Will be added to prompt along with Lora")
+        self.slider_preferred_weight = gr.Slider(label='Preferred weight', info="Set to 0 to disable", minimum=0.0, maximum=2.0, step=0.01)
+        with gr.Row() as row_random_prompt:
+            with gr.Column(scale=8):
+                random_prompt = gr.Textbox(label='Random prompt', lines=4, max_lines=4, interactive=False)
+            with gr.Column(scale=1, min_width=120):
+                generate_random_prompt = gr.Button('Generate').style(full_width=True, size="lg")
+        self.edit_notes = gr.TextArea(label='Notes', lines=4)
+        generate_random_prompt.click(fn=self.generate_random_prompt, inputs=[self.edit_name_input], outputs=[random_prompt], show_progress=False)
+        def select_tag(activation_text, evt: gr.SelectData):
+            tag = evt.value[0]
+            words = re.split(re_comma, activation_text)
+            if tag in words:
+                words = [x for x in words if x != tag and x.strip()]
+                return ", ".join(words)
+            return activation_text + ", " + tag if activation_text else tag
+        self.taginfo.select(fn=select_tag, inputs=[self.edit_activation_text], outputs=[self.edit_activation_text], show_progress=False)
+        self.create_default_buttons()
+        viewed_components = [
+            self.edit_name,
+            self.edit_description,
+            self.html_filedata,
+            self.html_preview,
+            self.edit_notes,
+            self.select_sd_version,
+            self.taginfo,
+            self.edit_activation_text,
+            self.slider_preferred_weight,
+            row_random_prompt,
+            random_prompt,
+        ]
+        self.button_edit\
+            .click(fn=self.put_values_into_components, inputs=[self.edit_name_input], outputs=viewed_components)\
+            .then(fn=lambda: gr.update(visible=True), inputs=[], outputs=[self.box])
+        edited_components = [
+            self.edit_description,
+            self.select_sd_version,
+            self.edit_activation_text,
+            self.slider_preferred_weight,
+            self.edit_notes,
+        ]
+        self.setup_save_handler(self.button_save, self.save_lora_user_metadata, edited_components)

extensions-builtin/Lora/ui_extra_networks_lora.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import network
+import networks
+from modules import shared, ui_extra_networks
+from modules.ui_extra_networks import quote_js
+from ui_edit_user_metadata import LoraUserMetadataEditor
+class ExtraNetworksPageLora(ui_extra_networks.ExtraNetworksPage):
+    def __init__(self):
+        super().__init__('Lora')
+    def refresh(self):
+        networks.list_available_networks()
+    def create_item(self, name, index=None, enable_filter=True):
+        lora_on_disk = networks.available_networks.get(name)
+        path, ext = os.path.splitext(lora_on_disk.filename)
+        alias = lora_on_disk.get_alias()
+        item = {
+            "name": name,
+            "filename": lora_on_disk.filename,
+            "preview": self.find_preview(path) if self.find_preview(path) else './file=html/card-no-preview.png',
+            "description": self.find_description(path),
+            "search_term": self.search_terms_from_path(lora_on_disk.filename),
+            "local_preview": f"{path}.{shared.opts.samples_format}",
+            "metadata": lora_on_disk.metadata,
+            "sort_keys": {'default': index, **self.get_sort_keys(lora_on_disk.filename)},
+            "sd_version": lora_on_disk.sd_version.name,
+        }
+        self.read_user_metadata(item)
+        activation_text = item["user_metadata"].get("activation text")
+        preferred_weight = item["user_metadata"].get("preferred weight", 0.0)
+        item["prompt"] = quote_js(f"<lora:{alias}:") + " + " + (str(preferred_weight) if preferred_weight else "opts.extra_networks_default_multiplier") + " + " + quote_js(">")
+        if activation_text:
+            item["prompt"] += " + " + quote_js(" " + activation_text)
+        sd_version = item["user_metadata"].get("sd version")
+        if sd_version in network.SdVersion.__members__:
+            item["sd_version"] = sd_version
+            sd_version = network.SdVersion[sd_version]
+        else:
+            sd_version = lora_on_disk.sd_version
+        if shared.opts.lora_show_all or not enable_filter:
+            pass
+        elif sd_version == network.SdVersion.Unknown:
+            model_version = network.SdVersion.SDXL if shared.sd_model.is_sdxl else network.SdVersion.SD2 if shared.sd_model.is_sd2 else network.SdVersion.SD1
+            if model_version.name in shared.opts.lora_hide_unknown_for_versions:
+                return None
+        elif shared.sd_model.is_sdxl and sd_version != network.SdVersion.SDXL:
+            return None
+        elif shared.sd_model.is_sd2 and sd_version != network.SdVersion.SD2:
+            return None
+        elif shared.sd_model.is_sd1 and sd_version != network.SdVersion.SD1:
+            return None
+        return item
+    def list_items(self):
+        for index, name in enumerate(networks.available_networks):
+            item = self.create_item(name, index)
+            if item is not None:
+                yield item
+    def allowed_directories_for_previews(self):
+        return [shared.cmd_opts.lora_dir, shared.cmd_opts.lyco_dir_backcompat]
+    def create_user_metadata_editor(self, ui, tabname):
+        return LoraUserMetadataEditor(ui, tabname, self)

extensions-builtin/ScuNET/__pycache__/preload.cpython-310.pyc ADDED Viewed

Binary file (491 Bytes). View file

extensions/stable-diffusion-webui-images-browser/scripts/wib/__pycache__/wib_db.cpython-310.pyc ADDED Viewed

Binary file (21.8 kB). View file

extensions/stable-diffusion-webui-images-browser/scripts/wib/wib_db.py ADDED Viewed

	@@ -0,0 +1,888 @@

+import hashlib
+import json
+import os
+import sqlite3
+from modules import scripts
+from PIL import Image
+version = 6
+path_recorder_file = os.path.join(scripts.basedir(), "path_recorder.txt")
+aes_cache_file = os.path.join(scripts.basedir(), "aes_scores.json")
+exif_cache_file = os.path.join(scripts.basedir(), "exif_data.json")
+ranking_file = os.path.join(scripts.basedir(), "ranking.json")
+archive = os.path.join(scripts.basedir(), "archive")
+db_file = os.path.join(scripts.basedir(), "wib.sqlite3")
+np = "Negative prompt: "
+st = "Steps: "
+timeout = 30
+def create_filehash(cursor):
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS filehash (
+            file TEXT PRIMARY KEY,
+            hash TEXT,
+            created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+    ''')
+    cursor.execute('''
+        CREATE TRIGGER filehash_tr
+        AFTER UPDATE ON filehash
+        BEGIN
+            UPDATE filehash SET updated = CURRENT_TIMESTAMP WHERE file = OLD.file;
+        END;
+    ''')
+    return
+def create_work_files(cursor):
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS work_files (
+            file TEXT PRIMARY KEY
+        )
+    ''')
+    return
+def create_db(cursor):
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS db_data (
+            key TEXT PRIMARY KEY,
+            value TEXT
+        )
+    ''')
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS path_recorder (
+            path TEXT PRIMARY KEY,
+            depth INT,
+            path_display TEXT,
+            created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+    ''')
+    cursor.execute('''
+        CREATE TRIGGER path_recorder_tr
+        AFTER UPDATE ON path_recorder
+        BEGIN
+            UPDATE path_recorder SET updated = CURRENT_TIMESTAMP WHERE path = OLD.path;
+        END;
+    ''')
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS exif_data (
+            file TEXT,
+            key TEXT,
+            value TEXT,
+            created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            PRIMARY KEY (file, key)
+        )
+    ''')
+    cursor.execute('''
+        CREATE INDEX IF NOT EXISTS exif_data_key ON exif_data (key)
+    ''')
+    cursor.execute('''
+        CREATE TRIGGER exif_data_tr
+        AFTER UPDATE ON exif_data
+        BEGIN
+            UPDATE exif_data SET updated = CURRENT_TIMESTAMP WHERE file = OLD.file AND key = OLD.key;
+        END;
+    ''')
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS ranking (
+            file TEXT PRIMARY KEY,
+            name TEXT,
+            ranking TEXT,
+            created TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+    ''')
+    cursor.execute('''
+        CREATE INDEX IF NOT EXISTS ranking_name ON ranking (name)
+    ''')
+    cursor.execute('''
+        CREATE TRIGGER ranking_tr
+        AFTER UPDATE ON ranking
+        BEGIN
+            UPDATE ranking SET updated = CURRENT_TIMESTAMP WHERE file = OLD.file;
+        END;
+    ''')
+    create_filehash(cursor)
+    create_work_files(cursor)
+    return
+def migrate_path_recorder(cursor):
+    if os.path.exists(path_recorder_file):
+        try:
+            with open(path_recorder_file) as f:
+                # json-version
+                path_recorder = json.load(f)
+            for path, values in path_recorder.items():
+                path = os.path.realpath(path)
+                depth = values["depth"]
+                path_display = f"{path} [{depth}]"
+                cursor.execute('''
+                INSERT INTO path_recorder (path, depth, path_display)
+                VALUES (?, ?, ?)
+                ''', (path, depth, path_display))
+        except json.JSONDecodeError:
+            with open(path_recorder_file) as f:
+                # old txt-version
+                path = f.readline().rstrip("\n")
+                while len(path) > 0:
+                    path = os.path.realpath(path)
+                    cursor.execute('''
+                    INSERT INTO path_recorder (path, depth, path_display)
+                    VALUES (?, ?, ?)
+                    ''', (path, 0, f"{path} [0]"))
+                    path = f.readline().rstrip("\n")
+    return
+def update_exif_data(cursor, file, info):
+    prompt = "0"
+    negative_prompt = "0"
+    key_values = "0: 0"
+    if info != "0":
+        info_list = info.split("\n")
+        prompt = ""
+        negative_prompt = ""
+        key_values = ""
+        for info_item in info_list:
+            if info_item.startswith(st):
+                key_values = info_item
+            elif info_item.startswith(np):
+                negative_prompt = info_item.replace(np, "")
+            else:
+                if prompt == "":
+                    prompt = info_item
+                else:
+                    # multiline prompts
+                    prompt = f"{prompt}\n{info_item}"
+    if key_values != "":
+        key_value_pairs = []
+        key_value = ""
+        quote_open = False
+        for char in key_values + ",":
+            key_value += char
+            if char == '"':
+                quote_open = not quote_open
+            if char == "," and not quote_open:
+                try:
+                    k, v = key_value.strip(" ,").split(": ")
+                except ValueError:
+                    k = key_value.strip(" ,").split(": ")[0]
+                    v = ""
+                key_value_pairs.append((k, v))
+                key_value = ""
+                try:
+                    cursor.execute('''
+                    INSERT INTO exif_data (file, key, value)
+                    VALUES (?, ?, ?)
+                    ''', (file, "prompt", prompt))
+                except sqlite3.IntegrityError:
+                    # Duplicate, delete all "file" entries and try again
+                    cursor.execute('''
+                    DELETE FROM exif_data
+                    WHERE file = ?
+                    ''', (file,))
+                    cursor.execute('''
+                    INSERT INTO exif_data (file, key, value)
+                    VALUES (?, ?, ?)
+                    ''', (file, "prompt", prompt))
+                cursor.execute('''
+                INSERT INTO exif_data (file, key, value)
+                VALUES (?, ?, ?)
+                ''', (file, "negative_prompt", negative_prompt))
+                for (key, value) in key_value_pairs:
+                    try:
+                        cursor.execute('''
+                        INSERT INTO exif_data (file, key, value)
+                        VALUES (?, ?, ?)
+                        ''', (file, key, value))
+                    except sqlite3.IntegrityError:
+                        pass
+    return
+def migrate_exif_data(cursor):
+    if os.path.exists(exif_cache_file):
+        with open(exif_cache_file, 'r') as file:
+            exif_cache = json.load(file)
+        for file, info in exif_cache.items():
+            file = os.path.realpath(file)
+            update_exif_data(cursor, file, info)
+    return
+def migrate_ranking(cursor):
+    if os.path.exists(ranking_file):
+        with open(ranking_file, 'r') as file:
+            ranking = json.load(file)
+        for file, info in ranking.items():
+            if info != "None":
+                file = os.path.realpath(file)
+                name = os.path.basename(file)
+                cursor.execute('''
+                INSERT INTO ranking (file, name, ranking)
+                VALUES (?, ?, ?)
+                ''', (file, name, info))
+    return
+def get_hash(file):
+    # Get filehash without exif info
+    try:
+        image = Image.open(file)
+    except Exception as e:
+        print(e)
+    hash = hashlib.sha512(image.tobytes()).hexdigest()
+    image.close()
+    return hash
+def migrate_filehash(cursor, version):
+    if version <= "4":
+        create_filehash(cursor)
+    cursor.execute('''
+    SELECT file
+    FROM ranking
+    ''')
+    for (file,) in cursor.fetchall():
+        if os.path.exists(file):
+            hash = get_hash(file)
+            cursor.execute('''
+            INSERT INTO filehash (file, hash)
+            VALUES (?, ?)
+            ''', (file, hash))
+    return
+def migrate_work_files(cursor):
+    create_work_files(cursor)
+    return
+def update_db_data(cursor, key, value):
+    cursor.execute('''
+    INSERT OR REPLACE
+    INTO db_data (key, value)
+    VALUES (?, ?)
+    ''', (key, value))
+    return
+def get_version():
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        cursor.execute('''
+        SELECT value
+        FROM db_data
+        WHERE key = 'version'
+        ''',)
+        db_version = cursor.fetchone()
+    return db_version
+def migrate_path_recorder_dirs(cursor):
+    cursor.execute('''
+    SELECT path, path_display
+    FROM path_recorder
+    ''')
+    for (path, path_display) in cursor.fetchall():
+        real_path = os.path.realpath(path)
+        if path != real_path:
+            update_from = path
+            update_to = real_path
+            try:
+                cursor.execute('''
+                UPDATE path_recorder
+                SET path = ?,
+                    path_display = ? || SUBSTR(path_display, LENGTH(?) + 1)
+                WHERE path = ?
+                ''', (update_to, update_to, update_from, update_from))
+            except sqlite3.IntegrityError as e:
+                # these are double keys, because the same file can be in the db with different path notations
+                (e_msg,) = e.args
+                if e_msg.startswith("UNIQUE constraint"):
+                    cursor.execute('''
+                    DELETE FROM path_recorder
+                    WHERE path = ?
+                    ''', (update_from,))
+                else:
+                    raise
+    return
+def migrate_exif_data_dirs(cursor):
+    cursor.execute('''
+    SELECT file
+    FROM exif_data
+    ''')
+    for (filepath,) in cursor.fetchall():
+        (path, file) = os.path.split(filepath)
+        real_path = os.path.realpath(path)
+        if path != real_path:
+            update_from = filepath
+            update_to = os.path.join(real_path, file)
+            try:
+                cursor.execute('''
+                UPDATE exif_data
+                SET file = ?
+                WHERE file = ?
+                ''', (update_to, update_from))
+            except sqlite3.IntegrityError as e:
+                # these are double keys, because the same file can be in the db with different path notations
+                (e_msg,) = e.args
+                if e_msg.startswith("UNIQUE constraint"):
+                    cursor.execute('''
+                    DELETE FROM exif_data
+                    WHERE file = ?
+                    ''', (update_from,))
+                else:
+                    raise
+    return
+def migrate_ranking_dirs(cursor, db_version):
+    if db_version == "1":
+        cursor.execute('''
+        ALTER TABLE ranking
+        ADD COLUMN name TEXT
+        ''')
+        cursor.execute('''
+            CREATE INDEX IF NOT EXISTS ranking_name ON ranking (name)
+        ''')
+    cursor.execute('''
+    SELECT file, ranking
+    FROM ranking
+    ''')
+    for (filepath, ranking) in cursor.fetchall():
+        if filepath == "" or ranking == "None":
+            cursor.execute('''
+            DELETE FROM ranking
+            WHERE file = ?
+            ''', (filepath,))
+        else:
+            (path, file) = os.path.split(filepath)
+            real_path = os.path.realpath(path)
+            name = file
+            update_from = filepath
+            update_to = os.path.join(real_path, file)
+            try:
+                cursor.execute('''
+                UPDATE ranking
+                SET file = ?,
+                    name = ?
+                WHERE file = ?
+                ''', (update_to, name, update_from))
+            except sqlite3.IntegrityError as e:
+                # these are double keys, because the same file can be in the db with different path notations
+                (e_msg,) = e.args
+                if e_msg.startswith("UNIQUE constraint"):
+                    cursor.execute('''
+                    DELETE FROM ranking
+                    WHERE file = ?
+                    ''', (update_from,))
+                else:
+                    raise
+    return
+def check():
+    if not os.path.exists(db_file):
+        conn, cursor = transaction_begin()
+        print("Image Browser: Creating database")
+        create_db(cursor)
+        update_db_data(cursor, "version", version)
+        migrate_path_recorder(cursor)
+        migrate_exif_data(cursor)
+        migrate_ranking(cursor)
+        migrate_filehash(cursor, str(version))
+        transaction_end(conn, cursor)
+        print("Image Browser: Database created")
+    db_version = get_version()
+    conn, cursor = transaction_begin()
+    if db_version[0] <= "2":
+        # version 1 database had mixed path notations, changed them all to abspath
+        # version 2 database still had mixed path notations, because of windows short name, changed them all to realpath
+        print(f"Image Browser: Upgrading database from version {db_version[0]} to version {version}")
+        migrate_path_recorder_dirs(cursor)
+        migrate_exif_data_dirs(cursor)
+        migrate_ranking_dirs(cursor, db_version[0])
+    if db_version[0] <= "4":
+        migrate_filehash(cursor, db_version[0])
+    if db_version[0] <= "5":
+        migrate_work_files(cursor)
+        update_db_data(cursor, "version", version)
+        print(f"Image Browser: Database upgraded from version {db_version[0]} to version {version}")
+    transaction_end(conn, cursor)
+    return version
+def load_path_recorder():
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        cursor.execute('''
+        SELECT path, depth, path_display
+        FROM path_recorder
+        ''')
+        path_recorder = {path: {"depth": depth, "path_display": path_display} for path, depth, path_display in cursor.fetchall()}
+    return path_recorder
+def select_ranking(file):
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        cursor.execute('''
+        SELECT ranking
+        FROM ranking
+        WHERE file = ?
+        ''', (file,))
+        ranking_value = cursor.fetchone()
+    if ranking_value is None:
+        return_ranking = "None"
+    else:
+        (return_ranking,) = ranking_value
+    return return_ranking
+def update_ranking(file, ranking):
+    name = os.path.basename(file)
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        if ranking == "None":
+            cursor.execute('''
+            DELETE FROM ranking
+            WHERE file = ?
+            ''', (file,))
+        else:
+            cursor.execute('''
+            INSERT OR REPLACE
+            INTO ranking (file, name, ranking)
+            VALUES (?, ?, ?)
+            ''', (file, name, ranking))
+            hash = get_hash(file)
+            cursor.execute('''
+            INSERT OR REPLACE
+            INTO filehash (file, hash)
+            VALUES (?, ?)
+            ''', (file, hash))
+    return
+def select_image_reward_score(cursor, file):
+    cursor.execute('''
+    SELECT value
+    FROM exif_data
+    WHERE file = ?
+    AND key = 'ImageRewardScore'
+    ''', (file,))
+    image_reward_score = cursor.fetchone()
+    if image_reward_score is None:
+        return_image_reward_score = None
+    else:
+        (return_image_reward_score,) = image_reward_score
+    cursor.execute('''
+    SELECT value
+    FROM exif_data
+    WHERE file = ?
+    AND key = 'prompt'
+    ''', (file,))
+    image_reward_prompt = cursor.fetchone()
+    if image_reward_prompt is None:
+        return_image_reward_prompt = None
+    else:
+        (return_image_reward_prompt,) = image_reward_prompt
+    return return_image_reward_score, return_image_reward_prompt
+def update_image_reward_score(cursor, file, image_reward_score):
+    cursor.execute('''
+    INSERT OR REPLACE
+    INTO exif_data (file, key, value)
+    VALUES (?, ?, ?)
+    ''', (file, "ImageRewardScore", image_reward_score))
+    return
+def update_path_recorder(path, depth, path_display):
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        cursor.execute('''
+        INSERT OR REPLACE
+        INTO path_recorder (path, depth, path_display)
+        VALUES (?, ?, ?)
+        ''', (path, depth, path_display))
+    return
+def update_path_recorder(path, depth, path_display):
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        cursor.execute('''
+        INSERT OR REPLACE
+        INTO path_recorder (path, depth, path_display)
+        VALUES (?, ?, ?)
+        ''', (path, depth, path_display))
+    return
+def delete_path_recorder(path):
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        cursor.execute('''
+        DELETE FROM path_recorder
+        WHERE path = ?
+        ''', (path,))
+    return
+def update_path_recorder_mult(cursor, update_from, update_to):
+    cursor.execute('''
+    UPDATE path_recorder
+    SET path = ?,
+        path_display = ? || SUBSTR(path_display, LENGTH(?) + 1)
+    WHERE path = ?
+    ''', (update_to, update_to, update_from, update_from))
+    return
+def update_exif_data_mult(cursor, update_from, update_to):
+    update_from = update_from + os.path.sep
+    update_to = update_to + os.path.sep
+    cursor.execute('''
+    UPDATE exif_data
+    SET file = ? || SUBSTR(file, LENGTH(?) + 1)
+    WHERE file like ? || '%'
+    ''', (update_to, update_from, update_from))
+    return
+def update_ranking_mult(cursor, update_from, update_to):
+    update_from = update_from + os.path.sep
+    update_to = update_to + os.path.sep
+    cursor.execute('''
+    UPDATE ranking
+    SET file = ? || SUBSTR(file, LENGTH(?) + 1)
+    WHERE file like ? || '%'
+    ''', (update_to, update_from, update_from))
+    return
+def delete_exif_0(cursor):
+    cursor.execute('''
+    DELETE FROM exif_data
+    WHERE file IN (
+        SELECT file FROM exif_data a
+        WHERE value = '0'
+        GROUP BY file
+        HAVING COUNT(*) = (SELECT COUNT(*) FROM exif_data WHERE file = a.file)
+    )
+    ''')
+    return
+def get_ranking_by_file(cursor, file):
+    cursor.execute('''
+    SELECT ranking
+    FROM ranking
+    WHERE file = ?
+    ''', (file,))
+    ranking_value = cursor.fetchone()
+    return ranking_value
+def get_ranking_by_name(cursor, name):
+    cursor.execute('''
+    SELECT file, ranking
+    FROM ranking
+    WHERE name = ?
+    ''', (name,))
+    ranking_value = cursor.fetchone()
+    if ranking_value is not None:
+        (file, _) = ranking_value
+        cursor.execute('''
+        SELECT hash
+        FROM filehash
+        WHERE file = ?
+        ''', (file,))
+        hash_value = cursor.fetchone()
+    else:
+        hash_value = None
+    return ranking_value, hash_value
+def insert_ranking(cursor, file, ranking, hash):
+    name = os.path.basename(file)
+    cursor.execute('''
+    INSERT INTO ranking (file, name, ranking)
+    VALUES (?, ?, ?)
+    ''', (file, name, ranking))
+    cursor.execute('''
+    INSERT OR REPLACE
+    INTO filehash (file, hash)
+    VALUES (?, ?)
+    ''', (file, hash))
+    return
+def replace_ranking(cursor, file, alternate_file, hash):
+    name = os.path.basename(file)
+    cursor.execute('''
+    UPDATE ranking
+    SET file = ?
+    WHERE file = ?
+    ''', (file, alternate_file))
+    cursor.execute('''
+    INSERT OR REPLACE
+    INTO filehash (file, hash)
+    VALUES (?, ?)
+    ''', (file, hash))
+    return
+def transaction_begin():
+    conn = sqlite3.connect(db_file, timeout=timeout)
+    conn.isolation_level = None
+    cursor = conn.cursor()
+    cursor.execute("BEGIN")
+    return conn, cursor
+def transaction_end(conn, cursor):
+    cursor.execute("COMMIT")
+    conn.close()
+    return
+def update_exif_data_by_key(cursor, file, key, value):
+    cursor.execute('''
+    INSERT OR REPLACE
+    INTO exif_data (file, key, value)
+    VALUES (?, ?, ?)
+    ''', (file, key, value))
+    return
+def select_prompts(file):
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        cursor.execute('''
+        SELECT key, value
+        FROM exif_data
+        WHERE file = ?
+          AND KEY in ('prompt', 'negative_prompt')
+        ''', (file,))
+        rows = cursor.fetchall()
+    prompt = ""
+    neg_prompt = ""
+    for row in rows:
+        (key, value) = row
+        if key == 'prompt':
+            prompt = value
+        elif key == 'negative_prompt':
+            neg_prompt = value
+    return prompt, neg_prompt
+def load_exif_data(exif_cache):
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        cursor.execute('''
+        SELECT file, group_concat(
+            case when key = 'prompt' or key = 'negative_prompt' then key || ': ' || value || '\n'
+            else key || ': ' || value
+            end, ', ') AS string
+        FROM (
+            SELECT *
+            FROM exif_data
+            ORDER BY
+                CASE WHEN key = 'prompt' THEN 0
+                    WHEN key = 'negative_prompt' THEN 1
+                    ELSE 2 END,
+                key
+        )
+        GROUP BY file
+        ''')
+        rows = cursor.fetchall()
+    for row in rows:
+        exif_cache[row[0]] = row[1]
+    return exif_cache
+def load_exif_data_by_key(cache, key1, key2):
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        cursor.execute('''
+        SELECT file, value
+        FROM exif_data
+        WHERE key IN (?, ?)
+        ''', (key1, key2))
+        rows = cursor.fetchall()
+    for row in rows:
+        cache[row[0]] = row[1]
+    return cache
+def get_exif_dirs():
+    with sqlite3.connect(db_file, timeout=timeout) as conn:
+        cursor = conn.cursor()
+        cursor.execute('''
+        SELECT file
+        FROM exif_data
+        ''')
+    rows = cursor.fetchall()
+    dirs = {}
+    for row in rows:
+        dir = os.path.dirname(row[0])
+        dirs[dir] = dir
+    return dirs
+def fill_work_files(cursor, fileinfos):
+    filenames = [x[0] for x in fileinfos]
+    cursor.execute('''
+    DELETE
+    FROM work_files
+    ''')
+    sql = '''
+    INSERT INTO work_files (file)
+    VALUES (?)
+    '''
+    cursor.executemany(sql, [(x,) for x in filenames])
+    return
+def filter_aes(cursor, fileinfos, aes_filter_min_num, aes_filter_max_num, score_type):
+    if score_type == "aesthetic_score":
+        key = "aesthetic_score"
+    else:
+        key = "ImageRewardScore"
+    cursor.execute('''
+    DELETE
+    FROM work_files
+    WHERE file not in (
+        SELECT file
+        FROM exif_data b
+        WHERE file = b.file
+          AND b.key = ?
+          AND CAST(b.value AS REAL) between ? and ?
+    )
+    ''', (key, aes_filter_min_num, aes_filter_max_num))
+    cursor.execute('''
+    SELECT file
+    FROM work_files
+    ''')
+    rows = cursor.fetchall()
+    fileinfos_dict = {pair[0]: pair[1] for pair in fileinfos}
+    fileinfos_new = []
+    for (file,) in rows:
+        if fileinfos_dict.get(file) is not None:
+            fileinfos_new.append((file, fileinfos_dict[file]))
+    return fileinfos_new
+def filter_ranking(cursor, fileinfos, ranking_filter, ranking_filter_min_num, ranking_filter_max_num):
+    if ranking_filter == "None":
+        cursor.execute('''
+        DELETE
+        FROM work_files
+        WHERE file IN (
+            SELECT file
+            FROM ranking b
+            WHERE file = b.file
+        )
+        ''')
+    elif ranking_filter == "Min-max":
+        cursor.execute('''
+        DELETE
+        FROM work_files
+        WHERE file NOT IN (
+            SELECT file
+            FROM ranking b
+            WHERE file = b.file
+            AND b.ranking BETWEEN ? AND ?
+        )
+        ''', (ranking_filter_min_num, ranking_filter_max_num))
+    else:
+        cursor.execute('''
+        DELETE
+        FROM work_files
+        WHERE file NOT IN (
+            SELECT file
+            FROM ranking b
+            WHERE file = b.file
+            AND b.ranking = ?
+        )
+        ''', (ranking_filter,))
+    cursor.execute('''
+    SELECT file
+    FROM work_files
+    ''')
+    rows = cursor.fetchall()
+    fileinfos_dict = {pair[0]: pair[1] for pair in fileinfos}
+    fileinfos_new = []
+    for (file,) in rows:
+        if fileinfos_dict.get(file) is not None:
+            fileinfos_new.append((file, fileinfos_dict[file]))
+    return fileinfos_new
+def select_x_y(cursor, file):
+    cursor.execute('''
+    SELECT value
+    FROM exif_data
+    WHERE file = ?
+    AND key = 'Size'
+    ''', (file,))
+    size_value = cursor.fetchone()
+    if size_value is None:
+        x = "?"
+        y = "?"
+    else:
+        (size,) = size_value
+        parts = size.split("x")
+        x = parts[0]
+        y = parts[1]
+    return x, y

extensions/stable-diffusion-webui-images-browser/style.css ADDED Viewed

	@@ -0,0 +1,23 @@

+.thumbnails.svelte-1tkea93.svelte-1tkea93 {
+  justify-content: initial;
+}
+.thumbnails.scroll-hide.svelte-g4rw9 {
+  justify-content: initial;
+}
+div[id^="image_browser_tab"][id$="image_browser_gallery"].hide_loading > .svelte-gjihhp {
+  display: none;
+}
+.image_browser_gallery img {
+  object-fit: scale-down !important;
+}
+/* Workaround until gradio version is updated to a version that fixes it
+   see https://github.com/gradio-app/gradio/issues/1590
+*/
+#tab_image_browser .thumbnail-item > img {
+  width: auto !important;
+  height: auto !important;
+}

extensions/stable-diffusion-webui-images-browser/wib.sqlite3 ADDED Viewed

Binary file (307 kB). View file

extensions/ultimate-upscale-for-automatic1111/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .vscode

extensions/ultimate-upscale-for-automatic1111/LICENSE ADDED Viewed

	@@ -0,0 +1,674 @@

+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+                            Preamble
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+  The precise terms and conditions for copying, distribution and
+modification follow.
+                       TERMS AND CONDITIONS
+  0. Definitions.
+  "This License" refers to version 3 of the GNU General Public License.
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+  1. Source Code.
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+  The Corresponding Source for a work in source code form is that
+same work.
+  2. Basic Permissions.
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+  4. Conveying Verbatim Copies.
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+  5. Conveying Modified Source Versions.
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+  6. Conveying Non-Source Forms.
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+  7. Additional Terms.
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+  8. Termination.
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+  9. Acceptance Not Required for Having Copies.
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+  10. Automatic Licensing of Downstream Recipients.
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+  11. Patents.
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+  12. No Surrender of Others' Freedom.
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+  13. Use with the GNU Affero General Public License.
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+  14. Revised Versions of this License.
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+  15. Disclaimer of Warranty.
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+  16. Limitation of Liability.
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+  17. Interpretation of Sections 15 and 16.
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+                     END OF TERMS AND CONDITIONS
+            How to Apply These Terms to Your New Programs
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+    ultimate-upscale-for-automatic1111
+    Copyright (C) 2023  Mirzam
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+Also add information on how to contact you by electronic and paper mail.
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+    <program>  Copyright (C) 2023  Mirzam
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.

extensions/ultimate-upscale-for-automatic1111/README.md ADDED Viewed

	@@ -0,0 +1,43 @@

+# Ultimate SD Upscale extension for [AUTOMATIC1111 Stable Diffusion web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
+Now you have the opportunity to use a large denoise (0.3-0.5) and not spawn many artifacts. Works on any video card, since you can use a 512x512 tile size and the image will converge.
+News channel: https://t.me/usdunews
+# Instructions
+All instructions can be found on the project's [wiki](https://github.com/Coyote-A/ultimate-upscale-for-automatic1111/wiki).
+# Examples
+More on [wiki page](https://github.com/Coyote-A/ultimate-upscale-for-automatic1111/wiki/Examples)
+<details>
+  <summary>E1</summary>
+  Original image
+  ![Original](https://i.imgur.com/J8mRYOD.png)
+  2k upscaled. **Tile size**: 512, **Padding**: 32, **Mask blur**: 16, **Denoise**: 0.4
+  ![2k upscale](https://i.imgur.com/0aKua4r.png)
+</details>
+<details>
+  <summary>E2</summary>
+  Original image
+  ![Original](https://i.imgur.com/aALNI2w.png)
+  2k upscaled. **Tile size**: 768, **Padding**: 55, **Mask blur**: 20, **Denoise**: 0.35
+  ![2k upscale](https://i.imgur.com/B5PHz0J.png)
+  4k upscaled. **Tile size**: 768, **Padding**: 55, **Mask blur**: 20, **Denoise**: 0.35
+  ![4k upscale](https://i.imgur.com/tIUQ7TJ.jpg)
+</details>
+<details>
+  <summary>E3</summary>
+  Original image
+  ![Original](https://i.imgur.com/AGtszA8.png)
+  4k upscaled. **Tile size**: 768, **Padding**: 55, **Mask blur**: 20, **Denoise**: 0.4
+  ![4k upscale](https://i.imgur.com/LCYLfCs.jpg)
+</details>

extensions/ultimate-upscale-for-automatic1111/scripts/__pycache__/ultimate-upscale.cpython-310.pyc ADDED Viewed

Binary file (16.1 kB). View file

extensions/ultimate-upscale-for-automatic1111/scripts/ultimate-upscale.py ADDED Viewed

	@@ -0,0 +1,557 @@

+import math
+import gradio as gr
+from PIL import Image, ImageDraw, ImageOps
+from modules import processing, shared, images, devices, scripts
+from modules.processing import StableDiffusionProcessing
+from modules.processing import Processed
+from modules.shared import opts, state
+from enum import Enum
+class USDUMode(Enum):
+    LINEAR = 0
+    CHESS = 1
+    NONE = 2
+class USDUSFMode(Enum):
+    NONE = 0
+    BAND_PASS = 1
+    HALF_TILE = 2
+    HALF_TILE_PLUS_INTERSECTIONS = 3
+class USDUpscaler():
+    def __init__(self, p, image, upscaler_index:int, save_redraw, save_seams_fix, tile_width, tile_height) -> None:
+        self.p:StableDiffusionProcessing = p
+        self.image:Image = image
+        self.scale_factor = math.ceil(max(p.width, p.height) / max(image.width, image.height))
+        self.upscaler = shared.sd_upscalers[upscaler_index]
+        self.redraw = USDURedraw()
+        self.redraw.save = save_redraw
+        self.redraw.tile_width = tile_width if tile_width > 0 else tile_height
+        self.redraw.tile_height = tile_height if tile_height > 0 else tile_width
+        self.seams_fix = USDUSeamsFix()
+        self.seams_fix.save = save_seams_fix
+        self.seams_fix.tile_width = tile_width if tile_width > 0 else tile_height
+        self.seams_fix.tile_height = tile_height if tile_height > 0 else tile_width
+        self.initial_info = None
+        self.rows = math.ceil(self.p.height / self.redraw.tile_height)
+        self.cols = math.ceil(self.p.width / self.redraw.tile_width)
+    def get_factor(self, num):
+        # Its just return, don't need elif
+        if num == 1:
+            return 2
+        if num % 4 == 0:
+            return 4
+        if num % 3 == 0:
+            return 3
+        if num % 2 == 0:
+            return 2
+        return 0
+    def get_factors(self):
+        scales = []
+        current_scale = 1
+        current_scale_factor = self.get_factor(self.scale_factor)
+        while current_scale_factor == 0:
+            self.scale_factor += 1
+            current_scale_factor = self.get_factor(self.scale_factor)
+        while current_scale < self.scale_factor:
+            current_scale_factor = self.get_factor(self.scale_factor // current_scale)
+            scales.append(current_scale_factor)
+            current_scale = current_scale * current_scale_factor
+            if current_scale_factor == 0:
+                break
+        self.scales = enumerate(scales)
+    def upscale(self):
+        # Log info
+        print(f"Canva size: {self.p.width}x{self.p.height}")
+        print(f"Image size: {self.image.width}x{self.image.height}")
+        print(f"Scale factor: {self.scale_factor}")
+        # Check upscaler is not empty
+        if self.upscaler.name == "None":
+            self.image = self.image.resize((self.p.width, self.p.height), resample=Image.LANCZOS)
+            return
+        # Get list with scale factors
+        self.get_factors()
+        # Upscaling image over all factors
+        for index, value in self.scales:
+            print(f"Upscaling iteration {index+1} with scale factor {value}")
+            self.image = self.upscaler.scaler.upscale(self.image, value, self.upscaler.data_path)
+        # Resize image to set values
+        self.image = self.image.resize((self.p.width, self.p.height), resample=Image.LANCZOS)
+    def setup_redraw(self, redraw_mode, padding, mask_blur):
+        self.redraw.mode = USDUMode(redraw_mode)
+        self.redraw.enabled = self.redraw.mode != USDUMode.NONE
+        self.redraw.padding = padding
+        self.p.mask_blur = mask_blur
+    def setup_seams_fix(self, padding, denoise, mask_blur, width, mode):
+        self.seams_fix.padding = padding
+        self.seams_fix.denoise = denoise
+        self.seams_fix.mask_blur = mask_blur
+        self.seams_fix.width = width
+        self.seams_fix.mode = USDUSFMode(mode)
+        self.seams_fix.enabled = self.seams_fix.mode != USDUSFMode.NONE
+    def save_image(self):
+        if type(self.p.prompt) != list:
+            images.save_image(self.image, self.p.outpath_samples, "", self.p.seed, self.p.prompt, opts.samples_format, info=self.initial_info, p=self.p)
+        else:
+            images.save_image(self.image, self.p.outpath_samples, "", self.p.seed, self.p.prompt[0], opts.samples_format, info=self.initial_info, p=self.p)
+    def calc_jobs_count(self):
+        redraw_job_count = (self.rows * self.cols) if self.redraw.enabled else 0
+        seams_job_count = 0
+        if self.seams_fix.mode == USDUSFMode.BAND_PASS:
+            seams_job_count = self.rows + self.cols - 2
+        elif self.seams_fix.mode == USDUSFMode.HALF_TILE:
+            seams_job_count = self.rows * (self.cols - 1) + (self.rows - 1) * self.cols
+        elif self.seams_fix.mode == USDUSFMode.HALF_TILE_PLUS_INTERSECTIONS:
+            seams_job_count = self.rows * (self.cols - 1) + (self.rows - 1) * self.cols + (self.rows - 1) * (self.cols - 1)
+        state.job_count = redraw_job_count + seams_job_count
+    def print_info(self):
+        print(f"Tile size: {self.redraw.tile_width}x{self.redraw.tile_height}")
+        print(f"Tiles amount: {self.rows * self.cols}")
+        print(f"Grid: {self.rows}x{self.cols}")
+        print(f"Redraw enabled: {self.redraw.enabled}")
+        print(f"Seams fix mode: {self.seams_fix.mode.name}")
+    def add_extra_info(self):
+        self.p.extra_generation_params["Ultimate SD upscale upscaler"] = self.upscaler.name
+        self.p.extra_generation_params["Ultimate SD upscale tile_width"] = self.redraw.tile_width
+        self.p.extra_generation_params["Ultimate SD upscale tile_height"] = self.redraw.tile_height
+        self.p.extra_generation_params["Ultimate SD upscale mask_blur"] = self.p.mask_blur
+        self.p.extra_generation_params["Ultimate SD upscale padding"] = self.redraw.padding
+    def process(self):
+        state.begin()
+        self.calc_jobs_count()
+        self.result_images = []
+        if self.redraw.enabled:
+            self.image = self.redraw.start(self.p, self.image, self.rows, self.cols)
+            self.initial_info = self.redraw.initial_info
+        self.result_images.append(self.image)
+        if self.redraw.save:
+            self.save_image()
+        if self.seams_fix.enabled:
+            self.image = self.seams_fix.start(self.p, self.image, self.rows, self.cols)
+            self.initial_info = self.seams_fix.initial_info
+            self.result_images.append(self.image)
+            if self.seams_fix.save:
+                self.save_image()
+        state.end()
+class USDURedraw():
+    def init_draw(self, p, width, height):
+        p.inpaint_full_res = True
+        p.inpaint_full_res_padding = self.padding
+        p.width = math.ceil((self.tile_width+self.padding) / 64) * 64
+        p.height = math.ceil((self.tile_height+self.padding) / 64) * 64
+        mask = Image.new("L", (width, height), "black")
+        draw = ImageDraw.Draw(mask)
+        return mask, draw
+    def calc_rectangle(self, xi, yi):
+        x1 = xi * self.tile_width
+        y1 = yi * self.tile_height
+        x2 = xi * self.tile_width + self.tile_width
+        y2 = yi * self.tile_height + self.tile_height
+        return x1, y1, x2, y2
+    def linear_process(self, p, image, rows, cols):
+        mask, draw = self.init_draw(p, image.width, image.height)
+        for yi in range(rows):
+            for xi in range(cols):
+                if state.interrupted:
+                    break
+                draw.rectangle(self.calc_rectangle(xi, yi), fill="white")
+                p.init_images = [image]
+                p.image_mask = mask
+                processed = processing.process_images(p)
+                draw.rectangle(self.calc_rectangle(xi, yi), fill="black")
+                if (len(processed.images) > 0):
+                    image = processed.images[0]
+        p.width = image.width
+        p.height = image.height
+        self.initial_info = processed.infotext(p, 0)
+        return image
+    def chess_process(self, p, image, rows, cols):
+        mask, draw = self.init_draw(p, image.width, image.height)
+        tiles = []
+        # calc tiles colors
+        for yi in range(rows):
+            for xi in range(cols):
+                if state.interrupted:
+                    break
+                if xi == 0:
+                    tiles.append([])
+                color = xi % 2 == 0
+                if yi > 0 and yi % 2 != 0:
+                    color = not color
+                tiles[yi].append(color)
+        for yi in range(len(tiles)):
+            for xi in range(len(tiles[yi])):
+                if state.interrupted:
+                    break
+                if not tiles[yi][xi]:
+                    tiles[yi][xi] = not tiles[yi][xi]
+                    continue
+                tiles[yi][xi] = not tiles[yi][xi]
+                draw.rectangle(self.calc_rectangle(xi, yi), fill="white")
+                p.init_images = [image]
+                p.image_mask = mask
+                processed = processing.process_images(p)
+                draw.rectangle(self.calc_rectangle(xi, yi), fill="black")
+                if (len(processed.images) > 0):
+                    image = processed.images[0]
+        for yi in range(len(tiles)):
+            for xi in range(len(tiles[yi])):
+                if state.interrupted:
+                    break
+                if not tiles[yi][xi]:
+                    continue
+                draw.rectangle(self.calc_rectangle(xi, yi), fill="white")
+                p.init_images = [image]
+                p.image_mask = mask
+                processed = processing.process_images(p)
+                draw.rectangle(self.calc_rectangle(xi, yi), fill="black")
+                if (len(processed.images) > 0):
+                    image = processed.images[0]
+        p.width = image.width
+        p.height = image.height
+        self.initial_info = processed.infotext(p, 0)
+        return image
+    def start(self, p, image, rows, cols):
+        self.initial_info = None
+        if self.mode == USDUMode.LINEAR:
+            return self.linear_process(p, image, rows, cols)
+        if self.mode == USDUMode.CHESS:
+            return self.chess_process(p, image, rows, cols)
+class USDUSeamsFix():
+    def init_draw(self, p):
+        self.initial_info = None
+        p.width = math.ceil((self.tile_width+self.padding) / 64) * 64
+        p.height = math.ceil((self.tile_height+self.padding) / 64) * 64
+    def half_tile_process(self, p, image, rows, cols):
+        self.init_draw(p)
+        processed = None
+        gradient = Image.linear_gradient("L")
+        row_gradient = Image.new("L", (self.tile_width, self.tile_height), "black")
+        row_gradient.paste(gradient.resize(
+            (self.tile_width, self.tile_height//2), resample=Image.BICUBIC), (0, 0))
+        row_gradient.paste(gradient.rotate(180).resize(
+                (self.tile_width, self.tile_height//2), resample=Image.BICUBIC),
+                (0, self.tile_height//2))
+        col_gradient = Image.new("L", (self.tile_width, self.tile_height), "black")
+        col_gradient.paste(gradient.rotate(90).resize(
+            (self.tile_width//2, self.tile_height), resample=Image.BICUBIC), (0, 0))
+        col_gradient.paste(gradient.rotate(270).resize(
+            (self.tile_width//2, self.tile_height), resample=Image.BICUBIC), (self.tile_width//2, 0))
+        p.denoising_strength = self.denoise
+        p.mask_blur = self.mask_blur
+        for yi in range(rows-1):
+            for xi in range(cols):
+                if state.interrupted:
+                    break
+                p.width = self.tile_width
+                p.height = self.tile_height
+                p.inpaint_full_res = True
+                p.inpaint_full_res_padding = self.padding
+                mask = Image.new("L", (image.width, image.height), "black")
+                mask.paste(row_gradient, (xi*self.tile_width, yi*self.tile_height + self.tile_height//2))
+                p.init_images = [image]
+                p.image_mask = mask
+                processed = processing.process_images(p)
+                if (len(processed.images) > 0):
+                    image = processed.images[0]
+        for yi in range(rows):
+            for xi in range(cols-1):
+                if state.interrupted:
+                    break
+                p.width = self.tile_width
+                p.height = self.tile_height
+                p.inpaint_full_res = True
+                p.inpaint_full_res_padding = self.padding
+                mask = Image.new("L", (image.width, image.height), "black")
+                mask.paste(col_gradient, (xi*self.tile_width+self.tile_width//2, yi*self.tile_height))
+                p.init_images = [image]
+                p.image_mask = mask
+                processed = processing.process_images(p)
+                if (len(processed.images) > 0):
+                    image = processed.images[0]
+        p.width = image.width
+        p.height = image.height
+        if processed is not None:
+            self.initial_info = processed.infotext(p, 0)
+        return image
+    def half_tile_process_corners(self, p, image, rows, cols):
+        fixed_image = self.half_tile_process(p, image, rows, cols)
+        processed = None
+        self.init_draw(p)
+        gradient = Image.radial_gradient("L").resize(
+            (self.tile_width, self.tile_height), resample=Image.BICUBIC)
+        gradient = ImageOps.invert(gradient)
+        p.denoising_strength = self.denoise
+        #p.mask_blur = 0
+        p.mask_blur = self.mask_blur
+        for yi in range(rows-1):
+            for xi in range(cols-1):
+                if state.interrupted:
+                    break
+                p.width = self.tile_width
+                p.height = self.tile_height
+                p.inpaint_full_res = True
+                p.inpaint_full_res_padding = 0
+                mask = Image.new("L", (fixed_image.width, fixed_image.height), "black")
+                mask.paste(gradient, (xi*self.tile_width + self.tile_width//2,
+                                      yi*self.tile_height + self.tile_height//2))
+                p.init_images = [fixed_image]
+                p.image_mask = mask
+                processed = processing.process_images(p)
+                if (len(processed.images) > 0):
+                    fixed_image = processed.images[0]
+        p.width = fixed_image.width
+        p.height = fixed_image.height
+        if processed is not None:
+            self.initial_info = processed.infotext(p, 0)
+        return fixed_image
+    def band_pass_process(self, p, image, cols, rows):
+        self.init_draw(p)
+        processed = None
+        p.denoising_strength = self.denoise
+        p.mask_blur = 0
+        gradient = Image.linear_gradient("L")
+        mirror_gradient = Image.new("L", (256, 256), "black")
+        mirror_gradient.paste(gradient.resize((256, 128), resample=Image.BICUBIC), (0, 0))
+        mirror_gradient.paste(gradient.rotate(180).resize((256, 128), resample=Image.BICUBIC), (0, 128))
+        row_gradient = mirror_gradient.resize((image.width, self.width), resample=Image.BICUBIC)
+        col_gradient = mirror_gradient.rotate(90).resize((self.width, image.height), resample=Image.BICUBIC)
+        for xi in range(1, rows):
+            if state.interrupted:
+                    break
+            p.width = self.width + self.padding * 2
+            p.height = image.height
+            p.inpaint_full_res = True
+            p.inpaint_full_res_padding = self.padding
+            mask = Image.new("L", (image.width, image.height), "black")
+            mask.paste(col_gradient, (xi * self.tile_width - self.width // 2, 0))
+            p.init_images = [image]
+            p.image_mask = mask
+            processed = processing.process_images(p)
+            if (len(processed.images) > 0):
+                image = processed.images[0]
+        for yi in range(1, cols):
+            if state.interrupted:
+                    break
+            p.width = image.width
+            p.height = self.width + self.padding * 2
+            p.inpaint_full_res = True
+            p.inpaint_full_res_padding = self.padding
+            mask = Image.new("L", (image.width, image.height), "black")
+            mask.paste(row_gradient, (0, yi * self.tile_height - self.width // 2))
+            p.init_images = [image]
+            p.image_mask = mask
+            processed = processing.process_images(p)
+            if (len(processed.images) > 0):
+                image = processed.images[0]
+        p.width = image.width
+        p.height = image.height
+        if processed is not None:
+            self.initial_info = processed.infotext(p, 0)
+        return image
+    def start(self, p, image, rows, cols):
+        if USDUSFMode(self.mode) == USDUSFMode.BAND_PASS:
+            return self.band_pass_process(p, image, rows, cols)
+        elif USDUSFMode(self.mode) == USDUSFMode.HALF_TILE:
+            return self.half_tile_process(p, image, rows, cols)
+        elif USDUSFMode(self.mode) == USDUSFMode.HALF_TILE_PLUS_INTERSECTIONS:
+            return self.half_tile_process_corners(p, image, rows, cols)
+        else:
+            return image
+class Script(scripts.Script):
+    def title(self):
+        return "Ultimate SD upscale"
+    def show(self, is_img2img):
+        return is_img2img
+    def ui(self, is_img2img):
+        target_size_types = [
+            "From img2img2 settings",
+            "Custom size",
+            "Scale from image size"
+        ]
+        seams_fix_types = [
+            "None",
+            "Band pass",
+            "Half tile offset pass",
+            "Half tile offset pass + intersections"
+        ]
+        redrow_modes = [
+            "Linear",
+            "Chess",
+            "None"
+        ]
+        info = gr.HTML(
+            "<p style=\"margin-bottom:0.75em\">Will upscale the image depending on the selected target size type</p>")
+        with gr.Row():
+            target_size_type = gr.Dropdown(label="Target size type", choices=[k for k in target_size_types], type="index",
+                                  value=next(iter(target_size_types)))
+            custom_width = gr.Slider(label='Custom width', minimum=64, maximum=8192, step=64, value=2048, visible=False, interactive=True)
+            custom_height = gr.Slider(label='Custom height', minimum=64, maximum=8192, step=64, value=2048, visible=False, interactive=True)
+            custom_scale = gr.Slider(label='Scale', minimum=1, maximum=16, step=0.01, value=2, visible=False, interactive=True)
+        gr.HTML("<p style=\"margin-bottom:0.75em\">Redraw options:</p>")
+        with gr.Row():
+            upscaler_index = gr.Radio(label='Upscaler', choices=[x.name for x in shared.sd_upscalers],
+                                value=shared.sd_upscalers[0].name, type="index")
+        with gr.Row():
+            redraw_mode = gr.Dropdown(label="Type", choices=[k for k in redrow_modes], type="index", value=next(iter(redrow_modes)))
+            tile_width = gr.Slider(minimum=0, maximum=2048, step=64, label='Tile width', value=512)
+            tile_height = gr.Slider(minimum=0, maximum=2048, step=64, label='Tile height', value=0)
+            mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=8)
+            padding = gr.Slider(label='Padding', minimum=0, maximum=128, step=1, value=32)
+        gr.HTML("<p style=\"margin-bottom:0.75em\">Seams fix:</p>")
+        with gr.Row():
+            seams_fix_type = gr.Dropdown(label="Type", choices=[k for k in seams_fix_types], type="index", value=next(iter(seams_fix_types)))
+            seams_fix_denoise = gr.Slider(label='Denoise', minimum=0, maximum=1, step=0.01, value=0.35, visible=False, interactive=True)
+            seams_fix_width = gr.Slider(label='Width', minimum=0, maximum=128, step=1, value=64, visible=False, interactive=True)
+            seams_fix_mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, visible=False, interactive=True)
+            seams_fix_padding = gr.Slider(label='Padding', minimum=0, maximum=128, step=1, value=16, visible=False, interactive=True)
+        gr.HTML("<p style=\"margin-bottom:0.75em\">Save options:</p>")
+        with gr.Row():
+            save_upscaled_image = gr.Checkbox(label="Upscaled", value=True)
+            save_seams_fix_image = gr.Checkbox(label="Seams fix", value=False)
+        def select_fix_type(fix_index):
+            all_visible = fix_index != 0
+            mask_blur_visible = fix_index == 2 or fix_index == 3
+            width_visible = fix_index == 1
+            return [gr.update(visible=all_visible),
+                    gr.update(visible=width_visible),
+                    gr.update(visible=mask_blur_visible),
+                    gr.update(visible=all_visible)]
+        seams_fix_type.change(
+            fn=select_fix_type,
+            inputs=seams_fix_type,
+            outputs=[seams_fix_denoise, seams_fix_width, seams_fix_mask_blur, seams_fix_padding]
+        )
+        def select_scale_type(scale_index):
+            is_custom_size = scale_index == 1
+            is_custom_scale = scale_index == 2
+            return [gr.update(visible=is_custom_size),
+                    gr.update(visible=is_custom_size),
+                    gr.update(visible=is_custom_scale),
+                    ]
+        target_size_type.change(
+            fn=select_scale_type,
+            inputs=target_size_type,
+            outputs=[custom_width, custom_height, custom_scale]
+        )
+        return [info, tile_width, tile_height, mask_blur, padding, seams_fix_width, seams_fix_denoise, seams_fix_padding,
+                upscaler_index, save_upscaled_image, redraw_mode, save_seams_fix_image, seams_fix_mask_blur,
+                seams_fix_type, target_size_type, custom_width, custom_height, custom_scale]
+    def run(self, p, _, tile_width, tile_height, mask_blur, padding, seams_fix_width, seams_fix_denoise, seams_fix_padding,
+            upscaler_index, save_upscaled_image, redraw_mode, save_seams_fix_image, seams_fix_mask_blur,
+            seams_fix_type, target_size_type, custom_width, custom_height, custom_scale):
+        # Init
+        processing.fix_seed(p)
+        devices.torch_gc()
+        p.do_not_save_grid = True
+        p.do_not_save_samples = True
+        p.inpaint_full_res = False
+        p.inpainting_fill = 1
+        p.n_iter = 1
+        p.batch_size = 1
+        seed = p.seed
+        # Init image
+        init_img = p.init_images[0]
+        if init_img == None:
+            return Processed(p, [], seed, "Empty image")
+        init_img = images.flatten(init_img, opts.img2img_background_color)
+        #override size
+        if target_size_type == 1:
+            p.width = custom_width
+            p.height = custom_height
+        if target_size_type == 2:
+            p.width = math.ceil((init_img.width * custom_scale) / 64) * 64
+            p.height = math.ceil((init_img.height * custom_scale) / 64) * 64
+        # Upscaling
+        upscaler = USDUpscaler(p, init_img, upscaler_index, save_upscaled_image, save_seams_fix_image, tile_width, tile_height)
+        upscaler.upscale()
+        # Drawing
+        upscaler.setup_redraw(redraw_mode, padding, mask_blur)
+        upscaler.setup_seams_fix(seams_fix_padding, seams_fix_denoise, seams_fix_mask_blur, seams_fix_width, seams_fix_type)
+        upscaler.print_info()
+        upscaler.add_extra_info()
+        upscaler.process()
+        result_images = upscaler.result_images
+        return Processed(p, result_images, seed, upscaler.initial_info if upscaler.initial_info is not None else "")