Spaces:

adamelliotfields
/

diffusion-xl

Running on Zero

App Files Files Community

adamelliotfields commited on Sep 25, 2024

Commit

163a3a9

•

1 Parent(s): 933318d

Loading and inferencing improvements

Browse files

Files changed (4) hide show

lib/config.py +43 -5
lib/inference.py +62 -90
lib/loader.py +30 -28
lib/upscaler.py +5 -7

lib/config.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from types import SimpleNamespace
 from diffusers import (
@@ -10,7 +12,38 @@ from diffusers import (
     StableDiffusionXLPipeline,
 )
 Config = SimpleNamespace(
     MONO_FONTS=["monospace"],
     SANS_FONTS=[
         "sans-serif",
@@ -23,6 +56,11 @@ Config = SimpleNamespace(
         "txt2img": StableDiffusionXLPipeline,
         "img2img": StableDiffusionXLImg2ImgPipeline,
     },
     MODEL="segmind/Segmind-Vega",
     MODELS=[
         "cagliostrolab/animagine-xl-3.1",
@@ -49,13 +87,13 @@ Config = SimpleNamespace(
         "Euler": EulerDiscreteScheduler,
         "Euler a": EulerAncestralDiscreteScheduler,
     },
-    STYLE="sai-enhance",
-    WIDTH=896,
-    HEIGHT=1152,
     NUM_IMAGES=1,
     SEED=-1,
-    GUIDANCE_SCALE=6,
-    INFERENCE_STEPS=35,
     DEEPCACHE_INTERVAL=1,
     SCALE=1,
     SCALES=[1, 2, 4],

+import os
+from importlib import import_module
 from types import SimpleNamespace
 from diffusers import (
     StableDiffusionXLPipeline,
 )
+# improved GPU handling and progress bars; set before importing spaces
+os.environ["ZEROGPU_V2"] = "true"
+_sdxl_refiner_files = [
+    "scheduler/scheduler_config.json",
+    "text_encoder_2/config.json",
+    "text_encoder_2/model.fp16.safetensors",
+    "tokenizer_2/merges.txt",
+    "tokenizer_2/special_tokens_map.json",
+    "tokenizer_2/tokenizer_config.json",
+    "tokenizer_2/vocab.json",
+    "unet/config.json",
+    "unet/diffusion_pytorch_model.fp16.safetensors",
+    "vae/config.json",
+    "vae/diffusion_pytorch_model.fp16.safetensors",
+    "model_index.json",
+]
+_sdxl_files = [
+    *_sdxl_refiner_files,
+    "text_encoder/config.json",
+    "text_encoder/model.fp16.safetensors",
+    "tokenizer/merges.txt",
+    "tokenizer/special_tokens_map.json",
+    "tokenizer/tokenizer_config.json",
+    "tokenizer/vocab.json",
+]
 Config = SimpleNamespace(
+    HF_TOKEN=os.environ.get("HF_TOKEN", None),
+    CIVIT_TOKEN=os.environ.get("CIVIT_TOKEN", None),
+    ZERO_GPU=import_module("spaces").config.Config.zero_gpu,
     MONO_FONTS=["monospace"],
     SANS_FONTS=[
         "sans-serif",
         "txt2img": StableDiffusionXLPipeline,
         "img2img": StableDiffusionXLImg2ImgPipeline,
     },
+    HF_MODELS={
+        "segmind/Segmind-Vega": [*_sdxl_files],
+        "stabilityai/stable-diffusion-xl-base-1.0": [*_sdxl_files, "vae_1_0/config.json"],
+        "stabilityai/stable-diffusion-xl-refiner-1.0": [*_sdxl_refiner_files],
+    },
     MODEL="segmind/Segmind-Vega",
     MODELS=[
         "cagliostrolab/animagine-xl-3.1",
         "Euler": EulerDiscreteScheduler,
         "Euler a": EulerAncestralDiscreteScheduler,
     },
+    STYLE="enhance",
+    WIDTH=1024,
+    HEIGHT=1024,
     NUM_IMAGES=1,
     SEED=-1,
+    GUIDANCE_SCALE=7.5,
+    INFERENCE_STEPS=40,
     DEEPCACHE_INTERVAL=1,
     SCALE=1,
     SCALES=[1, 2, 4],

lib/inference.py CHANGED Viewed

@@ -1,77 +1,51 @@
-import functools
-import inspect
-import json
 import re
 import time
 from datetime import datetime
 from itertools import product
-from typing import Callable, TypeVar
-import anyio
-import spaces
 import torch
-from anyio import Semaphore
 from compel import Compel, ReturnedEmbeddingsType
 from compel.prompt_parser import PromptParser
-from typing_extensions import ParamSpec
 from .loader import Loader
-__import__("warnings").filterwarnings("ignore", category=FutureWarning, module="transformers")
-__import__("transformers").logging.set_verbosity_error()
-T = TypeVar("T")
-P = ParamSpec("P")
-MAX_CONCURRENT_THREADS = 1
-MAX_THREADS_GUARD = Semaphore(MAX_CONCURRENT_THREADS)
-with open("./data/styles.json") as f:
-    STYLES = json.load(f)
-# like the original but supports args and kwargs instead of a dict
-# https://github.com/huggingface/huggingface-inference-toolkit/blob/0.2.0/src/huggingface_inference_toolkit/async_utils.py
-async def async_call(fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
-    async with MAX_THREADS_GUARD:
-        sig = inspect.signature(fn)
-        bound_args = sig.bind(*args, **kwargs)
-        bound_args.apply_defaults()
-        partial_fn = functools.partial(fn, **bound_args.arguments)
-        return await anyio.to_thread.run_sync(partial_fn)
-# parse prompts with arrays
-def parse_prompt(prompt: str) -> list[str]:
     arrays = re.findall(r"\[\[(.*?)\]\]", prompt)
     if not arrays:
         return [prompt]
-    tokens = [item.split(",") for item in arrays]
-    combinations = list(product(*tokens))
-    prompts = []
     for combo in combinations:
         current_prompt = prompt
         for i, token in enumerate(combo):
             current_prompt = current_prompt.replace(f"[[{arrays[i]}]]", token.strip(), 1)
         prompts.append(current_prompt)
     return prompts
-def apply_style(prompt, style_id, negative=False):
-    global STYLES
-    if not style_id or style_id == "None":
-        return prompt
-    for style in STYLES:
-        if style["id"] == style_id:
-            if negative:
-                return prompt + " . " + style["negative_prompt"]
-            else:
-                return style["prompt"].format(prompt=prompt)
-    return prompt
 # max 60s per image
@@ -97,7 +71,7 @@ def gpu_duration(**kwargs):
     return loading + (duration * num_images)
-@spaces.GPU(duration=gpu_duration)
 def generate(
     positive_prompt,
     negative_prompt="",
@@ -114,53 +88,51 @@ def generate(
     num_images=1,
     use_karras=False,
     use_refiner=False,
-    Info: Callable[[str], None] = None,
     Error=Exception,
     progress=None,
 ):
     if not torch.cuda.is_available():
-        raise Error("RuntimeError: CUDA not available")
     # https://pytorch.org/docs/stable/generated/torch.manual_seed.html
     if seed is None or seed < 0:
-        seed = int(datetime.now().timestamp() * 1_000_000) % (2**64)
     KIND = "txt2img"
     CURRENT_STEP = 0
     CURRENT_IMAGE = 1
     EMBEDDINGS_TYPE = ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED
-    if progress is not None:
-        TQDM = False
-        progress((0, inference_steps), desc=f"Generating image 1/{num_images}")
-    else:
-        TQDM = True
     def callback_on_step_end(pipeline, step, timestep, latents):
         nonlocal CURRENT_IMAGE, CURRENT_STEP
-        if progress is None:
-            return latents
-        strength = 1
-        total_steps = min(int(inference_steps * strength), inference_steps)
-        # if steps are different we're in the refiner
-        refining = False
-        if CURRENT_STEP == step:
-            CURRENT_STEP = step + 1
-        else:
-            refining = True
-            CURRENT_STEP += 1
-        progress(
-            (CURRENT_STEP, total_steps),
-            desc=f"{'Refining' if refining else 'Generating'} image {CURRENT_IMAGE}/{num_images}",
-        )
         return latents
     start = time.perf_counter()
     loader = Loader()
     loader.load(
         KIND,
@@ -170,11 +142,11 @@ def generate(
         scale,
         use_karras,
         use_refiner,
-        TQDM,
     )
     if loader.pipe is None:
-        raise Error(f"RuntimeError: Error loading {model}")
     pipe = loader.pipe
     refiner = loader.refiner
@@ -205,21 +177,21 @@ def generate(
     images = []
     current_seed = seed
     for i in range(num_images):
-        # seeded generator for each iteration
         generator = torch.Generator(device=pipe.device).manual_seed(current_seed)
         try:
-            styled_negative_prompt = apply_style(negative_prompt, style, negative=True)
-            all_positive_prompts = parse_prompt(positive_prompt)
-            prompt_index = i % len(all_positive_prompts)
-            prompt = all_positive_prompts[prompt_index]
-            styled_prompt = apply_style(prompt, style)
-            conditioning_1, pooled_1 = compel_1([styled_prompt, styled_negative_prompt])
-            conditioning_2, pooled_2 = compel_2([styled_prompt, styled_negative_prompt])
         except PromptParser.ParsingException:
-            raise Error("ValueError: Invalid prompt")
         # refiner expects latents; upscaler expects numpy array
         pipe_output_type = "pil"
@@ -272,12 +244,12 @@ def generate(
             if scale > 1:
                 image = upscaler.predict(image)
             images.append((image, str(current_seed)))
         except Exception as e:
-            raise Error(f"RuntimeError: {e}")
         finally:
             CURRENT_STEP = 0
             CURRENT_IMAGE += 1
-            current_seed += 1
     diff = time.perf_counter() - start
     if Info:

 import re
 import time
 from datetime import datetime
 from itertools import product
 import torch
 from compel import Compel, ReturnedEmbeddingsType
 from compel.prompt_parser import PromptParser
+from spaces import GPU
+from .config import Config
 from .loader import Loader
+from .utils import load_json
+def parse_prompt_with_arrays(prompt: str) -> list[str]:
     arrays = re.findall(r"\[\[(.*?)\]\]", prompt)
     if not arrays:
         return [prompt]
+    tokens = [item.split(",") for item in arrays]  # [("a", "b"), (1, 2)]
+    combinations = list(product(*tokens))  # [("a", 1), ("a", 2), ("b", 1), ("b", 2)]
+    # find all the arrays in the prompt and replace them with tokens
+    prompts = []
     for combo in combinations:
         current_prompt = prompt
         for i, token in enumerate(combo):
             current_prompt = current_prompt.replace(f"[[{arrays[i]}]]", token.strip(), 1)
         prompts.append(current_prompt)
     return prompts
+def apply_style(positive_prompt, negative_prompt, style_id="none"):
+    if style_id.lower() == "none":
+        return (positive_prompt, negative_prompt)
+    styles = load_json("./data/styles.json")
+    style = styles.get(style_id)
+    if style is None:
+        return (positive_prompt, negative_prompt)
+    style_base = style.get("_base", {})
+    return (
+        style.get("positive").format(prompt=positive_prompt, _base=style_base.get("positive")).strip(),
+        style.get("negative").format(prompt=negative_prompt, _base=style_base.get("negative")).strip(),
+    )
 # max 60s per image
     return loading + (duration * num_images)
+@GPU(duration=gpu_duration)
 def generate(
     positive_prompt,
     negative_prompt="",
     num_images=1,
     use_karras=False,
     use_refiner=False,
     Error=Exception,
+    Info=None,
     progress=None,
 ):
     if not torch.cuda.is_available():
+        raise Error("CUDA not available")
     # https://pytorch.org/docs/stable/generated/torch.manual_seed.html
     if seed is None or seed < 0:
+        seed = int(datetime.now().timestamp() * 1e6) % (2**64)
     KIND = "txt2img"
     CURRENT_STEP = 0
     CURRENT_IMAGE = 1
     EMBEDDINGS_TYPE = ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED
+    # custom progress bar for multiple images
     def callback_on_step_end(pipeline, step, timestep, latents):
         nonlocal CURRENT_IMAGE, CURRENT_STEP
+        if progress is not None:
+            # calculate total steps for img2img based on denoising strength
+            strength = 1
+            total_steps = min(int(inference_steps * strength), inference_steps)
+            # if steps are different we're in the refiner
+            refining = False
+            if CURRENT_STEP == step:
+                CURRENT_STEP = step + 1
+            else:
+                refining = True
+                CURRENT_STEP += 1
+            progress(
+                (CURRENT_STEP, total_steps),
+                desc=f"{'Refining' if refining else 'Generating'} image {CURRENT_IMAGE}/{num_images}",
+            )
         return latents
     start = time.perf_counter()
+    print(f"Generating {num_images} image{'s' if num_images > 1 else ''}")
+    if Config.ZERO_GPU and progress is not None:
+        progress((100, 100), desc="ZeroGPU init")
     loader = Loader()
     loader.load(
         KIND,
         scale,
         use_karras,
         use_refiner,
+        progress,
     )
     if loader.pipe is None:
+        raise Error(f"Error loading {model}")
     pipe = loader.pipe
     refiner = loader.refiner
     images = []
     current_seed = seed
     for i in range(num_images):
         generator = torch.Generator(device=pipe.device).manual_seed(current_seed)
         try:
+            positive_prompts = parse_prompt_with_arrays(positive_prompt)
+            index = i % len(positive_prompts)
+            positive_styled, negative_styled = apply_style(positive_prompts[index], negative_prompt, style)
+            if negative_styled.startswith("(), "):
+                negative_styled = negative_styled[4:]
+            conditioning_1, pooled_1 = compel_1([positive_styled, negative_styled])
+            conditioning_2, pooled_2 = compel_2([positive_styled, negative_styled])
         except PromptParser.ParsingException:
+            raise Error("Invalid prompt")
         # refiner expects latents; upscaler expects numpy array
         pipe_output_type = "pil"
             if scale > 1:
                 image = upscaler.predict(image)
             images.append((image, str(current_seed)))
+            current_seed += 1
         except Exception as e:
+            raise Error(f"{e}")
         finally:
             CURRENT_STEP = 0
             CURRENT_IMAGE += 1
     diff = time.perf_counter() - start
     if Info:

lib/loader.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gc
 from threading import Lock
-from warnings import filterwarnings
 import torch
 from DeepCache import DeepCacheSDHelper
@@ -9,10 +8,6 @@ from diffusers.models import AutoencoderKL
 from .config import Config
 from .upscaler import RealESRGAN
-__import__("diffusers").logging.set_verbosity_error()
-filterwarnings("ignore", category=FutureWarning, module="torch")
-filterwarnings("ignore", category=FutureWarning, module="diffusers")
 class Loader:
     _instance = None
@@ -33,7 +28,6 @@ class Loader:
         gc.collect()
         torch.cuda.empty_cache()
         torch.cuda.ipc_collect()
-        torch.cuda.reset_max_memory_allocated()
         torch.cuda.reset_peak_memory_stats()
         torch.cuda.synchronize()
@@ -44,8 +38,18 @@ class Loader:
             return True
         return False
-    def _unload(self, model):
         to_unload = []
         if self._should_unload_pipeline(model):
             to_unload.append("model")
             to_unload.append("pipe")
@@ -55,7 +59,7 @@ class Loader:
         for component in to_unload:
             setattr(self, component, None)
-    def _load_pipeline(self, kind, model, tqdm, **kwargs):
         pipeline = Config.PIPELINES[kind]
         if self.pipe is None:
             try:
@@ -81,9 +85,9 @@ class Loader:
         if not isinstance(self.pipe, pipeline):
             self.pipe = pipeline.from_pipe(self.pipe).to("cuda")
         if self.pipe is not None:
-            self.pipe.set_progress_bar_config(disable=not tqdm)
-    def _load_refiner(self, refiner, tqdm, **kwargs):
         if refiner and self.refiner is None:
             model = Config.REFINER_MODEL
             pipeline = Config.PIPELINES["img2img"]
@@ -95,7 +99,7 @@ class Loader:
                 self.refiner = None
                 return
         if self.refiner is not None:
-            self.refiner.set_progress_bar_config(disable=not tqdm)
     def _load_upscaler(self, scale=1):
         if scale == 2 and self.upscaler_2x is None:
@@ -117,29 +121,27 @@ class Loader:
     def _load_deepcache(self, interval=1):
         pipe_has_deepcache = hasattr(self.pipe, "deepcache")
         if pipe_has_deepcache and self.pipe.deepcache.params["cache_interval"] == interval:
             return
-        if pipe_has_deepcache:
-            self.pipe.deepcache.disable()
-        else:
-            self.pipe.deepcache = DeepCacheSDHelper(pipe=self.pipe)
         self.pipe.deepcache.set_params(cache_interval=interval)
         self.pipe.deepcache.enable()
         if self.refiner is not None:
             refiner_has_deepcache = hasattr(self.refiner, "deepcache")
             if refiner_has_deepcache and self.refiner.deepcache.params["cache_interval"] == interval:
                 return
-            if refiner_has_deepcache:
-                self.refiner.deepcache.disable()
-            else:
-                self.refiner.deepcache = DeepCacheSDHelper(pipe=self.refiner)
             self.refiner.deepcache.set_params(cache_interval=interval)
             self.refiner.deepcache.enable()
-    def load(self, kind, model, scheduler, deepcache, scale, karras, refiner, tqdm):
-        model_lower = model.lower()
         scheduler_kwargs = {
             "beta_start": 0.00085,
             "beta_end": 0.012,
@@ -156,7 +158,7 @@ class Loader:
             scheduler_kwargs["clip_sample"] = False
             scheduler_kwargs["set_alpha_to_one"] = False
-        if model_lower not in Config.MODEL_CHECKPOINTS.keys():
             variant = "fp16"
         else:
             variant = None
@@ -170,8 +172,8 @@ class Loader:
             "vae": AutoencoderKL.from_pretrained(Config.VAE_MODEL, torch_dtype=dtype),
         }
-        self._unload(model)
-        self._load_pipeline(kind, model, tqdm, **pipe_kwargs)
         # error loading model
         if self.pipe is None:
@@ -184,7 +186,7 @@ class Loader:
         )
         # same model, different scheduler
-        if self.model.lower() == model_lower:
             if not same_scheduler:
                 print(f"Switching to {scheduler}...")
             if not same_karras:
@@ -207,6 +209,6 @@ class Loader:
             "text_encoder_2": self.pipe.text_encoder_2,
         }
-        self._load_refiner(refiner, tqdm, **refiner_kwargs)
-        self._load_upscaler(scale)
         self._load_deepcache(deepcache)

 import gc
 from threading import Lock
 import torch
 from DeepCache import DeepCacheSDHelper
 from .config import Config
 from .upscaler import RealESRGAN
 class Loader:
     _instance = None
         gc.collect()
         torch.cuda.empty_cache()
         torch.cuda.ipc_collect()
         torch.cuda.reset_peak_memory_stats()
         torch.cuda.synchronize()
             return True
         return False
+    def _unload_deepcache(self):
+        if self.pipe.deepcache is None:
+            return
+        print("Unloading DeepCache")
+        self.pipe.deepcache.disable()
+        delattr(self.pipe, "deepcache")
+    # don't unload refiner
+    def _unload(self, model, deepcache):
         to_unload = []
+        if self._should_unload_deepcache(deepcache):
+            self._unload_deepcache()
         if self._should_unload_pipeline(model):
             to_unload.append("model")
             to_unload.append("pipe")
         for component in to_unload:
             setattr(self, component, None)
+    def _load_pipeline(self, kind, model, progress, **kwargs):
         pipeline = Config.PIPELINES[kind]
         if self.pipe is None:
             try:
         if not isinstance(self.pipe, pipeline):
             self.pipe = pipeline.from_pipe(self.pipe).to("cuda")
         if self.pipe is not None:
+            self.pipe.set_progress_bar_config(disable=progress is not None)
+    def _load_refiner(self, refiner, progress, **kwargs):
         if refiner and self.refiner is None:
             model = Config.REFINER_MODEL
             pipeline = Config.PIPELINES["img2img"]
                 self.refiner = None
                 return
         if self.refiner is not None:
+            self.refiner.set_progress_bar_config(disable=progress is not None)
     def _load_upscaler(self, scale=1):
         if scale == 2 and self.upscaler_2x is None:
     def _load_deepcache(self, interval=1):
         pipe_has_deepcache = hasattr(self.pipe, "deepcache")
+        if not pipe_has_deepcache and interval == 1:
+            return
         if pipe_has_deepcache and self.pipe.deepcache.params["cache_interval"] == interval:
             return
+        print("Loading DeepCache")
+        self.pipe.deepcache = DeepCacheSDHelper(pipe=self.pipe)
         self.pipe.deepcache.set_params(cache_interval=interval)
         self.pipe.deepcache.enable()
         if self.refiner is not None:
             refiner_has_deepcache = hasattr(self.refiner, "deepcache")
+            if not refiner_has_deepcache and interval == 1:
+                return
             if refiner_has_deepcache and self.refiner.deepcache.params["cache_interval"] == interval:
                 return
+            print("Loading DeepCache for refiner")
+            self.refiner.deepcache = DeepCacheSDHelper(pipe=self.refiner)
             self.refiner.deepcache.set_params(cache_interval=interval)
             self.refiner.deepcache.enable()
+    def load(self, kind, model, scheduler, deepcache, scale, karras, refiner, progress):
         scheduler_kwargs = {
             "beta_start": 0.00085,
             "beta_end": 0.012,
             scheduler_kwargs["clip_sample"] = False
             scheduler_kwargs["set_alpha_to_one"] = False
+        if model.lower() not in Config.MODEL_CHECKPOINTS.keys():
             variant = "fp16"
         else:
             variant = None
             "vae": AutoencoderKL.from_pretrained(Config.VAE_MODEL, torch_dtype=dtype),
         }
+        self._unload(model, deepcache)
+        self._load_pipeline(kind, model, progress, **pipe_kwargs)
         # error loading model
         if self.pipe is None:
         )
         # same model, different scheduler
+        if self.model.lower() == model.lower():
             if not same_scheduler:
                 print(f"Switching to {scheduler}...")
             if not same_karras:
             "text_encoder_2": self.pipe.text_encoder_2,
         }
+        self._load_refiner(refiner, progress, **refiner_kwargs)
         self._load_deepcache(deepcache)
+        self._load_upscaler(scale)

lib/upscaler.py CHANGED Viewed

@@ -55,17 +55,15 @@ HF_MODELS = {
 def pad_reflect(image, pad_size):
-    # fmt: off
     image_size = image.shape
     height, width = image_size[:2]
     new_image = np.zeros([height + pad_size * 2, width + pad_size * 2, image_size[2]]).astype(np.uint8)
     new_image[pad_size:-pad_size, pad_size:-pad_size, :] = image
-    new_image[0:pad_size, pad_size:-pad_size, :] = np.flip(image[0:pad_size, :, :], axis=0)    # top
-    new_image[-pad_size:, pad_size:-pad_size, :] = np.flip(image[-pad_size:, :, :], axis=0)    # bottom
-    new_image[:, 0:pad_size, :] = np.flip(new_image[:, pad_size : pad_size * 2, :], axis=1)    # left
     new_image[:, -pad_size:, :] = np.flip(new_image[:, -pad_size * 2 : -pad_size, :], axis=1)  # right
     return new_image
-    # fmt: on
 def unpad_image(image, pad_size):
@@ -279,9 +277,8 @@ class RealESRGAN:
             self.model.load_state_dict(loadnet, strict=True)
         self.model.eval().to(device=self.device)
-    @torch.cuda.amp.autocast()
     def predict(self, lr_image, batch_size=4, patches_size=192, padding=24, pad_size=15):
-        scale = self.scale
         if not isinstance(lr_image, np.ndarray):
             lr_image = np.array(lr_image)
         if lr_image.min() < 0.0:
@@ -302,6 +299,7 @@ class RealESRGAN:
             for i in range(batch_size, image.shape[0], batch_size):
                 res = torch.cat((res, self.model(image[i : i + batch_size])), 0)
         sr_image = einops.rearrange(res.clamp(0, 1), "b c h w -> b h w c").cpu().numpy()
         padded_size_scaled = tuple(np.multiply(p_shape[0:2], scale)) + (3,)
         scaled_image_shape = tuple(np.multiply(lr_image.shape[0:2], scale)) + (3,)

 def pad_reflect(image, pad_size):
     image_size = image.shape
     height, width = image_size[:2]
     new_image = np.zeros([height + pad_size * 2, width + pad_size * 2, image_size[2]]).astype(np.uint8)
     new_image[pad_size:-pad_size, pad_size:-pad_size, :] = image
+    new_image[0:pad_size, pad_size:-pad_size, :] = np.flip(image[0:pad_size, :, :], axis=0)  # # top
+    new_image[-pad_size:, pad_size:-pad_size, :] = np.flip(image[-pad_size:, :, :], axis=0)  # # bottom
+    new_image[:, 0:pad_size, :] = np.flip(new_image[:, pad_size : pad_size * 2, :], axis=1)  # # left
     new_image[:, -pad_size:, :] = np.flip(new_image[:, -pad_size * 2 : -pad_size, :], axis=1)  # right
     return new_image
 def unpad_image(image, pad_size):
             self.model.load_state_dict(loadnet, strict=True)
         self.model.eval().to(device=self.device)
+    @torch.autocast("cuda")
     def predict(self, lr_image, batch_size=4, patches_size=192, padding=24, pad_size=15):
         if not isinstance(lr_image, np.ndarray):
             lr_image = np.array(lr_image)
         if lr_image.min() < 0.0:
             for i in range(batch_size, image.shape[0], batch_size):
                 res = torch.cat((res, self.model(image[i : i + batch_size])), 0)
+        scale = self.scale
         sr_image = einops.rearrange(res.clamp(0, 1), "b c h w -> b h w c").cpu().numpy()
         padded_size_scaled = tuple(np.multiply(p_shape[0:2], scale)) + (3,)
         scaled_image_shape = tuple(np.multiply(lr_image.shape[0:2], scale)) + (3,)