Spaces:

Cropinky
/

hana_hanak_houses

Runtime error

App Files Files Community

Cropinky commited on Jun 3, 2023

Commit

0826939

1 Parent(s): 2a2be11

v0.2 up in dis

Browse files

Files changed (18) hide show

.gitignore +5 -0
README.md +6 -6
app.py +50 -6
image_generator.py +2 -1
realesrgan/__init__.py +6 -0
realesrgan/archs/__init__.py +10 -0
realesrgan/archs/discriminator_arch.py +67 -0
realesrgan/archs/srvgg_arch.py +69 -0
realesrgan/data/__init__.py +10 -0
realesrgan/data/realesrgan_dataset.py +192 -0
realesrgan/data/realesrgan_paired_dataset.py +108 -0
realesrgan/models/__init__.py +10 -0
realesrgan/models/realesrgan_model.py +258 -0
realesrgan/models/realesrnet_model.py +188 -0
realesrgan/train.py +11 -0
realesrgan/utils.py +313 -0
realesrgan/version.py +5 -0
requirements.txt +116 -4

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+out/
+__pycache__/
+hana_env/
+flagged/
+weights/

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
-title: Anti House Generator
-emoji: 📈
-colorFrom: pink
-colorTo: yellow
 sdk: gradio
-sdk_version: 2.9.4
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

 ---
+title: Test
+emoji: 🐠
+colorFrom: blue
+colorTo: red
 sdk: gradio
+sdk_version: 3.33.1
 app_file: app.py
 pinned: false
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -7,11 +7,56 @@ from networks_fastgan import MyGenerator
 import click
 import PIL
 from image_generator import generate_images
 def image_generation(model, number_of_images=1):
     img = generate_images(model)
     #return f"generating {number_of_images} images from {model}"
-    return img
 if __name__ == "__main__":
     description = "This is a web demo of a projected GAN trained on photos of thirty paintings from the series of paintings Welcome home.                                                                                         The abstract expressionism and color field models were initially trained on images from their perspective art directions and then transfer learned to Hana's houses."
     inputs = gr.inputs.Radio(["Hana Hanak houses", "Hana Hanak houses - abstract expressionism", "Hana Hanak houses - color field"])
@@ -22,9 +67,8 @@ if __name__ == "__main__":
-    gr.Interface(image_generation, inputs, outputs, title=title, article = article, description = description,
-    analytics_enabled=False).launch(debug=True)
-    app, local_url, share_url = iface.launch(share=True)

 import click
 import PIL
 from image_generator import generate_images
+from basicsr.archs.rrdbnet_arch import RRDBNet
+from basicsr.utils.download_util import load_file_from_url
+import cv2
+import sys
+import numpy as np
+#sys.path.append('Real-ESRGAN')
+from realesrgan import RealESRGANer
+import gc
+import os
 def image_generation(model, number_of_images=1):
     img = generate_images(model)
+    #TODO: run this image through the ESRGAN upscaler and return it, simple enough ?
+    #upscaled_img = torchvision.transforms.functional.resize(img, (1024, 1024), interpolation=2)
+    upscale_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+    file_url = ['https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth']
+    ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
+    #model_path = load_file_from_url(url=file_url, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
+    model_path = os.path.join('weights', 'RealESRGAN_x4plus.pth')
+    upsampler = RealESRGANer(
+        scale=4,
+        model_path=model_path,
+        dni_weight=None,
+        model=upscale_model,
+        tile=0,
+        tile_pad=10,
+        pre_pad=0,
+        half=False,
+        device='cpu'
+  )
+    #TODO: img has to be same as opencv imread format
+    open_cv_image = np.array(img)
+    # Convert RGB to BGR
+    open_cv_image = open_cv_image[:, :, ::-1].copy()
+    #print(type(open_cv_image))
+    #print(type(img))
+    #print(type(upscaled_img))
+    output, _ = upsampler.enhance(open_cv_image, outscale=4)
+    #output2, _ = upsampler.enhance(output   , outscale=4)
     #return f"generating {number_of_images} images from {model}"
+    #cv2.imwrite('out/output_upscaled.png', output)
+    #cv2.imwrite('out/output_upscaled_dupli.png', output2)
+    #cv2.imwrite('out/output.png', np.array(img)[:, :, ::-1])
+    output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB)
+    gc.collect()
+    torch.cuda.empty_cache()
+    del(upsampler)
+    return PIL.Image.fromarray(output)
 if __name__ == "__main__":
     description = "This is a web demo of a projected GAN trained on photos of thirty paintings from the series of paintings Welcome home.                                                                                         The abstract expressionism and color field models were initially trained on images from their perspective art directions and then transfer learned to Hana's houses."
     inputs = gr.inputs.Radio(["Hana Hanak houses", "Hana Hanak houses - abstract expressionism", "Hana Hanak houses - color field"])
+    demo = gr.Interface(image_generation, inputs, outputs, title=title, article = article, description = description,
+    analytics_enabled=False)
+    demo.launch()
+    #app, local_url, share_url = iface.launch(share=True)

image_generator.py CHANGED Viewed

@@ -97,7 +97,8 @@ def generate_images(
     seeds=[random.randint(1,230)]
     #seeds =f"{seeds}-{seeds+number_of_images-1}"
     #seeds = parse_range(seeds)
-    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
     G = MyGenerator.from_pretrained(model_path)
     os.makedirs(outdir, exist_ok=True)
     # Labels.

     seeds=[random.randint(1,230)]
     #seeds =f"{seeds}-{seeds+number_of_images-1}"
     #seeds = parse_range(seeds)
+    #device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+    device = torch.device('cpu')
     G = MyGenerator.from_pretrained(model_path)
     os.makedirs(outdir, exist_ok=True)
     # Labels.

realesrgan/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# flake8: noqa
+from .archs import *
+from .data import *
+from .models import *
+from .utils import *
+from .version import *

realesrgan/archs/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import importlib
+from basicsr.utils import scandir
+from os import path as osp
+# automatically scan and import arch modules for registry
+# scan all the files that end with '_arch.py' under the archs folder
+arch_folder = osp.dirname(osp.abspath(__file__))
+arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')]
+# import all the arch modules
+_arch_modules = [importlib.import_module(f'realesrgan.archs.{file_name}') for file_name in arch_filenames]

realesrgan/archs/discriminator_arch.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from basicsr.utils.registry import ARCH_REGISTRY
+from torch import nn as nn
+from torch.nn import functional as F
+from torch.nn.utils import spectral_norm
+@ARCH_REGISTRY.register()
+class UNetDiscriminatorSN(nn.Module):
+    """Defines a U-Net discriminator with spectral normalization (SN)
+    It is used in Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
+    Arg:
+        num_in_ch (int): Channel number of inputs. Default: 3.
+        num_feat (int): Channel number of base intermediate features. Default: 64.
+        skip_connection (bool): Whether to use skip connections between U-Net. Default: True.
+    """
+    def __init__(self, num_in_ch, num_feat=64, skip_connection=True):
+        super(UNetDiscriminatorSN, self).__init__()
+        self.skip_connection = skip_connection
+        norm = spectral_norm
+        # the first convolution
+        self.conv0 = nn.Conv2d(num_in_ch, num_feat, kernel_size=3, stride=1, padding=1)
+        # downsample
+        self.conv1 = norm(nn.Conv2d(num_feat, num_feat * 2, 4, 2, 1, bias=False))
+        self.conv2 = norm(nn.Conv2d(num_feat * 2, num_feat * 4, 4, 2, 1, bias=False))
+        self.conv3 = norm(nn.Conv2d(num_feat * 4, num_feat * 8, 4, 2, 1, bias=False))
+        # upsample
+        self.conv4 = norm(nn.Conv2d(num_feat * 8, num_feat * 4, 3, 1, 1, bias=False))
+        self.conv5 = norm(nn.Conv2d(num_feat * 4, num_feat * 2, 3, 1, 1, bias=False))
+        self.conv6 = norm(nn.Conv2d(num_feat * 2, num_feat, 3, 1, 1, bias=False))
+        # extra convolutions
+        self.conv7 = norm(nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=False))
+        self.conv8 = norm(nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=False))
+        self.conv9 = nn.Conv2d(num_feat, 1, 3, 1, 1)
+    def forward(self, x):
+        # downsample
+        x0 = F.leaky_relu(self.conv0(x), negative_slope=0.2, inplace=True)
+        x1 = F.leaky_relu(self.conv1(x0), negative_slope=0.2, inplace=True)
+        x2 = F.leaky_relu(self.conv2(x1), negative_slope=0.2, inplace=True)
+        x3 = F.leaky_relu(self.conv3(x2), negative_slope=0.2, inplace=True)
+        # upsample
+        x3 = F.interpolate(x3, scale_factor=2, mode='bilinear', align_corners=False)
+        x4 = F.leaky_relu(self.conv4(x3), negative_slope=0.2, inplace=True)
+        if self.skip_connection:
+            x4 = x4 + x2
+        x4 = F.interpolate(x4, scale_factor=2, mode='bilinear', align_corners=False)
+        x5 = F.leaky_relu(self.conv5(x4), negative_slope=0.2, inplace=True)
+        if self.skip_connection:
+            x5 = x5 + x1
+        x5 = F.interpolate(x5, scale_factor=2, mode='bilinear', align_corners=False)
+        x6 = F.leaky_relu(self.conv6(x5), negative_slope=0.2, inplace=True)
+        if self.skip_connection:
+            x6 = x6 + x0
+        # extra convolutions
+        out = F.leaky_relu(self.conv7(x6), negative_slope=0.2, inplace=True)
+        out = F.leaky_relu(self.conv8(out), negative_slope=0.2, inplace=True)
+        out = self.conv9(out)
+        return out

realesrgan/archs/srvgg_arch.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from basicsr.utils.registry import ARCH_REGISTRY
+from torch import nn as nn
+from torch.nn import functional as F
+@ARCH_REGISTRY.register()
+class SRVGGNetCompact(nn.Module):
+    """A compact VGG-style network structure for super-resolution.
+    It is a compact network structure, which performs upsampling in the last layer and no convolution is
+    conducted on the HR feature space.
+    Args:
+        num_in_ch (int): Channel number of inputs. Default: 3.
+        num_out_ch (int): Channel number of outputs. Default: 3.
+        num_feat (int): Channel number of intermediate features. Default: 64.
+        num_conv (int): Number of convolution layers in the body network. Default: 16.
+        upscale (int): Upsampling factor. Default: 4.
+        act_type (str): Activation type, options: 'relu', 'prelu', 'leakyrelu'. Default: prelu.
+    """
+    def __init__(self, num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=16, upscale=4, act_type='prelu'):
+        super(SRVGGNetCompact, self).__init__()
+        self.num_in_ch = num_in_ch
+        self.num_out_ch = num_out_ch
+        self.num_feat = num_feat
+        self.num_conv = num_conv
+        self.upscale = upscale
+        self.act_type = act_type
+        self.body = nn.ModuleList()
+        # the first conv
+        self.body.append(nn.Conv2d(num_in_ch, num_feat, 3, 1, 1))
+        # the first activation
+        if act_type == 'relu':
+            activation = nn.ReLU(inplace=True)
+        elif act_type == 'prelu':
+            activation = nn.PReLU(num_parameters=num_feat)
+        elif act_type == 'leakyrelu':
+            activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+        self.body.append(activation)
+        # the body structure
+        for _ in range(num_conv):
+            self.body.append(nn.Conv2d(num_feat, num_feat, 3, 1, 1))
+            # activation
+            if act_type == 'relu':
+                activation = nn.ReLU(inplace=True)
+            elif act_type == 'prelu':
+                activation = nn.PReLU(num_parameters=num_feat)
+            elif act_type == 'leakyrelu':
+                activation = nn.LeakyReLU(negative_slope=0.1, inplace=True)
+            self.body.append(activation)
+        # the last conv
+        self.body.append(nn.Conv2d(num_feat, num_out_ch * upscale * upscale, 3, 1, 1))
+        # upsample
+        self.upsampler = nn.PixelShuffle(upscale)
+    def forward(self, x):
+        out = x
+        for i in range(0, len(self.body)):
+            out = self.body[i](out)
+        out = self.upsampler(out)
+        # add the nearest upsampled image, so that the network learns the residual
+        base = F.interpolate(x, scale_factor=self.upscale, mode='nearest')
+        out += base
+        return out

realesrgan/data/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import importlib
+from basicsr.utils import scandir
+from os import path as osp
+# automatically scan and import dataset modules for registry
+# scan all the files that end with '_dataset.py' under the data folder
+data_folder = osp.dirname(osp.abspath(__file__))
+dataset_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(data_folder) if v.endswith('_dataset.py')]
+# import all the dataset modules
+_dataset_modules = [importlib.import_module(f'realesrgan.data.{file_name}') for file_name in dataset_filenames]

realesrgan/data/realesrgan_dataset.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import cv2
+import math
+import numpy as np
+import os
+import os.path as osp
+import random
+import time
+import torch
+from basicsr.data.degradations import circular_lowpass_kernel, random_mixed_kernels
+from basicsr.data.transforms import augment
+from basicsr.utils import FileClient, get_root_logger, imfrombytes, img2tensor
+from basicsr.utils.registry import DATASET_REGISTRY
+from torch.utils import data as data
+@DATASET_REGISTRY.register()
+class RealESRGANDataset(data.Dataset):
+    """Dataset used for Real-ESRGAN model:
+    Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
+    It loads gt (Ground-Truth) images, and augments them.
+    It also generates blur kernels and sinc kernels for generating low-quality images.
+    Note that the low-quality images are processed in tensors on GPUS for faster processing.
+    Args:
+        opt (dict): Config for train datasets. It contains the following keys:
+            dataroot_gt (str): Data root path for gt.
+            meta_info (str): Path for meta information file.
+            io_backend (dict): IO backend type and other kwarg.
+            use_hflip (bool): Use horizontal flips.
+            use_rot (bool): Use rotation (use vertical flip and transposing h and w for implementation).
+            Please see more options in the codes.
+    """
+    def __init__(self, opt):
+        super(RealESRGANDataset, self).__init__()
+        self.opt = opt
+        self.file_client = None
+        self.io_backend_opt = opt['io_backend']
+        self.gt_folder = opt['dataroot_gt']
+        # file client (lmdb io backend)
+        if self.io_backend_opt['type'] == 'lmdb':
+            self.io_backend_opt['db_paths'] = [self.gt_folder]
+            self.io_backend_opt['client_keys'] = ['gt']
+            if not self.gt_folder.endswith('.lmdb'):
+                raise ValueError(f"'dataroot_gt' should end with '.lmdb', but received {self.gt_folder}")
+            with open(osp.join(self.gt_folder, 'meta_info.txt')) as fin:
+                self.paths = [line.split('.')[0] for line in fin]
+        else:
+            # disk backend with meta_info
+            # Each line in the meta_info describes the relative path to an image
+            with open(self.opt['meta_info']) as fin:
+                paths = [line.strip().split(' ')[0] for line in fin]
+                self.paths = [os.path.join(self.gt_folder, v) for v in paths]
+        # blur settings for the first degradation
+        self.blur_kernel_size = opt['blur_kernel_size']
+        self.kernel_list = opt['kernel_list']
+        self.kernel_prob = opt['kernel_prob']  # a list for each kernel probability
+        self.blur_sigma = opt['blur_sigma']
+        self.betag_range = opt['betag_range']  # betag used in generalized Gaussian blur kernels
+        self.betap_range = opt['betap_range']  # betap used in plateau blur kernels
+        self.sinc_prob = opt['sinc_prob']  # the probability for sinc filters
+        # blur settings for the second degradation
+        self.blur_kernel_size2 = opt['blur_kernel_size2']
+        self.kernel_list2 = opt['kernel_list2']
+        self.kernel_prob2 = opt['kernel_prob2']
+        self.blur_sigma2 = opt['blur_sigma2']
+        self.betag_range2 = opt['betag_range2']
+        self.betap_range2 = opt['betap_range2']
+        self.sinc_prob2 = opt['sinc_prob2']
+        # a final sinc filter
+        self.final_sinc_prob = opt['final_sinc_prob']
+        self.kernel_range = [2 * v + 1 for v in range(3, 11)]  # kernel size ranges from 7 to 21
+        # TODO: kernel range is now hard-coded, should be in the configure file
+        self.pulse_tensor = torch.zeros(21, 21).float()  # convolving with pulse tensor brings no blurry effect
+        self.pulse_tensor[10, 10] = 1
+    def __getitem__(self, index):
+        if self.file_client is None:
+            self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
+        # -------------------------------- Load gt images -------------------------------- #
+        # Shape: (h, w, c); channel order: BGR; image range: [0, 1], float32.
+        gt_path = self.paths[index]
+        # avoid errors caused by high latency in reading files
+        retry = 3
+        while retry > 0:
+            try:
+                img_bytes = self.file_client.get(gt_path, 'gt')
+            except (IOError, OSError) as e:
+                logger = get_root_logger()
+                logger.warn(f'File client error: {e}, remaining retry times: {retry - 1}')
+                # change another file to read
+                index = random.randint(0, self.__len__())
+                gt_path = self.paths[index]
+                time.sleep(1)  # sleep 1s for occasional server congestion
+            else:
+                break
+            finally:
+                retry -= 1
+        img_gt = imfrombytes(img_bytes, float32=True)
+        # -------------------- Do augmentation for training: flip, rotation -------------------- #
+        img_gt = augment(img_gt, self.opt['use_hflip'], self.opt['use_rot'])
+        # crop or pad to 400
+        # TODO: 400 is hard-coded. You may change it accordingly
+        h, w = img_gt.shape[0:2]
+        crop_pad_size = 400
+        # pad
+        if h < crop_pad_size or w < crop_pad_size:
+            pad_h = max(0, crop_pad_size - h)
+            pad_w = max(0, crop_pad_size - w)
+            img_gt = cv2.copyMakeBorder(img_gt, 0, pad_h, 0, pad_w, cv2.BORDER_REFLECT_101)
+        # crop
+        if img_gt.shape[0] > crop_pad_size or img_gt.shape[1] > crop_pad_size:
+            h, w = img_gt.shape[0:2]
+            # randomly choose top and left coordinates
+            top = random.randint(0, h - crop_pad_size)
+            left = random.randint(0, w - crop_pad_size)
+            img_gt = img_gt[top:top + crop_pad_size, left:left + crop_pad_size, ...]
+        # ------------------------ Generate kernels (used in the first degradation) ------------------------ #
+        kernel_size = random.choice(self.kernel_range)
+        if np.random.uniform() < self.opt['sinc_prob']:
+            # this sinc filter setting is for kernels ranging from [7, 21]
+            if kernel_size < 13:
+                omega_c = np.random.uniform(np.pi / 3, np.pi)
+            else:
+                omega_c = np.random.uniform(np.pi / 5, np.pi)
+            kernel = circular_lowpass_kernel(omega_c, kernel_size, pad_to=False)
+        else:
+            kernel = random_mixed_kernels(
+                self.kernel_list,
+                self.kernel_prob,
+                kernel_size,
+                self.blur_sigma,
+                self.blur_sigma, [-math.pi, math.pi],
+                self.betag_range,
+                self.betap_range,
+                noise_range=None)
+        # pad kernel
+        pad_size = (21 - kernel_size) // 2
+        kernel = np.pad(kernel, ((pad_size, pad_size), (pad_size, pad_size)))
+        # ------------------------ Generate kernels (used in the second degradation) ------------------------ #
+        kernel_size = random.choice(self.kernel_range)
+        if np.random.uniform() < self.opt['sinc_prob2']:
+            if kernel_size < 13:
+                omega_c = np.random.uniform(np.pi / 3, np.pi)
+            else:
+                omega_c = np.random.uniform(np.pi / 5, np.pi)
+            kernel2 = circular_lowpass_kernel(omega_c, kernel_size, pad_to=False)
+        else:
+            kernel2 = random_mixed_kernels(
+                self.kernel_list2,
+                self.kernel_prob2,
+                kernel_size,
+                self.blur_sigma2,
+                self.blur_sigma2, [-math.pi, math.pi],
+                self.betag_range2,
+                self.betap_range2,
+                noise_range=None)
+        # pad kernel
+        pad_size = (21 - kernel_size) // 2
+        kernel2 = np.pad(kernel2, ((pad_size, pad_size), (pad_size, pad_size)))
+        # ------------------------------------- the final sinc kernel ------------------------------------- #
+        if np.random.uniform() < self.opt['final_sinc_prob']:
+            kernel_size = random.choice(self.kernel_range)
+            omega_c = np.random.uniform(np.pi / 3, np.pi)
+            sinc_kernel = circular_lowpass_kernel(omega_c, kernel_size, pad_to=21)
+            sinc_kernel = torch.FloatTensor(sinc_kernel)
+        else:
+            sinc_kernel = self.pulse_tensor
+        # BGR to RGB, HWC to CHW, numpy to tensor
+        img_gt = img2tensor([img_gt], bgr2rgb=True, float32=True)[0]
+        kernel = torch.FloatTensor(kernel)
+        kernel2 = torch.FloatTensor(kernel2)
+        return_d = {'gt': img_gt, 'kernel1': kernel, 'kernel2': kernel2, 'sinc_kernel': sinc_kernel, 'gt_path': gt_path}
+        return return_d
+    def __len__(self):
+        return len(self.paths)

realesrgan/data/realesrgan_paired_dataset.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+from basicsr.data.data_util import paired_paths_from_folder, paired_paths_from_lmdb
+from basicsr.data.transforms import augment, paired_random_crop
+from basicsr.utils import FileClient, imfrombytes, img2tensor
+from basicsr.utils.registry import DATASET_REGISTRY
+from torch.utils import data as data
+from torchvision.transforms.functional import normalize
+@DATASET_REGISTRY.register()
+class RealESRGANPairedDataset(data.Dataset):
+    """Paired image dataset for image restoration.
+    Read LQ (Low Quality, e.g. LR (Low Resolution), blurry, noisy, etc) and GT image pairs.
+    There are three modes:
+    1. 'lmdb': Use lmdb files.
+        If opt['io_backend'] == lmdb.
+    2. 'meta_info': Use meta information file to generate paths.
+        If opt['io_backend'] != lmdb and opt['meta_info'] is not None.
+    3. 'folder': Scan folders to generate paths.
+        The rest.
+    Args:
+        opt (dict): Config for train datasets. It contains the following keys:
+            dataroot_gt (str): Data root path for gt.
+            dataroot_lq (str): Data root path for lq.
+            meta_info (str): Path for meta information file.
+            io_backend (dict): IO backend type and other kwarg.
+            filename_tmpl (str): Template for each filename. Note that the template excludes the file extension.
+                Default: '{}'.
+            gt_size (int): Cropped patched size for gt patches.
+            use_hflip (bool): Use horizontal flips.
+            use_rot (bool): Use rotation (use vertical flip and transposing h
+                and w for implementation).
+            scale (bool): Scale, which will be added automatically.
+            phase (str): 'train' or 'val'.
+    """
+    def __init__(self, opt):
+        super(RealESRGANPairedDataset, self).__init__()
+        self.opt = opt
+        self.file_client = None
+        self.io_backend_opt = opt['io_backend']
+        # mean and std for normalizing the input images
+        self.mean = opt['mean'] if 'mean' in opt else None
+        self.std = opt['std'] if 'std' in opt else None
+        self.gt_folder, self.lq_folder = opt['dataroot_gt'], opt['dataroot_lq']
+        self.filename_tmpl = opt['filename_tmpl'] if 'filename_tmpl' in opt else '{}'
+        # file client (lmdb io backend)
+        if self.io_backend_opt['type'] == 'lmdb':
+            self.io_backend_opt['db_paths'] = [self.lq_folder, self.gt_folder]
+            self.io_backend_opt['client_keys'] = ['lq', 'gt']
+            self.paths = paired_paths_from_lmdb([self.lq_folder, self.gt_folder], ['lq', 'gt'])
+        elif 'meta_info' in self.opt and self.opt['meta_info'] is not None:
+            # disk backend with meta_info
+            # Each line in the meta_info describes the relative path to an image
+            with open(self.opt['meta_info']) as fin:
+                paths = [line.strip() for line in fin]
+            self.paths = []
+            for path in paths:
+                gt_path, lq_path = path.split(', ')
+                gt_path = os.path.join(self.gt_folder, gt_path)
+                lq_path = os.path.join(self.lq_folder, lq_path)
+                self.paths.append(dict([('gt_path', gt_path), ('lq_path', lq_path)]))
+        else:
+            # disk backend
+            # it will scan the whole folder to get meta info
+            # it will be time-consuming for folders with too many files. It is recommended using an extra meta txt file
+            self.paths = paired_paths_from_folder([self.lq_folder, self.gt_folder], ['lq', 'gt'], self.filename_tmpl)
+    def __getitem__(self, index):
+        if self.file_client is None:
+            self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)
+        scale = self.opt['scale']
+        # Load gt and lq images. Dimension order: HWC; channel order: BGR;
+        # image range: [0, 1], float32.
+        gt_path = self.paths[index]['gt_path']
+        img_bytes = self.file_client.get(gt_path, 'gt')
+        img_gt = imfrombytes(img_bytes, float32=True)
+        lq_path = self.paths[index]['lq_path']
+        img_bytes = self.file_client.get(lq_path, 'lq')
+        img_lq = imfrombytes(img_bytes, float32=True)
+        # augmentation for training
+        if self.opt['phase'] == 'train':
+            gt_size = self.opt['gt_size']
+            # random crop
+            img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale, gt_path)
+            # flip, rotation
+            img_gt, img_lq = augment([img_gt, img_lq], self.opt['use_hflip'], self.opt['use_rot'])
+        # BGR to RGB, HWC to CHW, numpy to tensor
+        img_gt, img_lq = img2tensor([img_gt, img_lq], bgr2rgb=True, float32=True)
+        # normalize
+        if self.mean is not None or self.std is not None:
+            normalize(img_lq, self.mean, self.std, inplace=True)
+            normalize(img_gt, self.mean, self.std, inplace=True)
+        return {'lq': img_lq, 'gt': img_gt, 'lq_path': lq_path, 'gt_path': gt_path}
+    def __len__(self):
+        return len(self.paths)

realesrgan/models/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import importlib
+from basicsr.utils import scandir
+from os import path as osp
+# automatically scan and import model modules for registry
+# scan all the files that end with '_model.py' under the model folder
+model_folder = osp.dirname(osp.abspath(__file__))
+model_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(model_folder) if v.endswith('_model.py')]
+# import all the model modules
+_model_modules = [importlib.import_module(f'realesrgan.models.{file_name}') for file_name in model_filenames]

realesrgan/models/realesrgan_model.py ADDED Viewed

	@@ -0,0 +1,258 @@

+import numpy as np
+import random
+import torch
+from basicsr.data.degradations import random_add_gaussian_noise_pt, random_add_poisson_noise_pt
+from basicsr.data.transforms import paired_random_crop
+from basicsr.models.srgan_model import SRGANModel
+from basicsr.utils import DiffJPEG, USMSharp
+from basicsr.utils.img_process_util import filter2D
+from basicsr.utils.registry import MODEL_REGISTRY
+from collections import OrderedDict
+from torch.nn import functional as F
+@MODEL_REGISTRY.register()
+class RealESRGANModel(SRGANModel):
+    """RealESRGAN Model for Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
+    It mainly performs:
+    1. randomly synthesize LQ images in GPU tensors
+    2. optimize the networks with GAN training.
+    """
+    def __init__(self, opt):
+        super(RealESRGANModel, self).__init__(opt)
+        self.jpeger = DiffJPEG(differentiable=False).cuda()  # simulate JPEG compression artifacts
+        self.usm_sharpener = USMSharp().cuda()  # do usm sharpening
+        self.queue_size = opt.get('queue_size', 180)
+    @torch.no_grad()
+    def _dequeue_and_enqueue(self):
+        """It is the training pair pool for increasing the diversity in a batch.
+        Batch processing limits the diversity of synthetic degradations in a batch. For example, samples in a
+        batch could not have different resize scaling factors. Therefore, we employ this training pair pool
+        to increase the degradation diversity in a batch.
+        """
+        # initialize
+        b, c, h, w = self.lq.size()
+        if not hasattr(self, 'queue_lr'):
+            assert self.queue_size % b == 0, f'queue size {self.queue_size} should be divisible by batch size {b}'
+            self.queue_lr = torch.zeros(self.queue_size, c, h, w).cuda()
+            _, c, h, w = self.gt.size()
+            self.queue_gt = torch.zeros(self.queue_size, c, h, w).cuda()
+            self.queue_ptr = 0
+        if self.queue_ptr == self.queue_size:  # the pool is full
+            # do dequeue and enqueue
+            # shuffle
+            idx = torch.randperm(self.queue_size)
+            self.queue_lr = self.queue_lr[idx]
+            self.queue_gt = self.queue_gt[idx]
+            # get first b samples
+            lq_dequeue = self.queue_lr[0:b, :, :, :].clone()
+            gt_dequeue = self.queue_gt[0:b, :, :, :].clone()
+            # update the queue
+            self.queue_lr[0:b, :, :, :] = self.lq.clone()
+            self.queue_gt[0:b, :, :, :] = self.gt.clone()
+            self.lq = lq_dequeue
+            self.gt = gt_dequeue
+        else:
+            # only do enqueue
+            self.queue_lr[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.lq.clone()
+            self.queue_gt[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.gt.clone()
+            self.queue_ptr = self.queue_ptr + b
+    @torch.no_grad()
+    def feed_data(self, data):
+        """Accept data from dataloader, and then add two-order degradations to obtain LQ images.
+        """
+        if self.is_train and self.opt.get('high_order_degradation', True):
+            # training data synthesis
+            self.gt = data['gt'].to(self.device)
+            self.gt_usm = self.usm_sharpener(self.gt)
+            self.kernel1 = data['kernel1'].to(self.device)
+            self.kernel2 = data['kernel2'].to(self.device)
+            self.sinc_kernel = data['sinc_kernel'].to(self.device)
+            ori_h, ori_w = self.gt.size()[2:4]
+            # ----------------------- The first degradation process ----------------------- #
+            # blur
+            out = filter2D(self.gt_usm, self.kernel1)
+            # random resize
+            updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob'])[0]
+            if updown_type == 'up':
+                scale = np.random.uniform(1, self.opt['resize_range'][1])
+            elif updown_type == 'down':
+                scale = np.random.uniform(self.opt['resize_range'][0], 1)
+            else:
+                scale = 1
+            mode = random.choice(['area', 'bilinear', 'bicubic'])
+            out = F.interpolate(out, scale_factor=scale, mode=mode)
+            # add noise
+            gray_noise_prob = self.opt['gray_noise_prob']
+            if np.random.uniform() < self.opt['gaussian_noise_prob']:
+                out = random_add_gaussian_noise_pt(
+                    out, sigma_range=self.opt['noise_range'], clip=True, rounds=False, gray_prob=gray_noise_prob)
+            else:
+                out = random_add_poisson_noise_pt(
+                    out,
+                    scale_range=self.opt['poisson_scale_range'],
+                    gray_prob=gray_noise_prob,
+                    clip=True,
+                    rounds=False)
+            # JPEG compression
+            jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range'])
+            out = torch.clamp(out, 0, 1)  # clamp to [0, 1], otherwise JPEGer will result in unpleasant artifacts
+            out = self.jpeger(out, quality=jpeg_p)
+            # ----------------------- The second degradation process ----------------------- #
+            # blur
+            if np.random.uniform() < self.opt['second_blur_prob']:
+                out = filter2D(out, self.kernel2)
+            # random resize
+            updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob2'])[0]
+            if updown_type == 'up':
+                scale = np.random.uniform(1, self.opt['resize_range2'][1])
+            elif updown_type == 'down':
+                scale = np.random.uniform(self.opt['resize_range2'][0], 1)
+            else:
+                scale = 1
+            mode = random.choice(['area', 'bilinear', 'bicubic'])
+            out = F.interpolate(
+                out, size=(int(ori_h / self.opt['scale'] * scale), int(ori_w / self.opt['scale'] * scale)), mode=mode)
+            # add noise
+            gray_noise_prob = self.opt['gray_noise_prob2']
+            if np.random.uniform() < self.opt['gaussian_noise_prob2']:
+                out = random_add_gaussian_noise_pt(
+                    out, sigma_range=self.opt['noise_range2'], clip=True, rounds=False, gray_prob=gray_noise_prob)
+            else:
+                out = random_add_poisson_noise_pt(
+                    out,
+                    scale_range=self.opt['poisson_scale_range2'],
+                    gray_prob=gray_noise_prob,
+                    clip=True,
+                    rounds=False)
+            # JPEG compression + the final sinc filter
+            # We also need to resize images to desired sizes. We group [resize back + sinc filter] together
+            # as one operation.
+            # We consider two orders:
+            #   1. [resize back + sinc filter] + JPEG compression
+            #   2. JPEG compression + [resize back + sinc filter]
+            # Empirically, we find other combinations (sinc + JPEG + Resize) will introduce twisted lines.
+            if np.random.uniform() < 0.5:
+                # resize back + the final sinc filter
+                mode = random.choice(['area', 'bilinear', 'bicubic'])
+                out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
+                out = filter2D(out, self.sinc_kernel)
+                # JPEG compression
+                jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
+                out = torch.clamp(out, 0, 1)
+                out = self.jpeger(out, quality=jpeg_p)
+            else:
+                # JPEG compression
+                jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
+                out = torch.clamp(out, 0, 1)
+                out = self.jpeger(out, quality=jpeg_p)
+                # resize back + the final sinc filter
+                mode = random.choice(['area', 'bilinear', 'bicubic'])
+                out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
+                out = filter2D(out, self.sinc_kernel)
+            # clamp and round
+            self.lq = torch.clamp((out * 255.0).round(), 0, 255) / 255.
+            # random crop
+            gt_size = self.opt['gt_size']
+            (self.gt, self.gt_usm), self.lq = paired_random_crop([self.gt, self.gt_usm], self.lq, gt_size,
+                                                                 self.opt['scale'])
+            # training pair pool
+            self._dequeue_and_enqueue()
+            # sharpen self.gt again, as we have changed the self.gt with self._dequeue_and_enqueue
+            self.gt_usm = self.usm_sharpener(self.gt)
+            self.lq = self.lq.contiguous()  # for the warning: grad and param do not obey the gradient layout contract
+        else:
+            # for paired training or validation
+            self.lq = data['lq'].to(self.device)
+            if 'gt' in data:
+                self.gt = data['gt'].to(self.device)
+                self.gt_usm = self.usm_sharpener(self.gt)
+    def nondist_validation(self, dataloader, current_iter, tb_logger, save_img):
+        # do not use the synthetic process during validation
+        self.is_train = False
+        super(RealESRGANModel, self).nondist_validation(dataloader, current_iter, tb_logger, save_img)
+        self.is_train = True
+    def optimize_parameters(self, current_iter):
+        # usm sharpening
+        l1_gt = self.gt_usm
+        percep_gt = self.gt_usm
+        gan_gt = self.gt_usm
+        if self.opt['l1_gt_usm'] is False:
+            l1_gt = self.gt
+        if self.opt['percep_gt_usm'] is False:
+            percep_gt = self.gt
+        if self.opt['gan_gt_usm'] is False:
+            gan_gt = self.gt
+        # optimize net_g
+        for p in self.net_d.parameters():
+            p.requires_grad = False
+        self.optimizer_g.zero_grad()
+        self.output = self.net_g(self.lq)
+        l_g_total = 0
+        loss_dict = OrderedDict()
+        if (current_iter % self.net_d_iters == 0 and current_iter > self.net_d_init_iters):
+            # pixel loss
+            if self.cri_pix:
+                l_g_pix = self.cri_pix(self.output, l1_gt)
+                l_g_total += l_g_pix
+                loss_dict['l_g_pix'] = l_g_pix
+            # perceptual loss
+            if self.cri_perceptual:
+                l_g_percep, l_g_style = self.cri_perceptual(self.output, percep_gt)
+                if l_g_percep is not None:
+                    l_g_total += l_g_percep
+                    loss_dict['l_g_percep'] = l_g_percep
+                if l_g_style is not None:
+                    l_g_total += l_g_style
+                    loss_dict['l_g_style'] = l_g_style
+            # gan loss
+            fake_g_pred = self.net_d(self.output)
+            l_g_gan = self.cri_gan(fake_g_pred, True, is_disc=False)
+            l_g_total += l_g_gan
+            loss_dict['l_g_gan'] = l_g_gan
+            l_g_total.backward()
+            self.optimizer_g.step()
+        # optimize net_d
+        for p in self.net_d.parameters():
+            p.requires_grad = True
+        self.optimizer_d.zero_grad()
+        # real
+        real_d_pred = self.net_d(gan_gt)
+        l_d_real = self.cri_gan(real_d_pred, True, is_disc=True)
+        loss_dict['l_d_real'] = l_d_real
+        loss_dict['out_d_real'] = torch.mean(real_d_pred.detach())
+        l_d_real.backward()
+        # fake
+        fake_d_pred = self.net_d(self.output.detach().clone())  # clone for pt1.9
+        l_d_fake = self.cri_gan(fake_d_pred, False, is_disc=True)
+        loss_dict['l_d_fake'] = l_d_fake
+        loss_dict['out_d_fake'] = torch.mean(fake_d_pred.detach())
+        l_d_fake.backward()
+        self.optimizer_d.step()
+        if self.ema_decay > 0:
+            self.model_ema(decay=self.ema_decay)
+        self.log_dict = self.reduce_loss_dict(loss_dict)

realesrgan/models/realesrnet_model.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import numpy as np
+import random
+import torch
+from basicsr.data.degradations import random_add_gaussian_noise_pt, random_add_poisson_noise_pt
+from basicsr.data.transforms import paired_random_crop
+from basicsr.models.sr_model import SRModel
+from basicsr.utils import DiffJPEG, USMSharp
+from basicsr.utils.img_process_util import filter2D
+from basicsr.utils.registry import MODEL_REGISTRY
+from torch.nn import functional as F
+@MODEL_REGISTRY.register()
+class RealESRNetModel(SRModel):
+    """RealESRNet Model for Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
+    It is trained without GAN losses.
+    It mainly performs:
+    1. randomly synthesize LQ images in GPU tensors
+    2. optimize the networks with GAN training.
+    """
+    def __init__(self, opt):
+        super(RealESRNetModel, self).__init__(opt)
+        self.jpeger = DiffJPEG(differentiable=False).cuda()  # simulate JPEG compression artifacts
+        self.usm_sharpener = USMSharp().cuda()  # do usm sharpening
+        self.queue_size = opt.get('queue_size', 180)
+    @torch.no_grad()
+    def _dequeue_and_enqueue(self):
+        """It is the training pair pool for increasing the diversity in a batch.
+        Batch processing limits the diversity of synthetic degradations in a batch. For example, samples in a
+        batch could not have different resize scaling factors. Therefore, we employ this training pair pool
+        to increase the degradation diversity in a batch.
+        """
+        # initialize
+        b, c, h, w = self.lq.size()
+        if not hasattr(self, 'queue_lr'):
+            assert self.queue_size % b == 0, f'queue size {self.queue_size} should be divisible by batch size {b}'
+            self.queue_lr = torch.zeros(self.queue_size, c, h, w).cuda()
+            _, c, h, w = self.gt.size()
+            self.queue_gt = torch.zeros(self.queue_size, c, h, w).cuda()
+            self.queue_ptr = 0
+        if self.queue_ptr == self.queue_size:  # the pool is full
+            # do dequeue and enqueue
+            # shuffle
+            idx = torch.randperm(self.queue_size)
+            self.queue_lr = self.queue_lr[idx]
+            self.queue_gt = self.queue_gt[idx]
+            # get first b samples
+            lq_dequeue = self.queue_lr[0:b, :, :, :].clone()
+            gt_dequeue = self.queue_gt[0:b, :, :, :].clone()
+            # update the queue
+            self.queue_lr[0:b, :, :, :] = self.lq.clone()
+            self.queue_gt[0:b, :, :, :] = self.gt.clone()
+            self.lq = lq_dequeue
+            self.gt = gt_dequeue
+        else:
+            # only do enqueue
+            self.queue_lr[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.lq.clone()
+            self.queue_gt[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.gt.clone()
+            self.queue_ptr = self.queue_ptr + b
+    @torch.no_grad()
+    def feed_data(self, data):
+        """Accept data from dataloader, and then add two-order degradations to obtain LQ images.
+        """
+        if self.is_train and self.opt.get('high_order_degradation', True):
+            # training data synthesis
+            self.gt = data['gt'].to(self.device)
+            # USM sharpen the GT images
+            if self.opt['gt_usm'] is True:
+                self.gt = self.usm_sharpener(self.gt)
+            self.kernel1 = data['kernel1'].to(self.device)
+            self.kernel2 = data['kernel2'].to(self.device)
+            self.sinc_kernel = data['sinc_kernel'].to(self.device)
+            ori_h, ori_w = self.gt.size()[2:4]
+            # ----------------------- The first degradation process ----------------------- #
+            # blur
+            out = filter2D(self.gt, self.kernel1)
+            # random resize
+            updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob'])[0]
+            if updown_type == 'up':
+                scale = np.random.uniform(1, self.opt['resize_range'][1])
+            elif updown_type == 'down':
+                scale = np.random.uniform(self.opt['resize_range'][0], 1)
+            else:
+                scale = 1
+            mode = random.choice(['area', 'bilinear', 'bicubic'])
+            out = F.interpolate(out, scale_factor=scale, mode=mode)
+            # add noise
+            gray_noise_prob = self.opt['gray_noise_prob']
+            if np.random.uniform() < self.opt['gaussian_noise_prob']:
+                out = random_add_gaussian_noise_pt(
+                    out, sigma_range=self.opt['noise_range'], clip=True, rounds=False, gray_prob=gray_noise_prob)
+            else:
+                out = random_add_poisson_noise_pt(
+                    out,
+                    scale_range=self.opt['poisson_scale_range'],
+                    gray_prob=gray_noise_prob,
+                    clip=True,
+                    rounds=False)
+            # JPEG compression
+            jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range'])
+            out = torch.clamp(out, 0, 1)  # clamp to [0, 1], otherwise JPEGer will result in unpleasant artifacts
+            out = self.jpeger(out, quality=jpeg_p)
+            # ----------------------- The second degradation process ----------------------- #
+            # blur
+            if np.random.uniform() < self.opt['second_blur_prob']:
+                out = filter2D(out, self.kernel2)
+            # random resize
+            updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob2'])[0]
+            if updown_type == 'up':
+                scale = np.random.uniform(1, self.opt['resize_range2'][1])
+            elif updown_type == 'down':
+                scale = np.random.uniform(self.opt['resize_range2'][0], 1)
+            else:
+                scale = 1
+            mode = random.choice(['area', 'bilinear', 'bicubic'])
+            out = F.interpolate(
+                out, size=(int(ori_h / self.opt['scale'] * scale), int(ori_w / self.opt['scale'] * scale)), mode=mode)
+            # add noise
+            gray_noise_prob = self.opt['gray_noise_prob2']
+            if np.random.uniform() < self.opt['gaussian_noise_prob2']:
+                out = random_add_gaussian_noise_pt(
+                    out, sigma_range=self.opt['noise_range2'], clip=True, rounds=False, gray_prob=gray_noise_prob)
+            else:
+                out = random_add_poisson_noise_pt(
+                    out,
+                    scale_range=self.opt['poisson_scale_range2'],
+                    gray_prob=gray_noise_prob,
+                    clip=True,
+                    rounds=False)
+            # JPEG compression + the final sinc filter
+            # We also need to resize images to desired sizes. We group [resize back + sinc filter] together
+            # as one operation.
+            # We consider two orders:
+            #   1. [resize back + sinc filter] + JPEG compression
+            #   2. JPEG compression + [resize back + sinc filter]
+            # Empirically, we find other combinations (sinc + JPEG + Resize) will introduce twisted lines.
+            if np.random.uniform() < 0.5:
+                # resize back + the final sinc filter
+                mode = random.choice(['area', 'bilinear', 'bicubic'])
+                out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
+                out = filter2D(out, self.sinc_kernel)
+                # JPEG compression
+                jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
+                out = torch.clamp(out, 0, 1)
+                out = self.jpeger(out, quality=jpeg_p)
+            else:
+                # JPEG compression
+                jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
+                out = torch.clamp(out, 0, 1)
+                out = self.jpeger(out, quality=jpeg_p)
+                # resize back + the final sinc filter
+                mode = random.choice(['area', 'bilinear', 'bicubic'])
+                out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
+                out = filter2D(out, self.sinc_kernel)
+            # clamp and round
+            self.lq = torch.clamp((out * 255.0).round(), 0, 255) / 255.
+            # random crop
+            gt_size = self.opt['gt_size']
+            self.gt, self.lq = paired_random_crop(self.gt, self.lq, gt_size, self.opt['scale'])
+            # training pair pool
+            self._dequeue_and_enqueue()
+            self.lq = self.lq.contiguous()  # for the warning: grad and param do not obey the gradient layout contract
+        else:
+            # for paired training or validation
+            self.lq = data['lq'].to(self.device)
+            if 'gt' in data:
+                self.gt = data['gt'].to(self.device)
+                self.gt_usm = self.usm_sharpener(self.gt)
+    def nondist_validation(self, dataloader, current_iter, tb_logger, save_img):
+        # do not use the synthetic process during validation
+        self.is_train = False
+        super(RealESRNetModel, self).nondist_validation(dataloader, current_iter, tb_logger, save_img)
+        self.is_train = True

realesrgan/train.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# flake8: noqa
+import os.path as osp
+from basicsr.train import train_pipeline
+import realesrgan.archs
+import realesrgan.data
+import realesrgan.models
+if __name__ == '__main__':
+    root_path = osp.abspath(osp.join(__file__, osp.pardir, osp.pardir))
+    train_pipeline(root_path)

realesrgan/utils.py ADDED Viewed

	@@ -0,0 +1,313 @@

+import cv2
+import math
+import numpy as np
+import os
+import queue
+import threading
+import torch
+from basicsr.utils.download_util import load_file_from_url
+from torch.nn import functional as F
+ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+class RealESRGANer():
+    """A helper class for upsampling images with RealESRGAN.
+    Args:
+        scale (int): Upsampling scale factor used in the networks. It is usually 2 or 4.
+        model_path (str): The path to the pretrained model. It can be urls (will first download it automatically).
+        model (nn.Module): The defined network. Default: None.
+        tile (int): As too large images result in the out of GPU memory issue, so this tile option will first crop
+            input images into tiles, and then process each of them. Finally, they will be merged into one image.
+            0 denotes for do not use tile. Default: 0.
+        tile_pad (int): The pad size for each tile, to remove border artifacts. Default: 10.
+        pre_pad (int): Pad the input images to avoid border artifacts. Default: 10.
+        half (float): Whether to use half precision during inference. Default: False.
+    """
+    def __init__(self,
+                 scale,
+                 model_path,
+                 dni_weight=None,
+                 model=None,
+                 tile=0,
+                 tile_pad=10,
+                 pre_pad=10,
+                 half=False,
+                 device=None,
+                 gpu_id=None):
+        self.scale = scale
+        self.tile_size = tile
+        self.tile_pad = tile_pad
+        self.pre_pad = pre_pad
+        self.mod_scale = None
+        self.half = half
+        # initialize model
+        if gpu_id:
+            self.device = torch.device(
+                f'cuda:{gpu_id}' if torch.cuda.is_available() else 'cpu') if device is None else device
+        else:
+            self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if device is None else device
+        if isinstance(model_path, list):
+            # dni
+            assert len(model_path) == len(dni_weight), 'model_path and dni_weight should have the save length.'
+            loadnet = self.dni(model_path[0], model_path[1], dni_weight)
+        else:
+            # if the model_path starts with https, it will first download models to the folder: weights
+            if model_path.startswith('https://'):
+                model_path = load_file_from_url(
+                    url=model_path, model_dir=os.path.join(ROOT_DIR, 'weights'), progress=True, file_name=None)
+            loadnet = torch.load(model_path, map_location=torch.device('cpu'))
+        # prefer to use params_ema
+        if 'params_ema' in loadnet:
+            keyname = 'params_ema'
+        else:
+            keyname = 'params'
+        model.load_state_dict(loadnet[keyname], strict=True)
+        model.eval()
+        self.model = model.to(self.device)
+        if self.half:
+            self.model = self.model.half()
+    def dni(self, net_a, net_b, dni_weight, key='params', loc='cpu'):
+        """Deep network interpolation.
+        ``Paper: Deep Network Interpolation for Continuous Imagery Effect Transition``
+        """
+        net_a = torch.load(net_a, map_location=torch.device(loc))
+        net_b = torch.load(net_b, map_location=torch.device(loc))
+        for k, v_a in net_a[key].items():
+            net_a[key][k] = dni_weight[0] * v_a + dni_weight[1] * net_b[key][k]
+        return net_a
+    def pre_process(self, img):
+        """Pre-process, such as pre-pad and mod pad, so that the images can be divisible
+        """
+        img = torch.from_numpy(np.transpose(img, (2, 0, 1))).float()
+        self.img = img.unsqueeze(0).to(self.device)
+        if self.half:
+            self.img = self.img.half()
+        # pre_pad
+        if self.pre_pad != 0:
+            self.img = F.pad(self.img, (0, self.pre_pad, 0, self.pre_pad), 'reflect')
+        # mod pad for divisible borders
+        if self.scale == 2:
+            self.mod_scale = 2
+        elif self.scale == 1:
+            self.mod_scale = 4
+        if self.mod_scale is not None:
+            self.mod_pad_h, self.mod_pad_w = 0, 0
+            _, _, h, w = self.img.size()
+            if (h % self.mod_scale != 0):
+                self.mod_pad_h = (self.mod_scale - h % self.mod_scale)
+            if (w % self.mod_scale != 0):
+                self.mod_pad_w = (self.mod_scale - w % self.mod_scale)
+            self.img = F.pad(self.img, (0, self.mod_pad_w, 0, self.mod_pad_h), 'reflect')
+    def process(self):
+        # model inference
+        self.output = self.model(self.img)
+    def tile_process(self):
+        """It will first crop input images to tiles, and then process each tile.
+        Finally, all the processed tiles are merged into one images.
+        Modified from: https://github.com/ata4/esrgan-launcher
+        """
+        batch, channel, height, width = self.img.shape
+        output_height = height * self.scale
+        output_width = width * self.scale
+        output_shape = (batch, channel, output_height, output_width)
+        # start with black image
+        self.output = self.img.new_zeros(output_shape)
+        tiles_x = math.ceil(width / self.tile_size)
+        tiles_y = math.ceil(height / self.tile_size)
+        # loop over all tiles
+        for y in range(tiles_y):
+            for x in range(tiles_x):
+                # extract tile from input image
+                ofs_x = x * self.tile_size
+                ofs_y = y * self.tile_size
+                # input tile area on total image
+                input_start_x = ofs_x
+                input_end_x = min(ofs_x + self.tile_size, width)
+                input_start_y = ofs_y
+                input_end_y = min(ofs_y + self.tile_size, height)
+                # input tile area on total image with padding
+                input_start_x_pad = max(input_start_x - self.tile_pad, 0)
+                input_end_x_pad = min(input_end_x + self.tile_pad, width)
+                input_start_y_pad = max(input_start_y - self.tile_pad, 0)
+                input_end_y_pad = min(input_end_y + self.tile_pad, height)
+                # input tile dimensions
+                input_tile_width = input_end_x - input_start_x
+                input_tile_height = input_end_y - input_start_y
+                tile_idx = y * tiles_x + x + 1
+                input_tile = self.img[:, :, input_start_y_pad:input_end_y_pad, input_start_x_pad:input_end_x_pad]
+                # upscale tile
+                try:
+                    with torch.no_grad():
+                        output_tile = self.model(input_tile)
+                except RuntimeError as error:
+                    print('Error', error)
+                print(f'\tTile {tile_idx}/{tiles_x * tiles_y}')
+                # output tile area on total image
+                output_start_x = input_start_x * self.scale
+                output_end_x = input_end_x * self.scale
+                output_start_y = input_start_y * self.scale
+                output_end_y = input_end_y * self.scale
+                # output tile area without padding
+                output_start_x_tile = (input_start_x - input_start_x_pad) * self.scale
+                output_end_x_tile = output_start_x_tile + input_tile_width * self.scale
+                output_start_y_tile = (input_start_y - input_start_y_pad) * self.scale
+                output_end_y_tile = output_start_y_tile + input_tile_height * self.scale
+                # put tile into output image
+                self.output[:, :, output_start_y:output_end_y,
+                            output_start_x:output_end_x] = output_tile[:, :, output_start_y_tile:output_end_y_tile,
+                                                                       output_start_x_tile:output_end_x_tile]
+    def post_process(self):
+        # remove extra pad
+        if self.mod_scale is not None:
+            _, _, h, w = self.output.size()
+            self.output = self.output[:, :, 0:h - self.mod_pad_h * self.scale, 0:w - self.mod_pad_w * self.scale]
+        # remove prepad
+        if self.pre_pad != 0:
+            _, _, h, w = self.output.size()
+            self.output = self.output[:, :, 0:h - self.pre_pad * self.scale, 0:w - self.pre_pad * self.scale]
+        return self.output
+    @torch.no_grad()
+    def enhance(self, img, outscale=None, alpha_upsampler='realesrgan'):
+        h_input, w_input = img.shape[0:2]
+        # img: numpy
+        img = img.astype(np.float32)
+        if np.max(img) > 256:  # 16-bit image
+            max_range = 65535
+            print('\tInput is a 16-bit image')
+        else:
+            max_range = 255
+        img = img / max_range
+        if len(img.shape) == 2:  # gray image
+            img_mode = 'L'
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+        elif img.shape[2] == 4:  # RGBA image with alpha channel
+            img_mode = 'RGBA'
+            alpha = img[:, :, 3]
+            img = img[:, :, 0:3]
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            if alpha_upsampler == 'realesrgan':
+                alpha = cv2.cvtColor(alpha, cv2.COLOR_GRAY2RGB)
+        else:
+            img_mode = 'RGB'
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        # ------------------- process image (without the alpha channel) ------------------- #
+        self.pre_process(img)
+        if self.tile_size > 0:
+            self.tile_process()
+        else:
+            self.process()
+        output_img = self.post_process()
+        output_img = output_img.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+        output_img = np.transpose(output_img[[2, 1, 0], :, :], (1, 2, 0))
+        if img_mode == 'L':
+            output_img = cv2.cvtColor(output_img, cv2.COLOR_BGR2GRAY)
+        # ------------------- process the alpha channel if necessary ------------------- #
+        if img_mode == 'RGBA':
+            if alpha_upsampler == 'realesrgan':
+                self.pre_process(alpha)
+                if self.tile_size > 0:
+                    self.tile_process()
+                else:
+                    self.process()
+                output_alpha = self.post_process()
+                output_alpha = output_alpha.data.squeeze().float().cpu().clamp_(0, 1).numpy()
+                output_alpha = np.transpose(output_alpha[[2, 1, 0], :, :], (1, 2, 0))
+                output_alpha = cv2.cvtColor(output_alpha, cv2.COLOR_BGR2GRAY)
+            else:  # use the cv2 resize for alpha channel
+                h, w = alpha.shape[0:2]
+                output_alpha = cv2.resize(alpha, (w * self.scale, h * self.scale), interpolation=cv2.INTER_LINEAR)
+            # merge the alpha channel
+            output_img = cv2.cvtColor(output_img, cv2.COLOR_BGR2BGRA)
+            output_img[:, :, 3] = output_alpha
+        # ------------------------------ return ------------------------------ #
+        if max_range == 65535:  # 16-bit image
+            output = (output_img * 65535.0).round().astype(np.uint16)
+        else:
+            output = (output_img * 255.0).round().astype(np.uint8)
+        if outscale is not None and outscale != float(self.scale):
+            output = cv2.resize(
+                output, (
+                    int(w_input * outscale),
+                    int(h_input * outscale),
+                ), interpolation=cv2.INTER_LANCZOS4)
+        return output, img_mode
+class PrefetchReader(threading.Thread):
+    """Prefetch images.
+    Args:
+        img_list (list[str]): A image list of image paths to be read.
+        num_prefetch_queue (int): Number of prefetch queue.
+    """
+    def __init__(self, img_list, num_prefetch_queue):
+        super().__init__()
+        self.que = queue.Queue(num_prefetch_queue)
+        self.img_list = img_list
+    def run(self):
+        for img_path in self.img_list:
+            img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+            self.que.put(img)
+        self.que.put(None)
+    def __next__(self):
+        next_item = self.que.get()
+        if next_item is None:
+            raise StopIteration
+        return next_item
+    def __iter__(self):
+        return self
+class IOConsumer(threading.Thread):
+    def __init__(self, opt, que, qid):
+        super().__init__()
+        self._queue = que
+        self.qid = qid
+        self.opt = opt
+    def run(self):
+        while True:
+            msg = self._queue.get()
+            if isinstance(msg, str) and msg == 'quit':
+                break
+            output = msg['output']
+            save_path = msg['save_path']
+            cv2.imwrite(save_path, output)
+        print(f'IO worker {self.qid} is done.')

realesrgan/version.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# GENERATED VERSION FILE
+# TIME: Fri Jun  2 00:17:29 2023
+__version__ = '0.3.0'
+__gitsha__ = '5ca1078'
+version_info = (0, 3, 0)

requirements.txt CHANGED Viewed

@@ -1,4 +1,116 @@
-gradio
-torchvision
-matplotlib
-torch

+absl-py==1.4.0
+addict==2.4.0
+aiofiles==23.1.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+altair==5.0.1
+anyio==3.7.0
+async-timeout==4.0.2
+attrs==23.1.0
+basicsr==1.4.2
+cachetools==5.3.1
+certifi==2023.5.7
+charset-normalizer==3.1.0
+click==8.1.3
+cmake==3.26.3
+contourpy==1.0.7
+cycler==0.11.0
+exceptiongroup==1.1.1
+facexlib==0.3.0
+fastapi==0.95.2
+ffmpy==0.3.0
+filelock==3.12.0
+filterpy==1.4.5
+fonttools==4.39.4
+frozenlist==1.3.3
+fsspec==2023.5.0
+future==0.18.3
+gfpgan==1.3.8
+google-auth==2.19.1
+google-auth-oauthlib==1.0.0
+gradio==3.33.1
+gradio_client==0.2.5
+grpcio==1.54.2
+h11==0.14.0
+httpcore==0.17.2
+httpx==0.24.1
+huggingface-hub==0.15.1
+idna==3.4
+imageio==2.30.0
+Jinja2==3.1.2
+jsonschema==4.17.3
+kiwisolver==1.4.4
+lazy_loader==0.2
+linkify-it-py==2.0.2
+lit==16.0.5.post0
+llvmlite==0.40.0
+lmdb==1.4.1
+Markdown==3.4.3
+markdown-it-py==2.2.0
+MarkupSafe==2.1.3
+matplotlib==3.7.1
+mdit-py-plugins==0.3.3
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.0.4
+networkx==3.1
+numba==0.57.0
+numpy==1.24.3
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
+oauthlib==3.2.2
+opencv-python==4.7.0.72
+orjson==3.9.0
+packaging==23.1
+pandas==2.0.2
+Pillow==9.5.0
+protobuf==4.23.2
+pyasn1==0.5.0
+pyasn1-modules==0.3.0
+pydantic==1.10.8
+pydub==0.25.1
+Pygments==2.15.1
+pyparsing==3.0.9
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+python-multipart==0.0.6
+pytz==2023.3
+PyWavelets==1.4.1
+PyYAML==6.0
+requests==2.31.0
+requests-oauthlib==1.3.1
+rsa==4.9
+scikit-image==0.21.0
+scipy==1.10.1
+semantic-version==2.10.0
+six==1.16.0
+sniffio==1.3.0
+starlette==0.27.0
+sympy==1.12
+tb-nightly==2.14.0a20230602
+tensorboard-data-server==0.7.0
+tifffile==2023.4.12
+tomli==2.0.1
+toolz==0.12.0
+torch==2.0.1
+torchvision==0.15.2
+tqdm==4.65.0
+triton==2.0.0
+typing_extensions==4.6.3
+tzdata==2023.3
+uc-micro-py==1.0.2
+urllib3==1.26.16
+uvicorn==0.22.0
+websockets==11.0.3
+Werkzeug==2.3.4
+yapf==0.33.0
+yarl==1.9.2