Spaces:

Egrt
/

LicenseGAN

Build error

App Files Files Community

白鹭先生 commited on Jan 25, 2022

Commit

db5513e

•

1 Parent(s): 1e05415

新增SwinIR模型

Browse files

Files changed (16) hide show

.gitignore +1 -0
app.py +3 -4
esrgan.py +12 -5
model_data/{Generator_ESRGAN6.pth → Generator_SwinIR.pth} +2 -2
nets/SwinIR.py +912 -0
nets/__pycache__/esrgan.cpython-38.pyc +0 -0
utils/__pycache__/__init__.cpython-38.pyc +0 -0
utils/__pycache__/dataloader.cpython-38.pyc +0 -0
utils/__pycache__/utils.cpython-38.pyc +0 -0
utils/__pycache__/utils_fit.cpython-38.pyc +0 -0
utils/dataloader.py +259 -88
utils/degradations.py +765 -0
utils/preprocess.py +64 -13
utils/transforms.py +179 -0
utils/utils.py +103 -1
utils/utils_fit.py +21 -16

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	propress.py


1	propress.py
2	+ __pycache__

app.py CHANGED Viewed

@@ -5,18 +5,17 @@ LastEditors: Egrt
 LastEditTime: 2022-01-13 13:48:57
 FilePath: \LicenseGAN\app.py
 '''
-import os
-os.system('pip install -r requirements.txt')
 from PIL import Image
 from esrgan import ESRGAN
 import gradio as gr
 esrgan = ESRGAN()
 # --------模型推理---------- #
 def inference(img):
-    lr_shape = [12, 24]
     img = img.resize((lr_shape[1], lr_shape[0]), Image.BICUBIC)
     r_image = esrgan.generate_1x1_image(img)
     return r_image

 LastEditTime: 2022-01-13 13:48:57
 FilePath: \LicenseGAN\app.py
 '''
 from PIL import Image
 from esrgan import ESRGAN
 import gradio as gr
+import os
 esrgan = ESRGAN()
 # --------模型推理---------- #
 def inference(img):
+    lr_shape = [32, 56]
     img = img.resize((lr_shape[1], lr_shape[0]), Image.BICUBIC)
     r_image = esrgan.generate_1x1_image(img)
     return r_image

esrgan.py CHANGED Viewed

@@ -2,7 +2,7 @@ import numpy as np
 import torch
 import torch.backends.cudnn as cudnn
 from PIL import Image
-from nets.esrgan import Generator
 from utils.utils import cvtColor, preprocess_input
@@ -14,11 +14,15 @@ class ESRGAN(object):
         #-----------------------------------------------#
         #   model_path指向logs文件夹下的权值文件
         #-----------------------------------------------#
-        "model_path"        : 'model_data/Generator_ESRGAN6.pth',
         #-----------------------------------------------#
         #   上采样的倍数，和训练时一样
         #-----------------------------------------------#
-        "scale_factor"      : 8,
         #-------------------------------#
         #   是否使用Cuda
         #   没有GPU可以设置成False
@@ -36,7 +40,10 @@ class ESRGAN(object):
         self.generate()
     def generate(self):
-        self.net    = Generator(self.scale_factor)
         device      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         self.net.load_state_dict(torch.load(self.model_path, map_location=device))
@@ -72,7 +79,7 @@ class ESRGAN(object):
             #   将归一化的结果再转成rgb格式
             #---------------------------------------------------------#
             hr_image = (hr_image.cpu().data.numpy().transpose(1, 2, 0) * 0.5 + 0.5)
-            hr_image = (hr_image-np.min(hr_image))/(np.max(hr_image)-np.min(hr_image)) * 255
             hr_image = Image.fromarray(np.uint8(hr_image))
             return hr_image

 import torch
 import torch.backends.cudnn as cudnn
 from PIL import Image
+from nets.SwinIR import Generator
 from utils.utils import cvtColor, preprocess_input
         #-----------------------------------------------#
         #   model_path指向logs文件夹下的权值文件
         #-----------------------------------------------#
+        "model_path"        : 'model_data/Generator_SwinIR.pth',
         #-----------------------------------------------#
         #   上采样的倍数，和训练时一样
         #-----------------------------------------------#
+        "scale_factor"      : 4,
+        #-----------------------------------------------#
+        #   hr_shape
+        #-----------------------------------------------#
+        "hr_shape"          : [128, 224],
         #-------------------------------#
         #   是否使用Cuda
         #   没有GPU可以设置成False
         self.generate()
     def generate(self):
+        # self.net    = Generator(self.scale_factor)
+        self.net    = Generator(upscale=self.scale_factor, img_size=tuple(self.hr_shape),
+                   window_size=8, img_range=1., depths=[3, 3, 3, 3],
+                   embed_dim=60, num_heads=[3, 3, 3, 3], mlp_ratio=2, upsampler='pixelshuffledirect')
         device      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         self.net.load_state_dict(torch.load(self.model_path, map_location=device))
             #   将归一化的结果再转成rgb格式
             #---------------------------------------------------------#
             hr_image = (hr_image.cpu().data.numpy().transpose(1, 2, 0) * 0.5 + 0.5)
+            hr_image = np.clip(hr_image * 255, 0, 255)
             hr_image = Image.fromarray(np.uint8(hr_image))
             return hr_image

model_data/{Generator_ESRGAN6.pth → Generator_SwinIR.pth} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e9694a2253dba5c2e0365bd5d90842354d9edd46fa074509621cfcb1a54b34ae
-size 42002303

 version https://git-lfs.github.com/spec/v1
+oid sha256:0dbb3371d937501b0fd913053d92a0c358ccbcb240cb133a319d1cd86dcbbfe9
+size 32036063

nets/SwinIR.py ADDED Viewed

	@@ -0,0 +1,912 @@

+# -----------------------------------------------------------------------------------
+# SwinIR: Image Restoration Using Swin Transformer, https://arxiv.org/abs/2108.10257
+# Originally Written by Ze Liu, Modified by Jingyun Liang.
+# -----------------------------------------------------------------------------------
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+from timm.models.layers import DropPath, to_2tuple, trunc_normal_
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+def window_partition(x, window_size):
+    """
+    Args:
+        x: (B, H, W, C)
+        window_size (int): window size
+    Returns:
+        windows: (num_windows*B, window_size, window_size, C)
+    """
+    B, H, W, C = x.shape
+    x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
+    return windows
+def window_reverse(windows, window_size, H, W):
+    """
+    Args:
+        windows: (num_windows*B, window_size, window_size, C)
+        window_size (int): Window size
+        H (int): Height of image
+        W (int): Width of image
+    Returns:
+        x: (B, H, W, C)
+    """
+    B = int(windows.shape[0] / (H * W / window_size / window_size))
+    x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
+    x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
+    return x
+class WindowAttention(nn.Module):
+    r""" Window based multi-head self attention (W-MSA) module with relative position bias.
+    It supports both of shifted and non-shifted window.
+    Args:
+        dim (int): Number of input channels.
+        window_size (tuple[int]): The height and width of the window.
+        num_heads (int): Number of attention heads.
+        qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
+        attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
+        proj_drop (float, optional): Dropout ratio of output. Default: 0.0
+    """
+    def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
+        super().__init__()
+        self.dim = dim
+        self.window_size = window_size  # Wh, Ww
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+        # define a parameter table of relative position bias
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(self.window_size[0])
+        coords_w = torch.arange(self.window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += self.window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
+        relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        self.register_buffer("relative_position_index", relative_position_index)
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        trunc_normal_(self.relative_position_bias_table, std=.02)
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, x, mask=None):
+        """
+        Args:
+            x: input features with shape of (num_windows*B, N, C)
+            mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
+        """
+        B_, N, C = x.shape
+        qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
+        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+        attn = attn + relative_position_bias.unsqueeze(0)
+        if mask is not None:
+            nW = mask.shape[0]
+            attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
+            attn = attn.view(-1, self.num_heads, N, N)
+            attn = self.softmax(attn)
+        else:
+            attn = self.softmax(attn)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+    def extra_repr(self) -> str:
+        return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}'
+    def flops(self, N):
+        # calculate flops for 1 window with token length of N
+        flops = 0
+        # qkv = self.qkv(x)
+        flops += N * self.dim * 3 * self.dim
+        # attn = (q @ k.transpose(-2, -1))
+        flops += self.num_heads * N * (self.dim // self.num_heads) * N
+        #  x = (attn @ v)
+        flops += self.num_heads * N * N * (self.dim // self.num_heads)
+        # x = self.proj(x)
+        flops += N * self.dim * self.dim
+        return flops
+class SwinTransformerBlock(nn.Module):
+    r""" Swin Transformer Block.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resulotion.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
+                 act_layer=nn.GELU, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        if min(self.input_resolution) <= self.window_size:
+            # if window size is larger than input resolution, we don't partition windows
+            self.shift_size = 0
+            self.window_size = min(self.input_resolution)
+        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
+            qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+        if self.shift_size > 0:
+            attn_mask = self.calculate_mask(self.input_resolution)
+        else:
+            attn_mask = None
+        self.register_buffer("attn_mask", attn_mask)
+    def calculate_mask(self, x_size):
+        # calculate attention mask for SW-MSA
+        H, W = x_size
+        img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
+        h_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        w_slices = (slice(0, -self.window_size),
+                    slice(-self.window_size, -self.shift_size),
+                    slice(-self.shift_size, None))
+        cnt = 0
+        for h in h_slices:
+            for w in w_slices:
+                img_mask[:, h, w, :] = cnt
+                cnt += 1
+        mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+        return attn_mask
+    def forward(self, x, x_size):
+        H, W = x_size
+        B, L, C = x.shape
+        # assert L == H * W, "input feature has wrong size"
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(B, H, W, C)
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+        else:
+            shifted_x = x
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
+        # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
+        if self.input_resolution == x_size:
+            attn_windows = self.attn(x_windows, mask=self.attn_mask)  # nW*B, window_size*window_size, C
+        else:
+            attn_windows = self.attn(x_windows, mask=self.calculate_mask(x_size).to(x.device))
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
+        shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            x = shifted_x
+        x = x.view(B, H * W, C)
+        # FFN
+        x = shortcut + self.drop_path(x)
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+    def extra_repr(self) -> str:
+        return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
+               f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
+    def flops(self):
+        flops = 0
+        H, W = self.input_resolution
+        # norm1
+        flops += self.dim * H * W
+        # W-MSA/SW-MSA
+        nW = H * W / self.window_size / self.window_size
+        flops += nW * self.attn.flops(self.window_size * self.window_size)
+        # mlp
+        flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
+        # norm2
+        flops += self.dim * H * W
+        return flops
+class PatchMerging(nn.Module):
+    r""" Patch Merging Layer.
+    Args:
+        input_resolution (tuple[int]): Resolution of input feature.
+        dim (int): Number of input channels.
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.input_resolution = input_resolution
+        self.dim = dim
+        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
+        self.norm = norm_layer(4 * dim)
+    def forward(self, x):
+        """
+        x: B, H*W, C
+        """
+        H, W = self.input_resolution
+        B, L, C = x.shape
+        assert L == H * W, "input feature has wrong size"
+        assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
+        x = x.view(B, H, W, C)
+        x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
+        x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
+        x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
+        x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
+        x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
+        x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
+        x = self.norm(x)
+        x = self.reduction(x)
+        return x
+    def extra_repr(self) -> str:
+        return f"input_resolution={self.input_resolution}, dim={self.dim}"
+    def flops(self):
+        H, W = self.input_resolution
+        flops = H * W * self.dim
+        flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
+        return flops
+class BasicLayer(nn.Module):
+    """ A basic Swin Transformer layer for one stage.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+    """
+    def __init__(self, dim, input_resolution, depth, num_heads, window_size,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.depth = depth
+        self.use_checkpoint = use_checkpoint
+        # build blocks
+        self.blocks = nn.ModuleList([
+            SwinTransformerBlock(dim=dim, input_resolution=input_resolution,
+                                 num_heads=num_heads, window_size=window_size,
+                                 shift_size=0 if (i % 2 == 0) else window_size // 2,
+                                 mlp_ratio=mlp_ratio,
+                                 qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                 drop=drop, attn_drop=attn_drop,
+                                 drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
+                                 norm_layer=norm_layer)
+            for i in range(depth)])
+        # patch merging layer
+        if downsample is not None:
+            self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
+        else:
+            self.downsample = None
+    def forward(self, x, x_size):
+        for blk in self.blocks:
+            if self.use_checkpoint:
+                x = checkpoint.checkpoint(blk, x, x_size)
+            else:
+                x = blk(x, x_size)
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return x
+    def extra_repr(self) -> str:
+        return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"
+    def flops(self):
+        flops = 0
+        for blk in self.blocks:
+            flops += blk.flops()
+        if self.downsample is not None:
+            flops += self.downsample.flops()
+        return flops
+class RSTB(nn.Module):
+    """Residual Swin Transformer Block (RSTB).
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        depth (int): Number of blocks.
+        num_heads (int): Number of attention heads.
+        window_size (int): Local window size.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
+        norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
+        downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
+        img_size: Input image size.
+        patch_size: Patch size.
+        resi_connection: The convolutional block before residual connection.
+    """
+    def __init__(self, dim, input_resolution, depth, num_heads, window_size,
+                 mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False,
+                 img_size=224, patch_size=4, resi_connection='1conv'):
+        super(RSTB, self).__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.residual_group = BasicLayer(dim=dim,
+                                         input_resolution=input_resolution,
+                                         depth=depth,
+                                         num_heads=num_heads,
+                                         window_size=window_size,
+                                         mlp_ratio=mlp_ratio,
+                                         qkv_bias=qkv_bias, qk_scale=qk_scale,
+                                         drop=drop, attn_drop=attn_drop,
+                                         drop_path=drop_path,
+                                         norm_layer=norm_layer,
+                                         downsample=downsample,
+                                         use_checkpoint=use_checkpoint)
+        if resi_connection == '1conv':
+            self.conv = nn.Conv2d(dim, dim, 3, 1, 1)
+        elif resi_connection == '3conv':
+            # to save parameters and memory
+            self.conv = nn.Sequential(nn.Conv2d(dim, dim // 4, 3, 1, 1), nn.GELU(),
+                                      nn.Conv2d(dim // 4, dim // 4, 1, 1, 0),
+                                      nn.GELU(),
+                                      nn.Conv2d(dim // 4, dim, 3, 1, 1))
+        self.patch_embed = PatchEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim,
+            norm_layer=None)
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=0, embed_dim=dim,
+            norm_layer=None)
+    def forward(self, x, x_size):
+        return self.patch_embed(self.conv(self.patch_unembed(self.residual_group(x, x_size), x_size))) + x
+    def flops(self):
+        flops = 0
+        flops += self.residual_group.flops()
+        H, W = self.input_resolution
+        flops += H * W * self.dim * self.dim * 9
+        flops += self.patch_embed.flops()
+        flops += self.patch_unembed.flops()
+        return flops
+class PatchEmbed(nn.Module):
+    r""" Image to Patch Embedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+        if norm_layer is not None:
+            self.norm = norm_layer(embed_dim)
+        else:
+            self.norm = None
+    def forward(self, x):
+        x = x.flatten(2).transpose(1, 2)  # B Ph*Pw C
+        if self.norm is not None:
+            x = self.norm(x)
+        return x
+    def flops(self):
+        flops = 0
+        H, W = self.img_size
+        if self.norm is not None:
+            flops += H * W * self.embed_dim
+        return flops
+class PatchUnEmbed(nn.Module):
+    r""" Image to Patch Unembedding
+    Args:
+        img_size (int): Image size.  Default: 224.
+        patch_size (int): Patch token size. Default: 4.
+        in_chans (int): Number of input image channels. Default: 3.
+        embed_dim (int): Number of linear projection output channels. Default: 96.
+        norm_layer (nn.Module, optional): Normalization layer. Default: None
+    """
+    def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.patches_resolution = patches_resolution
+        self.num_patches = patches_resolution[0] * patches_resolution[1]
+        self.in_chans = in_chans
+        self.embed_dim = embed_dim
+    def forward(self, x, x_size):
+        B, HW, C = x.shape
+        x = x.transpose(1, 2).view(B, self.embed_dim, x_size[0], x_size[1])  # B Ph*Pw C
+        return x
+    def flops(self):
+        flops = 0
+        return flops
+class Upsample(nn.Sequential):
+    """Upsample module.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat):
+        m = []
+        if (scale & (scale - 1)) == 0:  # scale = 2^n
+            for _ in range(int(math.log(scale, 2))):
+                m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
+                m.append(nn.PixelShuffle(2))
+        elif scale == 3:
+            m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
+            m.append(nn.PixelShuffle(3))
+        else:
+            raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
+        super(Upsample, self).__init__(*m)
+class UpsampleOneStep(nn.Sequential):
+    """UpsampleOneStep module (the difference with Upsample is that it always only has 1conv + 1pixelshuffle)
+       Used in lightweight SR to save parameters.
+    Args:
+        scale (int): Scale factor. Supported scales: 2^n and 3.
+        num_feat (int): Channel number of intermediate features.
+    """
+    def __init__(self, scale, num_feat, num_out_ch, input_resolution=None):
+        self.num_feat = num_feat
+        self.input_resolution = input_resolution
+        m = []
+        m.append(nn.Conv2d(num_feat, (scale ** 2) * num_out_ch, 3, 1, 1))
+        m.append(nn.PixelShuffle(scale))
+        super(UpsampleOneStep, self).__init__(*m)
+    def flops(self):
+        H, W = self.input_resolution
+        flops = H * W * self.num_feat * 3 * 9
+        return flops
+class Generator(nn.Module):
+    r""" SwinIR
+        A PyTorch impl of : `SwinIR: Image Restoration Using Swin Transformer`, based on Swin Transformer.
+    Args:
+        img_size (int | tuple(int)): Input image size. Default 64
+        patch_size (int | tuple(int)): Patch size. Default: 1
+        in_chans (int): Number of input image channels. Default: 3
+        embed_dim (int): Patch embedding dimension. Default: 96
+        depths (tuple(int)): Depth of each Swin Transformer layer.
+        num_heads (tuple(int)): Number of attention heads in different layers.
+        window_size (int): Window size. Default: 7
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
+        qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
+        drop_rate (float): Dropout rate. Default: 0
+        attn_drop_rate (float): Attention dropout rate. Default: 0
+        drop_path_rate (float): Stochastic depth rate. Default: 0.1
+        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
+        ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
+        patch_norm (bool): If True, add normalization after patch embedding. Default: True
+        use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
+        upscale: Upscale factor. 2/3/4/8 for image SR, 1 for denoising and compress artifact reduction
+        img_range: Image range. 1. or 255.
+        upsampler: The reconstruction reconstruction module. 'pixelshuffle'/'pixelshuffledirect'/'nearest+conv'/None
+        resi_connection: The convolutional block before residual connection. '1conv'/'3conv'
+    """
+    def __init__(self, img_size=64, patch_size=1, in_chans=3,
+                 embed_dim=96, depths=[6, 6, 6, 6], num_heads=[6, 6, 6, 6],
+                 window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None,
+                 drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
+                 norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
+                 use_checkpoint=False, upscale=2, img_range=1., upsampler='', resi_connection='1conv',
+                 **kwargs):
+        super(Generator, self).__init__()
+        num_in_ch = in_chans
+        num_out_ch = in_chans
+        num_feat = 64
+        self.img_range = img_range
+        if in_chans == 3:
+            rgb_mean = (0.4488, 0.4371, 0.4040)
+            self.mean = torch.Tensor(rgb_mean).view(1, 3, 1, 1)
+        else:
+            self.mean = torch.zeros(1, 1, 1, 1)
+        self.upscale = upscale
+        self.upsampler = upsampler
+        self.window_size = window_size
+        #####################################################################################################
+        ################################### 1, shallow feature extraction ###################################
+        self.conv_first = nn.Conv2d(num_in_ch, embed_dim, 3, 1, 1)
+        #####################################################################################################
+        ################################### 2, deep feature extraction ######################################
+        self.num_layers = len(depths)
+        self.embed_dim = embed_dim
+        self.ape = ape
+        self.patch_norm = patch_norm
+        self.num_features = embed_dim
+        self.mlp_ratio = mlp_ratio
+        # split image into non-overlapping patches
+        self.patch_embed = PatchEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        num_patches = self.patch_embed.num_patches
+        patches_resolution = self.patch_embed.patches_resolution
+        self.patches_resolution = patches_resolution
+        # merge non-overlapping patches into image
+        self.patch_unembed = PatchUnEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=embed_dim, embed_dim=embed_dim,
+            norm_layer=norm_layer if self.patch_norm else None)
+        # absolute position embedding
+        if self.ape:
+            self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
+            trunc_normal_(self.absolute_pos_embed, std=.02)
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        # stochastic depth
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
+        # build Residual Swin Transformer blocks (RSTB)
+        self.layers = nn.ModuleList()
+        for i_layer in range(self.num_layers):
+            layer = RSTB(dim=embed_dim,
+                         input_resolution=(patches_resolution[0],
+                                           patches_resolution[1]),
+                         depth=depths[i_layer],
+                         num_heads=num_heads[i_layer],
+                         window_size=window_size,
+                         mlp_ratio=self.mlp_ratio,
+                         qkv_bias=qkv_bias, qk_scale=qk_scale,
+                         drop=drop_rate, attn_drop=attn_drop_rate,
+                         drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],  # no impact on SR results
+                         norm_layer=norm_layer,
+                         downsample=None,
+                         use_checkpoint=use_checkpoint,
+                         img_size=img_size,
+                         patch_size=patch_size,
+                         resi_connection=resi_connection
+                         )
+            self.layers.append(layer)
+        self.norm = norm_layer(self.num_features)
+        # build the last conv layer in deep feature extraction
+        if resi_connection == '1conv':
+            self.conv_after_body = nn.Conv2d(embed_dim, embed_dim, 3, 1, 1)
+        elif resi_connection == '3conv':
+            # to save parameters and memory
+            self.conv_after_body = nn.Sequential(nn.Conv2d(embed_dim, embed_dim // 4, 3, 1, 1),
+                                                 nn.GELU(),
+                                                 nn.Conv2d(embed_dim // 4, embed_dim // 4, 1, 1, 0),
+                                                 nn.GELU(),
+                                                 nn.Conv2d(embed_dim // 4, embed_dim, 3, 1, 1))
+        #####################################################################################################
+        ################################ 3, high quality image reconstruction ################################
+        if self.upsampler == 'pixelshuffle':
+            # for classical SR
+            self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
+                                                      nn.GELU())
+            self.upsample = Upsample(upscale, num_feat)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+        elif self.upsampler == 'pixelshuffledirect':
+            # for lightweight SR (to save parameters)
+            self.upsample = UpsampleOneStep(upscale, embed_dim, num_out_ch,
+                                            (patches_resolution[0], patches_resolution[1]))
+        elif self.upsampler == 'nearest+conv':
+            # for real-world SR (less artifacts)
+            assert self.upscale == 4, 'only support x4 now.'
+            self.conv_before_upsample = nn.Sequential(nn.Conv2d(embed_dim, num_feat, 3, 1, 1),
+                                                      nn.GELU())
+            self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
+            self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
+            self.lrelu = nn.GELU()
+        else:
+            # for image denoising and JPEG compression artifact reduction
+            self.conv_last = nn.Conv2d(embed_dim, num_out_ch, 3, 1, 1)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'absolute_pos_embed'}
+    @torch.jit.ignore
+    def no_weight_decay_keywords(self):
+        return {'relative_position_bias_table'}
+    def check_image_size(self, x):
+        _, _, h, w = x.size()
+        mod_pad_h = (self.window_size - h % self.window_size) % self.window_size
+        mod_pad_w = (self.window_size - w % self.window_size) % self.window_size
+        x = F.pad(x, (0, mod_pad_w, 0, mod_pad_h), 'reflect')
+        return x
+    def forward_features(self, x):
+        x_size = (x.shape[2], x.shape[3])
+        x = self.patch_embed(x)
+        if self.ape:
+            x = x + self.absolute_pos_embed
+        x = self.pos_drop(x)
+        for layer in self.layers:
+            x = layer(x, x_size)
+        x = self.norm(x)  # B L C
+        x = self.patch_unembed(x, x_size)
+        return x
+    def forward(self, x):
+        H, W = x.shape[2:]
+        x = self.check_image_size(x)
+        self.mean = self.mean.type_as(x)
+        x = (x - self.mean) * self.img_range
+        if self.upsampler == 'pixelshuffle':
+            # for classical SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.conv_last(self.upsample(x))
+        elif self.upsampler == 'pixelshuffledirect':
+            # for lightweight SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.upsample(x)
+        elif self.upsampler == 'nearest+conv':
+            # for real-world SR
+            x = self.conv_first(x)
+            x = self.conv_after_body(self.forward_features(x)) + x
+            x = self.conv_before_upsample(x)
+            x = self.lrelu(self.conv_up1(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
+            x = self.lrelu(self.conv_up2(torch.nn.functional.interpolate(x, scale_factor=2, mode='nearest')))
+            x = self.conv_last(self.lrelu(self.conv_hr(x)))
+        else:
+            # for image denoising and JPEG compression artifact reduction
+            x_first = self.conv_first(x)
+            res = self.conv_after_body(self.forward_features(x_first)) + x_first
+            x = x + self.conv_last(res)
+        x = x / self.img_range + self.mean
+        return x[:, :, :H*self.upscale, :W*self.upscale]
+    def flops(self):
+        flops = 0
+        H, W = self.patches_resolution
+        flops += H * W * 3 * self.embed_dim * 9
+        flops += self.patch_embed.flops()
+        for i, layer in enumerate(self.layers):
+            flops += layer.flops()
+        flops += H * W * 3 * self.embed_dim * self.embed_dim
+        flops += self.upsample.flops()
+        return flops
+class Discriminator(nn.Module):
+    def __init__(self):
+        super(Discriminator, self).__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(3, 64, kernel_size=3, padding=1),
+            nn.GELU(),
+            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(64),
+            nn.GELU(),
+            nn.Conv2d(64, 128, kernel_size=3, padding=1),
+            nn.BatchNorm2d(128),
+            nn.GELU(),
+            nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(128),
+            nn.GELU(),
+            nn.Conv2d(128, 256, kernel_size=3, padding=1),
+            nn.BatchNorm2d(256),
+            nn.GELU(),
+            nn.Conv2d(256, 256, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(256),
+            nn.GELU(),
+            nn.Conv2d(256, 512, kernel_size=3, padding=1),
+            nn.BatchNorm2d(512),
+            nn.GELU(),
+            nn.Conv2d(512, 512, kernel_size=3, stride=2, padding=1),
+            nn.BatchNorm2d(512),
+            nn.GELU(),
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(512, 1024, kernel_size=1),
+            nn.GELU(),
+            nn.Conv2d(1024, 1, kernel_size=1)
+        )
+    def forward(self, x):
+        batch_size = x.size(0)
+        return torch.sigmoid(self.net(x).view(batch_size))
+if __name__ == '__main__':
+    upscale = 8
+    window_size = 8
+    height = (96 // upscale // window_size + 1) * window_size
+    width = (192 // upscale // window_size + 1) * window_size
+    model = Generator(upscale=upscale, img_size=(height, width),
+                   window_size=window_size, img_range=1., depths=[6, 6, 6, 6],
+                   embed_dim=60, num_heads=[6, 6, 6, 6], mlp_ratio=2, upsampler='pixelshuffledirect')
+    print(model)
+    print(height, width, model.flops() / 1e9)
+    x = torch.randn((1, 3, height, width))
+    x = model(x)
+    print(x.shape)

nets/__pycache__/esrgan.cpython-38.pyc CHANGED Viewed

Binary files a/nets/__pycache__/esrgan.cpython-38.pyc and b/nets/__pycache__/esrgan.cpython-38.pyc differ

utils/__pycache__/__init__.cpython-38.pyc CHANGED Viewed

Binary files a/utils/__pycache__/__init__.cpython-38.pyc and b/utils/__pycache__/__init__.cpython-38.pyc differ

utils/__pycache__/dataloader.cpython-38.pyc CHANGED Viewed

Binary files a/utils/__pycache__/dataloader.cpython-38.pyc and b/utils/__pycache__/dataloader.cpython-38.pyc differ

utils/__pycache__/utils.cpython-38.pyc CHANGED Viewed

Binary files a/utils/__pycache__/utils.cpython-38.pyc and b/utils/__pycache__/utils.cpython-38.pyc differ

utils/__pycache__/utils_fit.cpython-38.pyc CHANGED Viewed

Binary files a/utils/__pycache__/utils_fit.cpython-38.pyc and b/utils/__pycache__/utils_fit.cpython-38.pyc differ

utils/dataloader.py CHANGED Viewed

@@ -1,12 +1,23 @@
-from random import randint
 import cv2
 import numpy as np
 from PIL import Image
 from torch.utils.data.dataset import Dataset
-from utils import cvtColor, preprocess_input
-from torch.utils.data import DataLoader
 def get_new_img_size(width, height, img_min_side=600):
     if width <= height:
@@ -29,6 +40,49 @@ class SRGANDataset(Dataset):
         self.lr_shape       = lr_shape
         self.hr_shape       = hr_shape
     def __len__(self):
         return self.train_batches
@@ -37,22 +91,20 @@ class SRGANDataset(Dataset):
         index = index % self.train_batches
         image_origin = Image.open(self.train_lines[index].split()[0])
-        if self.rand()<.5:
-            img_h = self.get_random_data(image_origin, self.hr_shape)
-        else:
-            img_h = self.random_crop(image_origin, self.hr_shape[1], self.hr_shape[0])
-        img_l = img_h.resize((self.lr_shape[1], self.lr_shape[0]), Image.BICUBIC)
-        img_h = np.transpose(preprocess_input(np.array(img_h, dtype=np.float32), [0.5,0.5,0.5], [0.5,0.5,0.5]), [2,0,1])
-        img_l = np.transpose(preprocess_input(np.array(img_l, dtype=np.float32), [0.5,0.5,0.5], [0.5,0.5,0.5]), [2,0,1])
-        return np.array(img_l), np.array(img_h)
     def rand(self, a=0, b=1):
         return np.random.rand()*(b-a) + a
-    def get_random_data(self, image, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
         #------------------------------#
         #   读取图像并转换成RGB图像
         #------------------------------#
         image   = cvtColor(image)
         #------------------------------#
@@ -61,50 +113,19 @@ class SRGANDataset(Dataset):
         iw, ih  = image.size
         h, w    = input_shape
-        if not random:
-            scale = min(w/iw, h/ih)
-            nw = int(iw*scale)
-            nh = int(ih*scale)
-            dx = (w-nw)//2
-            dy = (h-nh)//2
-            #---------------------------------#
-            #   将图像多余的部分加上灰条
-            #---------------------------------#
-            image       = image.resize((nw,nh), Image.BICUBIC)
-            new_image   = Image.new('RGB', (w,h), (128,128,128))
-            new_image.paste(image, (dx, dy))
-            image_data  = np.array(new_image, np.float32)
-            return image_data
-        #------------------------------------------#
-        #   对图像进行缩放并且进行长和宽的扭曲
-        #------------------------------------------#
-        new_ar = w/h * self.rand(1-jitter,1+jitter)/self.rand(1-jitter,1+jitter)
-        scale = self.rand(1, 1.5)
-        if new_ar < 1:
-            nh = int(scale*h)
-            nw = int(nh*new_ar)
-        else:
-            nw = int(scale*w)
-            nh = int(nw/new_ar)
-        image = image.resize((nw,nh), Image.BICUBIC)
-        #------------------------------------------#
         #   将图像多余的部分加上灰条
-        #------------------------------------------#
-        dx = int(self.rand(0, w-nw))
-        dy = int(self.rand(0, h-nh))
-        new_image = Image.new('RGB', (w,h), (128,128,128))
         new_image.paste(image, (dx, dy))
-        image = new_image
-        #------------------------------------------#
-        #   翻转图像
-        #------------------------------------------#
-        flip = self.rand()<.5
-        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
         rotate = self.rand()<.5
         if rotate:
@@ -113,41 +134,191 @@ class SRGANDataset(Dataset):
             M = cv2.getRotationMatrix2D((a,b),angle,1)
             image = cv2.warpAffine(np.array(image), M, (w,h), borderValue=[128,128,128])
-        #------------------------------------------#
-        #   色域扭曲
-        #------------------------------------------#
-        hue = self.rand(-hue, hue)
-        sat = self.rand(1, sat) if self.rand()<.5 else 1/self.rand(1, sat)
-        val = self.rand(1, val) if self.rand()<.5 else 1/self.rand(1, val)
-        x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
-        x[..., 1] *= sat
-        x[..., 2] *= val
-        x[x[:,:, 0]>360, 0] = 360
-        x[:, :, 1:][x[:, :, 1:]>1] = 1
-        x[x<0] = 0
-        image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255
-        return Image.fromarray(np.uint8(image_data))
-    def random_crop(self, image, width, height):
-        #--------------------------------------------#
-        #   如果图像过小无法截取，先对图像进行放大
-        #--------------------------------------------#
-        if image.size[0] < self.hr_shape[1] or image.size[1] < self.hr_shape[0]:
-            resized_width, resized_height = get_new_img_size(width, height, img_min_side=np.max(self.hr_shape))
-            image = image.resize((resized_width, resized_height), Image.BICUBIC)
-        #--------------------------------------------#
-        #   随机截取一部分
-        #--------------------------------------------#
-        width1  = randint(0, image.size[0] - width)
-        height1 = randint(0, image.size[1] - height)
-        width2  = width1 + width
-        height2 = height1 + height
-        image   = image.crop((width1, height1, width2, height2))
-        return image
 def SRGAN_dataset_collate(batch):
     images_l = []
     images_h = []

+import math
+from random import choice, choices, randint
 import cv2
 import numpy as np
 from PIL import Image
 from torch.utils.data.dataset import Dataset
+from utils import USMSharp_npy, cvtColor, preprocess_input
+from .degradations import (circular_lowpass_kernel, random_add_gaussian_noise,
+                           random_add_poisson_noise, random_mixed_kernels)
+from .transforms import augment, paired_random_crop
+def cv_show(image):
+    image = np.array(image)
+    image = cv2.resize(image, (256, 128), interpolation=cv2.INTER_CUBIC)
+    cv2.imshow('image', image)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
 def get_new_img_size(width, height, img_min_side=600):
     if width <= height:
         self.lr_shape       = lr_shape
         self.hr_shape       = hr_shape
+        self.scale          = int(hr_shape[0]/lr_shape[0])
+        self.usmsharp       = USMSharp_npy()
+        # 第一次滤波的参数
+        self.blur_kernel_size = 21
+        self.kernel_list    = ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
+        self.kernel_prob    = [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
+        self.sinc_prob      = 0.1
+        self.blur_sigma     = [0.2, 3]
+        self.betag_range    = [0.5, 4]
+        self.betap_range    = [1, 2]
+        # 第二次滤波的参数
+        self.blur_kernel_size2 = 21
+        self.kernel_list2    = ['iso', 'aniso', 'generalized_iso', 'generalized_aniso', 'plateau_iso', 'plateau_aniso']
+        self.kernel_prob2    = [0.45, 0.25, 0.12, 0.03, 0.12, 0.03]
+        self.sinc_prob2      = 0.1
+        self.blur_sigma2     = [0.2, 3]
+        self.betag_range2    = [0.5, 4]
+        self.betap_range2    = [1, 2]
+        # 最后的sinc滤波
+        self.final_sinc_prob = 0.8
+        # 卷积核大小从7到21分布
+        self.kernel_range    = [2 * v + 1 for v in range(3, 11)]
+        # 使用脉冲张量进行卷积不会产生模糊效果
+        self.pulse_tensor    = np.zeros(shape=[21, 21], dtype='float32')
+        self.pulse_tensor[10, 10] = 1
+        # 第一次退化的参数
+        self.resize_prob         = [0.2, 0.7, 0.1]  # up, down, keep
+        self.resize_range        = [0.15, 1.5]
+        self.gaussian_noise_prob = 0.5
+        self.noise_range         = [1, 30]
+        self.poisson_scale_range = [0.05, 3]
+        self.gray_noise_prob     = 0.4
+        self.jpeg_range          = [30, 95]
+        # 第二次退化的参数
+        self.second_blur_prob    = 0.8
+        self.resize_prob2        = [0.3, 0.4, 0.3]  # up, down, keep
+        self.resize_range2       = [0.3, 1.2]
+        self.gaussian_noise_prob2= 0.5
+        self.noise_range2        = [1, 25]
+        self.poisson_scale_range2= [0.05, 2.5]
+        self.gray_noise_prob2    = 0.4
+        self.jpeg_range2         = [30, 95]
     def __len__(self):
         return self.train_batches
         index = index % self.train_batches
         image_origin = Image.open(self.train_lines[index].split()[0])
+        lq, gt = self.get_random_data(image_origin, self.hr_shape)
+        gt = np.transpose(preprocess_input(np.array(gt, dtype=np.float32), [0.5,0.5,0.5], [0.5,0.5,0.5]), [2,0,1])
+        lq = np.transpose(preprocess_input(np.array(lq, dtype=np.float32), [0.5,0.5,0.5], [0.5,0.5,0.5]), [2,0,1])
+        return lq, gt
     def rand(self, a=0, b=1):
         return np.random.rand()*(b-a) + a
+    def get_random_data(self, image, input_shape):
         #------------------------------#
         #   读取图像并转换成RGB图像
+        #   cvtColor将np转Image
         #------------------------------#
         image   = cvtColor(image)
         #------------------------------#
         iw, ih  = image.size
         h, w    = input_shape
+        scale = min(w/iw, h/ih)
+        nw = int(iw*scale)
+        nh = int(ih*scale)
+        dx = (w-nw)//2
+        dy = (h-nh)//2
+        #---------------------------------#
         #   将图像多余的部分加上灰条
+        #---------------------------------#
+        image       = image.resize((nw,nh), Image.BICUBIC)
+        new_image   = Image.new('RGB', (w,h), (128,128,128))
         new_image.paste(image, (dx, dy))
+        image       = np.array(new_image, np.float32)
         rotate = self.rand()<.5
         if rotate:
             M = cv2.getRotationMatrix2D((a,b),angle,1)
             image = cv2.warpAffine(np.array(image), M, (w,h), borderValue=[128,128,128])
+        # ------------------------ 生成卷积核以进行第一次退化处理 ------------------------ #
+        kernel_size = choice(self.kernel_range)
+        if np.random.uniform() < self.sinc_prob:
+            # 此sinc过滤器设置适用于[7,21]范围内的内核
+            if kernel_size < 13:
+                omega_c = np.random.uniform(np.pi / 3, np.pi)
+            else:
+                omega_c = np.random.uniform(np.pi / 5, np.pi)
+            kernel = circular_lowpass_kernel(omega_c, kernel_size, pad_to=False)
+        else:
+            kernel = random_mixed_kernels(
+                self.kernel_list,
+                self.kernel_prob,
+                kernel_size,
+                self.blur_sigma,
+                self.blur_sigma, [-math.pi, math.pi],
+                self.betag_range,
+                self.betap_range,
+                noise_range=None)
+        # pad kernel
+        pad_size = (21 - kernel_size) // 2
+        kernel = np.pad(kernel, ((pad_size, pad_size), (pad_size, pad_size)))
+        kernel = kernel.astype(np.float32)
+        # ------------------------ 生成卷积核以进行第二次退化处理 ------------------------ #
+        kernel_size = choice(self.kernel_range)
+        if np.random.uniform() < self.sinc_prob2:
+            if kernel_size < 13:
+                omega_c = np.random.uniform(np.pi / 3, np.pi)
+            else:
+                omega_c = np.random.uniform(np.pi / 5, np.pi)
+            kernel2 = circular_lowpass_kernel(omega_c, kernel_size, pad_to=False)
+        else:
+            kernel2 = random_mixed_kernels(
+                self.kernel_list2,
+                self.kernel_prob2,
+                kernel_size,
+                self.blur_sigma2,
+                self.blur_sigma2, [-math.pi, math.pi],
+                self.betag_range2,
+                self.betap_range2,
+                noise_range=None)
+        # pad kernel
+        pad_size = (21 - kernel_size) // 2
+        kernel2 = np.pad(kernel2, ((pad_size, pad_size), (pad_size, pad_size)))
+        kernel2 = kernel2.astype(np.float32)
+        # ----------------------the final sinc kernel ------------------------- #
+        if np.random.uniform() < self.final_sinc_prob:
+            kernel_size = choice(self.kernel_range)
+            omega_c = np.random.uniform(np.pi / 3, np.pi)
+            sinc_kernel = circular_lowpass_kernel(omega_c, kernel_size, pad_to=21)
+        else:
+            sinc_kernel = self.pulse_tensor
+        sinc_kernel = sinc_kernel.astype(np.float32)
+        lq, gt      = self.feed_data(image, kernel, kernel2, sinc_kernel)
+        return lq, gt
+    def feed_data(self, img_gt, kernel1, kernel2, sinc_kernel):
+        img_gt = np.array(img_gt, dtype=np.float32)
+        # 对gt进行锐化
+        img_gt = np.clip(img_gt / 255, 0, 1)
+        gt     = self.usmsharp.filt(img_gt)
+        [ori_w, ori_h, _] = gt.shape
+        # ---------------------- 根据参数进行第一次退化 -------------------- #
+        # 模糊处理
+        out = cv2.filter2D(img_gt, -1, kernel1)
+        # 随机 resize
+        updown_type = choices(['up', 'down', 'keep'], self.resize_prob)[0]
+        if updown_type == 'up':
+            scale = np.random.uniform(1, self.resize_range[1])
+        elif updown_type == 'down':
+            scale = np.random.uniform(self.resize_range[0], 1)
+        else:
+            scale = 1
+        mode = choice(['area', 'bilinear', 'bicubic'])
+        if mode=='area':
+            out = cv2.resize(out, (int(ori_h * scale), int(ori_w * scale)), interpolation=cv2.INTER_AREA)
+        elif mode=='bilinear':
+            out = cv2.resize(out, (int(ori_h * scale), int(ori_w * scale)), interpolation=cv2.INTER_LINEAR)
+        else:
+            out = cv2.resize(out, (int(ori_h * scale), int(ori_w * scale)), interpolation=cv2.INTER_CUBIC)
+        # 灰度噪声
+        gray_noise_prob = self.gray_noise_prob
+        if np.random.uniform() < self.gaussian_noise_prob:
+            out = random_add_gaussian_noise(
+                out, sigma_range=self.noise_range, clip=True, rounds=False, gray_prob=gray_noise_prob)
+        else:
+            out = random_add_poisson_noise(
+                out,
+                scale_range=self.poisson_scale_range,
+                gray_prob=gray_noise_prob,
+                clip=True,
+                rounds=False)
+        # JPEG 压缩
+        jpeg_p = np.random.uniform(low=self.jpeg_range[0], high=self.jpeg_range[1])
+        jpeg_p = int(jpeg_p)
+        out    = np.clip(out, 0, 1)
+        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_p]
+        _, encimg = cv2.imencode('.jpg', out * 255., encode_param)
+        out = np.float32(cv2.imdecode(encimg, 1))/255
+        # ---------------------- 根据参数进行第一次退化 -------------------- #
+        # 模糊
+        if np.random.uniform() < self.second_blur_prob:
+            out = cv2.filter2D(out, -1, kernel2)
+        # 随机 resize
+        updown_type = choices(['up', 'down', 'keep'], self.resize_prob2)[0]
+        if updown_type == 'up':
+            scale = np.random.uniform(1, self.resize_range2[1])
+        elif updown_type == 'down':
+            scale = np.random.uniform(self.resize_range2[0], 1)
+        else:
+            scale = 1
+        mode = choice(['area', 'bilinear', 'bicubic'])
+        if mode == 'area':
+            out = cv2.resize(out, (int(ori_h / self.scale * scale), int(ori_w / self.scale * scale)), interpolation=cv2.INTER_AREA)
+        elif mode == 'bilinear':
+            out = cv2.resize(out, (int(ori_h / self.scale * scale), int(ori_w / self.scale * scale)), interpolation=cv2.INTER_LINEAR)
+        else:
+            out = cv2.resize(out, (int(ori_h / self.scale * scale), int(ori_w / self.scale * scale)), interpolation=cv2.INTER_CUBIC)
+        # 灰度噪声
+        gray_noise_prob = self.gray_noise_prob2
+        if np.random.uniform() < self.gaussian_noise_prob2:
+            out = random_add_gaussian_noise(
+                out, sigma_range=self.noise_range2, clip=True, rounds=False, gray_prob=gray_noise_prob)
+        else:
+            out = random_add_poisson_noise(
+                out,
+                scale_range=self.poisson_scale_range2,
+                gray_prob=gray_noise_prob,
+                clip=True,
+                rounds=False)
+        # JPEG压缩+最后的sinc滤波器
+        # 我们还需要将图像的大小调整到所需的尺寸。我们把[调整大小+sinc过滤器]组合在一起
+        # 作���一个操作。
+        # 我们考虑两个顺序。
+        # 1. [调整大小+sinc filter] + JPEG压缩
+        # 2. 2. JPEG压缩+[调整大小+sinc过滤]。
+        # 根据经验，我们发现其他组合（sinc + JPEG + Resize）会引入扭曲的线条。
+        if np.random.uniform() < 0.5:
+            # resize back + the final sinc filter
+            mode = choice(['area', 'bilinear', 'bicubic'])
+            if mode == 'area':
+                out = cv2.resize(out, (ori_h // self.scale, ori_w // self.scale), interpolation=cv2.INTER_AREA)
+            elif mode == 'bilinear':
+                out = cv2.resize(out, (ori_h // self.scale, ori_w // self.scale), interpolation=cv2.INTER_LINEAR)
+            else:
+                out = cv2.resize(out, (ori_h // self.scale, ori_w // self.scale), interpolation=cv2.INTER_CUBIC)
+            out = cv2.filter2D(out, -1, sinc_kernel)
+            # JPEG 压缩
+            jpeg_p = np.random.uniform(low=self.jpeg_range[0], high=self.jpeg_range[1])
+            jpeg_p = jpeg_p
+            out    = np.clip(out, 0, 1)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_p]
+            _, encimg = cv2.imencode('.jpg', out * 255., encode_param)
+            out = np.float32(cv2.imdecode(encimg, 1)) / 255
+        else:
+            # JPEG 压缩
+            jpeg_p = np.random.uniform(low=self.jpeg_range[0], high=self.jpeg_range[1])
+            jpeg_p = jpeg_p
+            out = np.clip(out, 0, 1)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_p]
+            _, encimg = cv2.imencode('.jpg', out * 255., encode_param)
+            out = np.float32(cv2.imdecode(encimg, 1)) / 255
+            # resize back + the final sinc filter
+            mode = choice(['area', 'bilinear', 'bicubic'])
+            if mode == 'area':
+                out = cv2.resize(out, (ori_h // self.scale, ori_w // self.scale),interpolation=cv2.INTER_AREA)
+            elif mode == 'bilinear':
+                out = cv2.resize(out, (ori_h // self.scale, ori_w // self.scale),interpolation=cv2.INTER_LINEAR)
+            else:
+                out = cv2.resize(out, (ori_h // self.scale, ori_w // self.scale),interpolation=cv2.INTER_CUBIC)
+        lq = np.clip((out * 255.0), 0, 255)
+        gt = np.clip((gt  * 255.0), 0, 255)
+        return Image.fromarray(np.uint8(lq)), Image.fromarray(np.uint8(gt))
 def SRGAN_dataset_collate(batch):
     images_l = []
     images_h = []

utils/degradations.py ADDED Viewed

	@@ -0,0 +1,765 @@

+import cv2
+import math
+import numpy as np
+import random
+import torch
+from scipy import special
+from scipy.stats import multivariate_normal
+from torchvision.transforms.functional_tensor import rgb_to_grayscale
+# -------------------------------------------------------------------- #
+# --------------------------- blur kernels --------------------------- #
+# -------------------------------------------------------------------- #
+# --------------------------- util functions --------------------------- #
+def sigma_matrix2(sig_x, sig_y, theta):
+    """Calculate the rotated sigma matrix (two dimensional matrix).
+    Args:
+        sig_x (float):
+        sig_y (float):
+        theta (float): Radian measurement.
+    Returns:
+        ndarray: Rotated sigma matrix.
+    """
+    d_matrix = np.array([[sig_x**2, 0], [0, sig_y**2]])
+    u_matrix = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
+    return np.dot(u_matrix, np.dot(d_matrix, u_matrix.T))
+def mesh_grid(kernel_size):
+    """Generate the mesh grid, centering at zero.
+    Args:
+        kernel_size (int):
+    Returns:
+        xy (ndarray): with the shape (kernel_size, kernel_size, 2)
+        xx (ndarray): with the shape (kernel_size, kernel_size)
+        yy (ndarray): with the shape (kernel_size, kernel_size)
+    """
+    ax = np.arange(-kernel_size // 2 + 1., kernel_size // 2 + 1.)
+    xx, yy = np.meshgrid(ax, ax)
+    xy = np.hstack((xx.reshape((kernel_size * kernel_size, 1)), yy.reshape(kernel_size * kernel_size,
+                                                                           1))).reshape(kernel_size, kernel_size, 2)
+    return xy, xx, yy
+def pdf2(sigma_matrix, grid):
+    """Calculate PDF of the bivariate Gaussian distribution.
+    Args:
+        sigma_matrix (ndarray): with the shape (2, 2)
+        grid (ndarray): generated by :func:`mesh_grid`,
+            with the shape (K, K, 2), K is the kernel size.
+    Returns:
+        kernel (ndarrray): un-normalized kernel.
+    """
+    inverse_sigma = np.linalg.inv(sigma_matrix)
+    kernel = np.exp(-0.5 * np.sum(np.dot(grid, inverse_sigma) * grid, 2))
+    return kernel
+def cdf2(d_matrix, grid):
+    """Calculate the CDF of the standard bivariate Gaussian distribution.
+        Used in skewed Gaussian distribution.
+    Args:
+        d_matrix (ndarrasy): skew matrix.
+        grid (ndarray): generated by :func:`mesh_grid`,
+            with the shape (K, K, 2), K is the kernel size.
+    Returns:
+        cdf (ndarray): skewed cdf.
+    """
+    rv = multivariate_normal([0, 0], [[1, 0], [0, 1]])
+    grid = np.dot(grid, d_matrix)
+    cdf = rv.cdf(grid)
+    return cdf
+def bivariate_Gaussian(kernel_size, sig_x, sig_y, theta, grid=None, isotropic=True):
+    """Generate a bivariate isotropic or anisotropic Gaussian kernel.
+    In the isotropic mode, only `sig_x` is used. `sig_y` and `theta` is ignored.
+    Args:
+        kernel_size (int):
+        sig_x (float):
+        sig_y (float):
+        theta (float): Radian measurement.
+        grid (ndarray, optional): generated by :func:`mesh_grid`,
+            with the shape (K, K, 2), K is the kernel size. Default: None
+        isotropic (bool):
+    Returns:
+        kernel (ndarray): normalized kernel.
+    """
+    if grid is None:
+        grid, _, _ = mesh_grid(kernel_size)
+    if isotropic:
+        sigma_matrix = np.array([[sig_x**2, 0], [0, sig_x**2]])
+    else:
+        sigma_matrix = sigma_matrix2(sig_x, sig_y, theta)
+    kernel = pdf2(sigma_matrix, grid)
+    kernel = kernel / np.sum(kernel)
+    return kernel
+def bivariate_generalized_Gaussian(kernel_size, sig_x, sig_y, theta, beta, grid=None, isotropic=True):
+    """Generate a bivariate generalized Gaussian kernel.
+        Described in `Parameter Estimation For Multivariate Generalized
+        Gaussian Distributions`_
+        by Pascal et. al (2013).
+    In the isotropic mode, only `sig_x` is used. `sig_y` and `theta` is ignored.
+    Args:
+        kernel_size (int):
+        sig_x (float):
+        sig_y (float):
+        theta (float): Radian measurement.
+        beta (float): shape parameter, beta = 1 is the normal distribution.
+        grid (ndarray, optional): generated by :func:`mesh_grid`,
+            with the shape (K, K, 2), K is the kernel size. Default: None
+    Returns:
+        kernel (ndarray): normalized kernel.
+    .. _Parameter Estimation For Multivariate Generalized Gaussian
+    Distributions: https://arxiv.org/abs/1302.6498
+    """
+    if grid is None:
+        grid, _, _ = mesh_grid(kernel_size)
+    if isotropic:
+        sigma_matrix = np.array([[sig_x**2, 0], [0, sig_x**2]])
+    else:
+        sigma_matrix = sigma_matrix2(sig_x, sig_y, theta)
+    inverse_sigma = np.linalg.inv(sigma_matrix)
+    kernel = np.exp(-0.5 * np.power(np.sum(np.dot(grid, inverse_sigma) * grid, 2), beta))
+    kernel = kernel / np.sum(kernel)
+    return kernel
+def bivariate_plateau(kernel_size, sig_x, sig_y, theta, beta, grid=None, isotropic=True):
+    """Generate a plateau-like anisotropic kernel.
+    1 / (1+x^(beta))
+    Ref: https://stats.stackexchange.com/questions/203629/is-there-a-plateau-shaped-distribution
+    In the isotropic mode, only `sig_x` is used. `sig_y` and `theta` is ignored.
+    Args:
+        kernel_size (int):
+        sig_x (float):
+        sig_y (float):
+        theta (float): Radian measurement.
+        beta (float): shape parameter, beta = 1 is the normal distribution.
+        grid (ndarray, optional): generated by :func:`mesh_grid`,
+            with the shape (K, K, 2), K is the kernel size. Default: None
+    Returns:
+        kernel (ndarray): normalized kernel.
+    """
+    if grid is None:
+        grid, _, _ = mesh_grid(kernel_size)
+    if isotropic:
+        sigma_matrix = np.array([[sig_x**2, 0], [0, sig_x**2]])
+    else:
+        sigma_matrix = sigma_matrix2(sig_x, sig_y, theta)
+    inverse_sigma = np.linalg.inv(sigma_matrix)
+    kernel = np.reciprocal(np.power(np.sum(np.dot(grid, inverse_sigma) * grid, 2), beta) + 1)
+    kernel = kernel / np.sum(kernel)
+    return kernel
+def random_bivariate_Gaussian(kernel_size,
+                              sigma_x_range,
+                              sigma_y_range,
+                              rotation_range,
+                              noise_range=None,
+                              isotropic=True):
+    """Randomly generate bivariate isotropic or anisotropic Gaussian kernels.
+    In the isotropic mode, only `sigma_x_range` is used. `sigma_y_range` and `rotation_range` is ignored.
+    Args:
+        kernel_size (int):
+        sigma_x_range (tuple): [0.6, 5]
+        sigma_y_range (tuple): [0.6, 5]
+        rotation range (tuple): [-math.pi, math.pi]
+        noise_range(tuple, optional): multiplicative kernel noise,
+            [0.75, 1.25]. Default: None
+    Returns:
+        kernel (ndarray):
+    """
+    assert kernel_size % 2 == 1, 'Kernel size must be an odd number.'
+    assert sigma_x_range[0] < sigma_x_range[1], 'Wrong sigma_x_range.'
+    sigma_x = np.random.uniform(sigma_x_range[0], sigma_x_range[1])
+    if isotropic is False:
+        assert sigma_y_range[0] < sigma_y_range[1], 'Wrong sigma_y_range.'
+        assert rotation_range[0] < rotation_range[1], 'Wrong rotation_range.'
+        sigma_y = np.random.uniform(sigma_y_range[0], sigma_y_range[1])
+        rotation = np.random.uniform(rotation_range[0], rotation_range[1])
+    else:
+        sigma_y = sigma_x
+        rotation = 0
+    kernel = bivariate_Gaussian(kernel_size, sigma_x, sigma_y, rotation, isotropic=isotropic)
+    # add multiplicative noise
+    if noise_range is not None:
+        assert noise_range[0] < noise_range[1], 'Wrong noise range.'
+        noise = np.random.uniform(noise_range[0], noise_range[1], size=kernel.shape)
+        kernel = kernel * noise
+    kernel = kernel / np.sum(kernel)
+    return kernel
+def random_bivariate_generalized_Gaussian(kernel_size,
+                                          sigma_x_range,
+                                          sigma_y_range,
+                                          rotation_range,
+                                          beta_range,
+                                          noise_range=None,
+                                          isotropic=True):
+    """Randomly generate bivariate generalized Gaussian kernels.
+    In the isotropic mode, only `sigma_x_range` is used. `sigma_y_range` and `rotation_range` is ignored.
+    Args:
+        kernel_size (int):
+        sigma_x_range (tuple): [0.6, 5]
+        sigma_y_range (tuple): [0.6, 5]
+        rotation range (tuple): [-math.pi, math.pi]
+        beta_range (tuple): [0.5, 8]
+        noise_range(tuple, optional): multiplicative kernel noise,
+            [0.75, 1.25]. Default: None
+    Returns:
+        kernel (ndarray):
+    """
+    assert kernel_size % 2 == 1, 'Kernel size must be an odd number.'
+    assert sigma_x_range[0] < sigma_x_range[1], 'Wrong sigma_x_range.'
+    sigma_x = np.random.uniform(sigma_x_range[0], sigma_x_range[1])
+    if isotropic is False:
+        assert sigma_y_range[0] < sigma_y_range[1], 'Wrong sigma_y_range.'
+        assert rotation_range[0] < rotation_range[1], 'Wrong rotation_range.'
+        sigma_y = np.random.uniform(sigma_y_range[0], sigma_y_range[1])
+        rotation = np.random.uniform(rotation_range[0], rotation_range[1])
+    else:
+        sigma_y = sigma_x
+        rotation = 0
+    # assume beta_range[0] < 1 < beta_range[1]
+    if np.random.uniform() < 0.5:
+        beta = np.random.uniform(beta_range[0], 1)
+    else:
+        beta = np.random.uniform(1, beta_range[1])
+    kernel = bivariate_generalized_Gaussian(kernel_size, sigma_x, sigma_y, rotation, beta, isotropic=isotropic)
+    # add multiplicative noise
+    if noise_range is not None:
+        assert noise_range[0] < noise_range[1], 'Wrong noise range.'
+        noise = np.random.uniform(noise_range[0], noise_range[1], size=kernel.shape)
+        kernel = kernel * noise
+    kernel = kernel / np.sum(kernel)
+    return kernel
+def random_bivariate_plateau(kernel_size,
+                             sigma_x_range,
+                             sigma_y_range,
+                             rotation_range,
+                             beta_range,
+                             noise_range=None,
+                             isotropic=True):
+    """Randomly generate bivariate plateau kernels.
+    In the isotropic mode, only `sigma_x_range` is used. `sigma_y_range` and `rotation_range` is ignored.
+    Args:
+        kernel_size (int):
+        sigma_x_range (tuple): [0.6, 5]
+        sigma_y_range (tuple): [0.6, 5]
+        rotation range (tuple): [-math.pi/2, math.pi/2]
+        beta_range (tuple): [1, 4]
+        noise_range(tuple, optional): multiplicative kernel noise,
+            [0.75, 1.25]. Default: None
+    Returns:
+        kernel (ndarray):
+    """
+    assert kernel_size % 2 == 1, 'Kernel size must be an odd number.'
+    assert sigma_x_range[0] < sigma_x_range[1], 'Wrong sigma_x_range.'
+    sigma_x = np.random.uniform(sigma_x_range[0], sigma_x_range[1])
+    if isotropic is False:
+        assert sigma_y_range[0] < sigma_y_range[1], 'Wrong sigma_y_range.'
+        assert rotation_range[0] < rotation_range[1], 'Wrong rotation_range.'
+        sigma_y = np.random.uniform(sigma_y_range[0], sigma_y_range[1])
+        rotation = np.random.uniform(rotation_range[0], rotation_range[1])
+    else:
+        sigma_y = sigma_x
+        rotation = 0
+    # TODO: this may be not proper
+    if np.random.uniform() < 0.5:
+        beta = np.random.uniform(beta_range[0], 1)
+    else:
+        beta = np.random.uniform(1, beta_range[1])
+    kernel = bivariate_plateau(kernel_size, sigma_x, sigma_y, rotation, beta, isotropic=isotropic)
+    # add multiplicative noise
+    if noise_range is not None:
+        assert noise_range[0] < noise_range[1], 'Wrong noise range.'
+        noise = np.random.uniform(noise_range[0], noise_range[1], size=kernel.shape)
+        kernel = kernel * noise
+    kernel = kernel / np.sum(kernel)
+    return kernel
+def random_mixed_kernels(kernel_list,
+                         kernel_prob,
+                         kernel_size=21,
+                         sigma_x_range=(0.6, 5),
+                         sigma_y_range=(0.6, 5),
+                         rotation_range=(-math.pi, math.pi),
+                         betag_range=(0.5, 8),
+                         betap_range=(0.5, 8),
+                         noise_range=None):
+    """Randomly generate mixed kernels.
+    Args:
+        kernel_list (tuple): a list name of kernel types,
+            support ['iso', 'aniso', 'skew', 'generalized', 'plateau_iso',
+            'plateau_aniso']
+        kernel_prob (tuple): corresponding kernel probability for each
+            kernel type
+        kernel_size (int):
+        sigma_x_range (tuple): [0.6, 5]
+        sigma_y_range (tuple): [0.6, 5]
+        rotation range (tuple): [-math.pi, math.pi]
+        beta_range (tuple): [0.5, 8]
+        noise_range(tuple, optional): multiplicative kernel noise,
+            [0.75, 1.25]. Default: None
+    Returns:
+        kernel (ndarray):
+    """
+    kernel_type = random.choices(kernel_list, kernel_prob)[0]
+    if kernel_type == 'iso':
+        kernel = random_bivariate_Gaussian(
+            kernel_size, sigma_x_range, sigma_y_range, rotation_range, noise_range=noise_range, isotropic=True)
+    elif kernel_type == 'aniso':
+        kernel = random_bivariate_Gaussian(
+            kernel_size, sigma_x_range, sigma_y_range, rotation_range, noise_range=noise_range, isotropic=False)
+    elif kernel_type == 'generalized_iso':
+        kernel = random_bivariate_generalized_Gaussian(
+            kernel_size,
+            sigma_x_range,
+            sigma_y_range,
+            rotation_range,
+            betag_range,
+            noise_range=noise_range,
+            isotropic=True)
+    elif kernel_type == 'generalized_aniso':
+        kernel = random_bivariate_generalized_Gaussian(
+            kernel_size,
+            sigma_x_range,
+            sigma_y_range,
+            rotation_range,
+            betag_range,
+            noise_range=noise_range,
+            isotropic=False)
+    elif kernel_type == 'plateau_iso':
+        kernel = random_bivariate_plateau(
+            kernel_size, sigma_x_range, sigma_y_range, rotation_range, betap_range, noise_range=None, isotropic=True)
+    elif kernel_type == 'plateau_aniso':
+        kernel = random_bivariate_plateau(
+            kernel_size, sigma_x_range, sigma_y_range, rotation_range, betap_range, noise_range=None, isotropic=False)
+    return kernel
+np.seterr(divide='ignore', invalid='ignore')
+def circular_lowpass_kernel(cutoff, kernel_size, pad_to=0):
+    """2D sinc filter, ref: https://dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter
+    Args:
+        cutoff (float): cutoff frequency in radians (pi is max)
+        kernel_size (int): horizontal and vertical size, must be odd.
+        pad_to (int): pad kernel size to desired size, must be odd or zero.
+    """
+    assert kernel_size % 2 == 1, 'Kernel size must be an odd number.'
+    kernel = np.fromfunction(
+        lambda x, y: cutoff * special.j1(cutoff * np.sqrt(
+            (x - (kernel_size - 1) / 2)**2 + (y - (kernel_size - 1) / 2)**2)) / (2 * np.pi * np.sqrt(
+                (x - (kernel_size - 1) / 2)**2 + (y - (kernel_size - 1) / 2)**2)), [kernel_size, kernel_size])
+    kernel[(kernel_size - 1) // 2, (kernel_size - 1) // 2] = cutoff**2 / (4 * np.pi)
+    kernel = kernel / np.sum(kernel)
+    if pad_to > kernel_size:
+        pad_size = (pad_to - kernel_size) // 2
+        kernel = np.pad(kernel, ((pad_size, pad_size), (pad_size, pad_size)))
+    return kernel
+# ------------------------------------------------------------- #
+# --------------------------- noise --------------------------- #
+# ------------------------------------------------------------- #
+# ----------------------- Gaussian Noise ----------------------- #
+def generate_gaussian_noise(img, sigma=10, gray_noise=False):
+    """Generate Gaussian noise.
+    Args:
+        img (Numpy array): Input image, shape (h, w, c), range [0, 1], float32.
+        sigma (float): Noise scale (measured in range 255). Default: 10.
+    Returns:
+        (Numpy array): Returned noisy image, shape (h, w, c), range[0, 1],
+            float32.
+    """
+    if gray_noise:
+        noise = np.float32(np.random.randn(*(img.shape[0:2]))) * sigma / 255.
+        noise = np.expand_dims(noise, axis=2).repeat(3, axis=2)
+    else:
+        noise = np.float32(np.random.randn(*(img.shape))) * sigma / 255.
+    return noise
+def add_gaussian_noise(img, sigma=10, clip=True, rounds=False, gray_noise=False):
+    """Add Gaussian noise.
+    Args:
+        img (Numpy array): Input image, shape (h, w, c), range [0, 1], float32.
+        sigma (float): Noise scale (measured in range 255). Default: 10.
+    Returns:
+        (Numpy array): Returned noisy image, shape (h, w, c), range[0, 1],
+            float32.
+    """
+    noise = generate_gaussian_noise(img, sigma, gray_noise)
+    out = img + noise
+    if clip and rounds:
+        out = np.clip((out * 255.0).round(), 0, 255) / 255.
+    elif clip:
+        out = np.clip(out, 0, 1)
+    elif rounds:
+        out = (out * 255.0).round() / 255.
+    return out
+def generate_gaussian_noise_pt(img, sigma=10, gray_noise=0):
+    """Add Gaussian noise (PyTorch version).
+    Args:
+        img (Tensor): Shape (b, c, h, w), range[0, 1], float32.
+        scale (float | Tensor): Noise scale. Default: 1.0.
+    Returns:
+        (Tensor): Returned noisy image, shape (b, c, h, w), range[0, 1],
+            float32.
+    """
+    b, _, h, w = img.size()
+    if not isinstance(sigma, (float, int)):
+        sigma = sigma.view(img.size(0), 1, 1, 1)
+    if isinstance(gray_noise, (float, int)):
+        cal_gray_noise = gray_noise > 0
+    else:
+        gray_noise = gray_noise.view(b, 1, 1, 1)
+        cal_gray_noise = torch.sum(gray_noise) > 0
+    if cal_gray_noise:
+        noise_gray = torch.randn(*img.size()[2:4], dtype=img.dtype, device=img.device) * sigma / 255.
+        noise_gray = noise_gray.view(b, 1, h, w)
+    # always calculate color noise
+    noise = torch.randn(*img.size(), dtype=img.dtype, device=img.device) * sigma / 255.
+    if cal_gray_noise:
+        noise = noise * (1 - gray_noise) + noise_gray * gray_noise
+    return noise
+def add_gaussian_noise_pt(img, sigma=10, gray_noise=0, clip=True, rounds=False):
+    """Add Gaussian noise (PyTorch version).
+    Args:
+        img (Tensor): Shape (b, c, h, w), range[0, 1], float32.
+        scale (float | Tensor): Noise scale. Default: 1.0.
+    Returns:
+        (Tensor): Returned noisy image, shape (b, c, h, w), range[0, 1],
+            float32.
+    """
+    noise = generate_gaussian_noise_pt(img, sigma, gray_noise)
+    out = img + noise
+    if clip and rounds:
+        out = torch.clamp((out * 255.0).round(), 0, 255) / 255.
+    elif clip:
+        out = torch.clamp(out, 0, 1)
+    elif rounds:
+        out = (out * 255.0).round() / 255.
+    return out
+# ----------------------- Random Gaussian Noise ----------------------- #
+def random_generate_gaussian_noise(img, sigma_range=(0, 10), gray_prob=0):
+    sigma = np.random.uniform(sigma_range[0], sigma_range[1])
+    if np.random.uniform() < gray_prob:
+        gray_noise = True
+    else:
+        gray_noise = False
+    return generate_gaussian_noise(img, sigma, gray_noise)
+def random_add_gaussian_noise(img, sigma_range=(0, 1.0), gray_prob=0, clip=True, rounds=False):
+    noise = random_generate_gaussian_noise(img, sigma_range, gray_prob)
+    out = img + noise
+    if clip and rounds:
+        out = np.clip((out * 255.0).round(), 0, 255) / 255.
+    elif clip:
+        out = np.clip(out, 0, 1)
+    elif rounds:
+        out = (out * 255.0).round() / 255.
+    return out
+def random_generate_gaussian_noise_pt(img, sigma_range=(0, 10), gray_prob=0):
+    sigma = torch.rand(
+        img.size(0), dtype=img.dtype, device=img.device) * (sigma_range[1] - sigma_range[0]) + sigma_range[0]
+    gray_noise = torch.rand(img.size(0), dtype=img.dtype, device=img.device)
+    gray_noise = (gray_noise < gray_prob).float()
+    return generate_gaussian_noise_pt(img, sigma, gray_noise)
+def random_add_gaussian_noise_pt(img, sigma_range=(0, 1.0), gray_prob=0, clip=True, rounds=False):
+    noise = random_generate_gaussian_noise_pt(img, sigma_range, gray_prob)
+    out = img + noise
+    if clip and rounds:
+        out = torch.clamp((out * 255.0).round(), 0, 255) / 255.
+    elif clip:
+        out = torch.clamp(out, 0, 1)
+    elif rounds:
+        out = (out * 255.0).round() / 255.
+    return out
+# ----------------------- Poisson (Shot) Noise ----------------------- #
+def generate_poisson_noise(img, scale=1.0, gray_noise=False):
+    """Generate poisson noise.
+    Ref: https://github.com/scikit-image/scikit-image/blob/main/skimage/util/noise.py#L37-L219
+    Args:
+        img (Numpy array): Input image, shape (h, w, c), range [0, 1], float32.
+        scale (float): Noise scale. Default: 1.0.
+        gray_noise (bool): Whether generate gray noise. Default: False.
+    Returns:
+        (Numpy array): Returned noisy image, shape (h, w, c), range[0, 1],
+            float32.
+    """
+    if gray_noise:
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    # round and clip image for counting vals correctly
+    img = np.clip((img * 255.0).round(), 0, 255) / 255.
+    vals = len(np.unique(img))
+    vals = 2**np.ceil(np.log2(vals))
+    out = np.float32(np.random.poisson(img * vals) / float(vals))
+    noise = out - img
+    if gray_noise:
+        noise = np.repeat(noise[:, :, np.newaxis], 3, axis=2)
+    return noise * scale
+def add_poisson_noise(img, scale=1.0, clip=True, rounds=False, gray_noise=False):
+    """Add poisson noise.
+    Args:
+        img (Numpy array): Input image, shape (h, w, c), range [0, 1], float32.
+        scale (float): Noise scale. Default: 1.0.
+        gray_noise (bool): Whether generate gray noise. Default: False.
+    Returns:
+        (Numpy array): Returned noisy image, shape (h, w, c), range[0, 1],
+            float32.
+    """
+    noise = generate_poisson_noise(img, scale, gray_noise)
+    out = img + noise
+    if clip and rounds:
+        out = np.clip((out * 255.0).round(), 0, 255) / 255.
+    elif clip:
+        out = np.clip(out, 0, 1)
+    elif rounds:
+        out = (out * 255.0).round() / 255.
+    return out
+def generate_poisson_noise_pt(img, scale=1.0, gray_noise=0):
+    """Generate a batch of poisson noise (PyTorch version)
+    Args:
+        img (Tensor): Input image, shape (b, c, h, w), range [0, 1], float32.
+        scale (float | Tensor): Noise scale. Number or Tensor with shape (b).
+            Default: 1.0.
+        gray_noise (float | Tensor): 0-1 number or Tensor with shape (b).
+            0 for False, 1 for True. Default: 0.
+    Returns:
+        (Tensor): Returned noisy image, shape (b, c, h, w), range[0, 1],
+            float32.
+    """
+    b, _, h, w = img.size()
+    if isinstance(gray_noise, (float, int)):
+        cal_gray_noise = gray_noise > 0
+    else:
+        gray_noise = gray_noise.view(b, 1, 1, 1)
+        cal_gray_noise = torch.sum(gray_noise) > 0
+    if cal_gray_noise:
+        img_gray = rgb_to_grayscale(img, num_output_channels=1)
+        # round and clip image for counting vals correctly
+        img_gray = torch.clamp((img_gray * 255.0).round(), 0, 255) / 255.
+        # use for-loop to get the unique values for each sample
+        vals_list = [len(torch.unique(img_gray[i, :, :, :])) for i in range(b)]
+        vals_list = [2**np.ceil(np.log2(vals)) for vals in vals_list]
+        vals = img_gray.new_tensor(vals_list).view(b, 1, 1, 1)
+        out = torch.poisson(img_gray * vals) / vals
+        noise_gray = out - img_gray
+        noise_gray = noise_gray.expand(b, 3, h, w)
+    # always calculate color noise
+    # round and clip image for counting vals correctly
+    img = torch.clamp((img * 255.0).round(), 0, 255) / 255.
+    # use for-loop to get the unique values for each sample
+    vals_list = [len(torch.unique(img[i, :, :, :])) for i in range(b)]
+    vals_list = [2**np.ceil(np.log2(vals)) for vals in vals_list]
+    vals = img.new_tensor(vals_list).view(b, 1, 1, 1)
+    out = torch.poisson(img * vals) / vals
+    noise = out - img
+    if cal_gray_noise:
+        noise = noise * (1 - gray_noise) + noise_gray * gray_noise
+    if not isinstance(scale, (float, int)):
+        scale = scale.view(b, 1, 1, 1)
+    return noise * scale
+def add_poisson_noise_pt(img, scale=1.0, clip=True, rounds=False, gray_noise=0):
+    """Add poisson noise to a batch of images (PyTorch version).
+    Args:
+        img (Tensor): Input image, shape (b, c, h, w), range [0, 1], float32.
+        scale (float | Tensor): Noise scale. Number or Tensor with shape (b).
+            Default: 1.0.
+        gray_noise (float | Tensor): 0-1 number or Tensor with shape (b).
+            0 for False, 1 for True. Default: 0.
+    Returns:
+        (Tensor): Returned noisy image, shape (b, c, h, w), range[0, 1],
+            float32.
+    """
+    noise = generate_poisson_noise_pt(img, scale, gray_noise)
+    out = img + noise
+    if clip and rounds:
+        out = torch.clamp((out * 255.0).round(), 0, 255) / 255.
+    elif clip:
+        out = torch.clamp(out, 0, 1)
+    elif rounds:
+        out = (out * 255.0).round() / 255.
+    return out
+# ----------------------- Random Poisson (Shot) Noise ----------------------- #
+def random_generate_poisson_noise(img, scale_range=(0, 1.0), gray_prob=0):
+    scale = np.random.uniform(scale_range[0], scale_range[1])
+    if np.random.uniform() < gray_prob:
+        gray_noise = True
+    else:
+        gray_noise = False
+    return generate_poisson_noise(img, scale, gray_noise)
+def random_add_poisson_noise(img, scale_range=(0, 1.0), gray_prob=0, clip=True, rounds=False):
+    noise = random_generate_poisson_noise(img, scale_range, gray_prob)
+    out = img + noise
+    if clip and rounds:
+        out = np.clip((out * 255.0).round(), 0, 255) / 255.
+    elif clip:
+        out = np.clip(out, 0, 1)
+    elif rounds:
+        out = (out * 255.0).round() / 255.
+    return out
+def random_generate_poisson_noise_pt(img, scale_range=(0, 1.0), gray_prob=0):
+    scale = torch.rand(
+        img.size(0), dtype=img.dtype, device=img.device) * (scale_range[1] - scale_range[0]) + scale_range[0]
+    gray_noise = torch.rand(img.size(0), dtype=img.dtype, device=img.device)
+    gray_noise = (gray_noise < gray_prob).float()
+    return generate_poisson_noise_pt(img, scale, gray_noise)
+def random_add_poisson_noise_pt(img, scale_range=(0, 1.0), gray_prob=0, clip=True, rounds=False):
+    noise = random_generate_poisson_noise_pt(img, scale_range, gray_prob)
+    out = img + noise
+    if clip and rounds:
+        out = torch.clamp((out * 255.0).round(), 0, 255) / 255.
+    elif clip:
+        out = torch.clamp(out, 0, 1)
+    elif rounds:
+        out = (out * 255.0).round() / 255.
+    return out
+# ------------------------------------------------------------------------ #
+# --------------------------- JPEG compression --------------------------- #
+# ------------------------------------------------------------------------ #
+def add_jpg_compression(img, quality=90):
+    """Add JPG compression artifacts.
+    Args:
+        img (Numpy array): Input image, shape (h, w, c), range [0, 1], float32.
+        quality (float): JPG compression quality. 0 for lowest quality, 100 for
+            best quality. Default: 90.
+    Returns:
+        (Numpy array): Returned image after JPG, shape (h, w, c), range[0, 1],
+            float32.
+    """
+    img = np.clip(img, 0, 1)
+    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
+    _, encimg = cv2.imencode('.jpg', img * 255., encode_param)
+    img = np.float32(cv2.imdecode(encimg, 1)) / 255.
+    return img
+def random_add_jpg_compression(img, quality_range=(90, 100)):
+    """Randomly add JPG compression artifacts.
+    Args:
+        img (Numpy array): Input image, shape (h, w, c), range [0, 1], float32.
+        quality_range (tuple[float] | list[float]): JPG compression quality
+            range. 0 for lowest quality, 100 for best quality.
+            Default: (90, 100).
+    Returns:
+        (Numpy array): Returned image after JPG, shape (h, w, c), range[0, 1],
+            float32.
+    """
+    quality = np.random.uniform(quality_range[0], quality_range[1])
+    return add_jpg_compression(img, quality)

utils/preprocess.py CHANGED Viewed

@@ -9,8 +9,56 @@ from dask import bag as dbag
 from dask.diagnostics import ProgressBar
 from typing import Tuple
 from PIL import Image
 # Dataset statistics that I gathered in development
 #-----------------------------------#
@@ -47,7 +95,9 @@ def parseLabel(label: str) -> Tuple[np.ndarray, np.ndarray]:
 #-----------------------------------#
 #   根据车牌坐标裁剪出车牌图像
 #-----------------------------------#
 def cropImage(image: np.ndarray, coor: np.ndarray, center: np.ndarray) -> np.ndarray:
     maxW = np.max(coor[:, 0] - center[0])  # max plate width
@@ -63,7 +113,7 @@ def cropImage(image: np.ndarray, coor: np.ndarray, center: np.ndarray) -> np.nda
             maxW = w//2
             found = True
             break
-    if not found:  # 车牌太大则丢弃
         return np.array([])
     elif center[1]-maxH < 0 or center[1]+maxH >= image.shape[1] or \
             center[0]-maxW < 0 or center[0] + maxW >= image.shape[0]:
@@ -107,10 +157,10 @@ def processImage(file: str, inputDir: str, outputDir: str, subFolder: str) -> in
         return 0
     mean = np.mean(plate/255.0)
     std = np.std(plate/255.0)
-    # 亮度不好的
     if mean <= IMAGE_MEAN - 10*IMAGE_MEAN_STD or mean >= IMAGE_MEAN + 10*IMAGE_MEAN_STD:
         return 0
-    # 低对比度的
     if std <= IMG_STD - 10*IMG_STD_STD:
         return 0
     status = saveImage(plate, file, outputDir)
@@ -126,26 +176,27 @@ def main(argv):
         for shape in ['64_32', '128_64', '192_96']:
             os.mkdir(os.path.join(outputDir, shape))
     except OSError:
-        pass  # 地址已经存在
-    client = LocalCluster(n_workers=jobNum, threads_per_worker=5)  # 开启多线程
-    for subFolder in ['ccpd_base', 'ccpd_db', 'ccpd_fn', 'ccpd_rotate', 'ccpd_tilt', 'ccpd_weather']:
         fileList = os.listdir(os.path.join(inputDir, subFolder))
         print('* {} images found in {}. Start processing ...'.format(len(fileList), subFolder))
         toDo = dbag.from_sequence(fileList, npartitions=jobNum*30).persist()  # persist the bag in memory
         toDo = toDo.map(processImage, inputDir, outputDir, subFolder)
         pbar = ProgressBar(minimum=2.0)
-        pbar.register()  # 登记所有的计算，以便更好地跟踪
         result = toDo.compute()
         print('* image cropped: {}. Done ...'.format(sum(result)))
-    client.close()  # 关闭集群
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description=__doc__)
     add_arg = functools.partial(add_arguments, argparser=parser)
     add_arg('jobNum',           int,    4,                                         '处理图片的线程数')
-    add_arg('inputDir',         str,    'datasets/CCPD2019',                       '输入图片目录')
-    add_arg('outputDir',        str,    'datasets/CCPD2019_new',                   '保存图��目录')
     args = parser.parse_args()
     print_arguments(args)
     main(args)

 from dask.diagnostics import ProgressBar
 from typing import Tuple
 from PIL import Image
+import cv2
+#-----------------------------------#
+#   对四个点坐标排序
+#-----------------------------------#
+def order_points(pts):
+	# 一共4个坐标点
+	rect = np.zeros((4, 2), dtype = "float32")
+	# 按顺序找到对应坐标0123分别是 左上，右上，右下，左下
+	# 计算左上，右下
+	s = pts.sum(axis = 1)
+	rect[0] = pts[np.argmin(s)]
+	rect[2] = pts[np.argmax(s)]
+	# 计算右上和左下
+	diff = np.diff(pts, axis = 1)
+	rect[1] = pts[np.argmin(diff)]
+	rect[3] = pts[np.argmax(diff)]
+	return rect
+#-----------------------------------#
+#   透射变换纠正车牌图片
+#-----------------------------------#
+def four_point_transform(image, pts):
+	# 获取输入坐标点
+	rect = order_points(pts)
+	(tl, tr, br, bl) = rect
+	# 计算输入的w和h值
+	widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
+	widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
+	maxWidth = max(int(widthA), int(widthB))
+	heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
+	heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
+	maxHeight = max(int(heightA), int(heightB))
+	# 变换后对应坐标位置
+	dst = np.array([
+		[0, 0],
+		[maxWidth - 1, 0],
+		[maxWidth - 1, maxHeight - 1],
+		[0, maxHeight - 1]], dtype = "float32")
+	# 计算变换矩阵
+	M = cv2.getPerspectiveTransform(rect, dst)
+	warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
+	# 返回变换后结果
+	return warped
 # Dataset statistics that I gathered in development
 #-----------------------------------#
 #-----------------------------------#
 #   根据车牌坐标裁剪出车牌图像
 #-----------------------------------#
+# def cropImage(image: np.ndarray, coor: np.ndarray, center: np.ndarray) -> np.ndarray:
+#     image = four_point_transform(image, coor)
+#     return image
 def cropImage(image: np.ndarray, coor: np.ndarray, center: np.ndarray) -> np.ndarray:
     maxW = np.max(coor[:, 0] - center[0])  # max plate width
             maxW = w//2
             found = True
             break
+    if not found:  # plate too large, discard
         return np.array([])
     elif center[1]-maxH < 0 or center[1]+maxH >= image.shape[1] or \
             center[0]-maxW < 0 or center[0] + maxW >= image.shape[0]:
         return 0
     mean = np.mean(plate/255.0)
     std = np.std(plate/255.0)
+    # bad brightness
     if mean <= IMAGE_MEAN - 10*IMAGE_MEAN_STD or mean >= IMAGE_MEAN + 10*IMAGE_MEAN_STD:
         return 0
+    # low contrast
     if std <= IMG_STD - 10*IMG_STD_STD:
         return 0
     status = saveImage(plate, file, outputDir)
         for shape in ['64_32', '128_64', '192_96']:
             os.mkdir(os.path.join(outputDir, shape))
     except OSError:
+        pass  # path already exists
+    client = LocalCluster(n_workers=jobNum, threads_per_worker=5)  # IO intensive, more threads
+    print('* number of workers:{}, \n* input dir:{}, \n* output dir:{}\n\n'.format(jobNum, inputDir, outputDir))
+    for subFolder in ['ccpd_green', 'ccpd_base', 'ccpd_db', 'ccpd_fn', 'ccpd_rotate', 'ccpd_tilt', 'ccpd_weather']:
         fileList = os.listdir(os.path.join(inputDir, subFolder))
         print('* {} images found in {}. Start processing ...'.format(len(fileList), subFolder))
         toDo = dbag.from_sequence(fileList, npartitions=jobNum*30).persist()  # persist the bag in memory
         toDo = toDo.map(processImage, inputDir, outputDir, subFolder)
         pbar = ProgressBar(minimum=2.0)
+        pbar.register()  # register all computations for better tracking
         result = toDo.compute()
         print('* image cropped: {}. Done ...'.format(sum(result)))
+    client.close()  # shut down the cluster
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description=__doc__)
     add_arg = functools.partial(add_arguments, argparser=parser)
     add_arg('jobNum',           int,    4,                                         '处理图片的线程数')
+    add_arg('inputDir',         str,    'datasets/CCPD2020',                       '输入图片目录')
+    add_arg('outputDir',        str,    'datasets/CCPD2020_new',                   '保存图片目录')
     args = parser.parse_args()
     print_arguments(args)
     main(args)

utils/transforms.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import cv2
+import random
+import torch
+def mod_crop(img, scale):
+    """Mod crop images, used during testing.
+    Args:
+        img (ndarray): Input image.
+        scale (int): Scale factor.
+    Returns:
+        ndarray: Result image.
+    """
+    img = img.copy()
+    if img.ndim in (2, 3):
+        h, w = img.shape[0], img.shape[1]
+        h_remainder, w_remainder = h % scale, w % scale
+        img = img[:h - h_remainder, :w - w_remainder, ...]
+    else:
+        raise ValueError(f'Wrong img ndim: {img.ndim}.')
+    return img
+def paired_random_crop(img_gts, img_lqs, gt_patch_size, scale, gt_path=None):
+    """Paired random crop. Support Numpy array and Tensor inputs.
+    It crops lists of lq and gt images with corresponding locations.
+    Args:
+        img_gts (list[ndarray] | ndarray | list[Tensor] | Tensor): GT images. Note that all images
+            should have the same shape. If the input is an ndarray, it will
+            be transformed to a list containing itself.
+        img_lqs (list[ndarray] | ndarray): LQ images. Note that all images
+            should have the same shape. If the input is an ndarray, it will
+            be transformed to a list containing itself.
+        gt_patch_size (int): GT patch size.
+        scale (int): Scale factor.
+        gt_path (str): Path to ground-truth. Default: None.
+    Returns:
+        list[ndarray] | ndarray: GT images and LQ images. If returned results
+            only have one element, just return ndarray.
+    """
+    if not isinstance(img_gts, list):
+        img_gts = [img_gts]
+    if not isinstance(img_lqs, list):
+        img_lqs = [img_lqs]
+    # determine input type: Numpy array or Tensor
+    input_type = 'Tensor' if torch.is_tensor(img_gts[0]) else 'Numpy'
+    if input_type == 'Tensor':
+        h_lq, w_lq = img_lqs[0].size()[-2:]
+        h_gt, w_gt = img_gts[0].size()[-2:]
+    else:
+        h_lq, w_lq = img_lqs[0].shape[0:2]
+        h_gt, w_gt = img_gts[0].shape[0:2]
+    lq_patch_size = gt_patch_size // scale
+    if h_gt != h_lq * scale or w_gt != w_lq * scale:
+        raise ValueError(f'Scale mismatches. GT ({h_gt}, {w_gt}) is not {scale}x ',
+                         f'multiplication of LQ ({h_lq}, {w_lq}).')
+    if h_lq < lq_patch_size or w_lq < lq_patch_size:
+        raise ValueError(f'LQ ({h_lq}, {w_lq}) is smaller than patch size '
+                         f'({lq_patch_size}, {lq_patch_size}). '
+                         f'Please remove {gt_path}.')
+    # randomly choose top and left coordinates for lq patch
+    top = random.randint(0, h_lq - lq_patch_size)
+    left = random.randint(0, w_lq - lq_patch_size)
+    # crop lq patch
+    if input_type == 'Tensor':
+        img_lqs = [v[:, :, top:top + lq_patch_size, left:left + lq_patch_size] for v in img_lqs]
+    else:
+        img_lqs = [v[top:top + lq_patch_size, left:left + lq_patch_size, ...] for v in img_lqs]
+    # crop corresponding gt patch
+    top_gt, left_gt = int(top * scale), int(left * scale)
+    if input_type == 'Tensor':
+        img_gts = [v[:, :, top_gt:top_gt + gt_patch_size, left_gt:left_gt + gt_patch_size] for v in img_gts]
+    else:
+        img_gts = [v[top_gt:top_gt + gt_patch_size, left_gt:left_gt + gt_patch_size, ...] for v in img_gts]
+    if len(img_gts) == 1:
+        img_gts = img_gts[0]
+    if len(img_lqs) == 1:
+        img_lqs = img_lqs[0]
+    return img_gts, img_lqs
+def augment(imgs, hflip=True, rotation=True, flows=None, return_status=False):
+    """Augment: horizontal flips OR rotate (0, 90, 180, 270 degrees).
+    We use vertical flip and transpose for rotation implementation.
+    All the images in the list use the same augmentation.
+    Args:
+        imgs (list[ndarray] | ndarray): Images to be augmented. If the input
+            is an ndarray, it will be transformed to a list.
+        hflip (bool): Horizontal flip. Default: True.
+        rotation (bool): Ratotation. Default: True.
+        flows (list[ndarray]: Flows to be augmented. If the input is an
+            ndarray, it will be transformed to a list.
+            Dimension is (h, w, 2). Default: None.
+        return_status (bool): Return the status of flip and rotation.
+            Default: False.
+    Returns:
+        list[ndarray] | ndarray: Augmented images and flows. If returned
+            results only have one element, just return ndarray.
+    """
+    hflip = hflip and random.random() < 0.5
+    vflip = rotation and random.random() < 0.5
+    rot90 = rotation and random.random() < 0.5
+    def _augment(img):
+        if hflip:  # horizontal
+            cv2.flip(img, 1, img)
+        if vflip:  # vertical
+            cv2.flip(img, 0, img)
+        if rot90:
+            img = img.transpose(1, 0, 2)
+        return img
+    def _augment_flow(flow):
+        if hflip:  # horizontal
+            cv2.flip(flow, 1, flow)
+            flow[:, :, 0] *= -1
+        if vflip:  # vertical
+            cv2.flip(flow, 0, flow)
+            flow[:, :, 1] *= -1
+        if rot90:
+            flow = flow.transpose(1, 0, 2)
+            flow = flow[:, :, [1, 0]]
+        return flow
+    if not isinstance(imgs, list):
+        imgs = [imgs]
+    imgs = [_augment(img) for img in imgs]
+    if len(imgs) == 1:
+        imgs = imgs[0]
+    if flows is not None:
+        if not isinstance(flows, list):
+            flows = [flows]
+        flows = [_augment_flow(flow) for flow in flows]
+        if len(flows) == 1:
+            flows = flows[0]
+        return imgs, flows
+    else:
+        if return_status:
+            return imgs, (hflip, vflip, rot90)
+        else:
+            return imgs
+def img_rotate(img, angle, center=None, scale=1.0):
+    """Rotate image.
+    Args:
+        img (ndarray): Image to be rotated.
+        angle (float): Rotation angle in degrees. Positive values mean
+            counter-clockwise rotation.
+        center (tuple[int]): Rotation center. If the center is None,
+            initialize it as the center of the image. Default: None.
+        scale (float): Isotropic scale factor. Default: 1.0.
+    """
+    (h, w) = img.shape[:2]
+    if center is None:
+        center = (w // 2, h // 2)
+    matrix = cv2.getRotationMatrix2D(center, angle, scale)
+    rotated_img = cv2.warpAffine(img, matrix, (w, h))
+    return rotated_img

utils/utils.py CHANGED Viewed

@@ -2,6 +2,8 @@ import itertools
 import numpy as np
 import matplotlib.pyplot as plt
 import torch
 import distutils.util
 def show_result(num_epoch, G_net, imgs_lr, imgs_hr):
@@ -57,4 +59,104 @@ def add_arguments(argname, type, default, help, argparser, **kwargs):
                            default=default,
                            type=type,
                            help=help + ' 默认: %(default)s.',
-                           **kwargs)

 import numpy as np
 import matplotlib.pyplot as plt
 import torch
+from torch.nn import functional as F
+import cv2
 import distutils.util
 def show_result(num_epoch, G_net, imgs_lr, imgs_hr):
                            default=default,
                            type=type,
                            help=help + ' 默认: %(default)s.',
+                           **kwargs)
+def filter2D(img, kernel):
+    """PyTorch version of cv2.filter2D
+    Args:
+        img (Tensor): (b, c, h, w)
+        kernel (Tensor): (b, k, k)
+    """
+    k = kernel.size(-1)
+    b, c, h, w = img.size()
+    if k % 2 == 1:
+        img = F.pad(img, (k // 2, k // 2, k // 2, k // 2), mode='reflect')
+    else:
+        raise ValueError('Wrong kernel size')
+    ph, pw = img.size()[-2:]
+    if kernel.size(0) == 1:
+        # apply the same kernel to all batch images
+        img = img.view(b * c, 1, ph, pw)
+        kernel = kernel.view(1, 1, k, k)
+        return F.conv2d(img, kernel, padding=0).view(b, c, h, w)
+    else:
+        img = img.view(1, b * c, ph, pw)
+        kernel = kernel.view(b, 1, k, k).repeat(1, c, 1, 1).view(b * c, 1, k, k)
+        return F.conv2d(img, kernel, groups=b * c).view(b, c, h, w)
+def usm_sharp(img, weight=0.5, radius=50, threshold=10):
+    """USM sharpening.
+    Input image: I; Blurry image: B.
+    1. sharp = I + weight * (I - B)
+    2. Mask = 1 if abs(I - B) > threshold, else: 0
+    3. Blur mask:
+    4. Out = Mask * sharp + (1 - Mask) * I
+    Args:
+        img (Numpy array): Input image, HWC, BGR; float32, [0, 1].
+        weight (float): Sharp weight. Default: 1.
+        radius (float): Kernel size of Gaussian blur. Default: 50.
+        threshold (int):
+    """
+    if radius % 2 == 0:
+        radius += 1
+    blur = cv2.GaussianBlur(img, (radius, radius), 0)
+    residual = img - blur
+    mask = np.abs(residual) * 255 > threshold
+    mask = mask.astype('float32')
+    soft_mask = cv2.GaussianBlur(mask, (radius, radius), 0)
+    sharp = img + weight * residual
+    sharp = np.clip(sharp, 0, 1)
+    return soft_mask * sharp + (1 - soft_mask) * img
+class USMSharp(torch.nn.Module):
+    def __init__(self, radius=50, sigma=0):
+        super(USMSharp, self).__init__()
+        if radius % 2 == 0:
+            radius += 1
+        self.radius = radius
+        kernel = cv2.getGaussianKernel(radius, sigma)
+        kernel = torch.FloatTensor(np.dot(kernel, kernel.transpose())).unsqueeze_(0)
+        self.register_buffer('kernel', kernel)
+    def forward(self, img, weight=0.5, threshold=10):
+        blur = filter2D(img, self.kernel)
+        residual = img - blur
+        mask = torch.abs(residual) * 255 > threshold
+        mask = mask.float()
+        soft_mask = filter2D(mask, self.kernel)
+        sharp = img + weight * residual
+        sharp = torch.clip(sharp, 0, 1)
+        return soft_mask * sharp + (1 - soft_mask) * img
+class USMSharp_npy():
+    def __init__(self, radius=50, sigma=0):
+        super(USMSharp_npy, self).__init__()
+        if radius % 2 == 0:
+            radius += 1
+        self.radius = radius
+        kernel = cv2.getGaussianKernel(radius, sigma)
+        self.kernel = np.dot(kernel, kernel.transpose()).astype(np.float32)
+    def filt(self, img, weight=0.5, threshold=10):
+        blur = cv2.filter2D(img, -1, self.kernel)
+        residual = img - blur
+        mask = np.abs(residual) * 255 > threshold
+        mask = mask.astype(np.float32)
+        soft_mask = cv2.filter2D(mask, -1, self.kernel)
+        sharp = img + weight * residual
+        sharp = np.clip(sharp, 0, 1)
+        return soft_mask * sharp + (1 - soft_mask) * img

utils/utils_fit.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import torch
 from tqdm import tqdm
-from .utils import show_result, get_lr
 from .utils_metrics import PSNR, SSIM
-def fit_one_epoch(G_model_train, D_model_train, G_model, D_model, VGG_feature_model, G_optimizer, D_optimizer, BCE_loss, MSE_loss, epoch, epoch_size, gen, Epoch, cuda, batch_size, save_interval):
     G_total_loss = 0
     D_total_loss = 0
     G_total_PSNR = 0
@@ -28,33 +28,38 @@ def fit_one_epoch(G_model_train, D_model_train, G_model, D_model, VGG_feature_mo
             #-------------------------------------------------#
             D_optimizer.zero_grad()
-            D_result                = D_model_train(hr_images)
-            D_real_loss             = BCE_loss(D_result, y_real)
-            D_real_loss.backward()
             G_result                = G_model_train(lr_images)
-            D_result                = D_model_train(G_result).squeeze()
-            D_fake_loss             = BCE_loss(D_result, y_fake)
-            D_fake_loss.backward()
             D_optimizer.step()
-            D_train_loss            = D_real_loss + D_fake_loss
             #-------------------------------------------------#
             #   训练生成器
             #-------------------------------------------------#
             G_optimizer.zero_grad()
             G_result                = G_model_train(lr_images)
-            image_loss              = MSE_loss(G_result, hr_images)
-            D_result                = D_model_train(G_result).squeeze()
-            adversarial_loss        = BCE_loss(D_result, y_real)
-            perception_loss         = MSE_loss(VGG_feature_model(G_result), VGG_feature_model(hr_images))
-            G_train_loss            = image_loss + 1e-3 * adversarial_loss + 2e-6 * perception_loss
             G_train_loss.backward()
             G_optimizer.step()

 import torch
 from tqdm import tqdm
+from .utils import get_lr, show_result
 from .utils_metrics import PSNR, SSIM
+def fit_one_epoch(G_model_train, D_model_train, G_model, D_model, VGG_feature_model, G_optimizer, D_optimizer, BCEWithLogits_loss, L1_loss, epoch, epoch_size, gen, Epoch, cuda, batch_size, save_interval):
     G_total_loss = 0
     D_total_loss = 0
     G_total_PSNR = 0
             #-------------------------------------------------#
             D_optimizer.zero_grad()
+            D_result_r              = D_model_train(hr_images)
             G_result                = G_model_train(lr_images)
+            D_result_f              = D_model_train(G_result).squeeze()
+            D_result_rf             = D_result_r - D_result_f.mean()
+            D_result_fr             = D_result_f - D_result_r.mean()
+            D_train_loss_rf         = BCEWithLogits_loss(D_result_rf, y_real)
+            D_train_loss_fr         = BCEWithLogits_loss(D_result_fr, y_fake)
+            D_train_loss            = (D_train_loss_rf + D_train_loss_fr) / 2
+            D_train_loss.backward()
             D_optimizer.step()
             #-------------------------------------------------#
             #   训练生成器
             #-------------------------------------------------#
             G_optimizer.zero_grad()
             G_result                = G_model_train(lr_images)
+            image_loss              = L1_loss(G_result, hr_images)
+            D_result_r              = D_model_train(hr_images)
+            D_result_f              = D_model_train(G_result).squeeze()
+            D_result_rf             = D_result_r - D_result_f.mean()
+            D_result_fr             = D_result_f - D_result_r.mean()
+            D_train_loss_rf         = BCEWithLogits_loss(D_result_rf, y_fake)
+            D_train_loss_fr         = BCEWithLogits_loss(D_result_fr, y_real)
+            adversarial_loss        = (D_train_loss_rf + D_train_loss_fr) / 2
+            perception_loss         = L1_loss(VGG_feature_model(G_result), VGG_feature_model(hr_images))
+            G_train_loss            = image_loss + 1e-1 * adversarial_loss + 1e-1 * perception_loss
             G_train_loss.backward()
             G_optimizer.step()