Upload realesrgan_model.py
Browse files
realesrgan/models/realesrgan_model.py
ADDED
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import random
|
3 |
+
import torch
|
4 |
+
from basicsr.data.degradations import random_add_gaussian_noise_pt, random_add_poisson_noise_pt
|
5 |
+
from basicsr.data.transforms import paired_random_crop
|
6 |
+
from basicsr.models.srgan_model import SRGANModel
|
7 |
+
from basicsr.utils import DiffJPEG, USMSharp
|
8 |
+
from basicsr.utils.img_process_util import filter2D
|
9 |
+
from basicsr.utils.registry import MODEL_REGISTRY
|
10 |
+
from collections import OrderedDict
|
11 |
+
from torch.nn import functional as F
|
12 |
+
|
13 |
+
|
14 |
+
@MODEL_REGISTRY.register()
|
15 |
+
class RealESRGANModel(SRGANModel):
|
16 |
+
"""RealESRGAN Model for Real-ESRGAN: Training Real-World Blind Super-Resolution with Pure Synthetic Data.
|
17 |
+
|
18 |
+
It mainly performs:
|
19 |
+
1. randomly synthesize LQ images in GPU tensors
|
20 |
+
2. optimize the networks with GAN training.
|
21 |
+
"""
|
22 |
+
|
23 |
+
def __init__(self, opt):
|
24 |
+
super(RealESRGANModel, self).__init__(opt)
|
25 |
+
self.jpeger = DiffJPEG(differentiable=False).cuda() # simulate JPEG compression artifacts
|
26 |
+
self.usm_sharpener = USMSharp().cuda() # do usm sharpening
|
27 |
+
self.queue_size = opt.get('queue_size', 180)
|
28 |
+
|
29 |
+
@torch.no_grad()
|
30 |
+
def _dequeue_and_enqueue(self):
|
31 |
+
"""It is the training pair pool for increasing the diversity in a batch.
|
32 |
+
|
33 |
+
Batch processing limits the diversity of synthetic degradations in a batch. For example, samples in a
|
34 |
+
batch could not have different resize scaling factors. Therefore, we employ this training pair pool
|
35 |
+
to increase the degradation diversity in a batch.
|
36 |
+
"""
|
37 |
+
# initialize
|
38 |
+
b, c, h, w = self.lq.size()
|
39 |
+
if not hasattr(self, 'queue_lr'):
|
40 |
+
assert self.queue_size % b == 0, f'queue size {self.queue_size} should be divisible by batch size {b}'
|
41 |
+
self.queue_lr = torch.zeros(self.queue_size, c, h, w).cuda()
|
42 |
+
_, c, h, w = self.gt.size()
|
43 |
+
self.queue_gt = torch.zeros(self.queue_size, c, h, w).cuda()
|
44 |
+
self.queue_ptr = 0
|
45 |
+
if self.queue_ptr == self.queue_size: # the pool is full
|
46 |
+
# do dequeue and enqueue
|
47 |
+
# shuffle
|
48 |
+
idx = torch.randperm(self.queue_size)
|
49 |
+
self.queue_lr = self.queue_lr[idx]
|
50 |
+
self.queue_gt = self.queue_gt[idx]
|
51 |
+
# get first b samples
|
52 |
+
lq_dequeue = self.queue_lr[0:b, :, :, :].clone()
|
53 |
+
gt_dequeue = self.queue_gt[0:b, :, :, :].clone()
|
54 |
+
# update the queue
|
55 |
+
self.queue_lr[0:b, :, :, :] = self.lq.clone()
|
56 |
+
self.queue_gt[0:b, :, :, :] = self.gt.clone()
|
57 |
+
|
58 |
+
self.lq = lq_dequeue
|
59 |
+
self.gt = gt_dequeue
|
60 |
+
else:
|
61 |
+
# only do enqueue
|
62 |
+
self.queue_lr[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.lq.clone()
|
63 |
+
self.queue_gt[self.queue_ptr:self.queue_ptr + b, :, :, :] = self.gt.clone()
|
64 |
+
self.queue_ptr = self.queue_ptr + b
|
65 |
+
|
66 |
+
@torch.no_grad()
|
67 |
+
def feed_data(self, data):
|
68 |
+
"""Accept data from dataloader, and then add two-order degradations to obtain LQ images.
|
69 |
+
"""
|
70 |
+
if self.is_train and self.opt.get('high_order_degradation', True):
|
71 |
+
# training data synthesis
|
72 |
+
self.gt = data['gt'].to(self.device)
|
73 |
+
self.gt_usm = self.usm_sharpener(self.gt)
|
74 |
+
|
75 |
+
self.kernel1 = data['kernel1'].to(self.device)
|
76 |
+
self.kernel2 = data['kernel2'].to(self.device)
|
77 |
+
self.sinc_kernel = data['sinc_kernel'].to(self.device)
|
78 |
+
|
79 |
+
ori_h, ori_w = self.gt.size()[2:4]
|
80 |
+
|
81 |
+
# ----------------------- The first degradation process ----------------------- #
|
82 |
+
# blur
|
83 |
+
out = filter2D(self.gt_usm, self.kernel1)
|
84 |
+
# random resize
|
85 |
+
updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob'])[0]
|
86 |
+
if updown_type == 'up':
|
87 |
+
scale = np.random.uniform(1, self.opt['resize_range'][1])
|
88 |
+
elif updown_type == 'down':
|
89 |
+
scale = np.random.uniform(self.opt['resize_range'][0], 1)
|
90 |
+
else:
|
91 |
+
scale = 1
|
92 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
93 |
+
out = F.interpolate(out, scale_factor=scale, mode=mode)
|
94 |
+
# add noise
|
95 |
+
gray_noise_prob = self.opt['gray_noise_prob']
|
96 |
+
if np.random.uniform() < self.opt['gaussian_noise_prob']:
|
97 |
+
out = random_add_gaussian_noise_pt(
|
98 |
+
out, sigma_range=self.opt['noise_range'], clip=True, rounds=False, gray_prob=gray_noise_prob)
|
99 |
+
else:
|
100 |
+
out = random_add_poisson_noise_pt(
|
101 |
+
out,
|
102 |
+
scale_range=self.opt['poisson_scale_range'],
|
103 |
+
gray_prob=gray_noise_prob,
|
104 |
+
clip=True,
|
105 |
+
rounds=False)
|
106 |
+
# JPEG compression
|
107 |
+
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range'])
|
108 |
+
out = torch.clamp(out, 0, 1) # clamp to [0, 1], otherwise JPEGer will result in unpleasant artifacts
|
109 |
+
out = self.jpeger(out, quality=jpeg_p)
|
110 |
+
|
111 |
+
# ----------------------- The second degradation process ----------------------- #
|
112 |
+
# blur
|
113 |
+
if np.random.uniform() < self.opt['second_blur_prob']:
|
114 |
+
out = filter2D(out, self.kernel2)
|
115 |
+
# random resize
|
116 |
+
updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob2'])[0]
|
117 |
+
if updown_type == 'up':
|
118 |
+
scale = np.random.uniform(1, self.opt['resize_range2'][1])
|
119 |
+
elif updown_type == 'down':
|
120 |
+
scale = np.random.uniform(self.opt['resize_range2'][0], 1)
|
121 |
+
else:
|
122 |
+
scale = 1
|
123 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
124 |
+
out = F.interpolate(
|
125 |
+
out, size=(int(ori_h / self.opt['scale'] * scale), int(ori_w / self.opt['scale'] * scale)), mode=mode)
|
126 |
+
# add noise
|
127 |
+
gray_noise_prob = self.opt['gray_noise_prob2']
|
128 |
+
if np.random.uniform() < self.opt['gaussian_noise_prob2']:
|
129 |
+
out = random_add_gaussian_noise_pt(
|
130 |
+
out, sigma_range=self.opt['noise_range2'], clip=True, rounds=False, gray_prob=gray_noise_prob)
|
131 |
+
else:
|
132 |
+
out = random_add_poisson_noise_pt(
|
133 |
+
out,
|
134 |
+
scale_range=self.opt['poisson_scale_range2'],
|
135 |
+
gray_prob=gray_noise_prob,
|
136 |
+
clip=True,
|
137 |
+
rounds=False)
|
138 |
+
|
139 |
+
# JPEG compression + the final sinc filter
|
140 |
+
# We also need to resize images to desired sizes. We group [resize back + sinc filter] together
|
141 |
+
# as one operation.
|
142 |
+
# We consider two orders:
|
143 |
+
# 1. [resize back + sinc filter] + JPEG compression
|
144 |
+
# 2. JPEG compression + [resize back + sinc filter]
|
145 |
+
# Empirically, we find other combinations (sinc + JPEG + Resize) will introduce twisted lines.
|
146 |
+
if np.random.uniform() < 0.5:
|
147 |
+
# resize back + the final sinc filter
|
148 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
149 |
+
out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
|
150 |
+
out = filter2D(out, self.sinc_kernel)
|
151 |
+
# JPEG compression
|
152 |
+
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
|
153 |
+
out = torch.clamp(out, 0, 1)
|
154 |
+
out = self.jpeger(out, quality=jpeg_p)
|
155 |
+
else:
|
156 |
+
# JPEG compression
|
157 |
+
jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2'])
|
158 |
+
out = torch.clamp(out, 0, 1)
|
159 |
+
out = self.jpeger(out, quality=jpeg_p)
|
160 |
+
# resize back + the final sinc filter
|
161 |
+
mode = random.choice(['area', 'bilinear', 'bicubic'])
|
162 |
+
out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode)
|
163 |
+
out = filter2D(out, self.sinc_kernel)
|
164 |
+
|
165 |
+
# clamp and round
|
166 |
+
self.lq = torch.clamp((out * 255.0).round(), 0, 255) / 255.
|
167 |
+
|
168 |
+
# random crop
|
169 |
+
gt_size = self.opt['gt_size']
|
170 |
+
(self.gt, self.gt_usm), self.lq = paired_random_crop([self.gt, self.gt_usm], self.lq, gt_size,
|
171 |
+
self.opt['scale'])
|
172 |
+
|
173 |
+
# training pair pool
|
174 |
+
self._dequeue_and_enqueue()
|
175 |
+
# sharpen self.gt again, as we have changed the self.gt with self._dequeue_and_enqueue
|
176 |
+
self.gt_usm = self.usm_sharpener(self.gt)
|
177 |
+
self.lq = self.lq.contiguous() # for the warning: grad and param do not obey the gradient layout contract
|
178 |
+
else:
|
179 |
+
# for paired training or validation
|
180 |
+
self.lq = data['lq'].to(self.device)
|
181 |
+
if 'gt' in data:
|
182 |
+
self.gt = data['gt'].to(self.device)
|
183 |
+
self.gt_usm = self.usm_sharpener(self.gt)
|
184 |
+
|
185 |
+
def nondist_validation(self, dataloader, current_iter, tb_logger, save_img):
|
186 |
+
# do not use the synthetic process during validation
|
187 |
+
self.is_train = False
|
188 |
+
super(RealESRGANModel, self).nondist_validation(dataloader, current_iter, tb_logger, save_img)
|
189 |
+
self.is_train = True
|
190 |
+
|
191 |
+
def optimize_parameters(self, current_iter):
|
192 |
+
# usm sharpening
|
193 |
+
l1_gt = self.gt_usm
|
194 |
+
percep_gt = self.gt_usm
|
195 |
+
gan_gt = self.gt_usm
|
196 |
+
if self.opt['l1_gt_usm'] is False:
|
197 |
+
l1_gt = self.gt
|
198 |
+
if self.opt['percep_gt_usm'] is False:
|
199 |
+
percep_gt = self.gt
|
200 |
+
if self.opt['gan_gt_usm'] is False:
|
201 |
+
gan_gt = self.gt
|
202 |
+
|
203 |
+
# optimize net_g
|
204 |
+
for p in self.net_d.parameters():
|
205 |
+
p.requires_grad = False
|
206 |
+
|
207 |
+
self.optimizer_g.zero_grad()
|
208 |
+
self.output = self.net_g(self.lq)
|
209 |
+
|
210 |
+
l_g_total = 0
|
211 |
+
loss_dict = OrderedDict()
|
212 |
+
if (current_iter % self.net_d_iters == 0 and current_iter > self.net_d_init_iters):
|
213 |
+
# pixel loss
|
214 |
+
if self.cri_pix:
|
215 |
+
l_g_pix = self.cri_pix(self.output, l1_gt)
|
216 |
+
l_g_total += l_g_pix
|
217 |
+
loss_dict['l_g_pix'] = l_g_pix
|
218 |
+
# perceptual loss
|
219 |
+
if self.cri_perceptual:
|
220 |
+
l_g_percep, l_g_style = self.cri_perceptual(self.output, percep_gt)
|
221 |
+
if l_g_percep is not None:
|
222 |
+
l_g_total += l_g_percep
|
223 |
+
loss_dict['l_g_percep'] = l_g_percep
|
224 |
+
if l_g_style is not None:
|
225 |
+
l_g_total += l_g_style
|
226 |
+
loss_dict['l_g_style'] = l_g_style
|
227 |
+
# gan loss
|
228 |
+
fake_g_pred = self.net_d(self.output)
|
229 |
+
l_g_gan = self.cri_gan(fake_g_pred, True, is_disc=False)
|
230 |
+
l_g_total += l_g_gan
|
231 |
+
loss_dict['l_g_gan'] = l_g_gan
|
232 |
+
|
233 |
+
l_g_total.backward()
|
234 |
+
self.optimizer_g.step()
|
235 |
+
|
236 |
+
# optimize net_d
|
237 |
+
for p in self.net_d.parameters():
|
238 |
+
p.requires_grad = True
|
239 |
+
|
240 |
+
self.optimizer_d.zero_grad()
|
241 |
+
# real
|
242 |
+
real_d_pred = self.net_d(gan_gt)
|
243 |
+
l_d_real = self.cri_gan(real_d_pred, True, is_disc=True)
|
244 |
+
loss_dict['l_d_real'] = l_d_real
|
245 |
+
loss_dict['out_d_real'] = torch.mean(real_d_pred.detach())
|
246 |
+
l_d_real.backward()
|
247 |
+
# fake
|
248 |
+
fake_d_pred = self.net_d(self.output.detach().clone()) # clone for pt1.9
|
249 |
+
l_d_fake = self.cri_gan(fake_d_pred, False, is_disc=True)
|
250 |
+
loss_dict['l_d_fake'] = l_d_fake
|
251 |
+
loss_dict['out_d_fake'] = torch.mean(fake_d_pred.detach())
|
252 |
+
l_d_fake.backward()
|
253 |
+
self.optimizer_d.step()
|
254 |
+
|
255 |
+
if self.ema_decay > 0:
|
256 |
+
self.model_ema(decay=self.ema_decay)
|
257 |
+
|
258 |
+
self.log_dict = self.reduce_loss_dict(loss_dict)
|