Spaces:

zhangap
/

S3Diff

Runtime error

App Files Files Community

S3Diff / basicsr /data /ffhq_degradation_dataset.py

zhangap

Upload 213 files

36d9761 verified 9 months ago

raw

history blame

10.5 kB

	import cv2
	import math
	import numpy as np
	import os.path as osp
	import torch
	import torch.utils.data as data
	import random
	from basicsr.data import degradations as degradations
	from basicsr.data.data_util import paths_from_folder
	from basicsr.data.transforms import augment
	from basicsr.utils import FileClient, get_root_logger, imfrombytes, img2tensor
	from basicsr.utils.registry import DATASET_REGISTRY
	from pathlib import Path
	from torchvision.transforms.functional import (adjust_brightness, adjust_contrast, adjust_hue, adjust_saturation,
	normalize)

	@DATASET_REGISTRY.register()
	class FFHQDegradationDataset(data.Dataset):
	"""FFHQ dataset for GFPGAN.
	It reads high resolution images, and then generate low-quality (LQ) images on-the-fly.
	Args:
	opt (dict): Config for train datasets. It contains the following keys:
	dataroot_gt (str): Data root path for gt.
	io_backend (dict): IO backend type and other kwarg.
	mean (list \| tuple): Image mean.
	std (list \| tuple): Image std.
	use_hflip (bool): Whether to horizontally flip.
	Please see more options in the codes.
	"""

	def __init__(self, opt):
	super(FFHQDegradationDataset, self).__init__()
	self.opt = opt
	# file client (io backend)
	self.file_client = None
	self.io_backend_opt = opt['io_backend']
	if 'image_type' not in opt:
	opt['image_type'] = 'png'

	self.gt_folder = opt['dataroot_gt']
	self.mean = opt['mean']
	self.std = opt['std']
	self.out_size = opt['out_size']

	self.crop_components = opt.get('crop_components', False) # facial components
	self.eye_enlarge_ratio = opt.get('eye_enlarge_ratio', 1) # whether enlarge eye regions

	if self.crop_components:
	# load component list from a pre-process pth files
	self.components_list = torch.load(opt.get('component_path'))

	# file client (lmdb io backend)
	if self.io_backend_opt['type'] == 'lmdb':
	self.io_backend_opt['db_paths'] = self.gt_folder
	if not self.gt_folder.endswith('.lmdb'):
	raise ValueError(f"'dataroot_gt' should end with '.lmdb', but received {self.gt_folder}")
	with open(osp.join(self.gt_folder, 'meta_info.txt')) as fin:
	self.paths = [line.split('.')[0] for line in fin]
	else:
	# disk backend: scan file list from a folder
	self.paths = self.paths = sorted([str(x) for x in Path(self.gt_folder).glob('*.'+opt['image_type'])])

	# degradation configurations
	self.blur_kernel_size = opt['blur_kernel_size']
	self.kernel_list = opt['kernel_list']
	self.kernel_prob = opt['kernel_prob']
	self.blur_sigma = opt['blur_sigma']
	self.downsample_range = opt['downsample_range']
	self.noise_range = opt['noise_range']
	self.jpeg_range = opt['jpeg_range']

	# color jitter
	self.color_jitter_prob = opt.get('color_jitter_prob')
	self.color_jitter_pt_prob = opt.get('color_jitter_pt_prob')
	self.color_jitter_shift = opt.get('color_jitter_shift', 20)
	# to gray
	self.gray_prob = opt.get('gray_prob')

	logger = get_root_logger()
	logger.info(f'Blur: blur_kernel_size {self.blur_kernel_size}, sigma: [{", ".join(map(str, self.blur_sigma))}]')
	logger.info(f'Downsample: downsample_range [{", ".join(map(str, self.downsample_range))}]')
	logger.info(f'Noise: [{", ".join(map(str, self.noise_range))}]')
	logger.info(f'JPEG compression: [{", ".join(map(str, self.jpeg_range))}]')

	if self.color_jitter_prob is not None:
	logger.info(f'Use random color jitter. Prob: {self.color_jitter_prob}, shift: {self.color_jitter_shift}')
	if self.gray_prob is not None:
	logger.info(f'Use random gray. Prob: {self.gray_prob}')
	if self.color_jitter_shift is not None:
	self.color_jitter_shift /= 255.

	@staticmethod
	def color_jitter(img, shift):
	"""jitter color: randomly jitter the RGB values, in numpy formats"""
	jitter_val = np.random.uniform(-shift, shift, 3).astype(np.float32)
	img = img + jitter_val
	img = np.clip(img, 0, 1)
	return img

	@staticmethod
	def color_jitter_pt(img, brightness, contrast, saturation, hue):
	"""jitter color: randomly jitter the brightness, contrast, saturation, and hue, in torch Tensor formats"""
	fn_idx = torch.randperm(4)
	for fn_id in fn_idx:
	if fn_id == 0 and brightness is not None:
	brightness_factor = torch.tensor(1.0).uniform_(brightness[0], brightness[1]).item()
	img = adjust_brightness(img, brightness_factor)

	if fn_id == 1 and contrast is not None:
	contrast_factor = torch.tensor(1.0).uniform_(contrast[0], contrast[1]).item()
	img = adjust_contrast(img, contrast_factor)

	if fn_id == 2 and saturation is not None:
	saturation_factor = torch.tensor(1.0).uniform_(saturation[0], saturation[1]).item()
	img = adjust_saturation(img, saturation_factor)

	if fn_id == 3 and hue is not None:
	hue_factor = torch.tensor(1.0).uniform_(hue[0], hue[1]).item()
	img = adjust_hue(img, hue_factor)
	return img

	def get_component_coordinates(self, index, status):
	"""Get facial component (left_eye, right_eye, mouth) coordinates from a pre-loaded pth file"""
	components_bbox = self.components_list[f'{index:08d}']
	if status[0]: # hflip
	# exchange right and left eye
	tmp = components_bbox['left_eye']
	components_bbox['left_eye'] = components_bbox['right_eye']
	components_bbox['right_eye'] = tmp
	# modify the width coordinate
	components_bbox['left_eye'][0] = self.out_size - components_bbox['left_eye'][0]
	components_bbox['right_eye'][0] = self.out_size - components_bbox['right_eye'][0]
	components_bbox['mouth'][0] = self.out_size - components_bbox['mouth'][0]

	# get coordinates
	locations = []
	for part in ['left_eye', 'right_eye', 'mouth']:
	mean = components_bbox[part][0:2]
	half_len = components_bbox[part][2]
	if 'eye' in part:
	half_len *= self.eye_enlarge_ratio
	loc = np.hstack((mean - half_len + 1, mean + half_len))
	loc = torch.from_numpy(loc).float()
	locations.append(loc)
	return locations

	def __getitem__(self, index):
	if self.file_client is None:
	self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt)

	# load gt image
	# Shape: (h, w, c); channel order: BGR; image range: [0, 1], float32.
	gt_path = self.paths[index]
	img_bytes = self.file_client.get(gt_path)
	img_gt = imfrombytes(img_bytes, float32=True)

	# random horizontal flip
	img_gt, status = augment(img_gt, hflip=self.opt['use_hflip'], rotation=False, return_status=True)
	h, w, _ = img_gt.shape

	# get facial component coordinates
	if self.crop_components:
	locations = self.get_component_coordinates(index, status)
	loc_left_eye, loc_right_eye, loc_mouth = locations

	# ------------------------ generate lq image ------------------------ #
	# blur
	kernel = degradations.random_mixed_kernels(
	self.kernel_list,
	self.kernel_prob,
	self.blur_kernel_size,
	self.blur_sigma,
	self.blur_sigma, [-math.pi, math.pi],
	noise_range=None)
	img_lq = cv2.filter2D(img_gt, -1, kernel)
	# downsample
	scale = np.random.uniform(self.downsample_range[0], self.downsample_range[1])
	img_lq = cv2.resize(img_lq, (int(w // scale), int(h // scale)), interpolation=cv2.INTER_LINEAR)
	# noise
	if self.noise_range is not None:
	img_lq = degradations.random_add_gaussian_noise(img_lq, self.noise_range)
	# jpeg compression
	if self.jpeg_range is not None:
	img_lq = degradations.random_add_jpg_compression(img_lq, self.jpeg_range)

	# resize to original size
	img_lq = cv2.resize(img_lq, (w, h), interpolation=cv2.INTER_LINEAR)

	# random color jitter (only for lq)
	if self.color_jitter_prob is not None and (np.random.uniform() < self.color_jitter_prob):
	img_lq = self.color_jitter(img_lq, self.color_jitter_shift)
	# random to gray (only for lq)
	if self.gray_prob and np.random.uniform() < self.gray_prob:
	img_lq = cv2.cvtColor(img_lq, cv2.COLOR_BGR2GRAY)
	img_lq = np.tile(img_lq[:, :, None], [1, 1, 3])
	if self.opt.get('gt_gray'): # whether convert GT to gray images
	img_gt = cv2.cvtColor(img_gt, cv2.COLOR_BGR2GRAY)
	img_gt = np.tile(img_gt[:, :, None], [1, 1, 3]) # repeat the color channels

	# BGR to RGB, HWC to CHW, numpy to tensor
	img_gt, img_lq = img2tensor([img_gt, img_lq], bgr2rgb=True, float32=True)

	# random color jitter (pytorch version) (only for lq)
	if self.color_jitter_pt_prob is not None and (np.random.uniform() < self.color_jitter_pt_prob):
	brightness = self.opt.get('brightness', (0.5, 1.5))
	contrast = self.opt.get('contrast', (0.5, 1.5))
	saturation = self.opt.get('saturation', (0, 1.5))
	hue = self.opt.get('hue', (-0.1, 0.1))
	img_lq = self.color_jitter_pt(img_lq, brightness, contrast, saturation, hue)

	# round and clip
	img_lq = torch.clamp((img_lq * 255.0).round(), 0, 255) / 255.

	# normalize
	normalize(img_gt, self.mean, self.std, inplace=True)
	normalize(img_lq, self.mean, self.std, inplace=True)

	if self.crop_components:
	return_dict = {
	'lq': img_lq,
	'gt': img_gt,
	'gt_path': gt_path,
	'loc_left_eye': loc_left_eye,
	'loc_right_eye': loc_right_eye,
	'loc_mouth': loc_mouth
	}
	return return_dict
	else:
	return {'lq': img_lq, 'gt': img_gt, 'gt_path': gt_path}

	def __len__(self):
	return len(self.paths)