deepspeed / scripts /external /large_scale_jittering /lsj_efficientdet_pytorch.py

init

002bd9b 12 months ago

10.5 kB

	# Copy from: https://github.com/rwightman/efficientdet-pytorch/blob/d43c9e34cd62d22b4205831bb735f6dd83b8e881/effdet/data/transforms.py
	""" COCO transforms (quick and dirty)

	Hacked together by Ross Wightman
	"""
	import random
	import math
	from copy import deepcopy

	from PIL import Image
	import numpy as np
	import torch

	IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
	IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
	IMAGENET_INCEPTION_MEAN = (0.5, 0.5, 0.5)
	IMAGENET_INCEPTION_STD = (0.5, 0.5, 0.5)


	class ImageToNumpy:
	def __call__(self, pil_img, annotations: dict):
	np_img = np.array(pil_img, dtype=np.uint8)
	if np_img.ndim < 3:
	np_img = np.expand_dims(np_img, axis=-1)
	np_img = np.moveaxis(np_img, 2, 0) # HWC to CHW
	return np_img, annotations


	class ImageToTensor:
	def __init__(self, dtype=torch.float32):
	self.dtype = dtype

	def __call__(self, pil_img, annotations: dict):
	np_img = np.array(pil_img, dtype=np.uint8)
	if np_img.ndim < 3:
	np_img = np.expand_dims(np_img, axis=-1)
	np_img = np.moveaxis(np_img, 2, 0) # HWC to CHW
	return torch.from_numpy(np_img).to(dtype=self.dtype), annotations


	def _pil_interp(method):
	if method == "bicubic":
	return Image.BICUBIC
	elif method == "lanczos":
	return Image.LANCZOS
	elif method == "hamming":
	return Image.HAMMING
	else:
	# default bilinear, do we want to allow nearest?
	return Image.BILINEAR


	_RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC)


	def clip_boxes_(boxes, img_size):
	height, width = img_size
	clip_upper = np.array([height, width] * 2, dtype=boxes.dtype)
	np.clip(boxes, 0, clip_upper, out=boxes)


	def clip_boxes(boxes, img_size):
	clipped_boxes = boxes.copy()
	clip_boxes_(clipped_boxes, img_size)
	return clipped_boxes


	def _size_tuple(size):
	if isinstance(size, int):
	return size, size
	else:
	assert len(size) == 2
	return size


	class ResizePad:
	def __init__(self, target_size: int, interpolation: str = "bilinear", fill_color: tuple = (0, 0, 0)):
	self.target_size = _size_tuple(target_size)
	self.interpolation = interpolation
	self.fill_color = fill_color

	def __call__(self, img, anno: dict):
	width, height = img.size

	img_scale_y = self.target_size[0] / height
	img_scale_x = self.target_size[1] / width
	img_scale = min(img_scale_y, img_scale_x)
	scaled_h = int(height * img_scale)
	scaled_w = int(width * img_scale)

	new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color)
	interp_method = _pil_interp(self.interpolation)
	img = img.resize((scaled_w, scaled_h), interp_method)
	new_img.paste(img) # pastes at 0,0 (upper-left corner)

	if "bbox" in anno:
	bbox = anno["bbox"]
	bbox[:, :4] *= img_scale
	bbox_bound = (min(scaled_h, self.target_size[0]), min(scaled_w, self.target_size[1]))
	clip_boxes_(bbox, bbox_bound) # crop to bounds of target image or letter-box, whichever is smaller
	valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)
	anno["bbox"] = bbox[valid_indices, :]
	anno["cls"] = anno["cls"][valid_indices]

	anno["img_scale"] = 1.0 / img_scale # back to original

	return new_img, anno


	class RandomResizePad:
	def __init__(
	self, target_size: int, scale: tuple = (0.1, 2.0), interpolation: str = "random", fill_color: tuple = (0, 0, 0)
	):
	self.target_size = _size_tuple(target_size)
	self.scale = scale
	if interpolation == "random":
	self.interpolation = _RANDOM_INTERPOLATION
	else:
	self.interpolation = _pil_interp(interpolation)
	self.fill_color = fill_color

	def _get_params(self, img):
	# Select a random scale factor.
	scale_factor = random.uniform(*self.scale)
	scaled_target_height = scale_factor * self.target_size[0]
	scaled_target_width = scale_factor * self.target_size[1]

	# Recompute the accurate scale_factor using rounded scaled image size.
	width, height = img.size
	img_scale_y = scaled_target_height / height
	img_scale_x = scaled_target_width / width
	img_scale = min(img_scale_y, img_scale_x)

	# Select non-zero random offset (x, y) if scaled image is larger than target size
	scaled_h = int(height * img_scale)
	scaled_w = int(width * img_scale)
	offset_y = scaled_h - self.target_size[0]
	offset_x = scaled_w - self.target_size[1]
	offset_y = int(max(0.0, float(offset_y)) * random.uniform(0, 1))
	offset_x = int(max(0.0, float(offset_x)) * random.uniform(0, 1))
	return scaled_h, scaled_w, offset_y, offset_x, img_scale

	def __call__(self, img, anno: dict):
	scaled_h, scaled_w, offset_y, offset_x, img_scale = self._get_params(img)

	if isinstance(self.interpolation, (tuple, list)):
	interpolation = random.choice(self.interpolation)
	else:
	interpolation = self.interpolation
	img = img.resize((scaled_w, scaled_h), interpolation)
	right, lower = min(scaled_w, offset_x + self.target_size[1]), min(scaled_h, offset_y + self.target_size[0])
	img = img.crop((offset_x, offset_y, right, lower))
	new_img = Image.new("RGB", (self.target_size[1], self.target_size[0]), color=self.fill_color)
	# new_img.paste(img) # pastes at 0,0 (upper-left corner)

	# NOTE: Change here. This is my idea of improve the padding
	target_height = self.target_size[0]
	target_width = self.target_size[1]
	img_width, img_height = img.size
	offset_x_paste = int(max(0, target_width - img_width) * random.uniform(0, 1))
	offset_y_paste = int(max(0, target_height - img_height) * random.uniform(0, 1))
	right_paste = min(target_width, offset_x_paste + img_width)
	lower_paste = min(target_height, offset_y_paste + img_height)
	print(offset_x_paste, offset_y_paste, right_paste, lower_paste)
	new_img.paste(
	img, (offset_x_paste, offset_y_paste, right_paste, lower_paste)
	) # pastes at 0,0 (upper-left corner)

	if "bbox" in anno:
	bbox = anno["bbox"] # for convenience, modifies in-place
	bbox[:, :4] *= img_scale
	box_offset = np.stack([offset_y, offset_x] * 2)
	bbox -= box_offset
	bbox_bound = (min(scaled_h, self.target_size[0]), min(scaled_w, self.target_size[1]))
	clip_boxes_(bbox, bbox_bound) # crop to bounds of target image or letter-box, whichever is smaller

	# NOTE: Change here. This is my idea of improve the padding
	box_offset_paste = np.stack([offset_y_paste, offset_x_paste] * 2)
	bbox += box_offset_paste

	valid_indices = (bbox[:, :2] < bbox[:, 2:4]).all(axis=1)
	anno["bbox"] = bbox[valid_indices, :]
	anno["cls"] = anno["cls"][valid_indices]

	anno["img_scale"] = 1.0 / img_scale # back to original

	return new_img, anno


	class RandomFlip:
	def __init__(self, horizontal=True, vertical=False, prob=0.5):
	self.horizontal = horizontal
	self.vertical = vertical
	self.prob = prob

	def _get_params(self):
	do_horizontal = random.random() < self.prob if self.horizontal else False
	do_vertical = random.random() < self.prob if self.vertical else False
	return do_horizontal, do_vertical

	def __call__(self, img, annotations: dict):
	do_horizontal, do_vertical = self._get_params()
	width, height = img.size

	def _fliph(bbox):
	x_max = width - bbox[:, 1]
	x_min = width - bbox[:, 3]
	bbox[:, 1] = x_min
	bbox[:, 3] = x_max

	def _flipv(bbox):
	y_max = height - bbox[:, 0]
	y_min = height - bbox[:, 2]
	bbox[:, 0] = y_min
	bbox[:, 2] = y_max

	if do_horizontal and do_vertical:
	img = img.transpose(Image.ROTATE_180)
	if "bbox" in annotations:
	_fliph(annotations["bbox"])
	_flipv(annotations["bbox"])
	elif do_horizontal:
	img = img.transpose(Image.FLIP_LEFT_RIGHT)
	if "bbox" in annotations:
	_fliph(annotations["bbox"])
	elif do_vertical:
	img = img.transpose(Image.FLIP_TOP_BOTTOM)
	if "bbox" in annotations:
	_flipv(annotations["bbox"])

	return img, annotations


	def resolve_fill_color(fill_color, img_mean=IMAGENET_DEFAULT_MEAN):
	if isinstance(fill_color, tuple):
	assert len(fill_color) == 3
	fill_color = fill_color
	else:
	try:
	int_color = int(fill_color)
	fill_color = (int_color,) * 3
	except ValueError:
	assert fill_color == "mean"
	fill_color = tuple([int(round(255 * x)) for x in img_mean])
	return fill_color


	class Compose:
	def __init__(self, transforms: list):
	self.transforms = transforms

	def __call__(self, img, annotations: dict):
	for t in self.transforms:
	img, annotations = t(img, annotations)
	return img, annotations


	def transforms_coco_eval(
	img_size=224,
	interpolation="bilinear",
	use_prefetcher=False,
	fill_color="mean",
	mean=IMAGENET_DEFAULT_MEAN,
	std=IMAGENET_DEFAULT_STD,
	):
	fill_color = resolve_fill_color(fill_color, mean)

	image_tfl = [
	ResizePad(target_size=img_size, interpolation=interpolation, fill_color=fill_color),
	ImageToNumpy(),
	]

	assert use_prefetcher, "Only supporting prefetcher usage right now"

	image_tf = Compose(image_tfl)
	return image_tf


	def transforms_coco_train(
	img_size=224,
	interpolation="random",
	use_prefetcher=False,
	fill_color="mean",
	mean=IMAGENET_DEFAULT_MEAN,
	std=IMAGENET_DEFAULT_STD,
	):
	fill_color = resolve_fill_color(fill_color, mean)

	image_tfl = [
	RandomFlip(horizontal=True, prob=0.5),
	RandomResizePad(target_size=img_size, interpolation=interpolation, fill_color=fill_color),
	ImageToNumpy(),
	]

	assert use_prefetcher, "Only supporting prefetcher usage right now"

	image_tf = Compose(image_tfl)
	return image_tf