test5

Runtime error

test5 / preprocess /humanparsing /modules /functions.py

IDM-VTON

update IDM-VTON Demo

938e515 8 months ago

8.15 kB

	import pdb
	from os import path
	import torch
	import torch.distributed as dist
	import torch.autograd as autograd
	import torch.cuda.comm as comm
	from torch.autograd.function import once_differentiable
	from torch.utils.cpp_extension import load

	_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
	_backend = load(name="inplace_abn",
	extra_cflags=["-O3"],
	sources=[path.join(_src_path, f) for f in [
	"inplace_abn.cpp",
	"inplace_abn_cpu.cpp",
	"inplace_abn_cuda.cu",
	"inplace_abn_cuda_half.cu"
	]],
	extra_cuda_cflags=["--expt-extended-lambda"])

	# Activation names
	ACT_RELU = "relu"
	ACT_LEAKY_RELU = "leaky_relu"
	ACT_ELU = "elu"
	ACT_NONE = "none"


	def _check(fn, args, *kwargs):
	success = fn(args, *kwargs)
	if not success:
	raise RuntimeError("CUDA Error encountered in {}".format(fn))


	def _broadcast_shape(x):
	out_size = []
	for i, s in enumerate(x.size()):
	if i != 1:
	out_size.append(1)
	else:
	out_size.append(s)
	return out_size


	def _reduce(x):
	if len(x.size()) == 2:
	return x.sum(dim=0)
	else:
	n, c = x.size()[0:2]
	return x.contiguous().view((n, c, -1)).sum(2).sum(0)


	def _count_samples(x):
	count = 1
	for i, s in enumerate(x.size()):
	if i != 1:
	count *= s
	return count


	def _act_forward(ctx, x):
	if ctx.activation == ACT_LEAKY_RELU:
	_backend.leaky_relu_forward(x, ctx.slope)
	elif ctx.activation == ACT_ELU:
	_backend.elu_forward(x)
	elif ctx.activation == ACT_NONE:
	pass


	def _act_backward(ctx, x, dx):
	if ctx.activation == ACT_LEAKY_RELU:
	_backend.leaky_relu_backward(x, dx, ctx.slope)
	elif ctx.activation == ACT_ELU:
	_backend.elu_backward(x, dx)
	elif ctx.activation == ACT_NONE:
	pass


	class InPlaceABN(autograd.Function):
	@staticmethod
	def forward(ctx, x, weight, bias, running_mean, running_var,
	training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01):
	# Save context
	ctx.training = training
	ctx.momentum = momentum
	ctx.eps = eps
	ctx.activation = activation
	ctx.slope = slope
	ctx.affine = weight is not None and bias is not None

	# Prepare inputs
	count = _count_samples(x)
	x = x.contiguous()
	weight = weight.contiguous() if ctx.affine else x.new_empty(0)
	bias = bias.contiguous() if ctx.affine else x.new_empty(0)

	if ctx.training:
	mean, var = _backend.mean_var(x)

	# Update running stats
	running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
	running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * count / (count - 1))

	# Mark in-place modified tensors
	ctx.mark_dirty(x, running_mean, running_var)
	else:
	mean, var = running_mean.contiguous(), running_var.contiguous()
	ctx.mark_dirty(x)

	# BN forward + activation
	_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
	_act_forward(ctx, x)

	# Output
	ctx.var = var
	ctx.save_for_backward(x, var, weight, bias)
	ctx.mark_non_differentiable(running_mean, running_var)
	return x, running_mean, running_var

	@staticmethod
	@once_differentiable
	def backward(ctx, dz, _drunning_mean, _drunning_var):
	z, var, weight, bias = ctx.saved_tensors
	dz = dz.contiguous()

	# Undo activation
	_act_backward(ctx, z, dz)

	if ctx.training:
	edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
	else:
	# TODO: implement simplified CUDA backward for inference mode
	edz = dz.new_zeros(dz.size(1))
	eydz = dz.new_zeros(dz.size(1))

	dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
	# dweight = eydz * weight.sign() if ctx.affine else None
	dweight = eydz if ctx.affine else None
	if dweight is not None:
	dweight[weight < 0] *= -1
	dbias = edz if ctx.affine else None

	return dx, dweight, dbias, None, None, None, None, None, None, None


	class InPlaceABNSync(autograd.Function):
	@classmethod
	def forward(cls, ctx, x, weight, bias, running_mean, running_var,
	training=True, momentum=0.1, eps=1e-05, activation=ACT_LEAKY_RELU, slope=0.01, equal_batches=True):
	# Save context
	ctx.training = training
	ctx.momentum = momentum
	ctx.eps = eps
	ctx.activation = activation
	ctx.slope = slope
	ctx.affine = weight is not None and bias is not None

	# Prepare inputs
	ctx.world_size = dist.get_world_size() if dist.is_initialized() else 1

	# count = _count_samples(x)
	batch_size = x.new_tensor([x.shape[0]], dtype=torch.long)

	x = x.contiguous()
	weight = weight.contiguous() if ctx.affine else x.new_empty(0)
	bias = bias.contiguous() if ctx.affine else x.new_empty(0)

	if ctx.training:
	mean, var = _backend.mean_var(x)
	if ctx.world_size > 1:
	# get global batch size
	if equal_batches:
	batch_size *= ctx.world_size
	else:
	dist.all_reduce(batch_size, dist.ReduceOp.SUM)

	ctx.factor = x.shape[0] / float(batch_size.item())

	mean_all = mean.clone() * ctx.factor
	dist.all_reduce(mean_all, dist.ReduceOp.SUM)

	var_all = (var + (mean - mean_all) ** 2) * ctx.factor
	dist.all_reduce(var_all, dist.ReduceOp.SUM)

	mean = mean_all
	var = var_all

	# Update running stats
	running_mean.mul_((1 - ctx.momentum)).add_(ctx.momentum * mean)
	count = batch_size.item() * x.view(x.shape[0], x.shape[1], -1).shape[-1]
	running_var.mul_((1 - ctx.momentum)).add_(ctx.momentum * var * (float(count) / (count - 1)))

	# Mark in-place modified tensors
	ctx.mark_dirty(x, running_mean, running_var)
	else:
	mean, var = running_mean.contiguous(), running_var.contiguous()
	ctx.mark_dirty(x)

	# BN forward + activation
	_backend.forward(x, mean, var, weight, bias, ctx.affine, ctx.eps)
	_act_forward(ctx, x)

	# Output
	ctx.var = var
	ctx.save_for_backward(x, var, weight, bias)
	ctx.mark_non_differentiable(running_mean, running_var)
	return x, running_mean, running_var

	@staticmethod
	@once_differentiable
	def backward(ctx, dz, _drunning_mean, _drunning_var):
	z, var, weight, bias = ctx.saved_tensors
	dz = dz.contiguous()

	# Undo activation
	_act_backward(ctx, z, dz)

	if ctx.training:
	edz, eydz = _backend.edz_eydz(z, dz, weight, bias, ctx.affine, ctx.eps)
	edz_local = edz.clone()
	eydz_local = eydz.clone()

	if ctx.world_size > 1:
	edz *= ctx.factor
	dist.all_reduce(edz, dist.ReduceOp.SUM)

	eydz *= ctx.factor
	dist.all_reduce(eydz, dist.ReduceOp.SUM)
	else:
	edz_local = edz = dz.new_zeros(dz.size(1))
	eydz_local = eydz = dz.new_zeros(dz.size(1))

	dx = _backend.backward(z, dz, var, weight, bias, edz, eydz, ctx.affine, ctx.eps)
	# dweight = eydz_local * weight.sign() if ctx.affine else None
	dweight = eydz_local if ctx.affine else None
	if dweight is not None:
	dweight[weight < 0] *= -1
	dbias = edz_local if ctx.affine else None

	return dx, dweight, dbias, None, None, None, None, None, None, None


	inplace_abn = InPlaceABN.apply
	inplace_abn_sync = InPlaceABNSync.apply

	__all__ = ["inplace_abn", "inplace_abn_sync", "ACT_RELU", "ACT_LEAKY_RELU", "ACT_ELU", "ACT_NONE"]