Spaces:

Sparkles-AI
/

sd-automatic111

Paused

App Files Files

sd-automatic111 / extensions /sd-webui-controlnet /annotator /lama /saicinpainting /training /modules /ffc.py

QuintW

Upload 1350 files

3f9c56c about 1 year ago

raw

history blame

20.4 kB

	# Fast Fourier Convolution NeurIPS 2020
	# original implementation https://github.com/pkumivision/FFC/blob/main/model_zoo/ffc.py
	# paper https://proceedings.neurips.cc/paper/2020/file/2fd5d41ec6cfab47e32164d5624269b1-Paper.pdf

	import numpy as np
	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from annotator.lama.saicinpainting.training.modules.base import get_activation, BaseDiscriminator
	from annotator.lama.saicinpainting.training.modules.spatial_transform import LearnableSpatialTransformWrapper
	from annotator.lama.saicinpainting.training.modules.squeeze_excitation import SELayer
	from annotator.lama.saicinpainting.utils import get_shape


	class FFCSE_block(nn.Module):

	def __init__(self, channels, ratio_g):
	super(FFCSE_block, self).__init__()
	in_cg = int(channels * ratio_g)
	in_cl = channels - in_cg
	r = 16

	self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
	self.conv1 = nn.Conv2d(channels, channels // r,
	kernel_size=1, bias=True)
	self.relu1 = nn.ReLU(inplace=True)
	self.conv_a2l = None if in_cl == 0 else nn.Conv2d(
	channels // r, in_cl, kernel_size=1, bias=True)
	self.conv_a2g = None if in_cg == 0 else nn.Conv2d(
	channels // r, in_cg, kernel_size=1, bias=True)
	self.sigmoid = nn.Sigmoid()

	def forward(self, x):
	x = x if type(x) is tuple else (x, 0)
	id_l, id_g = x

	x = id_l if type(id_g) is int else torch.cat([id_l, id_g], dim=1)
	x = self.avgpool(x)
	x = self.relu1(self.conv1(x))

	x_l = 0 if self.conv_a2l is None else id_l * \
	self.sigmoid(self.conv_a2l(x))
	x_g = 0 if self.conv_a2g is None else id_g * \
	self.sigmoid(self.conv_a2g(x))
	return x_l, x_g


	class FourierUnit(nn.Module):

	def __init__(self, in_channels, out_channels, groups=1, spatial_scale_factor=None, spatial_scale_mode='bilinear',
	spectral_pos_encoding=False, use_se=False, se_kwargs=None, ffc3d=False, fft_norm='ortho'):
	# bn_layer not used
	super(FourierUnit, self).__init__()
	self.groups = groups

	self.conv_layer = torch.nn.Conv2d(in_channels=in_channels * 2 + (2 if spectral_pos_encoding else 0),
	out_channels=out_channels * 2,
	kernel_size=1, stride=1, padding=0, groups=self.groups, bias=False)
	self.bn = torch.nn.BatchNorm2d(out_channels * 2)
	self.relu = torch.nn.ReLU(inplace=True)

	# squeeze and excitation block
	self.use_se = use_se
	if use_se:
	if se_kwargs is None:
	se_kwargs = {}
	self.se = SELayer(self.conv_layer.in_channels, **se_kwargs)

	self.spatial_scale_factor = spatial_scale_factor
	self.spatial_scale_mode = spatial_scale_mode
	self.spectral_pos_encoding = spectral_pos_encoding
	self.ffc3d = ffc3d
	self.fft_norm = fft_norm

	def forward(self, x):
	batch = x.shape[0]

	if self.spatial_scale_factor is not None:
	orig_size = x.shape[-2:]
	x = F.interpolate(x, scale_factor=self.spatial_scale_factor, mode=self.spatial_scale_mode, align_corners=False)

	r_size = x.size()
	# (batch, c, h, w/2+1, 2)
	fft_dim = (-3, -2, -1) if self.ffc3d else (-2, -1)
	ffted = torch.fft.rfftn(x, dim=fft_dim, norm=self.fft_norm)
	ffted = torch.stack((ffted.real, ffted.imag), dim=-1)
	ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1)
	ffted = ffted.view((batch, -1,) + ffted.size()[3:])

	if self.spectral_pos_encoding:
	height, width = ffted.shape[-2:]
	coords_vert = torch.linspace(0, 1, height)[None, None, :, None].expand(batch, 1, height, width).to(ffted)
	coords_hor = torch.linspace(0, 1, width)[None, None, None, :].expand(batch, 1, height, width).to(ffted)
	ffted = torch.cat((coords_vert, coords_hor, ffted), dim=1)

	if self.use_se:
	ffted = self.se(ffted)

	ffted = self.conv_layer(ffted) # (batch, c*2, h, w/2+1)
	ffted = self.relu(self.bn(ffted))

	ffted = ffted.view((batch, -1, 2,) + ffted.size()[2:]).permute(
	0, 1, 3, 4, 2).contiguous() # (batch,c, t, h, w/2+1, 2)
	ffted = torch.complex(ffted[..., 0], ffted[..., 1])

	ifft_shape_slice = x.shape[-3:] if self.ffc3d else x.shape[-2:]
	output = torch.fft.irfftn(ffted, s=ifft_shape_slice, dim=fft_dim, norm=self.fft_norm)

	if self.spatial_scale_factor is not None:
	output = F.interpolate(output, size=orig_size, mode=self.spatial_scale_mode, align_corners=False)

	return output


	class SeparableFourierUnit(nn.Module):

	def __init__(self, in_channels, out_channels, groups=1, kernel_size=3):
	# bn_layer not used
	super(SeparableFourierUnit, self).__init__()
	self.groups = groups
	row_out_channels = out_channels // 2
	col_out_channels = out_channels - row_out_channels
	self.row_conv = torch.nn.Conv2d(in_channels=in_channels * 2,
	out_channels=row_out_channels * 2,
	kernel_size=(kernel_size, 1), # kernel size is always like this, but the data will be transposed
	stride=1, padding=(kernel_size // 2, 0),
	padding_mode='reflect',
	groups=self.groups, bias=False)
	self.col_conv = torch.nn.Conv2d(in_channels=in_channels * 2,
	out_channels=col_out_channels * 2,
	kernel_size=(kernel_size, 1), # kernel size is always like this, but the data will be transposed
	stride=1, padding=(kernel_size // 2, 0),
	padding_mode='reflect',
	groups=self.groups, bias=False)
	self.row_bn = torch.nn.BatchNorm2d(row_out_channels * 2)
	self.col_bn = torch.nn.BatchNorm2d(col_out_channels * 2)
	self.relu = torch.nn.ReLU(inplace=True)

	def process_branch(self, x, conv, bn):
	batch = x.shape[0]

	r_size = x.size()
	# (batch, c, h, w/2+1, 2)
	ffted = torch.fft.rfft(x, norm="ortho")
	ffted = torch.stack((ffted.real, ffted.imag), dim=-1)
	ffted = ffted.permute(0, 1, 4, 2, 3).contiguous() # (batch, c, 2, h, w/2+1)
	ffted = ffted.view((batch, -1,) + ffted.size()[3:])

	ffted = self.relu(bn(conv(ffted)))

	ffted = ffted.view((batch, -1, 2,) + ffted.size()[2:]).permute(
	0, 1, 3, 4, 2).contiguous() # (batch,c, t, h, w/2+1, 2)
	ffted = torch.complex(ffted[..., 0], ffted[..., 1])

	output = torch.fft.irfft(ffted, s=x.shape[-1:], norm="ortho")
	return output


	def forward(self, x):
	rowwise = self.process_branch(x, self.row_conv, self.row_bn)
	colwise = self.process_branch(x.permute(0, 1, 3, 2), self.col_conv, self.col_bn).permute(0, 1, 3, 2)
	out = torch.cat((rowwise, colwise), dim=1)
	return out


	class SpectralTransform(nn.Module):

	def __init__(self, in_channels, out_channels, stride=1, groups=1, enable_lfu=True, separable_fu=False, **fu_kwargs):
	# bn_layer not used
	super(SpectralTransform, self).__init__()
	self.enable_lfu = enable_lfu
	if stride == 2:
	self.downsample = nn.AvgPool2d(kernel_size=(2, 2), stride=2)
	else:
	self.downsample = nn.Identity()

	self.stride = stride
	self.conv1 = nn.Sequential(
	nn.Conv2d(in_channels, out_channels //
	2, kernel_size=1, groups=groups, bias=False),
	nn.BatchNorm2d(out_channels // 2),
	nn.ReLU(inplace=True)
	)
	fu_class = SeparableFourierUnit if separable_fu else FourierUnit
	self.fu = fu_class(
	out_channels // 2, out_channels // 2, groups, **fu_kwargs)
	if self.enable_lfu:
	self.lfu = fu_class(
	out_channels // 2, out_channels // 2, groups)
	self.conv2 = torch.nn.Conv2d(
	out_channels // 2, out_channels, kernel_size=1, groups=groups, bias=False)

	def forward(self, x):

	x = self.downsample(x)
	x = self.conv1(x)
	output = self.fu(x)

	if self.enable_lfu:
	n, c, h, w = x.shape
	split_no = 2
	split_s = h // split_no
	xs = torch.cat(torch.split(
	x[:, :c // 4], split_s, dim=-2), dim=1).contiguous()
	xs = torch.cat(torch.split(xs, split_s, dim=-1),
	dim=1).contiguous()
	xs = self.lfu(xs)
	xs = xs.repeat(1, 1, split_no, split_no).contiguous()
	else:
	xs = 0

	output = self.conv2(x + output + xs)

	return output


	class FFC(nn.Module):

	def __init__(self, in_channels, out_channels, kernel_size,
	ratio_gin, ratio_gout, stride=1, padding=0,
	dilation=1, groups=1, bias=False, enable_lfu=True,
	padding_type='reflect', gated=False, **spectral_kwargs):
	super(FFC, self).__init__()

	assert stride == 1 or stride == 2, "Stride should be 1 or 2."
	self.stride = stride

	in_cg = int(in_channels * ratio_gin)
	in_cl = in_channels - in_cg
	out_cg = int(out_channels * ratio_gout)
	out_cl = out_channels - out_cg
	#groups_g = 1 if groups == 1 else int(groups * ratio_gout)
	#groups_l = 1 if groups == 1 else groups - groups_g

	self.ratio_gin = ratio_gin
	self.ratio_gout = ratio_gout
	self.global_in_num = in_cg

	module = nn.Identity if in_cl == 0 or out_cl == 0 else nn.Conv2d
	self.convl2l = module(in_cl, out_cl, kernel_size,
	stride, padding, dilation, groups, bias, padding_mode=padding_type)
	module = nn.Identity if in_cl == 0 or out_cg == 0 else nn.Conv2d
	self.convl2g = module(in_cl, out_cg, kernel_size,
	stride, padding, dilation, groups, bias, padding_mode=padding_type)
	module = nn.Identity if in_cg == 0 or out_cl == 0 else nn.Conv2d
	self.convg2l = module(in_cg, out_cl, kernel_size,
	stride, padding, dilation, groups, bias, padding_mode=padding_type)
	module = nn.Identity if in_cg == 0 or out_cg == 0 else SpectralTransform
	self.convg2g = module(
	in_cg, out_cg, stride, 1 if groups == 1 else groups // 2, enable_lfu, **spectral_kwargs)

	self.gated = gated
	module = nn.Identity if in_cg == 0 or out_cl == 0 or not self.gated else nn.Conv2d
	self.gate = module(in_channels, 2, 1)

	def forward(self, x):
	x_l, x_g = x if type(x) is tuple else (x, 0)
	out_xl, out_xg = 0, 0

	if self.gated:
	total_input_parts = [x_l]
	if torch.is_tensor(x_g):
	total_input_parts.append(x_g)
	total_input = torch.cat(total_input_parts, dim=1)

	gates = torch.sigmoid(self.gate(total_input))
	g2l_gate, l2g_gate = gates.chunk(2, dim=1)
	else:
	g2l_gate, l2g_gate = 1, 1

	if self.ratio_gout != 1:
	out_xl = self.convl2l(x_l) + self.convg2l(x_g) * g2l_gate
	if self.ratio_gout != 0:
	out_xg = self.convl2g(x_l) * l2g_gate + self.convg2g(x_g)

	return out_xl, out_xg


	class FFC_BN_ACT(nn.Module):

	def __init__(self, in_channels, out_channels,
	kernel_size, ratio_gin, ratio_gout,
	stride=1, padding=0, dilation=1, groups=1, bias=False,
	norm_layer=nn.BatchNorm2d, activation_layer=nn.Identity,
	padding_type='reflect',
	enable_lfu=True, **kwargs):
	super(FFC_BN_ACT, self).__init__()
	self.ffc = FFC(in_channels, out_channels, kernel_size,
	ratio_gin, ratio_gout, stride, padding, dilation,
	groups, bias, enable_lfu, padding_type=padding_type, **kwargs)
	lnorm = nn.Identity if ratio_gout == 1 else norm_layer
	gnorm = nn.Identity if ratio_gout == 0 else norm_layer
	global_channels = int(out_channels * ratio_gout)
	self.bn_l = lnorm(out_channels - global_channels)
	self.bn_g = gnorm(global_channels)

	lact = nn.Identity if ratio_gout == 1 else activation_layer
	gact = nn.Identity if ratio_gout == 0 else activation_layer
	self.act_l = lact(inplace=True)
	self.act_g = gact(inplace=True)

	def forward(self, x):
	x_l, x_g = self.ffc(x)
	x_l = self.act_l(self.bn_l(x_l))
	x_g = self.act_g(self.bn_g(x_g))
	return x_l, x_g


	class FFCResnetBlock(nn.Module):
	def __init__(self, dim, padding_type, norm_layer, activation_layer=nn.ReLU, dilation=1,
	spatial_transform_kwargs=None, inline=False, **conv_kwargs):
	super().__init__()
	self.conv1 = FFC_BN_ACT(dim, dim, kernel_size=3, padding=dilation, dilation=dilation,
	norm_layer=norm_layer,
	activation_layer=activation_layer,
	padding_type=padding_type,
	**conv_kwargs)
	self.conv2 = FFC_BN_ACT(dim, dim, kernel_size=3, padding=dilation, dilation=dilation,
	norm_layer=norm_layer,
	activation_layer=activation_layer,
	padding_type=padding_type,
	**conv_kwargs)
	if spatial_transform_kwargs is not None:
	self.conv1 = LearnableSpatialTransformWrapper(self.conv1, **spatial_transform_kwargs)
	self.conv2 = LearnableSpatialTransformWrapper(self.conv2, **spatial_transform_kwargs)
	self.inline = inline

	def forward(self, x):
	if self.inline:
	x_l, x_g = x[:, :-self.conv1.ffc.global_in_num], x[:, -self.conv1.ffc.global_in_num:]
	else:
	x_l, x_g = x if type(x) is tuple else (x, 0)

	id_l, id_g = x_l, x_g

	x_l, x_g = self.conv1((x_l, x_g))
	x_l, x_g = self.conv2((x_l, x_g))

	x_l, x_g = id_l + x_l, id_g + x_g
	out = x_l, x_g
	if self.inline:
	out = torch.cat(out, dim=1)
	return out


	class ConcatTupleLayer(nn.Module):
	def forward(self, x):
	assert isinstance(x, tuple)
	x_l, x_g = x
	assert torch.is_tensor(x_l) or torch.is_tensor(x_g)
	if not torch.is_tensor(x_g):
	return x_l
	return torch.cat(x, dim=1)


	class FFCResNetGenerator(nn.Module):
	def __init__(self, input_nc, output_nc, ngf=64, n_downsampling=3, n_blocks=9, norm_layer=nn.BatchNorm2d,
	padding_type='reflect', activation_layer=nn.ReLU,
	up_norm_layer=nn.BatchNorm2d, up_activation=nn.ReLU(True),
	init_conv_kwargs={}, downsample_conv_kwargs={}, resnet_conv_kwargs={},
	spatial_transform_layers=None, spatial_transform_kwargs={},
	add_out_act=True, max_features=1024, out_ffc=False, out_ffc_kwargs={}):
	assert (n_blocks >= 0)
	super().__init__()

	model = [nn.ReflectionPad2d(3),
	FFC_BN_ACT(input_nc, ngf, kernel_size=7, padding=0, norm_layer=norm_layer,
	activation_layer=activation_layer, **init_conv_kwargs)]

	### downsample
	for i in range(n_downsampling):
	mult = 2 ** i
	if i == n_downsampling - 1:
	cur_conv_kwargs = dict(downsample_conv_kwargs)
	cur_conv_kwargs['ratio_gout'] = resnet_conv_kwargs.get('ratio_gin', 0)
	else:
	cur_conv_kwargs = downsample_conv_kwargs
	model += [FFC_BN_ACT(min(max_features, ngf * mult),
	min(max_features, ngf * mult * 2),
	kernel_size=3, stride=2, padding=1,
	norm_layer=norm_layer,
	activation_layer=activation_layer,
	**cur_conv_kwargs)]

	mult = 2 ** n_downsampling
	feats_num_bottleneck = min(max_features, ngf * mult)

	### resnet blocks
	for i in range(n_blocks):
	cur_resblock = FFCResnetBlock(feats_num_bottleneck, padding_type=padding_type, activation_layer=activation_layer,
	norm_layer=norm_layer, **resnet_conv_kwargs)
	if spatial_transform_layers is not None and i in spatial_transform_layers:
	cur_resblock = LearnableSpatialTransformWrapper(cur_resblock, **spatial_transform_kwargs)
	model += [cur_resblock]

	model += [ConcatTupleLayer()]

	### upsample
	for i in range(n_downsampling):
	mult = 2 ** (n_downsampling - i)
	model += [nn.ConvTranspose2d(min(max_features, ngf * mult),
	min(max_features, int(ngf * mult / 2)),
	kernel_size=3, stride=2, padding=1, output_padding=1),
	up_norm_layer(min(max_features, int(ngf * mult / 2))),
	up_activation]

	if out_ffc:
	model += [FFCResnetBlock(ngf, padding_type=padding_type, activation_layer=activation_layer,
	norm_layer=norm_layer, inline=True, **out_ffc_kwargs)]

	model += [nn.ReflectionPad2d(3),
	nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
	if add_out_act:
	model.append(get_activation('tanh' if add_out_act is True else add_out_act))
	self.model = nn.Sequential(*model)

	def forward(self, input):
	return self.model(input)


	class FFCNLayerDiscriminator(BaseDiscriminator):
	def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d, max_features=512,
	init_conv_kwargs={}, conv_kwargs={}):
	super().__init__()
	self.n_layers = n_layers

	def _act_ctor(inplace=True):
	return nn.LeakyReLU(negative_slope=0.2, inplace=inplace)

	kw = 3
	padw = int(np.ceil((kw-1.0)/2))
	sequence = [[FFC_BN_ACT(input_nc, ndf, kernel_size=kw, padding=padw, norm_layer=norm_layer,
	activation_layer=_act_ctor, **init_conv_kwargs)]]

	nf = ndf
	for n in range(1, n_layers):
	nf_prev = nf
	nf = min(nf * 2, max_features)

	cur_model = [
	FFC_BN_ACT(nf_prev, nf,
	kernel_size=kw, stride=2, padding=padw,
	norm_layer=norm_layer,
	activation_layer=_act_ctor,
	**conv_kwargs)
	]
	sequence.append(cur_model)

	nf_prev = nf
	nf = min(nf * 2, 512)

	cur_model = [
	FFC_BN_ACT(nf_prev, nf,
	kernel_size=kw, stride=1, padding=padw,
	norm_layer=norm_layer,
	activation_layer=lambda args, kwargs: nn.LeakyReLU(args, negative_slope=0.2, **kwargs),
	**conv_kwargs),
	ConcatTupleLayer()
	]
	sequence.append(cur_model)

	sequence += [[nn.Conv2d(nf, 1, kernel_size=kw, stride=1, padding=padw)]]

	for n in range(len(sequence)):
	setattr(self, 'model'+str(n), nn.Sequential(*sequence[n]))

	def get_all_activations(self, x):
	res = [x]
	for n in range(self.n_layers + 2):
	model = getattr(self, 'model' + str(n))
	res.append(model(res[-1]))
	return res[1:]

	def forward(self, x):
	act = self.get_all_activations(x)
	feats = []
	for out in act[:-1]:
	if isinstance(out, tuple):
	if torch.is_tensor(out[1]):
	out = torch.cat(out, dim=1)
	else:
	out = out[0]
	feats.append(out)
	return act[-1], feats