Spaces:

xiexh20
/

HDM-interaction-recon

Sleeping

App Files Files Community

HDM-interaction-recon / model /pvcnn /pvcnn_utils.py

xiexh20

add hdm demo v1

2fd6166 5 months ago

raw

history blame contribute delete

No virus

9.46 kB

	import functools
	import torch
	import torch.nn as nn
	import numpy as np

	from model.pvcnn.modules import SharedMLP, PVConv, PointNetSAModule, PointNetAModule, PointNetFPModule, Swish


	def _linear_gn_relu(in_channels, out_channels):
	return nn.Sequential(nn.Linear(in_channels, out_channels), nn.GroupNorm(8,out_channels), Swish())


	def create_mlp_components(in_channels, out_channels, classifier=False, dim=2, width_multiplier=1):
	r = width_multiplier

	if dim == 1:
	block = _linear_gn_relu
	else:
	block = SharedMLP
	if not isinstance(out_channels, (list, tuple)):
	out_channels = [out_channels]
	if len(out_channels) == 0 or (len(out_channels) == 1 and out_channels[0] is None):
	return nn.Sequential(), in_channels, in_channels

	layers = []
	for oc in out_channels[:-1]:
	if oc < 1:
	layers.append(nn.Dropout(oc))
	else:
	oc = int(r * oc)
	layers.append(block(in_channels, oc))
	in_channels = oc
	if dim == 1:
	if classifier:
	layers.append(nn.Linear(in_channels, out_channels[-1]))
	else:
	layers.append(_linear_gn_relu(in_channels, int(r * out_channels[-1])))
	else:
	if classifier:
	layers.append(nn.Conv1d(in_channels, out_channels[-1], 1))
	else:
	layers.append(SharedMLP(in_channels, int(r * out_channels[-1])))
	return layers, out_channels[-1] if classifier else int(r * out_channels[-1])


	def create_pointnet_components(blocks, in_channels, embed_dim, with_se=False, normalize=True, eps=0,
	width_multiplier=1, voxel_resolution_multiplier=1):
	r, vr = width_multiplier, voxel_resolution_multiplier

	layers, concat_channels = [], 0
	c = 0
	for k, (out_channels, num_blocks, voxel_resolution) in enumerate(blocks):
	out_channels = int(r * out_channels)
	for p in range(num_blocks):
	attention = k % 2 == 0 and k > 0 and p == 0
	if voxel_resolution is None:
	block = SharedMLP
	else:
	block = functools.partial(PVConv, kernel_size=3, resolution=int(vr * voxel_resolution), attention=attention,
	with_se=with_se, normalize=normalize, eps=eps)

	if c == 0:
	layers.append(block(in_channels, out_channels))
	else:
	layers.append(block(in_channels+embed_dim, out_channels))
	in_channels = out_channels
	concat_channels += out_channels
	c += 1
	return layers, in_channels, concat_channels


	def create_pointnet2_sa_components(sa_blocks_config, extra_feature_channels, embed_dim=64, use_att=False,
	dropout=0.1, with_se=False, normalize=True, eps=0,
	width_multiplier=1, voxel_resolution_multiplier=1,
	in_ch_multiplier=1,
	extra_in_channel=0):
	"use_att is True by default, in_ch_multiplier: increase the input channel dimension"
	r, vr = width_multiplier, voxel_resolution_multiplier
	in_channels = extra_feature_channels + 3

	sa_layers, sa_in_channels = [], []
	block_count = 0
	for conv_configs, sa_configs in sa_blocks_config:
	k = 0
	sa_in_channels.append(in_channels)
	sa_blocks = []

	if conv_configs is not None:
	out_channels, num_blocks, voxel_resolution = conv_configs
	out_channels = int(r * out_channels)
	for p in range(num_blocks): # pconv is repeated
	attention = (block_count+1) % 2 == 0 and use_att and p == 0
	if voxel_resolution is None:
	block = SharedMLP
	else:
	block = functools.partial(PVConv, kernel_size=3, resolution=int(vr * voxel_resolution), attention=attention,
	dropout=dropout,
	with_se=with_se, with_se_relu=True,
	normalize=normalize, eps=eps)

	if block_count == 0:
	sa_blocks.append(block(in_channels, out_channels))
	elif k ==0:
	sa_blocks.append(block(in_channels+embed_dim, out_channels))
	in_channels = out_channels
	k += 1
	extra_feature_channels = in_channels
	num_centers, radius, num_neighbors, out_channels = sa_configs
	_out_channels = []
	for oc in out_channels:
	if isinstance(oc, (list, tuple)):
	_out_channels.append([int(r * _oc) for _oc in oc])
	else:
	_out_channels.append(int(r * oc))
	out_channels = _out_channels
	if num_centers is None:
	block = PointNetAModule # always not-none
	else:
	block = functools.partial(PointNetSAModule, num_centers=num_centers, radius=radius,
	num_neighbors=num_neighbors)
	sa_blocks.append(block(in_channels=extra_feature_channels+(embed_dim if k==0 else 0 ), out_channels=out_channels,
	include_coordinates=True))
	block_count += 1
	# XH: double the channel for concat, or add additional channel for cross attention
	if block_count < len(sa_blocks_config):
	in_channels = extra_feature_channels = int(sa_blocks[-1].out_channels * in_ch_multiplier + extra_in_channel)
	else:
	# no cross attention before the self attention module
	in_channels = extra_feature_channels = int(sa_blocks[-1].out_channels * in_ch_multiplier)
	if len(sa_blocks) == 1:
	sa_layers.append(sa_blocks[0]) # first pconv is repeated ?
	else:
	sa_layers.append(nn.Sequential(*sa_blocks))

	return sa_layers, sa_in_channels, in_channels, 1 if num_centers is None else num_centers


	def create_pointnet2_fp_modules(fp_blocks, in_channels, sa_in_channels, embed_dim=64, use_att=False,
	dropout=0.1,
	with_se=False, normalize=True, eps=0,
	width_multiplier=1, voxel_resolution_multiplier=1,
	in_ch_multiplier=1, extra_in_channel=0):
	"""

	:param fp_blocks:
	:param in_channels:
	:param sa_in_channels:
	:param embed_dim:
	:param use_att:
	:param dropout:
	:param with_se:
	:param normalize:
	:param eps:
	:param width_multiplier:
	:param voxel_resolution_multiplier:
	:param in_ch_multiplier: increase the input channel dimension
	:return:
	"""
	r, vr = width_multiplier, voxel_resolution_multiplier

	fp_layers = []
	c = 0
	for fp_idx, (fp_configs, conv_configs) in enumerate(fp_blocks):
	fp_blocks = []
	out_channels = tuple(int(r * oc) for oc in fp_configs)
	if fp_idx > 0:
	# to handle additional channel from concatenating human + object features
	sa_in_concat = int(in_channels*in_ch_multiplier + extra_in_channel)
	else:
	sa_in_concat = in_channels + extra_in_channel # this is for simple-coord3d, where the decoder first layer also has cross attention
	fp_blocks.append(
	PointNetFPModule(in_channels=sa_in_concat + sa_in_channels[-1 - fp_idx] + embed_dim,
	out_channels=out_channels)
	) # interpolate + Conv1d, does not change number of points
	in_channels = out_channels[-1]

	if conv_configs is not None:
	out_channels, num_blocks, voxel_resolution = conv_configs
	out_channels = int(r * out_channels)
	for p in range(num_blocks):
	attention = (c+1) % 2 == 0 and c < len(fp_blocks) - 1 and use_att and p == 0
	if voxel_resolution is None:
	block = SharedMLP
	else:
	block = functools.partial(PVConv, kernel_size=3, resolution=int(vr * voxel_resolution), attention=attention,
	dropout=dropout,
	with_se=with_se, with_se_relu=True,
	normalize=normalize, eps=eps)

	fp_blocks.append(block(in_channels, out_channels))
	in_channels = out_channels # this should not change!
	if len(fp_blocks) == 1:
	fp_layers.append(fp_blocks[0]) # this is the last block, no PVConv layer
	else:
	fp_layers.append(nn.Sequential(*fp_blocks))

	c += 1

	return fp_layers, in_channels


	def get_timestep_embedding(embed_dim, timesteps, device):
	"""
	Timestep embedding function. Not that this should work just as well for
	continuous values as for discrete values.
	"""
	assert len(timesteps.shape) == 1 # and timesteps.dtype == tf.int32
	half_dim = embed_dim // 2
	emb = np.log(10000) / (half_dim - 1)
	emb = torch.from_numpy(np.exp(np.arange(0, half_dim) * -emb)).float().to(device)
	emb = timesteps[:, None] * emb[None, :]
	emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1)
	if embed_dim % 2 == 1: # zero pad
	emb = nn.functional.pad(emb, (0, 1), "constant", 0)
	assert emb.shape == torch.Size([timesteps.shape[0], embed_dim])
	return emb