Spaces:

Realcat
/

image-matching-webui

Running

image-matching-webui / third_party /SOLD2 /sold2 /model /line_detection.py

Vincentqyw

fix: roma

c74a070 over 1 year ago

23.6 kB

	"""
	Implementation of the line segment detection module.
	"""
	import math
	import numpy as np
	import torch


	class LineSegmentDetectionModule(object):
	"""Module extracting line segments from junctions and line heatmaps."""

	def __init__(
	self,
	detect_thresh,
	num_samples=64,
	sampling_method="local_max",
	inlier_thresh=0.0,
	heatmap_low_thresh=0.15,
	heatmap_high_thresh=0.2,
	max_local_patch_radius=3,
	lambda_radius=2.0,
	use_candidate_suppression=False,
	nms_dist_tolerance=3.0,
	use_heatmap_refinement=False,
	heatmap_refine_cfg=None,
	use_junction_refinement=False,
	junction_refine_cfg=None,
	):
	"""
	Parameters:
	detect_thresh: The probability threshold for mean activation (0. ~ 1.)
	num_samples: Number of sampling locations along the line segments.
	sampling_method: Sampling method on locations ("bilinear" or "local_max").
	inlier_thresh: The min inlier ratio to satisfy (0. ~ 1.) => 0. means no threshold.
	heatmap_low_thresh: The lowest threshold for the pixel to be considered as candidate in junction recovery.
	heatmap_high_thresh: The higher threshold for NMS in junction recovery.
	max_local_patch_radius: The max patch to be considered in local maximum search.
	lambda_radius: The lambda factor in linear local maximum search formulation
	use_candidate_suppression: Apply candidate suppression to break long segments into short sub-segments.
	nms_dist_tolerance: The distance tolerance for nms. Decide whether the junctions are on the line.
	use_heatmap_refinement: Use heatmap refinement method or not.
	heatmap_refine_cfg: The configs for heatmap refinement methods.
	use_junction_refinement: Use junction refinement method or not.
	junction_refine_cfg: The configs for junction refinement methods.
	"""
	# Line detection parameters
	self.detect_thresh = detect_thresh

	# Line sampling parameters
	self.num_samples = num_samples
	self.sampling_method = sampling_method
	self.inlier_thresh = inlier_thresh
	self.local_patch_radius = max_local_patch_radius
	self.lambda_radius = lambda_radius

	# Detecting junctions on the boundary parameters
	self.low_thresh = heatmap_low_thresh
	self.high_thresh = heatmap_high_thresh

	# Pre-compute the linspace sampler
	self.sampler = np.linspace(0, 1, self.num_samples)
	self.torch_sampler = torch.linspace(0, 1, self.num_samples)

	# Long line segment suppression configuration
	self.use_candidate_suppression = use_candidate_suppression
	self.nms_dist_tolerance = nms_dist_tolerance

	# Heatmap refinement configuration
	self.use_heatmap_refinement = use_heatmap_refinement
	self.heatmap_refine_cfg = heatmap_refine_cfg
	if self.use_heatmap_refinement and self.heatmap_refine_cfg is None:
	raise ValueError("[Error] Missing heatmap refinement config.")

	# Junction refinement configuration
	self.use_junction_refinement = use_junction_refinement
	self.junction_refine_cfg = junction_refine_cfg
	if self.use_junction_refinement and self.junction_refine_cfg is None:
	raise ValueError("[Error] Missing junction refinement config.")

	def convert_inputs(self, inputs, device):
	"""Convert inputs to desired torch tensor."""
	if isinstance(inputs, np.ndarray):
	outputs = torch.tensor(inputs, dtype=torch.float32, device=device)
	elif isinstance(inputs, torch.Tensor):
	outputs = inputs.to(torch.float32).to(device)
	else:
	raise ValueError(
	"[Error] Inputs must either be torch tensor or numpy ndarray."
	)

	return outputs

	def detect(self, junctions, heatmap, device=torch.device("cpu")):
	"""Main function performing line segment detection."""
	# Convert inputs to torch tensor
	junctions = self.convert_inputs(junctions, device=device)
	heatmap = self.convert_inputs(heatmap, device=device)

	# Perform the heatmap refinement
	if self.use_heatmap_refinement:
	if self.heatmap_refine_cfg["mode"] == "global":
	heatmap = self.refine_heatmap(
	heatmap,
	self.heatmap_refine_cfg["ratio"],
	self.heatmap_refine_cfg["valid_thresh"],
	)
	elif self.heatmap_refine_cfg["mode"] == "local":
	heatmap = self.refine_heatmap_local(
	heatmap,
	self.heatmap_refine_cfg["num_blocks"],
	self.heatmap_refine_cfg["overlap_ratio"],
	self.heatmap_refine_cfg["ratio"],
	self.heatmap_refine_cfg["valid_thresh"],
	)

	# Initialize empty line map
	num_junctions = junctions.shape[0]
	line_map_pred = torch.zeros(
	[num_junctions, num_junctions], device=device, dtype=torch.int32
	)

	# Stop if there are not enough junctions
	if num_junctions < 2:
	return line_map_pred, junctions, heatmap

	# Generate the candidate map
	candidate_map = torch.triu(
	torch.ones(
	[num_junctions, num_junctions], device=device, dtype=torch.int32
	),
	diagonal=1,
	)

	# Fetch the image boundary
	if len(heatmap.shape) > 2:
	H, W, _ = heatmap.shape
	else:
	H, W = heatmap.shape

	# Optionally perform candidate filtering
	if self.use_candidate_suppression:
	candidate_map = self.candidate_suppression(junctions, candidate_map)

	# Fetch the candidates
	candidate_index_map = torch.where(candidate_map)
	candidate_index_map = torch.cat(
	[candidate_index_map[0][..., None], candidate_index_map[1][..., None]],
	dim=-1,
	)

	# Get the corresponding start and end junctions
	candidate_junc_start = junctions[candidate_index_map[:, 0], :]
	candidate_junc_end = junctions[candidate_index_map[:, 1], :]

	# Get the sampling locations (N x 64)
	sampler = self.torch_sampler.to(device)[None, ...]
	cand_samples_h = candidate_junc_start[:, 0:1] * sampler + candidate_junc_end[
	:, 0:1
	] * (1 - sampler)
	cand_samples_w = candidate_junc_start[:, 1:2] * sampler + candidate_junc_end[
	:, 1:2
	] * (1 - sampler)

	# Clip to image boundary
	cand_h = torch.clamp(cand_samples_h, min=0, max=H - 1)
	cand_w = torch.clamp(cand_samples_w, min=0, max=W - 1)

	# Local maximum search
	if self.sampling_method == "local_max":
	# Compute normalized segment lengths
	segments_length = torch.sqrt(
	torch.sum(
	(
	candidate_junc_start.to(torch.float32)
	- candidate_junc_end.to(torch.float32)
	)
	** 2,
	dim=-1,
	)
	)
	normalized_seg_length = segments_length / (((H2) + (W2)) ** 0.5)

	# Perform local max search
	num_cand = cand_h.shape[0]
	group_size = 10000
	if num_cand > group_size:
	num_iter = math.ceil(num_cand / group_size)
	sampled_feat_lst = []
	for iter_idx in range(num_iter):
	if not iter_idx == num_iter - 1:
	cand_h_ = cand_h[
	iter_idx * group_size : (iter_idx + 1) * group_size, :
	]
	cand_w_ = cand_w[
	iter_idx * group_size : (iter_idx + 1) * group_size, :
	]
	normalized_seg_length_ = normalized_seg_length[
	iter_idx * group_size : (iter_idx + 1) * group_size
	]
	else:
	cand_h_ = cand_h[iter_idx * group_size :, :]
	cand_w_ = cand_w[iter_idx * group_size :, :]
	normalized_seg_length_ = normalized_seg_length[
	iter_idx * group_size :
	]
	sampled_feat_ = self.detect_local_max(
	heatmap, cand_h_, cand_w_, H, W, normalized_seg_length_, device
	)
	sampled_feat_lst.append(sampled_feat_)
	sampled_feat = torch.cat(sampled_feat_lst, dim=0)
	else:
	sampled_feat = self.detect_local_max(
	heatmap, cand_h, cand_w, H, W, normalized_seg_length, device
	)
	# Bilinear sampling
	elif self.sampling_method == "bilinear":
	# Perform bilinear sampling
	sampled_feat = self.detect_bilinear(heatmap, cand_h, cand_w, H, W, device)
	else:
	raise ValueError("[Error] Unknown sampling method.")

	# [Simple threshold detection]
	# detection_results is a mask over all candidates
	detection_results = torch.mean(sampled_feat, dim=-1) > self.detect_thresh

	# [Inlier threshold detection]
	if self.inlier_thresh > 0.0:
	inlier_ratio = (
	torch.sum(sampled_feat > self.detect_thresh, dim=-1).to(torch.float32)
	/ self.num_samples
	)
	detection_results_inlier = inlier_ratio >= self.inlier_thresh
	detection_results = detection_results * detection_results_inlier

	# Convert detection results back to line_map_pred
	detected_junc_indexes = candidate_index_map[detection_results, :]
	line_map_pred[detected_junc_indexes[:, 0], detected_junc_indexes[:, 1]] = 1
	line_map_pred[detected_junc_indexes[:, 1], detected_junc_indexes[:, 0]] = 1

	# Perform junction refinement
	if self.use_junction_refinement and len(detected_junc_indexes) > 0:
	junctions, line_map_pred = self.refine_junction_perturb(
	junctions, line_map_pred, heatmap, H, W, device
	)

	return line_map_pred, junctions, heatmap

	def refine_heatmap(self, heatmap, ratio=0.2, valid_thresh=1e-2):
	"""Global heatmap refinement method."""
	# Grab the top 10% values
	heatmap_values = heatmap[heatmap > valid_thresh]
	sorted_values = torch.sort(heatmap_values, descending=True)[0]
	top10_len = math.ceil(sorted_values.shape[0] * ratio)
	max20 = torch.mean(sorted_values[:top10_len])
	heatmap = torch.clamp(heatmap / max20, min=0.0, max=1.0)
	return heatmap

	def refine_heatmap_local(
	self, heatmap, num_blocks=5, overlap_ratio=0.5, ratio=0.2, valid_thresh=2e-3
	):
	"""Local heatmap refinement method."""
	# Get the shape of the heatmap
	H, W = heatmap.shape
	increase_ratio = 1 - overlap_ratio
	h_block = round(H / (1 + (num_blocks - 1) * increase_ratio))
	w_block = round(W / (1 + (num_blocks - 1) * increase_ratio))

	count_map = torch.zeros(heatmap.shape, dtype=torch.int, device=heatmap.device)
	heatmap_output = torch.zeros(
	heatmap.shape, dtype=torch.float, device=heatmap.device
	)
	# Iterate through each block
	for h_idx in range(num_blocks):
	for w_idx in range(num_blocks):
	# Fetch the heatmap
	h_start = round(h_idx * h_block * increase_ratio)
	w_start = round(w_idx * w_block * increase_ratio)
	h_end = h_start + h_block if h_idx < num_blocks - 1 else H
	w_end = w_start + w_block if w_idx < num_blocks - 1 else W

	subheatmap = heatmap[h_start:h_end, w_start:w_end]
	if subheatmap.max() > valid_thresh:
	subheatmap = self.refine_heatmap(
	subheatmap, ratio, valid_thresh=valid_thresh
	)

	# Aggregate it to the final heatmap
	heatmap_output[h_start:h_end, w_start:w_end] += subheatmap
	count_map[h_start:h_end, w_start:w_end] += 1
	heatmap_output = torch.clamp(heatmap_output / count_map, max=1.0, min=0.0)

	return heatmap_output

	def candidate_suppression(self, junctions, candidate_map):
	"""Suppress overlapping long lines in the candidate segments."""
	# Define the distance tolerance
	dist_tolerance = self.nms_dist_tolerance

	# Compute distance between junction pairs
	# (num_junc x 1 x 2) - (1 x num_junc x 2) => num_junc x num_junc map
	line_dist_map = (
	torch.sum(
	(torch.unsqueeze(junctions, dim=1) - junctions[None, ...]) ** 2, dim=-1
	)
	** 0.5
	)

	# Fetch all the "detected lines"
	seg_indexes = torch.where(torch.triu(candidate_map, diagonal=1))
	start_point_idxs = seg_indexes[0]
	end_point_idxs = seg_indexes[1]
	start_points = junctions[start_point_idxs, :]
	end_points = junctions[end_point_idxs, :]

	# Fetch corresponding entries
	line_dists = line_dist_map[start_point_idxs, end_point_idxs]

	# Check whether they are on the line
	dir_vecs = (end_points - start_points) / torch.norm(
	end_points - start_points, dim=-1
	)[..., None]
	# Get the orthogonal distance
	cand_vecs = junctions[None, ...] - start_points.unsqueeze(dim=1)
	cand_vecs_norm = torch.norm(cand_vecs, dim=-1)
	# Check whether they are projected directly onto the segment
	proj = (
	torch.einsum("bij,bjk->bik", cand_vecs, dir_vecs[..., None])
	/ line_dists[..., None, None]
	)
	# proj is num_segs x num_junction x 1
	proj_mask = (proj >= 0) * (proj <= 1)
	cand_angles = torch.acos(
	torch.einsum("bij,bjk->bik", cand_vecs, dir_vecs[..., None])
	/ cand_vecs_norm[..., None]
	)
	cand_dists = cand_vecs_norm[..., None] * torch.sin(cand_angles)
	junc_dist_mask = cand_dists <= dist_tolerance
	junc_mask = junc_dist_mask * proj_mask

	# Minus starting points
	num_segs = start_point_idxs.shape[0]
	junc_counts = torch.sum(junc_mask, dim=[1, 2])
	junc_counts -= junc_mask[..., 0][
	torch.arange(0, num_segs), start_point_idxs
	].to(torch.int)
	junc_counts -= junc_mask[..., 0][torch.arange(0, num_segs), end_point_idxs].to(
	torch.int
	)

	# Get the invalid candidate mask
	final_mask = junc_counts > 0
	candidate_map[start_point_idxs[final_mask], end_point_idxs[final_mask]] = 0

	return candidate_map

	def refine_junction_perturb(self, junctions, line_map_pred, heatmap, H, W, device):
	"""Refine the line endpoints in a similar way as in LSD."""
	# Get the config
	junction_refine_cfg = self.junction_refine_cfg

	# Fetch refinement parameters
	num_perturbs = junction_refine_cfg["num_perturbs"]
	perturb_interval = junction_refine_cfg["perturb_interval"]
	side_perturbs = (num_perturbs - 1) // 2
	# Fetch the 2D perturb mat
	perturb_vec = torch.arange(
	start=-perturb_interval * side_perturbs,
	end=perturb_interval * (side_perturbs + 1),
	step=perturb_interval,
	device=device,
	)
	w1_grid, h1_grid, w2_grid, h2_grid = torch.meshgrid(
	perturb_vec, perturb_vec, perturb_vec, perturb_vec
	)
	perturb_tensor = torch.cat(
	[
	w1_grid[..., None],
	h1_grid[..., None],
	w2_grid[..., None],
	h2_grid[..., None],
	],
	dim=-1,
	)
	perturb_tensor_flat = perturb_tensor.view(-1, 2, 2)

	# Fetch the junctions and line_map
	junctions = junctions.clone()
	line_map = line_map_pred

	# Fetch all the detected lines
	detected_seg_indexes = torch.where(torch.triu(line_map, diagonal=1))
	start_point_idxs = detected_seg_indexes[0]
	end_point_idxs = detected_seg_indexes[1]
	start_points = junctions[start_point_idxs, :]
	end_points = junctions[end_point_idxs, :]

	line_segments = torch.cat(
	[start_points.unsqueeze(dim=1), end_points.unsqueeze(dim=1)], dim=1
	)

	line_segment_candidates = (
	line_segments.unsqueeze(dim=1) + perturb_tensor_flat[None, ...]
	)
	# Clip the boundaries
	line_segment_candidates[..., 0] = torch.clamp(
	line_segment_candidates[..., 0], min=0, max=H - 1
	)
	line_segment_candidates[..., 1] = torch.clamp(
	line_segment_candidates[..., 1], min=0, max=W - 1
	)

	# Iterate through all the segments
	refined_segment_lst = []
	num_segments = line_segments.shape[0]
	for idx in range(num_segments):
	segment = line_segment_candidates[idx, ...]
	# Get the corresponding start and end junctions
	candidate_junc_start = segment[:, 0, :]
	candidate_junc_end = segment[:, 1, :]

	# Get the sampling locations (N x 64)
	sampler = self.torch_sampler.to(device)[None, ...]
	cand_samples_h = candidate_junc_start[
	:, 0:1
	] * sampler + candidate_junc_end[:, 0:1] * (1 - sampler)
	cand_samples_w = candidate_junc_start[
	:, 1:2
	] * sampler + candidate_junc_end[:, 1:2] * (1 - sampler)

	# Clip to image boundary
	cand_h = torch.clamp(cand_samples_h, min=0, max=H - 1)
	cand_w = torch.clamp(cand_samples_w, min=0, max=W - 1)

	# Perform bilinear sampling
	segment_feat = self.detect_bilinear(heatmap, cand_h, cand_w, H, W, device)
	segment_results = torch.mean(segment_feat, dim=-1)
	max_idx = torch.argmax(segment_results)
	refined_segment_lst.append(segment[max_idx, ...][None, ...])

	# Concatenate back to segments
	refined_segments = torch.cat(refined_segment_lst, dim=0)

	# Convert back to junctions and line_map
	junctions_new = torch.cat(
	[refined_segments[:, 0, :], refined_segments[:, 1, :]], dim=0
	)
	junctions_new = torch.unique(junctions_new, dim=0)
	line_map_new = self.segments_to_line_map(junctions_new, refined_segments)

	return junctions_new, line_map_new

	def segments_to_line_map(self, junctions, segments):
	"""Convert the list of segments to line map."""
	# Create empty line map
	device = junctions.device
	num_junctions = junctions.shape[0]
	line_map = torch.zeros([num_junctions, num_junctions], device=device)

	# Iterate through every segment
	for idx in range(segments.shape[0]):
	# Get the junctions from a single segement
	seg = segments[idx, ...]
	junction1 = seg[0, :]
	junction2 = seg[1, :]

	# Get index
	idx_junction1 = torch.where((junctions == junction1).sum(axis=1) == 2)[0]
	idx_junction2 = torch.where((junctions == junction2).sum(axis=1) == 2)[0]

	# label the corresponding entries
	line_map[idx_junction1, idx_junction2] = 1
	line_map[idx_junction2, idx_junction1] = 1

	return line_map

	def detect_bilinear(self, heatmap, cand_h, cand_w, H, W, device):
	"""Detection by bilinear sampling."""
	# Get the floor and ceiling locations
	cand_h_floor = torch.floor(cand_h).to(torch.long)
	cand_h_ceil = torch.ceil(cand_h).to(torch.long)
	cand_w_floor = torch.floor(cand_w).to(torch.long)
	cand_w_ceil = torch.ceil(cand_w).to(torch.long)

	# Perform the bilinear sampling
	cand_samples_feat = (
	heatmap[cand_h_floor, cand_w_floor]
	* (cand_h_ceil - cand_h)
	* (cand_w_ceil - cand_w)
	+ heatmap[cand_h_floor, cand_w_ceil]
	* (cand_h_ceil - cand_h)
	* (cand_w - cand_w_floor)
	+ heatmap[cand_h_ceil, cand_w_floor]
	* (cand_h - cand_h_floor)
	* (cand_w_ceil - cand_w)
	+ heatmap[cand_h_ceil, cand_w_ceil]
	* (cand_h - cand_h_floor)
	* (cand_w - cand_w_floor)
	)

	return cand_samples_feat

	def detect_local_max(
	self, heatmap, cand_h, cand_w, H, W, normalized_seg_length, device
	):
	"""Detection by local maximum search."""
	# Compute the distance threshold
	dist_thresh = 0.5 * (2*0.5) + self.lambda_radius normalized_seg_length
	# Make it N x 64
	dist_thresh = torch.repeat_interleave(
	dist_thresh[..., None], self.num_samples, dim=-1
	)

	# Compute the candidate points
	cand_points = torch.cat([cand_h[..., None], cand_w[..., None]], dim=-1)
	cand_points_round = torch.round(cand_points) # N x 64 x 2

	# Construct local patches 9x9 = 81
	patch_mask = torch.zeros(
	[
	int(2 * self.local_patch_radius + 1),
	int(2 * self.local_patch_radius + 1),
	],
	device=device,
	)
	patch_center = torch.tensor(
	[[self.local_patch_radius, self.local_patch_radius]],
	device=device,
	dtype=torch.float32,
	)
	H_patch_points, W_patch_points = torch.where(patch_mask >= 0)
	patch_points = torch.cat(
	[H_patch_points[..., None], W_patch_points[..., None]], dim=-1
	)
	# Fetch the circle region
	patch_center_dist = torch.sqrt(
	torch.sum((patch_points - patch_center) ** 2, dim=-1)
	)
	patch_points = patch_points[patch_center_dist <= self.local_patch_radius, :]
	# Shift [0, 0] to the center
	patch_points = patch_points - self.local_patch_radius

	# Construct local patch mask
	patch_points_shifted = (
	torch.unsqueeze(cand_points_round, dim=2) + patch_points[None, None, ...]
	)
	patch_dist = torch.sqrt(
	torch.sum(
	(torch.unsqueeze(cand_points, dim=2) - patch_points_shifted) ** 2,
	dim=-1,
	)
	)
	patch_dist_mask = patch_dist < dist_thresh[..., None]

	# Get all points => num_points_center x num_patch_points x 2
	points_H = torch.clamp(patch_points_shifted[:, :, :, 0], min=0, max=H - 1).to(
	torch.long
	)
	points_W = torch.clamp(patch_points_shifted[:, :, :, 1], min=0, max=W - 1).to(
	torch.long
	)
	points = torch.cat([points_H[..., None], points_W[..., None]], dim=-1)

	# Sample the feature (N x 64 x 81)
	sampled_feat = heatmap[points[:, :, :, 0], points[:, :, :, 1]]
	# Filtering using the valid mask
	sampled_feat = sampled_feat * patch_dist_mask.to(torch.float32)
	if len(sampled_feat) == 0:
	sampled_feat_lmax = torch.empty(0, 64)
	else:
	sampled_feat_lmax, _ = torch.max(sampled_feat, dim=-1)

	return sampled_feat_lmax