LSM

Runtime error

App Files Files Community

LSM / src /datasets_preprocess /scannet_preprocess.py

kairunwen

Update Code

57746f1 5 months ago

raw

history blame contribute delete

8 kB

	import os
	import numpy as np
	import cv2
	import torch
	import torch.multiprocessing as mp

	def process_scene_on_gpu(gpu_id, scene_names, data_root, output_queue):
	torch.cuda.set_device(gpu_id)
	local_pairs = {}
	local_images = {}

	for scene_name in scene_names:
	save_path = os.path.join(data_root, scene_name, "scene_data.npz")
	if os.path.exists(save_path):
	print(f"Scene {scene_name} already processed, skipping")
	continue
	pairs, images = process_scene(data_root, scene_name)
	np.savez_compressed(save_path, pairs=pairs, images=images)

	output_queue.put((local_pairs, local_images))

	def preprocess_scannet(data_root, threads_per_gpu=4):
	scene_names = [folder for folder in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, folder))]
	num_gpus = torch.cuda.device_count()
	total_threads = num_gpus * threads_per_gpu

	# 将场景平均分配给所有线程
	scenes_per_thread = [scene_names[i::total_threads] for i in range(total_threads)]

	output_queue = mp.Queue()
	processes = []

	# 为每个GPU创建多个进程
	for gpu_id in range(num_gpus):
	for thread_id in range(threads_per_gpu):
	process_id = gpu_id * threads_per_gpu + thread_id
	p = mp.Process(
	target=process_scene_on_gpu,
	args=(gpu_id, scenes_per_thread[process_id], data_root, output_queue)
	)
	p.start()
	processes.append(p)

	# 收集所有进程的结果
	all_pairs = {}
	all_images = {}
	for _ in range(total_threads):
	local_pairs, local_images = output_queue.get()
	all_pairs.update(local_pairs)
	all_images.update(local_images)

	# Wait for all processes to complete
	for p in processes:
	p.join()

	# Save to npz file
	np.savez_compressed(os.path.join(data_root, "scannet_image_pairs.npz"), **all_pairs)
	np.savez_compressed(os.path.join(data_root, "scannet_images.npz"), **all_images)

	# print the number of image pairs
	# sum up the number of image pairs for all scenes
	total_pairs = sum(len(pairs) for pairs in all_pairs.values())
	print(f"Total number of image pairs: {total_pairs}")
	return all_pairs, all_images

	def process_scene(data_root, scene_name):
	pairs = []
	images_dir = os.path.join(data_root, scene_name, "images")
	images = [os.path.splitext(file)[0] for file in os.listdir(images_dir) if file.endswith(".jpg")]
	images.sort()

	# Check validity of c2w for each image
	valid_images = []
	for image in images:
	_, c2w, _ = load_image(data_root, scene_name, image)
	if is_valid_c2w(c2w):
	valid_images.append(image)
	else:
	print(f"Invalid c2w for image {image} in scene {scene_name}")

	# generate image pairs
	slide_window = 50
	num_sub_intervals = 5

	pairs = generate_image_pairs(data_root, scene_name, valid_images, slide_window, num_sub_intervals)
	print(f"Scene {scene_name} has {len(pairs)} image pairs and {len(valid_images)} valid images out of {len(images)} total images")
	return pairs, valid_images

	def is_valid_c2w(c2w):
	return not np.any(np.isinf(c2w)) and not np.any(np.isnan(c2w))

	def generate_image_pairs(data_root, scene_name, images, slide_window, num_sub_intervals=3):
	pairs = []
	n = len(images)

	# Define IOU sub-intervals
	iou_range = (0.3, 0.8)
	sub_interval_size = (iou_range[1] - iou_range[0]) / num_sub_intervals
	sub_intervals = [(iou_range[0] + i * sub_interval_size, iou_range[0] + (i + 1) * sub_interval_size)
	for i in range(num_sub_intervals)]

	for i in range(n):
	# Keep track of whether a pair has been added for each sub-interval
	interval_selected = [False] * num_sub_intervals

	for j in range(i+1, min(i + slide_window, n)):
	# Break early if all sub-intervals have been selected
	if all(interval_selected):
	break

	# Load image pair
	depth1, c2w1, K1 = load_image(data_root, scene_name, images[i])
	depth2, c2w2, K2 = load_image(data_root, scene_name, images[j])

	# Calculate mean IoU
	try:
	iou_1 = calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2)
	iou_2 = calculate_iou(depth2, c2w2, K2, depth1, c2w1, K1)
	except Exception as e:
	print(f"Error calculating IoU for images {images[i]} and {images[j]} in scene {scene_name}: {str(e)}")
	continue

	mean_iou = (iou_1 + iou_2) / 2

	# Check which sub-interval the mean IoU falls into
	for idx, (lower, upper) in enumerate(sub_intervals):
	if lower <= mean_iou <= upper and not interval_selected[idx]:
	pairs.append((i, j, mean_iou))
	interval_selected[idx] = True # Mark this interval as selected
	break # Move to the next pair after adding one in the current sub-interval

	return pairs


	def load_image(data_root, scene_name, image_id):
	# load depthmap
	depth_path = f"{data_root}/{scene_name}/depths/{image_id}.png"
	depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 1000.0
	# load camera parameters
	meta_path = f"{data_root}/{scene_name}/images/{image_id}.npz"
	meta = np.load(meta_path)
	c2w = meta['camera_pose']
	K = meta['camera_intrinsics']
	return depth, c2w, K

	# Unproject depthmap to point cloud and project to another camera
	def calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2):
	# Move data to GPU and ensure float32 dtype
	depth1 = torch.from_numpy(depth1).cuda().float()
	depth2 = torch.from_numpy(depth2).cuda().float()
	c2w1 = torch.from_numpy(c2w1).cuda().float()
	c2w2 = torch.from_numpy(c2w2).cuda().float()
	K1 = torch.from_numpy(K1).cuda().float()
	K2 = torch.from_numpy(K2).cuda().float()

	# Get image dimensions
	h, w = depth1.shape

	# Create pixel coordinates
	y, x = torch.meshgrid(torch.arange(h, device='cuda', dtype=torch.float32),
	torch.arange(w, device='cuda', dtype=torch.float32))
	pixels = torch.stack((x.flatten(), y.flatten(), torch.ones_like(x.flatten())), dim=-1).T

	# Unproject pixels to 3D points
	pixels_3d = torch.linalg.inv(K1) @ pixels
	pixels_3d *= depth1.flatten().unsqueeze(0)

	# Transform 3D points to world coordinates
	pixels_world = c2w1[:3, :3] @ pixels_3d + c2w1[:3, 3:4]

	# Check if c2w2[:3, :3] is invertible
	if torch.det(c2w2[:3, :3]) == 0:
	return 0, False # Calculation failed

	# Project world points to second camera
	pixels_cam2 = torch.linalg.inv(c2w2[:3, :3]) @ (pixels_world - c2w2[:3, 3:4])
	pixels_img2 = K2 @ pixels_cam2

	# Normalize homogeneous coordinates
	pixels_img2 = pixels_img2[:2] / pixels_img2[2]
	pixels_img2 = pixels_img2.T

	# Filter valid pixels
	valid_mask = (pixels_img2[:, 0] >= 0) & (pixels_img2[:, 0] < w) & \
	(pixels_img2[:, 1] >= 0) & (pixels_img2[:, 1] < h)

	pixels_img2 = pixels_img2[valid_mask].long()

	# Compare depths
	projected_depth = pixels_cam2[2, valid_mask]
	actual_depth = depth2[pixels_img2[:, 1], pixels_img2[:, 0]]

	depth_diff = torch.abs(projected_depth - actual_depth)
	depth_threshold = 0.1 # 10cm threshold

	overlap_mask = depth_diff < depth_threshold

	# Calculate IoU
	intersection = torch.sum(overlap_mask)
	union = torch.sum(valid_mask) + torch.sum(depth2 > 0) - intersection

	iou = intersection.float() / union.float() if union > 0 else torch.tensor(0.0, device='cuda')

	return iou.item()

	if __name__ == "__main__":
	data_root = "data/scannet_processed"
	# 可以通过参数指定每个GPU的线程数
	preprocess_scannet(data_root, threads_per_gpu=12)