Spaces:
Runtime error
Runtime error
import os | |
import numpy as np | |
import cv2 | |
import torch | |
import torch.multiprocessing as mp | |
def process_scene_on_gpu(gpu_id, scene_names, data_root, output_queue): | |
torch.cuda.set_device(gpu_id) | |
local_pairs = {} | |
local_images = {} | |
for scene_name in scene_names: | |
save_path = os.path.join(data_root, scene_name, "scene_data.npz") | |
if os.path.exists(save_path): | |
print(f"Scene {scene_name} already processed, skipping") | |
continue | |
pairs, images = process_scene(data_root, scene_name) | |
np.savez_compressed(save_path, pairs=pairs, images=images) | |
output_queue.put((local_pairs, local_images)) | |
def preprocess_scannet(data_root, threads_per_gpu=4): | |
scene_names = [folder for folder in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, folder))] | |
num_gpus = torch.cuda.device_count() | |
total_threads = num_gpus * threads_per_gpu | |
# 将场景平均分配给所有线程 | |
scenes_per_thread = [scene_names[i::total_threads] for i in range(total_threads)] | |
output_queue = mp.Queue() | |
processes = [] | |
# 为每个GPU创建多个进程 | |
for gpu_id in range(num_gpus): | |
for thread_id in range(threads_per_gpu): | |
process_id = gpu_id * threads_per_gpu + thread_id | |
p = mp.Process( | |
target=process_scene_on_gpu, | |
args=(gpu_id, scenes_per_thread[process_id], data_root, output_queue) | |
) | |
p.start() | |
processes.append(p) | |
# 收集所有进程的结果 | |
all_pairs = {} | |
all_images = {} | |
for _ in range(total_threads): | |
local_pairs, local_images = output_queue.get() | |
all_pairs.update(local_pairs) | |
all_images.update(local_images) | |
# Wait for all processes to complete | |
for p in processes: | |
p.join() | |
# Save to npz file | |
np.savez_compressed(os.path.join(data_root, "scannet_image_pairs.npz"), **all_pairs) | |
np.savez_compressed(os.path.join(data_root, "scannet_images.npz"), **all_images) | |
# print the number of image pairs | |
# sum up the number of image pairs for all scenes | |
total_pairs = sum(len(pairs) for pairs in all_pairs.values()) | |
print(f"Total number of image pairs: {total_pairs}") | |
return all_pairs, all_images | |
def process_scene(data_root, scene_name): | |
pairs = [] | |
images_dir = os.path.join(data_root, scene_name, "images") | |
images = [os.path.splitext(file)[0] for file in os.listdir(images_dir) if file.endswith(".jpg")] | |
images.sort() | |
# Check validity of c2w for each image | |
valid_images = [] | |
for image in images: | |
_, c2w, _ = load_image(data_root, scene_name, image) | |
if is_valid_c2w(c2w): | |
valid_images.append(image) | |
else: | |
print(f"Invalid c2w for image {image} in scene {scene_name}") | |
# generate image pairs | |
slide_window = 50 | |
num_sub_intervals = 5 | |
pairs = generate_image_pairs(data_root, scene_name, valid_images, slide_window, num_sub_intervals) | |
print(f"Scene {scene_name} has {len(pairs)} image pairs and {len(valid_images)} valid images out of {len(images)} total images") | |
return pairs, valid_images | |
def is_valid_c2w(c2w): | |
return not np.any(np.isinf(c2w)) and not np.any(np.isnan(c2w)) | |
def generate_image_pairs(data_root, scene_name, images, slide_window, num_sub_intervals=3): | |
pairs = [] | |
n = len(images) | |
# Define IOU sub-intervals | |
iou_range = (0.3, 0.8) | |
sub_interval_size = (iou_range[1] - iou_range[0]) / num_sub_intervals | |
sub_intervals = [(iou_range[0] + i * sub_interval_size, iou_range[0] + (i + 1) * sub_interval_size) | |
for i in range(num_sub_intervals)] | |
for i in range(n): | |
# Keep track of whether a pair has been added for each sub-interval | |
interval_selected = [False] * num_sub_intervals | |
for j in range(i+1, min(i + slide_window, n)): | |
# Break early if all sub-intervals have been selected | |
if all(interval_selected): | |
break | |
# Load image pair | |
depth1, c2w1, K1 = load_image(data_root, scene_name, images[i]) | |
depth2, c2w2, K2 = load_image(data_root, scene_name, images[j]) | |
# Calculate mean IoU | |
try: | |
iou_1 = calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2) | |
iou_2 = calculate_iou(depth2, c2w2, K2, depth1, c2w1, K1) | |
except Exception as e: | |
print(f"Error calculating IoU for images {images[i]} and {images[j]} in scene {scene_name}: {str(e)}") | |
continue | |
mean_iou = (iou_1 + iou_2) / 2 | |
# Check which sub-interval the mean IoU falls into | |
for idx, (lower, upper) in enumerate(sub_intervals): | |
if lower <= mean_iou <= upper and not interval_selected[idx]: | |
pairs.append((i, j, mean_iou)) | |
interval_selected[idx] = True # Mark this interval as selected | |
break # Move to the next pair after adding one in the current sub-interval | |
return pairs | |
def load_image(data_root, scene_name, image_id): | |
# load depthmap | |
depth_path = f"{data_root}/{scene_name}/depths/{image_id}.png" | |
depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 1000.0 | |
# load camera parameters | |
meta_path = f"{data_root}/{scene_name}/images/{image_id}.npz" | |
meta = np.load(meta_path) | |
c2w = meta['camera_pose'] | |
K = meta['camera_intrinsics'] | |
return depth, c2w, K | |
# Unproject depthmap to point cloud and project to another camera | |
def calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2): | |
# Move data to GPU and ensure float32 dtype | |
depth1 = torch.from_numpy(depth1).cuda().float() | |
depth2 = torch.from_numpy(depth2).cuda().float() | |
c2w1 = torch.from_numpy(c2w1).cuda().float() | |
c2w2 = torch.from_numpy(c2w2).cuda().float() | |
K1 = torch.from_numpy(K1).cuda().float() | |
K2 = torch.from_numpy(K2).cuda().float() | |
# Get image dimensions | |
h, w = depth1.shape | |
# Create pixel coordinates | |
y, x = torch.meshgrid(torch.arange(h, device='cuda', dtype=torch.float32), | |
torch.arange(w, device='cuda', dtype=torch.float32)) | |
pixels = torch.stack((x.flatten(), y.flatten(), torch.ones_like(x.flatten())), dim=-1).T | |
# Unproject pixels to 3D points | |
pixels_3d = torch.linalg.inv(K1) @ pixels | |
pixels_3d *= depth1.flatten().unsqueeze(0) | |
# Transform 3D points to world coordinates | |
pixels_world = c2w1[:3, :3] @ pixels_3d + c2w1[:3, 3:4] | |
# Check if c2w2[:3, :3] is invertible | |
if torch.det(c2w2[:3, :3]) == 0: | |
return 0, False # Calculation failed | |
# Project world points to second camera | |
pixels_cam2 = torch.linalg.inv(c2w2[:3, :3]) @ (pixels_world - c2w2[:3, 3:4]) | |
pixels_img2 = K2 @ pixels_cam2 | |
# Normalize homogeneous coordinates | |
pixels_img2 = pixels_img2[:2] / pixels_img2[2] | |
pixels_img2 = pixels_img2.T | |
# Filter valid pixels | |
valid_mask = (pixels_img2[:, 0] >= 0) & (pixels_img2[:, 0] < w) & \ | |
(pixels_img2[:, 1] >= 0) & (pixels_img2[:, 1] < h) | |
pixels_img2 = pixels_img2[valid_mask].long() | |
# Compare depths | |
projected_depth = pixels_cam2[2, valid_mask] | |
actual_depth = depth2[pixels_img2[:, 1], pixels_img2[:, 0]] | |
depth_diff = torch.abs(projected_depth - actual_depth) | |
depth_threshold = 0.1 # 10cm threshold | |
overlap_mask = depth_diff < depth_threshold | |
# Calculate IoU | |
intersection = torch.sum(overlap_mask) | |
union = torch.sum(valid_mask) + torch.sum(depth2 > 0) - intersection | |
iou = intersection.float() / union.float() if union > 0 else torch.tensor(0.0, device='cuda') | |
return iou.item() | |
if __name__ == "__main__": | |
data_root = "data/scannet_processed" | |
# 可以通过参数指定每个GPU的线程数 | |
preprocess_scannet(data_root, threads_per_gpu=12) | |