Spaces:

Jiahua0
/

vmem

Build error

App Files Files Community

vmem / extern /CUT3R /datasets_preprocess /preprocess_urbansyn.py

Jiahua0

Upload folder using huggingface_hub

ff47419 verified about 1 month ago

raw

history blame contribute delete

7.9 kB

	#!/usr/bin/env python3
	"""
	Preprocess Script for UrbanSyn Dataset

	This script:
	1. Reads RGB, depth (EXR), and semantic segmentation (class) files from an UrbanSyn dataset directory.
	2. Retrieves camera intrinsics from a JSON metadata file.
	3. Rescales images, depth maps, and masks to a fixed resolution (e.g., 640×480).
	4. Saves processed data (RGB, .npy depth, .png sky mask, and .npz intrinsics) in an organized structure.

	Usage:
	python preprocess_urbansyn.py \
	--input_dir /path/to/data_urbansyn \
	--output_dir /path/to/processed_urbansyn
	"""

	import os
	import json
	import argparse
	import shutil
	from concurrent.futures import ProcessPoolExecutor, as_completed
	import cv2
	import numpy as np
	from tqdm import tqdm
	from PIL import Image

	# Make sure OpenCV EXR support is enabled
	os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"

	# Custom "cropping" module (ensure cropping.py is available/installed)
	import cropping


	def process_basename(
	basename,
	rgb_dir,
	depth_dir,
	class_dir,
	cam_info,
	out_rgb_dir,
	out_depth_dir,
	out_mask_dir,
	out_cam_dir,
	):
	"""
	Process a single file triplet (RGB, depth, class) for a given basename.

	Args:
	basename (str): Base name without file extension (e.g., 'image_0001').
	rgb_dir (str): Directory containing RGB .png files.
	depth_dir (str): Directory containing .exr depth files.
	class_dir (str): Directory containing class .png files (semantic segmentation).
	cam_info (dict): Dictionary with camera metadata (focal length, sensor size).
	out_rgb_dir (str): Output directory for rescaled RGB images.
	out_depth_dir (str): Output directory for rescaled depth files.
	out_mask_dir (str): Output directory for sky masks.
	out_cam_dir (str): Output directory for camera intrinsics.

	Returns:
	str or None:
	- Returns None if successful.
	- Returns an error message if something fails.
	"""

	# Construct output file paths
	out_img_path = os.path.join(out_rgb_dir, f"{basename}.png")
	out_depth_path = os.path.join(out_depth_dir, f"{basename}.npy")
	out_mask_path = os.path.join(out_mask_dir, f"{basename}.png")
	out_cam_path = os.path.join(out_cam_dir, f"{basename}.npz")

	# Skip if already processed
	if (
	os.path.exists(out_img_path)
	and os.path.exists(out_depth_path)
	and os.path.exists(out_mask_path)
	and os.path.exists(out_cam_path)
	):
	return None

	try:
	# Build file paths
	img_file = os.path.join(rgb_dir, f"{basename}.png")
	depth_file = os.path.join(depth_dir, f'{basename.replace("rgb", "depth")}.exr')
	class_file = os.path.join(class_dir, basename.replace("rgb", "ss") + ".png")

	# 1. Read RGB image
	img = cv2.imread(img_file, cv2.IMREAD_UNCHANGED)
	if img is None:
	return f"Error: Could not read image file {img_file}"
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert BGR -> RGB
	H, W = img.shape[:2]

	# 2. Read depth from EXR
	depth = cv2.imread(depth_file, cv2.IMREAD_UNCHANGED)
	if depth is None:
	# Attempt fallback if there's a '.exr.1' file
	alt_exr_1 = depth_file + ".1"
	if os.path.exists(alt_exr_1):
	temp_exr = depth_file.replace(".exr", "_tmp.exr")
	os.rename(alt_exr_1, temp_exr)
	depth = cv2.imread(temp_exr, cv2.IMREAD_UNCHANGED)
	if depth is None:
	return f"Error reading depth file (fallback) {temp_exr}"
	depth *= 1e5
	else:
	return f"Error reading depth file {depth_file}"
	else:
	depth *= 1e5 # multiply by 1e5, consistent with your original code

	# 3. Read class image, build sky mask
	cl = cv2.imread(class_file, cv2.IMREAD_UNCHANGED)
	if cl is None:
	return f"Error: Could not read class file {class_file}"
	sky_mask = (cl[..., 0] == 10).astype(np.uint8) # class ID 10 => sky

	# 4. Build camera intrinsics
	f_mm = cam_info["focalLength_mm"]
	w_mm = cam_info["sensorWidth_mm"]
	h_mm = cam_info["sensorHeight_mm"]
	K = np.eye(3, dtype=np.float32)
	K[0, 0] = f_mm / w_mm * W
	K[1, 1] = f_mm / h_mm * H
	K[0, 2] = W / 2
	K[1, 2] = H / 2

	# 5. Combine depth + sky_mask in a single array for rescaling
	depth_with_mask = np.stack([depth, sky_mask], axis=-1)

	# 6. Rescale to desired size
	image_pil = Image.fromarray(img)
	image_rescaled, depth_with_mask_rescaled, K_rescaled = (
	cropping.rescale_image_depthmap(
	image_pil, depth_with_mask, K, output_resolution=(640, 480)
	)
	)

	# Write outputs
	image_rescaled.save(out_img_path)
	np.save(out_depth_path, depth_with_mask_rescaled[..., 0])
	cv2.imwrite(
	out_mask_path, (depth_with_mask_rescaled[..., 1] * 255).astype(np.uint8)
	)
	np.savez(out_cam_path, intrinsics=K_rescaled)

	except Exception as e:
	return f"Error processing {basename}: {e}"

	return None


	def main():
	parser = argparse.ArgumentParser(
	description="Preprocess UrbanSyn dataset by loading RGB/Depth/Seg "
	"and rescaling them with camera intrinsics."
	)
	parser.add_argument(
	"--input_dir", required=True, help="Path to the UrbanSyn dataset directory."
	)
	parser.add_argument(
	"--output_dir",
	required=True,
	help="Path to the directory where processed data will be stored.",
	)
	args = parser.parse_args()

	input_dir = os.path.abspath(args.input_dir)
	output_dir = os.path.abspath(args.output_dir)
	os.makedirs(output_dir, exist_ok=True)

	# Define input subdirectories
	rgb_dir = os.path.join(input_dir, "rgb")
	depth_dir = os.path.join(input_dir, "depth")
	class_dir = os.path.join(input_dir, "ss")
	meta_file = os.path.join(input_dir, "camera_metadata.json")

	# Define output subdirectories
	out_rgb_dir = os.path.join(output_dir, "rgb")
	out_depth_dir = os.path.join(output_dir, "depth")
	out_mask_dir = os.path.join(output_dir, "sky_mask")
	out_cam_dir = os.path.join(output_dir, "cam")
	for d in [out_rgb_dir, out_depth_dir, out_mask_dir, out_cam_dir]:
	os.makedirs(d, exist_ok=True)

	# Gather basenames from RGB files
	basenames = sorted(
	[
	os.path.splitext(fname)[0]
	for fname in os.listdir(rgb_dir)
	if fname.endswith(".png")
	]
	)
	if not basenames:
	print(f"No RGB .png files found in {rgb_dir}. Exiting.")
	return

	# Load camera metadata
	if not os.path.isfile(meta_file):
	print(f"Error: metadata file not found at {meta_file}. Exiting.")
	return

	with open(meta_file, "r") as f:
	cam_info_full = json.load(f)
	cam_info = cam_info_full["parameters"][0]["Camera"]

	# Process in parallel
	num_workers = max(1, os.cpu_count() or 1)
	with ProcessPoolExecutor(max_workers=num_workers) as executor:
	futures = {
	executor.submit(
	process_basename,
	basename,
	rgb_dir,
	depth_dir,
	class_dir,
	cam_info,
	out_rgb_dir,
	out_depth_dir,
	out_mask_dir,
	out_cam_dir,
	): basename
	for basename in basenames
	}

	# Use tqdm for progress
	for future in tqdm(
	as_completed(futures), total=len(futures), desc="Processing UrbanSyn"
	):
	error = future.result()
	if error:
	print(error)


	if __name__ == "__main__":
	main()