|
|
|
|
|
""" |
|
|
Preprocess Script for UrbanSyn Dataset |
|
|
|
|
|
This script: |
|
|
1. Reads RGB, depth (EXR), and semantic segmentation (class) files from an UrbanSyn dataset directory. |
|
|
2. Retrieves camera intrinsics from a JSON metadata file. |
|
|
3. Rescales images, depth maps, and masks to a fixed resolution (e.g., 640×480). |
|
|
4. Saves processed data (RGB, .npy depth, .png sky mask, and .npz intrinsics) in an organized structure. |
|
|
|
|
|
Usage: |
|
|
python preprocess_urbansyn.py \ |
|
|
--input_dir /path/to/data_urbansyn \ |
|
|
--output_dir /path/to/processed_urbansyn |
|
|
""" |
|
|
|
|
|
import os |
|
|
import json |
|
|
import argparse |
|
|
import shutil |
|
|
from concurrent.futures import ProcessPoolExecutor, as_completed |
|
|
import cv2 |
|
|
import numpy as np |
|
|
from tqdm import tqdm |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" |
|
|
|
|
|
|
|
|
import cropping |
|
|
|
|
|
|
|
|
def process_basename( |
|
|
basename, |
|
|
rgb_dir, |
|
|
depth_dir, |
|
|
class_dir, |
|
|
cam_info, |
|
|
out_rgb_dir, |
|
|
out_depth_dir, |
|
|
out_mask_dir, |
|
|
out_cam_dir, |
|
|
): |
|
|
""" |
|
|
Process a single file triplet (RGB, depth, class) for a given basename. |
|
|
|
|
|
Args: |
|
|
basename (str): Base name without file extension (e.g., 'image_0001'). |
|
|
rgb_dir (str): Directory containing RGB .png files. |
|
|
depth_dir (str): Directory containing .exr depth files. |
|
|
class_dir (str): Directory containing class .png files (semantic segmentation). |
|
|
cam_info (dict): Dictionary with camera metadata (focal length, sensor size). |
|
|
out_rgb_dir (str): Output directory for rescaled RGB images. |
|
|
out_depth_dir (str): Output directory for rescaled depth files. |
|
|
out_mask_dir (str): Output directory for sky masks. |
|
|
out_cam_dir (str): Output directory for camera intrinsics. |
|
|
|
|
|
Returns: |
|
|
str or None: |
|
|
- Returns None if successful. |
|
|
- Returns an error message if something fails. |
|
|
""" |
|
|
|
|
|
|
|
|
out_img_path = os.path.join(out_rgb_dir, f"{basename}.png") |
|
|
out_depth_path = os.path.join(out_depth_dir, f"{basename}.npy") |
|
|
out_mask_path = os.path.join(out_mask_dir, f"{basename}.png") |
|
|
out_cam_path = os.path.join(out_cam_dir, f"{basename}.npz") |
|
|
|
|
|
|
|
|
if ( |
|
|
os.path.exists(out_img_path) |
|
|
and os.path.exists(out_depth_path) |
|
|
and os.path.exists(out_mask_path) |
|
|
and os.path.exists(out_cam_path) |
|
|
): |
|
|
return None |
|
|
|
|
|
try: |
|
|
|
|
|
img_file = os.path.join(rgb_dir, f"{basename}.png") |
|
|
depth_file = os.path.join(depth_dir, f'{basename.replace("rgb", "depth")}.exr') |
|
|
class_file = os.path.join(class_dir, basename.replace("rgb", "ss") + ".png") |
|
|
|
|
|
|
|
|
img = cv2.imread(img_file, cv2.IMREAD_UNCHANGED) |
|
|
if img is None: |
|
|
return f"Error: Could not read image file {img_file}" |
|
|
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) |
|
|
H, W = img.shape[:2] |
|
|
|
|
|
|
|
|
depth = cv2.imread(depth_file, cv2.IMREAD_UNCHANGED) |
|
|
if depth is None: |
|
|
|
|
|
alt_exr_1 = depth_file + ".1" |
|
|
if os.path.exists(alt_exr_1): |
|
|
temp_exr = depth_file.replace(".exr", "_tmp.exr") |
|
|
os.rename(alt_exr_1, temp_exr) |
|
|
depth = cv2.imread(temp_exr, cv2.IMREAD_UNCHANGED) |
|
|
if depth is None: |
|
|
return f"Error reading depth file (fallback) {temp_exr}" |
|
|
depth *= 1e5 |
|
|
else: |
|
|
return f"Error reading depth file {depth_file}" |
|
|
else: |
|
|
depth *= 1e5 |
|
|
|
|
|
|
|
|
cl = cv2.imread(class_file, cv2.IMREAD_UNCHANGED) |
|
|
if cl is None: |
|
|
return f"Error: Could not read class file {class_file}" |
|
|
sky_mask = (cl[..., 0] == 10).astype(np.uint8) |
|
|
|
|
|
|
|
|
f_mm = cam_info["focalLength_mm"] |
|
|
w_mm = cam_info["sensorWidth_mm"] |
|
|
h_mm = cam_info["sensorHeight_mm"] |
|
|
K = np.eye(3, dtype=np.float32) |
|
|
K[0, 0] = f_mm / w_mm * W |
|
|
K[1, 1] = f_mm / h_mm * H |
|
|
K[0, 2] = W / 2 |
|
|
K[1, 2] = H / 2 |
|
|
|
|
|
|
|
|
depth_with_mask = np.stack([depth, sky_mask], axis=-1) |
|
|
|
|
|
|
|
|
image_pil = Image.fromarray(img) |
|
|
image_rescaled, depth_with_mask_rescaled, K_rescaled = ( |
|
|
cropping.rescale_image_depthmap( |
|
|
image_pil, depth_with_mask, K, output_resolution=(640, 480) |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
image_rescaled.save(out_img_path) |
|
|
np.save(out_depth_path, depth_with_mask_rescaled[..., 0]) |
|
|
cv2.imwrite( |
|
|
out_mask_path, (depth_with_mask_rescaled[..., 1] * 255).astype(np.uint8) |
|
|
) |
|
|
np.savez(out_cam_path, intrinsics=K_rescaled) |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error processing {basename}: {e}" |
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
def main(): |
|
|
parser = argparse.ArgumentParser( |
|
|
description="Preprocess UrbanSyn dataset by loading RGB/Depth/Seg " |
|
|
"and rescaling them with camera intrinsics." |
|
|
) |
|
|
parser.add_argument( |
|
|
"--input_dir", required=True, help="Path to the UrbanSyn dataset directory." |
|
|
) |
|
|
parser.add_argument( |
|
|
"--output_dir", |
|
|
required=True, |
|
|
help="Path to the directory where processed data will be stored.", |
|
|
) |
|
|
args = parser.parse_args() |
|
|
|
|
|
input_dir = os.path.abspath(args.input_dir) |
|
|
output_dir = os.path.abspath(args.output_dir) |
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
rgb_dir = os.path.join(input_dir, "rgb") |
|
|
depth_dir = os.path.join(input_dir, "depth") |
|
|
class_dir = os.path.join(input_dir, "ss") |
|
|
meta_file = os.path.join(input_dir, "camera_metadata.json") |
|
|
|
|
|
|
|
|
out_rgb_dir = os.path.join(output_dir, "rgb") |
|
|
out_depth_dir = os.path.join(output_dir, "depth") |
|
|
out_mask_dir = os.path.join(output_dir, "sky_mask") |
|
|
out_cam_dir = os.path.join(output_dir, "cam") |
|
|
for d in [out_rgb_dir, out_depth_dir, out_mask_dir, out_cam_dir]: |
|
|
os.makedirs(d, exist_ok=True) |
|
|
|
|
|
|
|
|
basenames = sorted( |
|
|
[ |
|
|
os.path.splitext(fname)[0] |
|
|
for fname in os.listdir(rgb_dir) |
|
|
if fname.endswith(".png") |
|
|
] |
|
|
) |
|
|
if not basenames: |
|
|
print(f"No RGB .png files found in {rgb_dir}. Exiting.") |
|
|
return |
|
|
|
|
|
|
|
|
if not os.path.isfile(meta_file): |
|
|
print(f"Error: metadata file not found at {meta_file}. Exiting.") |
|
|
return |
|
|
|
|
|
with open(meta_file, "r") as f: |
|
|
cam_info_full = json.load(f) |
|
|
cam_info = cam_info_full["parameters"][0]["Camera"] |
|
|
|
|
|
|
|
|
num_workers = max(1, os.cpu_count() or 1) |
|
|
with ProcessPoolExecutor(max_workers=num_workers) as executor: |
|
|
futures = { |
|
|
executor.submit( |
|
|
process_basename, |
|
|
basename, |
|
|
rgb_dir, |
|
|
depth_dir, |
|
|
class_dir, |
|
|
cam_info, |
|
|
out_rgb_dir, |
|
|
out_depth_dir, |
|
|
out_mask_dir, |
|
|
out_cam_dir, |
|
|
): basename |
|
|
for basename in basenames |
|
|
} |
|
|
|
|
|
|
|
|
for future in tqdm( |
|
|
as_completed(futures), total=len(futures), desc="Processing UrbanSyn" |
|
|
): |
|
|
error = future.result() |
|
|
if error: |
|
|
print(error) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|