File size: 5,931 Bytes
2df809d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/usr/bin/env python3
"""
Preprocess the Eden dataset.

This script processes the Eden dataset by copying RGB images, converting depth
data from .mat files to .npy format, and saving camera intrinsics from .mat files
into a structured output directory. Files are processed in parallel using
a ProcessPoolExecutor.

Usage:
    python preprocess_eden.py --root /path/to/data_raw_videos/data_eden \
                              --out_dir /path/to/data_raw_videos/processed_eden \
                              [--num_workers N]
"""

import os
import shutil
import scipy.io
import numpy as np
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed
import argparse


def process_basename(args):
    """
    Process a single basename: load the corresponding image, depth, and camera
    intrinsics files, then copy/save them into the output directories.

    Parameters:
        args (tuple): Contains (seq, basename, rgb_dir, depth_dir, cam_dir,
                      out_rgb_dir, out_depth_dir, out_cam_dir)
    Returns:
        None on success or an error message string on failure.
    """
    (
        seq,
        basename,
        rgb_dir,
        depth_dir,
        cam_dir,
        out_rgb_dir,
        out_depth_dir,
        out_cam_dir,
    ) = args
    # Define output paths.
    out_img_path = os.path.join(out_rgb_dir, f"{basename}.png")
    out_depth_path = os.path.join(out_depth_dir, f"{basename}.npy")
    out_cam_path = os.path.join(out_cam_dir, f"{basename}.npz")

    # Skip processing if the camera file has already been saved.
    if os.path.exists(out_cam_path):
        return None

    try:
        cam_type = "L"
        img_file = os.path.join(rgb_dir, f"{basename}_{cam_type}.png")
        depth_file = os.path.join(depth_dir, f"{basename}_{cam_type}.mat")
        cam_file = os.path.join(cam_dir, f"{basename}.mat")

        # Check if the required files exist.
        if not (
            os.path.exists(img_file)
            and os.path.exists(depth_file)
            and os.path.exists(cam_file)
        ):
            return f"Missing files for {basename} in {seq}"

        # Load depth data.
        depth_mat = scipy.io.loadmat(depth_file)
        depth = depth_mat.get("Depth")
        if depth is None:
            return f"Depth data missing in {depth_file}"
        depth = depth[..., 0]

        # Load camera intrinsics.
        cam_mat = scipy.io.loadmat(cam_file)
        intrinsics = cam_mat.get(f"K_{cam_type}")
        if intrinsics is None:
            return f"Intrinsics data missing in {cam_file}"

        # Copy the RGB image.
        shutil.copyfile(img_file, out_img_path)
        # Save the depth data.
        np.save(out_depth_path, depth)
        # Save the camera intrinsics.
        np.savez(out_cam_path, intrinsics=intrinsics)

    except Exception as e:
        return f"Error processing {basename} in {seq}: {e}"

    return None  # Indicate success.


def main():
    parser = argparse.ArgumentParser(
        description="Preprocess Eden dataset: copy RGB images, process depth maps, and save camera intrinsics."
    )
    parser.add_argument(
        "--root", type=str, default="", help="Root directory of the raw Eden data."
    )
    parser.add_argument(
        "--out_dir",
        type=str,
        default="",
        help="Output directory for processed Eden data.",
    )
    parser.add_argument(
        "--num_workers",
        type=int,
        default=os.cpu_count(),
        help="Number of worker processes to use.",
    )
    args = parser.parse_args()

    root = args.root
    out_dir = args.out_dir
    # Modes typically found in the Eden dataset.
    modes = ["clear", "cloudy", "overcast", "sunset", "twilight"]

    rgb_root = os.path.join(root, "RGB")
    depth_root = os.path.join(root, "Depth")
    cam_root = os.path.join(root, "cam_matrix")

    # Collect sequence directories by traversing the RGB root.
    seq_dirs = []
    for d in os.listdir(rgb_root):
        for m in modes:
            seq_path = os.path.join(rgb_root, d, m)
            if os.path.isdir(seq_path):
                # Save the relative path (e.g., "d/m").
                seq_dirs.append(os.path.join(d, m))

    all_tasks = []
    for seq in seq_dirs:
        rgb_dir = os.path.join(rgb_root, seq)
        depth_dir = os.path.join(depth_root, seq)
        cam_dir = os.path.join(cam_root, seq)

        # Create output directories for this sequence.
        # Replace any os.sep in the sequence name with an underscore.
        seq_name = "_".join(seq.split(os.sep))
        out_rgb_dir = os.path.join(out_dir, seq_name, "rgb")
        out_depth_dir = os.path.join(out_dir, seq_name, "depth")
        out_cam_dir = os.path.join(out_dir, seq_name, "cam")
        os.makedirs(out_rgb_dir, exist_ok=True)
        os.makedirs(out_depth_dir, exist_ok=True)
        os.makedirs(out_cam_dir, exist_ok=True)

        # Get basenames from the camera directory (assuming file extension .mat).
        basenames = sorted([d[:-4] for d in os.listdir(cam_dir) if d.endswith(".mat")])

        for basename in basenames:
            task = (
                seq,
                basename,
                rgb_dir,
                depth_dir,
                cam_dir,
                out_rgb_dir,
                out_depth_dir,
                out_cam_dir,
            )
            all_tasks.append(task)

    num_workers = args.num_workers
    print(f"Processing {len(all_tasks)} tasks using {num_workers} workers...")
    with ProcessPoolExecutor(max_workers=num_workers) as executor:
        futures = {
            executor.submit(process_basename, task): task[1] for task in all_tasks
        }
        for future in tqdm(
            as_completed(futures), total=len(futures), desc="Processing tasks"
        ):
            error = future.result()
            if error:
                print(error)


if __name__ == "__main__":
    main()