Spaces:
Running
on
Zero
Running
on
Zero
File size: 13,770 Bytes
0ca05b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 |
"""
Utility functions for cropping and resizing data while maintaining proper cameras.
References: DUSt3R
"""
import cv2
import numpy as np
import PIL.Image
try:
lanczos = PIL.Image.Resampling.LANCZOS
bicubic = PIL.Image.Resampling.BICUBIC
except AttributeError:
lanczos = PIL.Image.LANCZOS
bicubic = PIL.Image.BICUBIC
from src.utils.geometry import (
colmap_to_opencv_intrinsics,
opencv_to_colmap_intrinsics,
)
class ImageList:
"""
Convenience class to apply the same operation to a whole set of images.
This class wraps a list of PIL.Image objects and provides methods to perform
operations on all images simultaneously.
"""
def __init__(self, images):
if not isinstance(images, (tuple, list, set)):
images = [images]
self.images = []
for image in images:
if not isinstance(image, PIL.Image.Image):
image = PIL.Image.fromarray(image)
self.images.append(image)
def __len__(self):
"""Return the number of images in the list."""
return len(self.images)
def to_pil(self):
"""
Convert ImageList back to PIL Image(s).
Returns:
PIL.Image.Image or tuple: Single PIL Image if list contains one image,
or tuple of PIL Images if multiple images
"""
return tuple(self.images) if len(self.images) > 1 else self.images[0]
@property
def size(self):
"""
Get the size of images in the list.
Returns:
tuple: (width, height) of the images
Raises:
AssertionError: If images have different sizes
"""
sizes = [im.size for im in self.images]
assert all(sizes[0] == s for s in sizes), "All images must have the same size"
return sizes[0]
def resize(self, *args, **kwargs):
"""
Resize all images with the same parameters.
Args:
*args, **kwargs: Arguments passed to PIL.Image.resize()
Returns:
ImageList: New ImageList containing resized images
"""
return ImageList(self._dispatch("resize", *args, **kwargs))
def crop(self, *args, **kwargs):
"""
Crop all images with the same parameters.
Args:
*args, **kwargs: Arguments passed to PIL.Image.crop()
Returns:
ImageList: New ImageList containing cropped images
"""
return ImageList(self._dispatch("crop", *args, **kwargs))
def _dispatch(self, func, *args, **kwargs):
"""
Apply a PIL.Image method to all images in the list.
Args:
func (str): Name of the PIL.Image method to call
*args, **kwargs: Arguments to pass to the method
Returns:
list: List of results from applying the method to each image
"""
return [getattr(im, func)(*args, **kwargs) for im in self.images]
def rescale_image_and_other_optional_info(
image,
output_resolution,
depthmap=None,
camera_intrinsics=None,
force=True,
additional_quantities_to_be_resized_with_nearest=None,
):
"""
Rescale the image and depthmap to the output resolution.
If the image is larger than the output resolution, it is rescaled with lanczos interpolation.
If force is false and the image is smaller than the output resolution, it is not rescaled.
If force is true and the image is smaller than the output resolution, it is rescaled with bicubic interpolation.
Depth and other quantities are rescaled with nearest interpolation.
Args:
image (PIL.Image.Image or np.ndarray): The input image to be rescaled.
output_resolution (tuple): The desired output resolution as a tuple (width, height).
depthmap (np.ndarray, optional): The depth map associated with the image. Defaults to None.
camera_intrinsics (np.ndarray, optional): The camera intrinsics matrix. Defaults to None.
force (bool, optional): If True, force rescaling even if the image is smaller than the output resolution. Defaults to True.
additional_quantities_to_be_resized_with_nearest (list of np.ndarray, optional): Additional quantities to be rescaled using nearest interpolation. Defaults to None.
Returns:
tuple: A tuple containing:
- The rescaled image (PIL.Image.Image)
- The rescaled depthmap (numpy.ndarray or None)
- The updated camera intrinsics (numpy.ndarray or None)
- The list of rescaled additional quantities (list of numpy.ndarray or None)
"""
image = ImageList(image)
input_resolution = np.array(image.size) # (W, H)
output_resolution = np.array(output_resolution)
if depthmap is not None:
assert tuple(depthmap.shape[:2]) == image.size[::-1]
if additional_quantities_to_be_resized_with_nearest is not None:
assert all(
tuple(additional_quantity.shape[:2]) == image.size[::-1]
for additional_quantity in additional_quantities_to_be_resized_with_nearest
)
# Define output resolution
assert output_resolution.shape == (2,)
scale_final = max(output_resolution / image.size) + 1e-8
if scale_final >= 1 and not force: # image is already smaller than what is asked
output = (
image.to_pil(),
depthmap,
camera_intrinsics,
additional_quantities_to_be_resized_with_nearest,
)
return output
output_resolution = np.floor(input_resolution * scale_final).astype(int)
# First rescale the image so that it contains the crop
image = image.resize(
tuple(output_resolution), resample=lanczos if scale_final < 1 else bicubic
)
if depthmap is not None:
depthmap = cv2.resize(
depthmap,
output_resolution,
fx=scale_final,
fy=scale_final,
interpolation=cv2.INTER_NEAREST,
)
if additional_quantities_to_be_resized_with_nearest is not None:
resized_additional_quantities = []
for quantity in additional_quantities_to_be_resized_with_nearest:
resized_additional_quantities.append(
cv2.resize(
quantity,
output_resolution,
fx=scale_final,
fy=scale_final,
interpolation=cv2.INTER_NEAREST,
)
)
additional_quantities_to_be_resized_with_nearest = resized_additional_quantities
# No offset here; simple rescaling
if camera_intrinsics is not None:
camera_intrinsics = camera_matrix_of_crop(
camera_intrinsics, input_resolution, output_resolution, scaling=scale_final
)
# Return
return (
image.to_pil(),
depthmap,
camera_intrinsics,
additional_quantities_to_be_resized_with_nearest,
)
def camera_matrix_of_crop(
input_camera_matrix,
input_resolution,
output_resolution,
scaling=1,
offset_factor=0.5,
offset=None,
):
"""
Calculate the camera matrix for a cropped image.
Args:
input_camera_matrix (numpy.ndarray): Original camera intrinsics matrix
input_resolution (tuple or numpy.ndarray): Original image resolution as (width, height)
output_resolution (tuple or numpy.ndarray): Target image resolution as (width, height)
scaling (float, optional): Scaling factor for the image. Defaults to 1.
offset_factor (float, optional): Factor to determine crop offset. Defaults to 0.5 (centered).
offset (tuple or numpy.ndarray, optional): Explicit offset to use. If None, calculated from offset_factor.
Returns:
numpy.ndarray: Updated camera matrix for the cropped image
"""
# Margins to offset the origin
margins = np.asarray(input_resolution) * scaling - output_resolution
assert np.all(margins >= 0.0)
if offset is None:
offset = offset_factor * margins
# Generate new camera parameters
output_camera_matrix_colmap = opencv_to_colmap_intrinsics(input_camera_matrix)
output_camera_matrix_colmap[:2, :] *= scaling
output_camera_matrix_colmap[:2, 2] -= offset
output_camera_matrix = colmap_to_opencv_intrinsics(output_camera_matrix_colmap)
return output_camera_matrix
def crop_image_and_other_optional_info(
image,
crop_bbox,
depthmap=None,
camera_intrinsics=None,
additional_quantities=None,
):
"""
Return a crop of the input view and associated data.
Args:
image (PIL.Image.Image or numpy.ndarray): The input image to be cropped
crop_bbox (tuple): Crop bounding box as (left, top, right, bottom)
depthmap (numpy.ndarray, optional): Depth map associated with the image
camera_intrinsics (numpy.ndarray, optional): Camera intrinsics matrix
additional_quantities (list of numpy.ndarray, optional): Additional data arrays to crop
Returns:
tuple: A tuple containing:
- The cropped image
- The cropped depth map (if provided or None)
- Updated camera intrinsics (if provided or None)
- List of cropped additional quantities (if provided or None)
"""
image = ImageList(image)
left, top, right, bottom = crop_bbox
image = image.crop((left, top, right, bottom))
if depthmap is not None:
depthmap = depthmap[top:bottom, left:right]
if additional_quantities is not None:
additional_quantities = [
quantity[top:bottom, left:right] for quantity in additional_quantities
]
if camera_intrinsics is not None:
camera_intrinsics = camera_intrinsics.copy()
camera_intrinsics[0, 2] -= left
camera_intrinsics[1, 2] -= top
return (image.to_pil(), depthmap, camera_intrinsics, additional_quantities)
def bbox_from_intrinsics_in_out(
input_camera_matrix, output_camera_matrix, output_resolution
):
"""
Calculate the bounding box for cropping based on input and output camera intrinsics.
Args:
input_camera_matrix (numpy.ndarray): Original camera intrinsics matrix
output_camera_matrix (numpy.ndarray): Target camera intrinsics matrix
output_resolution (tuple): Target resolution as (width, height)
Returns:
tuple: Crop bounding box as (left, top, right, bottom)
"""
out_width, out_height = output_resolution
left, top = np.int32(
np.round(input_camera_matrix[:2, 2] - output_camera_matrix[:2, 2])
)
crop_bbox = (left, top, left + out_width, top + out_height)
return crop_bbox
def crop_resize_if_necessary(
image,
resolution,
depthmap=None,
intrinsics=None,
additional_quantities=None,
):
"""
First downsample image using LANCZOS and then crop if necessary to achieve target resolution.
This function performs high-quality downsampling followed by cropping to achieve the
desired output resolution while maintaining proper camera intrinsics.
Args:
image (PIL.Image.Image or numpy.ndarray): The input image to be processed
resolution (tuple): Target resolution as (width, height)
depthmap (numpy.ndarray, optional): Depth map associated with the image
intrinsics (numpy.ndarray, optional): Camera intrinsics matrix
additional_quantities (list of numpy.ndarray, optional): Additional data arrays to process
Returns:
tuple: A tuple containing the processed image and any provided additional data
(depthmap, intrinsics, additional_quantities) that have been similarly processed
"""
# Convert image to PIL.Image.Image if necessary
if not isinstance(image, PIL.Image.Image):
image = PIL.Image.fromarray(image)
# Get width and height of image
original_width, original_height = image.size
# High-quality Lanczos down-scaling
target_rescale_resolution = np.array(resolution)
image, depthmap, intrinsics, additional_quantities = (
rescale_image_and_other_optional_info(
image=image,
output_resolution=target_rescale_resolution,
depthmap=depthmap,
camera_intrinsics=intrinsics,
additional_quantities_to_be_resized_with_nearest=additional_quantities,
)
)
# Actual cropping (if necessary)
if intrinsics is not None:
new_intrinsics = camera_matrix_of_crop(
input_camera_matrix=intrinsics,
input_resolution=image.size,
output_resolution=resolution,
offset_factor=0.5,
)
crop_bbox = bbox_from_intrinsics_in_out(
input_camera_matrix=intrinsics,
output_camera_matrix=new_intrinsics,
output_resolution=resolution,
)
else:
# Create a centered crop if no intrinsics are available
w, h = image.size
target_w, target_h = resolution
left = (w - target_w) // 2
top = (h - target_h) // 2
crop_bbox = (left, top, left + target_w, top + target_h)
image, depthmap, new_intrinsics, additional_quantities = (
crop_image_and_other_optional_info(
image=image,
crop_bbox=crop_bbox,
depthmap=depthmap,
camera_intrinsics=intrinsics,
additional_quantities=additional_quantities,
)
)
# Return the output
output = (image,)
if depthmap is not None:
output += (depthmap,)
if new_intrinsics is not None:
output += (new_intrinsics,)
if additional_quantities is not None:
output += (additional_quantities,)
return output
|