import numpy as np import logging import torch import re def random_per_point_translation_in_place(pcd_data) -> None: """ Jittering the point cloud data by a random value between -0.02 and 0.02 Args: pcd_data: point cloud data in the form x, y, z """ translations = ( np.random.rand(pcd_data.shape[0], 3) - 0.5 ) * 0.04 # Random values between -0.02 and 0.02 pcd_data[:, -3:] += translations def compute_max_extent_and_centroid(pcd_data, epsilon=1e-4) -> tuple[np.ndarray, np.ndarray]: """_summary_ Args: pcd_data : point cloud data in the form x, y, z epsilon (float, optional): buffer for the max_extent. Defaults to 1e-4. Returns: max_extent: maximum extent of the point cloud data in terms of the largest dimension centroid: centroid of the point cloud data """ min_vals = pcd_data.min(axis=0) max_vals = pcd_data.max(axis=0) centroid = (min_vals + max_vals) / 2 max_extent = np.max(max_vals - min_vals) + epsilon return max_extent, centroid def unit_cube_normalization_in_place( pcd_data, max_extent, centroid, ): """ Normalized data point in a unit cube between 0 and 1 for each x, y, z in-place Args: pcd_data: point cloud data in the form x, y, z """ # translate the centroid to the origin pcd_data -= centroid # scale the data to fit within [-0.5, 0.5] pcd_data /= max_extent # translate it back to within [0, 1] pcd_data += 0.5 def point_to_index(point, grid_size): """ Maps a point in the unit cube to a unique index based on the grid size. Args: point (tuple): a tuple of (x, y, z) coordinates of the point. Each coordinate should be in [0, 1]. grid_size (int): the number of divisions along each axis. Returns: int: a unique index for the point. """ xi = int(point[0] * grid_size) yi = int(point[1] * grid_size) zi = int(point[2] * grid_size) # Ensure that the point is inside the unit cube if not (0 <= xi < grid_size) or not (0 <= yi < grid_size) or not (0 <= zi < grid_size): logging.warning( f"The point is outside the unit cube: point: {point}, grid_index: ({xi}, {yi}, {zi})" ) # Clamp the point to be inside the unit cube xi = min(max(xi, 0), grid_size - 1) yi = min(max(yi, 0), grid_size - 1) zi = min(max(zi, 0), grid_size - 1) # Compute the unique voxel ID, row-major order voxel_id = xi + yi * grid_size + zi * grid_size * grid_size return voxel_id def scale_bbox(bbox_str, max_extent, centroid): """ Scale the bounding box to be within a unit cube and output numerically tokenized bounding box. Args: bbox_str (str): A string representing a bounding box, in the format "". max_extent (float): The maximum extent of the bounding box. centroid (np.array): The centroid of the bounding box. Returns: str: A string representing the scaled bounding box, in the same format as the input. """ # Remove < and > from the bounding box string bbox_str = bbox_str.strip("<>") bbox_values = bbox_str.split(",") # Convert each string to a float and store in a list bbox_floats = [float(value) for value in bbox_values] # Convert the list to a numpy array bbox_array = np.array(bbox_floats) bbox_array[:3] -= centroid bbox_array[3:] -= centroid bbox_array /= max_extent bbox_array += 0.5 x_min, y_min, z_min, x_max, y_max, z_max = bbox_array x_min, y_min, z_min, x_max, y_max, z_max = ( x_min.item(), y_min.item(), z_min.item(), x_max.item(), y_max.item(), z_max.item(), ) x_min, y_min, z_min, x_max, y_max, z_max = ( round(x_min, 3), round(y_min, 3), round(z_min, 3), round(x_max, 3), round(y_max, 3), round(z_max, 3), ) new_bbox_str = f"< {x_min}, {y_min}, {z_min}, {x_max}, {y_max}, {z_max}>" # adding space after < because tokenizer will not merge < and first digit or negative sign return new_bbox_str def voxelize_points( xyz_to_be_voxelized: np.array, scene_min_xyz: np.array, scene_max_xyz: np.array, num_voxels_per_axis: int, ): """Convert points to voxel indexes Args: xyz_to_be_voxelized (np.array): shape (num_points, 3) scene_min_xyz (np.array): shape (3,) scene_max_xyz (np.array): shape (3,) num_voxels_per_axis (int): number of voxels per axis Returns: voxel_id (np.array): shape (num_points,) """ voxel_index = np.floor( (xyz_to_be_voxelized - scene_min_xyz) / (scene_max_xyz - scene_min_xyz) * num_voxels_per_axis ).astype( int ) # range after this overations: [0, num_voxels_per_axis] voxel_index = np.clip( voxel_index, 0, num_voxels_per_axis - 1 ) # clamp range to [0, num_voxels_per_axis - 1] # calculate index using row-major order voxel_id = ( voxel_index[:, 0] + voxel_index[:, 1] * num_voxels_per_axis + voxel_index[:, 2] * num_voxels_per_axis * num_voxels_per_axis ) # range after this operation: [0, num_voxels_per_axis ** 3 - 1] return voxel_id def process_one_bbox_minkowski_loc_token( bbox_str, scene_min_xyz, scene_max_xyz, num_voxels_per_axis ): # Remove < and > from the bounding box string bbox_str = bbox_str.strip("<>") bbox_values = bbox_str.split(",") # Convert each string to a float and store in a list bbox_floats = [float(value) for value in bbox_values] # Convert the list to a numpy array bbox_array = np.array(bbox_floats) # shape: (6,) bbox_array = bbox_array.reshape(2, 3) # shape: (2, 3) voxel_indices = voxelize_points( bbox_array, scene_min_xyz, scene_max_xyz, num_voxels_per_axis ) # shape: (2,) new_bbox_str = f"" return new_bbox_str def scale_bbox_special_token(bbox_str, max_extent, centroid, num_grid_cells): """ Special token for the bbox. The bbox is scaled to the unit cube and then converted to a unique index based on the grid size. Args: bbox_str (str): bbox string in the form "" max_extent (float): max extent of the point cloud data in terms of the largest dimension centroid (np.array): centroid of the point cloud data num_grid_cells (int): number of grids along each axis Returns: two unique special tokens for the bbox as string """ # Remove < and > from the bounding box string bbox_str = bbox_str.strip("<>") bbox_values = bbox_str.split(",") # Convert each string to a float and store in a list bbox_floats = [float(value) for value in bbox_values] # Convert the list to a numpy array bbox_floats = np.array(bbox_floats) bbox_floats[:3] -= centroid bbox_floats[3:] -= centroid bbox_floats /= max_extent bbox_floats += 0.5 min_point = bbox_floats[:3] max_point = bbox_floats[3:] index_min = point_to_index(min_point, num_grid_cells) index_max = point_to_index(max_point, num_grid_cells) new_bbox_str = f"" return new_bbox_str def rotate_point_cloud_90_degrees(pcd_data): """ Rotate the point cloud data by 90 degrees in the x-y plane Args: pcd_data: point cloud data in the form x, y, z Returns: pcd_data: rotated point cloud data in the form x, y, z """ # Randomly select among no change, clockwise, and counterclockwise rotation_choices = ["no change", "clockwise", "counterclockwise"] direction = np.random.choice(rotation_choices) if direction == "clockwise": rotation_matrix = torch.tensor([[0, 1], [-1, 0]]) # Apply rotation on x-y plane pcd_data[:, -3:-1] = torch.matmul(pcd_data[:, -3:-1], rotation_matrix) elif direction == "counterclockwise": rotation_matrix = torch.tensor([[0, -1], [1, 0]]) # Apply rotation on x-y plane pcd_data[:, -3:-1] = torch.matmul(pcd_data[:, -3:-1], rotation_matrix) return pcd_data, direction def adjust_bbox_after_rotation(bbox_str, direction): """_summary_ Args: bbox_str (_type_): _description_ direction (_type_): _description_ Returns: _type_: _description_ """ if direction == "no change": return bbox_str values = list(map(float, re.findall(r"[-+]?\d*\.\d+|\d+", bbox_str))) x_min, y_min, z_min, x_max, y_max, z_max = values if direction == "clockwise": # adding space after < because tokenizer will not merge < and first digit or negative sign new_bbox_str = f"< {y_min}, {x_min}, {z_min}, {y_max}, {x_max}, {z_max}>" else: # counterclockwise # adding space after < because tokenizer will not merge < and first digit or negative sign new_bbox_str = f"< {x_max}, {y_min}, {z_min}, {x_min}, {y_max}, {z_max}>" return new_bbox_str