import os from copy import deepcopy import imageio import open3d as o3d import numpy as np from PIL import Image, ImageChops POINT_COLOR = [1, 0, 0] # red for demonstration ARROW_COLOR = [0, 1, 0] # green IMAGE_EXTENSIONS = (".png", ".jpg", ".jpeg") def generate_rotation_visualization( pcd: o3d.geometry.PointCloud, axis_arrow: o3d.geometry.TriangleMesh, mask: np.ndarray, axis_vector: np.ndarray, origin: np.ndarray, range_min: float, range_max: float, num_samples: int, output_dir: str, ) -> None: """ Generate visualization files for a rotation motion of a part. :param pcd: point cloud object representing 2D image input (RGBD) as a point cloud :param axis_arrow: mesh object representing axis arrow of rotation to be rendered in visualization :param mask: mask np.array of dimensions (height, width) representing the part to be rotated in the image :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of rotation :param origin: np.array of dimensions (3, ) representing the origin point of the axis of rotation :param range_min: float representing the minimum range of motion in radians :param range_max: float representing the maximum range of motion in radians :param num_samples: number of sample states to visualize in between range_min and range_max of motion :param output_dir: string path to directory in which to save visualization output """ angle_in_radians = np.linspace(range_min, range_max, num_samples) angles_in_degrees = angle_in_radians * 180 / np.pi for idx, angle_in_degrees in enumerate(angles_in_degrees): # Make a copy of your original point cloud and arrow for each rotation rotated_pcd = deepcopy(pcd) rotated_arrow = deepcopy(axis_arrow) angle_rad = np.radians(angle_in_degrees) rotated_pcd = rotate_part(rotated_pcd, mask, axis_vector, origin, angle_rad) # Create a Visualizer object for each rotation vis = o3d.visualization.Visualizer() vis.create_window(visible=False) # Add the rotated geometries vis.add_geometry(rotated_pcd) vis.add_geometry(rotated_arrow) # Apply the additional rotation around x-axis if desired angle_x = np.pi * 5.5 / 5 # 198 degrees rotation_matrix = o3d.geometry.get_rotation_matrix_from_axis_angle(np.asarray([1, 0, 0]) * angle_x) rotated_pcd.rotate(rotation_matrix, center=rotated_pcd.get_center()) rotated_arrow.rotate(rotation_matrix, center=rotated_pcd.get_center()) # Capture and save the image output_filename = f"{output_dir}/{idx}.png" vis.capture_screen_image(output_filename, do_render=True) vis.destroy_window() def generate_translation_visualization( pcd: o3d.geometry.PointCloud, axis_arrow: o3d.geometry.TriangleMesh, mask: np.ndarray, end: np.ndarray, range_min: float, range_max: float, num_samples: int, output_dir: str, ) -> None: """ Generate visualization files for a translation motion of a part. :param pcd: point cloud object representing 2D image input (RGBD) as a point cloud :param axis_arrow: mesh object representing axis arrow of translation to be rendered in visualization :param mask: mask np.array of dimensions (height, width) representing the part to be translated in the image :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of translation :param origin: np.array of dimensions (3, ) representing the origin point of the axis of translation :param range_min: float representing the minimum range of motion :param range_max: float representing the maximum range of motion :param num_samples: number of sample states to visualize in between range_min and range_max of motion :param output_dir: string path to directory in which to save visualization output """ translate_distances = np.linspace(range_min, range_max, num_samples) for idx, translate_distance in enumerate(translate_distances): translated_pcd = deepcopy(pcd) translated_arrow = deepcopy(axis_arrow) translated_pcd = translate_part(translated_pcd, mask, end, translate_distance.item()) # Create a Visualizer object for each rotation vis = o3d.visualization.Visualizer() vis.create_window(visible=False) # Add the translated geometries vis.add_geometry(translated_pcd) vis.add_geometry(translated_arrow) # Apply the additional rotation around x-axis if desired # TODO: not sure why we need this rotation for the translation, and when it would be desired angle_x = np.pi * 5.5 / 5 # 198 degrees R = o3d.geometry.get_rotation_matrix_from_axis_angle(np.asarray([1, 0, 0]) * angle_x) translated_pcd.rotate(R, center=translated_pcd.get_center()) translated_arrow.rotate(R, center=translated_pcd.get_center()) # Capture and save the image output_filename = f"{output_dir}/{idx}.png" vis.capture_screen_image(output_filename, do_render=True) vis.destroy_window() def get_rotation_matrix_from_vectors(vec1: np.ndarray, vec2: np.ndarray) -> np.ndarray: """ Find the rotation matrix that aligns vec1 to vec2 :param vec1: A 3d "source" vector :param vec2: A 3d "destination" vector :return: A transform matrix (3x3) which when applied to vec1, aligns it with vec2. """ a, b = (vec1 / np.linalg.norm(vec1)).reshape(3), (vec2 / np.linalg.norm(vec2)).reshape(3) v = np.cross(a, b) c = np.dot(a, b) s = np.linalg.norm(v) kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]]) rotation_matrix = np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s**2)) return rotation_matrix def draw_line(start_point: np.ndarray, end_point: np.ndarray) -> o3d.geometry.TriangleMesh: """ Generate 3D mesh representing axis from start_point to end_point. :param start_point: np.ndarray of dimensions (3, ) representing the start point of the axis :param end_point: np.ndarray of dimensions (3, ) representing the end point of the axis :return: mesh object representing axis from start to end """ # Compute direction vector and normalize it direction_vector = end_point - start_point normalized_vector = direction_vector / np.linalg.norm(direction_vector) # Compute the rotation matrix to align the Z-axis with the desired direction target_vector = np.array([0, 0, 1]) rot_mat = get_rotation_matrix_from_vectors(target_vector, normalized_vector) # Create the cylinder (shaft of the arrow) cylinder_length = 0.9 # 90% of the total arrow length, you can adjust as needed cylinder_radius = 0.01 # Adjust the thickness of the arrow shaft cylinder = o3d.geometry.TriangleMesh.create_cylinder(radius=cylinder_radius, height=cylinder_length) # Move base of cylinder to origin, rotate, then translate to start_point cylinder.translate([0, 0, 0]) cylinder.rotate(rot_mat, center=[0, 0, 0]) cylinder.translate(start_point) # Create the cone (head of the arrow) cone_height = 0.1 # 10% of the total arrow length, adjust as needed cone_radius = 0.03 # Adjust the size of the arrowhead cone = o3d.geometry.TriangleMesh.create_cone(radius=cone_radius, height=cone_height) # Move base of cone to origin, rotate, then translate to end of cylinder cone.translate([-0, 0, 0]) cone.rotate(rot_mat, center=[0, 0, 0]) cone.translate(start_point + normalized_vector * 0.4) arrow = cylinder + cone return arrow def rotate_part( pcd: o3d.geometry.PointCloud, mask: np.ndarray, axis_vector: np.ndarray, origin: np.ndarray, angle_rad: float ) -> o3d.geometry.PointCloud: """ Generate rotated point cloud of mask based on provided angle around axis. :param pcd: point cloud object representing points of image :param mask: mask np.array of dimensions (height, width) representing the part to be rotated in the image :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of rotation :param origin: np.array of dimensions (3, ) representing the origin point of the axis of rotation :param angle_rad: angle in radians to rotate mask part :return: point cloud object after rotation of masked part """ # Get the coordinates of the point cloud as a numpy array points_np = np.asarray(pcd.points) # Convert point cloud colors to numpy array for easier manipulation colors_np = np.asarray(pcd.colors) # Create skew-symmetric matrix from end K = np.array( [ [0, -axis_vector[2], axis_vector[1]], [axis_vector[2], 0, -axis_vector[0]], [-axis_vector[1], axis_vector[0], 0], ] ) # Compute rotation matrix using Rodrigues' formula R = np.eye(3) + np.sin(angle_rad) * K + (1 - np.cos(angle_rad)) * np.dot(K, K) # Iterate over the mask and rotate the points corresponding to the object pixels for i in range(mask.shape[0]): for j in range(mask.shape[1]): if mask[i, j] > 0: # This condition checks if the pixel belongs to the object point_index = i * mask.shape[1] + j # Translate the point such that the rotation origin is at the world origin translated_point = points_np[point_index] - origin # Rotate the translated point rotated_point = np.dot(R, translated_point) # Translate the point back points_np[point_index] = rotated_point + origin colors_np[point_index] = POINT_COLOR # Update the point cloud's coordinates pcd.points = o3d.utility.Vector3dVector(points_np) # Update point cloud colors pcd.colors = o3d.utility.Vector3dVector(colors_np) return pcd def translate_part(pcd, mask, axis_vector, distance): """ Generate translated point cloud of mask based on provided angle around axis. :param pcd: point cloud object representing points of image :param mask: mask np.array of dimensions (height, width) representing the part to be translated in the image :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of translation :param distance: distance within coordinate system to translate mask part :return: point cloud object after translation of masked part """ normalized_vector = axis_vector / np.linalg.norm(axis_vector) translation_vector = normalized_vector * distance # Convert point cloud colors to numpy array for easier manipulation colors_np = np.asarray(pcd.colors) # Get the coordinates of the point cloud as a numpy array points_np = np.asarray(pcd.points) # Iterate over the mask and assign the color to the points corresponding to the object pixels for i in range(mask.shape[0]): for j in range(mask.shape[1]): if mask[i, j] > 0: # This condition checks if the pixel belongs to the object point_index = i * mask.shape[1] + j colors_np[point_index] = POINT_COLOR points_np[point_index] += translation_vector # Update point cloud colors pcd.colors = o3d.utility.Vector3dVector(colors_np) # Update the point cloud's coordinates pcd.points = o3d.utility.Vector3dVector(points_np) return pcd def batch_trim(images_path: str, save_path: str, identical: bool = False) -> None: """ Trim white spaces from all images in the given path and save new images to folder. :param images_path: local path to folder containing all images. Images must have the extension ".png", ".jpg", or ".jpeg". :param save_path: local path to folder in which to save trimmed images :param identical: if True, will apply same crop to all images, else each image will have its whitespace trimmed independently. Note that in the latter case, each image may have a slightly different size. """ def get_trim(im): """Trim whitespace from an image and return the cropped image.""" bg = Image.new(im.mode, im.size, im.getpixel((0, 0))) diff = ImageChops.difference(im, bg) diff = ImageChops.add(diff, diff, 2.0, -100) bbox = diff.getbbox() return bbox if identical: # images = [] optimal_box = None # load all images for image_file in sorted(os.listdir(images_path)): if image_file.endswith(IMAGE_EXTENSIONS): image_path = os.path.join(images_path, image_file) images.append(Image.open(image_path)) # find optimal box size for im in images: bbox = get_trim(im) if bbox is None: bbox = (0, 0, im.size[0], im.size[1]) # bound entire image if optimal_box is None: optimal_box = bbox else: optimal_box = ( min(optimal_box[0], bbox[0]), min(optimal_box[1], bbox[1]), max(optimal_box[2], bbox[2]), max(optimal_box[3], bbox[3]), ) # apply cropping, if optimal box was found for idx, im in enumerate(images): im.crop(optimal_box) im.save(os.path.join(save_path, f"{idx}.png")) im.close() else: # trim each image separately for image_file in os.listdir(images_path): if image_file.endswith(IMAGE_EXTENSIONS): image_path = os.path.join(images_path, image_file) with Image.open(image_path) as im: bbox = get_trim(im) trimmed = im.crop(bbox) if bbox else im trimmed.save(os.path.join(save_path, image_file)) def create_gif(image_folder_path: str, num_samples: int, gif_filename: str = "output.gif") -> None: """ Create gif out of folder of images and save to file. :param image_folder_path: path to folder containing images (non-recursive). Assumes images are named as {i}.png for each of i from 0 to num_samples. :param num_samples: number of sampled images to compile into gif. :param gif_filename: filename for gif, defaults to "output.gif" """ # Generate a list of image filenames (assuming the images are saved as 0.png, 1.png, etc.) image_files = [f"{image_folder_path}/{i}.png" for i in range(num_samples)] # Read the images using imageio images = [imageio.imread(image_file) for image_file in image_files] assert all( images[0].shape == im.shape for im in images ), f"Found some images with a different shape: {[im.shape for im in images]}" # Save images as a gif gif_output_path = f"{image_folder_path}/{gif_filename}" imageio.mimsave(gif_output_path, images, duration=0.1) return