Spaces:

3dlg-hcvc
/

opdmulti-demo

Sleeping

File size: 15,015 Bytes

6d737eb

import os
from copy import deepcopy

import imageio
import open3d as o3d
import numpy as np
from PIL import Image, ImageChops

POINT_COLOR = [1, 0, 0]  # red for demonstration
ARROW_COLOR = [0, 1, 0]  # green
IMAGE_EXTENSIONS = (".png", ".jpg", ".jpeg")


def generate_rotation_visualization(
    pcd: o3d.geometry.PointCloud,
    axis_arrow: o3d.geometry.TriangleMesh,
    mask: np.ndarray,
    axis_vector: np.ndarray,
    origin: np.ndarray,
    range_min: float,
    range_max: float,
    num_samples: int,
    output_dir: str,
) -> None:
    """
    Generate visualization files for a rotation motion of a part.

    :param pcd: point cloud object representing 2D image input (RGBD) as a point cloud
    :param axis_arrow: mesh object representing axis arrow of rotation to be rendered in visualization
    :param mask: mask np.array of dimensions (height, width) representing the part to be rotated in the image
    :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of rotation
    :param origin: np.array of dimensions (3, ) representing the origin point of the axis of rotation
    :param range_min: float representing the minimum range of motion in radians
    :param range_max: float representing the maximum range of motion in radians
    :param num_samples: number of sample states to visualize in between range_min and range_max of motion
    :param output_dir: string path to directory in which to save visualization output
    """
    angle_in_radians = np.linspace(range_min, range_max, num_samples)
    angles_in_degrees = angle_in_radians * 180 / np.pi

    for idx, angle_in_degrees in enumerate(angles_in_degrees):
        # Make a copy of your original point cloud and arrow for each rotation
        rotated_pcd = deepcopy(pcd)
        rotated_arrow = deepcopy(axis_arrow)

        angle_rad = np.radians(angle_in_degrees)
        rotated_pcd = rotate_part(rotated_pcd, mask, axis_vector, origin, angle_rad)

        # Create a Visualizer object for each rotation
        vis = o3d.visualization.Visualizer()
        vis.create_window(visible=False)

        # Add the rotated geometries
        vis.add_geometry(rotated_pcd)
        vis.add_geometry(rotated_arrow)

        # Apply the additional rotation around x-axis if desired
        angle_x = np.pi * 5.5 / 5  # 198 degrees
        rotation_matrix = o3d.geometry.get_rotation_matrix_from_axis_angle(np.asarray([1, 0, 0]) * angle_x)
        rotated_pcd.rotate(rotation_matrix, center=rotated_pcd.get_center())
        rotated_arrow.rotate(rotation_matrix, center=rotated_pcd.get_center())

        # Capture and save the image
        output_filename = f"{output_dir}/{idx}.png"
        vis.capture_screen_image(output_filename, do_render=True)
        vis.destroy_window()


def generate_translation_visualization(
    pcd: o3d.geometry.PointCloud,
    axis_arrow: o3d.geometry.TriangleMesh,
    mask: np.ndarray,
    end: np.ndarray,
    range_min: float,
    range_max: float,
    num_samples: int,
    output_dir: str,
) -> None:
    """
    Generate visualization files for a translation motion of a part.

    :param pcd: point cloud object representing 2D image input (RGBD) as a point cloud
    :param axis_arrow: mesh object representing axis arrow of translation to be rendered in visualization
    :param mask: mask np.array of dimensions (height, width) representing the part to be translated in the image
    :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of translation
    :param origin: np.array of dimensions (3, ) representing the origin point of the axis of translation
    :param range_min: float representing the minimum range of motion
    :param range_max: float representing the maximum range of motion
    :param num_samples: number of sample states to visualize in between range_min and range_max of motion
    :param output_dir: string path to directory in which to save visualization output
    """
    translate_distances = np.linspace(range_min, range_max, num_samples)
    for idx, translate_distance in enumerate(translate_distances):
        translated_pcd = deepcopy(pcd)
        translated_arrow = deepcopy(axis_arrow)

        translated_pcd = translate_part(translated_pcd, mask, end, translate_distance.item())

        # Create a Visualizer object for each rotation
        vis = o3d.visualization.Visualizer()
        vis.create_window(visible=False)

        # Add the translated geometries
        vis.add_geometry(translated_pcd)
        vis.add_geometry(translated_arrow)

        # Apply the additional rotation around x-axis if desired
        # TODO: not sure why we need this rotation for the translation, and when it would be desired
        angle_x = np.pi * 5.5 / 5  # 198 degrees
        R = o3d.geometry.get_rotation_matrix_from_axis_angle(np.asarray([1, 0, 0]) * angle_x)
        translated_pcd.rotate(R, center=translated_pcd.get_center())
        translated_arrow.rotate(R, center=translated_pcd.get_center())

        # Capture and save the image
        output_filename = f"{output_dir}/{idx}.png"
        vis.capture_screen_image(output_filename, do_render=True)
        vis.destroy_window()


def get_rotation_matrix_from_vectors(vec1: np.ndarray, vec2: np.ndarray) -> np.ndarray:
    """
    Find the rotation matrix that aligns vec1 to vec2

    :param vec1: A 3d "source" vector
    :param vec2: A 3d "destination" vector
    :return: A transform matrix (3x3) which when applied to vec1, aligns it with vec2.
    """
    a, b = (vec1 / np.linalg.norm(vec1)).reshape(3), (vec2 / np.linalg.norm(vec2)).reshape(3)
    v = np.cross(a, b)
    c = np.dot(a, b)
    s = np.linalg.norm(v)
    kmat = np.array([[0, -v[2], v[1]], [v[2], 0, -v[0]], [-v[1], v[0], 0]])
    rotation_matrix = np.eye(3) + kmat + kmat.dot(kmat) * ((1 - c) / (s**2))
    return rotation_matrix


def draw_line(start_point: np.ndarray, end_point: np.ndarray) -> o3d.geometry.TriangleMesh:
    """
    Generate 3D mesh representing axis from start_point to end_point.

    :param start_point: np.ndarray of dimensions (3, ) representing the start point of the axis
    :param end_point: np.ndarray of dimensions (3, ) representing the end point of the axis
    :return: mesh object representing axis from start to end
    """
    # Compute direction vector and normalize it
    direction_vector = end_point - start_point
    normalized_vector = direction_vector / np.linalg.norm(direction_vector)

    # Compute the rotation matrix to align the Z-axis with the desired direction
    target_vector = np.array([0, 0, 1])
    rot_mat = get_rotation_matrix_from_vectors(target_vector, normalized_vector)

    # Create the cylinder (shaft of the arrow)
    cylinder_length = 0.9  # 90% of the total arrow length, you can adjust as needed
    cylinder_radius = 0.01  # Adjust the thickness of the arrow shaft
    cylinder = o3d.geometry.TriangleMesh.create_cylinder(radius=cylinder_radius, height=cylinder_length)

    # Move base of cylinder to origin, rotate, then translate to start_point
    cylinder.translate([0, 0, 0])
    cylinder.rotate(rot_mat, center=[0, 0, 0])
    cylinder.translate(start_point)

    # Create the cone (head of the arrow)
    cone_height = 0.1  # 10% of the total arrow length, adjust as needed
    cone_radius = 0.03  # Adjust the size of the arrowhead
    cone = o3d.geometry.TriangleMesh.create_cone(radius=cone_radius, height=cone_height)

    # Move base of cone to origin, rotate, then translate to end of cylinder
    cone.translate([-0, 0, 0])
    cone.rotate(rot_mat, center=[0, 0, 0])
    cone.translate(start_point + normalized_vector * 0.4)

    arrow = cylinder + cone
    return arrow


def rotate_part(
    pcd: o3d.geometry.PointCloud, mask: np.ndarray, axis_vector: np.ndarray, origin: np.ndarray, angle_rad: float
) -> o3d.geometry.PointCloud:
    """
    Generate rotated point cloud of mask based on provided angle around axis.

    :param pcd: point cloud object representing points of image
    :param mask: mask np.array of dimensions (height, width) representing the part to be rotated in the image
    :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of rotation
    :param origin: np.array of dimensions (3, ) representing the origin point of the axis of rotation
    :param angle_rad: angle in radians to rotate mask part
    :return: point cloud object after rotation of masked part
    """
    # Get the coordinates of the point cloud as a numpy array
    points_np = np.asarray(pcd.points)

    # Convert point cloud colors to numpy array for easier manipulation
    colors_np = np.asarray(pcd.colors)

    # Create skew-symmetric matrix from end
    K = np.array(
        [
            [0, -axis_vector[2], axis_vector[1]],
            [axis_vector[2], 0, -axis_vector[0]],
            [-axis_vector[1], axis_vector[0], 0],
        ]
    )

    # Compute rotation matrix using Rodrigues' formula
    R = np.eye(3) + np.sin(angle_rad) * K + (1 - np.cos(angle_rad)) * np.dot(K, K)

    # Iterate over the mask and rotate the points corresponding to the object pixels
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            if mask[i, j] > 0:  # This condition checks if the pixel belongs to the object
                point_index = i * mask.shape[1] + j

                # Translate the point such that the rotation origin is at the world origin
                translated_point = points_np[point_index] - origin

                # Rotate the translated point
                rotated_point = np.dot(R, translated_point)

                # Translate the point back
                points_np[point_index] = rotated_point + origin

                colors_np[point_index] = POINT_COLOR

    # Update the point cloud's coordinates
    pcd.points = o3d.utility.Vector3dVector(points_np)

    # Update point cloud colors
    pcd.colors = o3d.utility.Vector3dVector(colors_np)

    return pcd


def translate_part(pcd, mask, axis_vector, distance):
    """
    Generate translated point cloud of mask based on provided angle around axis.

    :param pcd: point cloud object representing points of image
    :param mask: mask np.array of dimensions (height, width) representing the part to be translated in the image
    :param axis_vector: np.array of dimensions (3, ) representing the vector of the axis of translation
    :param distance: distance within coordinate system to translate mask part
    :return: point cloud object after translation of masked part
    """
    normalized_vector = axis_vector / np.linalg.norm(axis_vector)
    translation_vector = normalized_vector * distance

    # Convert point cloud colors to numpy array for easier manipulation
    colors_np = np.asarray(pcd.colors)

    # Get the coordinates of the point cloud as a numpy array
    points_np = np.asarray(pcd.points)

    # Iterate over the mask and assign the color to the points corresponding to the object pixels
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            if mask[i, j] > 0:  # This condition checks if the pixel belongs to the object
                point_index = i * mask.shape[1] + j
                colors_np[point_index] = POINT_COLOR
                points_np[point_index] += translation_vector

    # Update point cloud colors
    pcd.colors = o3d.utility.Vector3dVector(colors_np)

    # Update the point cloud's coordinates
    pcd.points = o3d.utility.Vector3dVector(points_np)

    return pcd


def batch_trim(images_path: str, save_path: str, identical: bool = False) -> None:
    """
    Trim white spaces from all images in the given path and save new images to folder.

    :param images_path: local path to folder containing all images. Images must have the extension ".png", ".jpg", or
    ".jpeg".
    :param save_path: local path to folder in which to save trimmed images
    :param identical: if True, will apply same crop to all images, else each image will have its whitespace trimmed
    independently. Note that in the latter case, each image may have a slightly different size.
    """

    def get_trim(im):
        """Trim whitespace from an image and return the cropped image."""
        bg = Image.new(im.mode, im.size, im.getpixel((0, 0)))
        diff = ImageChops.difference(im, bg)
        diff = ImageChops.add(diff, diff, 2.0, -100)
        bbox = diff.getbbox()
        return bbox

    if identical:  #
        images = []
        optimal_box = None

        # load all images
        for image_file in sorted(os.listdir(images_path)):
            if image_file.endswith(IMAGE_EXTENSIONS):
                image_path = os.path.join(images_path, image_file)
                images.append(Image.open(image_path))

        # find optimal box size
        for im in images:
            bbox = get_trim(im)
            if bbox is None:
                bbox = (0, 0, im.size[0], im.size[1])  # bound entire image

            if optimal_box is None:
                optimal_box = bbox
            else:
                optimal_box = (
                    min(optimal_box[0], bbox[0]),
                    min(optimal_box[1], bbox[1]),
                    max(optimal_box[2], bbox[2]),
                    max(optimal_box[3], bbox[3]),
                )

        # apply cropping, if optimal box was found
        for idx, im in enumerate(images):
            im.crop(optimal_box)
            im.save(os.path.join(save_path, f"{idx}.png"))
            im.close()

    else:  # trim each image separately
        for image_file in os.listdir(images_path):
            if image_file.endswith(IMAGE_EXTENSIONS):
                image_path = os.path.join(images_path, image_file)
                with Image.open(image_path) as im:
                    bbox = get_trim(im)
                    trimmed = im.crop(bbox) if bbox else im
                    trimmed.save(os.path.join(save_path, image_file))


def create_gif(image_folder_path: str, num_samples: int, gif_filename: str = "output.gif") -> None:
    """
    Create gif out of folder of images and save to file.

    :param image_folder_path: path to folder containing images (non-recursive). Assumes images are named as {i}.png for
    each of i from 0 to num_samples.
    :param num_samples: number of sampled images to compile into gif.
    :param gif_filename: filename for gif, defaults to "output.gif"
    """
    # Generate a list of image filenames (assuming the images are saved as 0.png, 1.png, etc.)
    image_files = [f"{image_folder_path}/{i}.png" for i in range(num_samples)]

    # Read the images using imageio
    images = [imageio.imread(image_file) for image_file in image_files]
    assert all(
        images[0].shape == im.shape for im in images
    ), f"Found some images with a different shape: {[im.shape for im in images]}"

    # Save images as a gif
    gif_output_path = f"{image_folder_path}/{gif_filename}"
    imageio.mimsave(gif_output_path, images, duration=0.1)

    return