hackathon_depth_segment_AB

Sleeping

App Files Files Community

JensParslov commited on Aug 22, 2023

Commit

b3da277

•

0 Parent(s):

Duplicate from NN-BRD/hackathon_depth_segment

Browse files

Files changed (11) hide show

.gitattributes +35 -0
README.md +13 -0
app.py +309 -0
app_legacy.py +48 -0
inference.py +448 -0
packages.txt +1 -0
requirements.txt +12 -0
sam_vit_b_01ec64.pth +3 -0
sam_vit_h_4b8939.pth +3 -0
tests.py +0 -0
utils.py +231 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Hackathon
+emoji: 👁
+colorFrom: purple
+colorTo: green
+sdk: gradio
+sdk_version: 3.39.0
+app_file: app.py
+pinned: false
+duplicated_from: NN-BRD/hackathon_depth_segment
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,309 @@

+import os
+import gradio as gr
+import numpy as np
+import cv2
+from PIL import Image, ImageOps
+import torch
+from inference import SegmentPredictor, DepthPredictor
+from utils import generate_PCL, PCL3, point_cloud
+sam = SegmentPredictor()
+sam_cpu = SegmentPredictor(device="cpu")
+dpt = DepthPredictor()
+red = (255, 0, 0)
+blue = (0, 0, 255)
+annos = []
+block = gr.Blocks()
+with block:
+    # States
+    def point_coords_empty():
+        return []
+    def point_labels_empty():
+        return []
+    image_edit_trigger = gr.State(True)
+    point_coords = gr.State(point_coords_empty)
+    point_labels = gr.State(point_labels_empty)
+    masks = gr.State([])
+    cutout_idx = gr.State(set())
+    pred_masks = gr.State([])
+    prompt_masks = gr.State([])
+    embedding = gr.State()
+    # UI
+    with gr.Column():
+        gr.Markdown(
+            """# Segment Anything Model (SAM)
+            ## a new AI model from Meta AI that can "cut out" any object, in any image, with a single click 🚀
+            SAM is a promptable segmentation system with zero-shot generalization to unfamiliar objects and images, without the need for additional training. [**Official Project**](https://segment-anything.com/) [**Code**](https://github.com/facebookresearch/segment-anything).
+            """
+        )
+        with gr.Row():
+            with gr.Column():
+                with gr.Tab("Upload Image"):
+                    # mirror_webcam = False
+                    upload_image = gr.Image(label="Input", type="pil", tool=None)
+                with gr.Tab("Webcam"):
+                    # mirror_webcam = False
+                    input_image = gr.Image(
+                        label="Input", type="pil", tool=None, source="webcam"
+                    )
+                with gr.Row():
+                    sam_encode_btn = gr.Button("Encode", variant="primary")
+                    sam_sgmt_everything_btn = gr.Button(
+                        "Segment Everything!", variant="primary"
+                    )
+                # sam_encode_status = gr.Label('Not encoded yet')
+        with gr.Row():
+            prompt_image = gr.Image(label="Segments")
+            # prompt_lbl_image = gr.AnnotatedImage(label='Segment Labels')
+            lbl_image = gr.AnnotatedImage(label="Everything")
+        with gr.Row():
+            point_label_radio = gr.Radio(label="Point Label", choices=[1, 0], value=1)
+            text = gr.Textbox(label="Mask Name")
+            reset_btn = gr.Button("New Mask")
+        selected_masks_image = gr.AnnotatedImage(label="Selected Masks")
+        with gr.Row():
+            with gr.Column():
+                pcl_figure = gr.Model3D(
+                    label="3-D Reconstruction", clear_color=[1.0, 1.0, 1.0, 1.0]
+                )
+                with gr.Row():
+                    max_depth = gr.Slider(
+                        minimum=0, maximum=10, value=3, step=0.01, label="Max Depth"
+                    )
+                    min_depth = gr.Slider(
+                        minimum=0, maximum=10, step=0.01, value=1, label="Min Depth"
+                    )
+                    n_samples = gr.Slider(
+                        minimum=1e3,
+                        maximum=1e6,
+                        step=1e3,
+                        value=1e5,
+                        label="Number of Samples",
+                    )
+                    cube_size = gr.Slider(
+                        minimum=0.00001,
+                        maximum=0.001,
+                        step=0.000001,
+                        default=0.00001,
+                        label="Cube size",
+                    )
+                    depth_reconstruction_btn = gr.Button(
+                        "3D Reconstruction", variant="primary"
+                    )
+                    depth_reconstruction_mask_btn = gr.Button(
+                        "Mask Reconstruction", variant="primary"
+                    )
+                sam_decode_btn = gr.Button("Predict using points!", variant="primary")
+    # components
+    components = {
+        point_coords,
+        point_labels,
+        image_edit_trigger,
+        masks,
+        cutout_idx,
+        input_image,
+        embedding,
+        point_label_radio,
+        text,
+        reset_btn,
+        sam_sgmt_everything_btn,
+        sam_decode_btn,
+        depth_reconstruction_btn,
+        prompt_image,
+        lbl_image,
+        n_samples,
+        max_depth,
+        min_depth,
+        cube_size,
+        selected_masks_image,
+    }
+    def on_upload_image(input_image, upload_image):
+        # Mirror because gradio.image webcam has mirror = True
+        upload_image_mirror = ImageOps.mirror(upload_image)
+        return [upload_image_mirror, upload_image]
+    upload_image.upload(
+        on_upload_image, [input_image, upload_image], [input_image, upload_image]
+    )
+    # event - init coords
+    def on_reset_btn_click(input_image):
+        return input_image, point_coords_empty(), point_labels_empty(), None, []
+    reset_btn.click(
+        on_reset_btn_click,
+        [input_image],
+        [prompt_image, point_coords, point_labels],
+        queue=False,
+    )
+    def on_prompt_image_select(
+        input_image,
+        prompt_image,
+        point_coords,
+        point_labels,
+        point_label_radio,
+        text,
+        pred_masks,
+        embedding,
+        evt: gr.SelectData,
+    ):
+        sam_cpu.dummy_encode(input_image)
+        x, y = evt.index
+        color = red if point_label_radio == 0 else blue
+        if prompt_image is None:
+            prompt_image = np.array(input_image.copy())
+        cv2.circle(prompt_image, (x, y), 5, color, -1)
+        point_coords.append([x, y])
+        point_labels.append(point_label_radio)
+        sam_masks = sam_cpu.cond_pred(
+            pts=np.array(point_coords), lbls=np.array(point_labels), embedding=embedding
+        )
+        return [
+            prompt_image,
+            (input_image, sam_masks),
+            point_coords,
+            point_labels,
+            sam_masks,
+        ]
+    prompt_image.select(
+        on_prompt_image_select,
+        [
+            input_image,
+            prompt_image,
+            point_coords,
+            point_labels,
+            point_label_radio,
+            text,
+            pred_masks,
+            embedding,
+        ],
+        [prompt_image, lbl_image, point_coords, point_labels, pred_masks],
+        queue=True,
+    )
+    def on_everything_image_select(
+        input_image, pred_masks, masks, text, evt: gr.SelectData
+    ):
+        i = evt.index
+        mask = pred_masks[i][0]
+        print(mask)
+        print(type(mask))
+        masks.append((mask, text))
+        anno = (input_image, masks)
+        return [masks, anno]
+    lbl_image.select(
+        on_everything_image_select,
+        [input_image, pred_masks, masks, text],
+        [masks, selected_masks_image],
+        queue=False,
+    )
+    def on_selected_masks_image_select(input_image, masks, evt: gr.SelectData):
+        i = evt.index
+        del masks[i]
+        anno = (input_image, masks)
+        return [masks, anno]
+    selected_masks_image.select(
+        on_selected_masks_image_select,
+        [input_image, masks],
+        [masks, selected_masks_image],
+        queue=False,
+    )
+    # prompt_lbl_image.select(on_everything_image_select,
+    #                   [input_image, prompt_masks, masks, text],
+    #                   [masks, selected_masks_image], queue=False)
+    def on_click_sam_encode_btn(inputs):
+        print("encoding")
+        # encode image on click
+        embedding = sam.encode(inputs[input_image]).cpu()
+        sam_cpu.dummy_encode(inputs[input_image])
+        print("encoding done")
+        return [inputs[input_image], embedding]
+    sam_encode_btn.click(
+        on_click_sam_encode_btn, components, [prompt_image, embedding], queue=False
+    )
+    def on_click_sam_dencode_btn(inputs):
+        print("inferencing")
+        image = inputs[input_image]
+        generated_mask, _, _ = sam.cond_pred(
+            pts=np.array(inputs[point_coords]), lbls=np.array(inputs[point_labels])
+        )
+        inputs[masks].append((generated_mask, inputs[text]))
+        print(inputs[masks][0])
+        return {prompt_image: (image, inputs[masks])}
+    sam_decode_btn.click(
+        on_click_sam_dencode_btn,
+        components,
+        [prompt_image, masks, cutout_idx],
+        queue=True,
+    )
+    def on_depth_reconstruction_btn_click(inputs):
+        print("depth reconstruction")
+        path = dpt.generate_obj_rgb(
+            image=inputs[input_image],
+            cube_size=inputs[cube_size],
+            n_samples=inputs[n_samples],
+            # masks=inputs[masks],
+            min_depth=inputs[min_depth],
+            max_depth=inputs[max_depth],
+        )
+        return {pcl_figure: path}
+    depth_reconstruction_btn.click(
+        on_depth_reconstruction_btn_click, components, [pcl_figure], queue=False
+    )
+    def on_depth_reconstruction_mask_btn_click(inputs):
+        print("depth reconstruction")
+        path = dpt.generate_obj_masks2(
+            image=inputs[input_image],
+            cube_size=inputs[cube_size],
+            n_samples=inputs[n_samples],
+            masks=inputs[masks],
+            min_depth=inputs[min_depth],
+            max_depth=inputs[max_depth],
+        )
+        return {pcl_figure: path}
+    depth_reconstruction_mask_btn.click(
+        on_depth_reconstruction_mask_btn_click, components, [pcl_figure], queue=False
+    )
+    def on_sam_sgmt_everything_btn_click(inputs):
+        print("segmenting everything")
+        image = inputs[input_image]
+        sam_masks = sam.segment_everything(image)
+        print(image)
+        print(sam_masks)
+        return [(image, sam_masks), sam_masks]
+    sam_sgmt_everything_btn.click(
+        on_sam_sgmt_everything_btn_click,
+        components,
+        [lbl_image, pred_masks],
+        queue=True,
+    )
+if __name__ == "__main__":
+    block.queue()
+    block.launch(auth=("novouser", "bstad2023"))

app_legacy.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import gradio as gr
+from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
+import supervision as sv
+from inference import DepthPredictor, SegmentPredictor
+from utils import create_3d_obj, create_3d_pc, point_cloud, generate_PCL
+import numpy as np
+def produce_depth_map(image):
+    depth_predictor = DepthPredictor()
+    depth_result = depth_predictor.predict(image)
+    return depth_result
+def produce_segmentation_map(image):
+    segment_predictor = SegmentPredictor()
+    sam_result = segment_predictor.predict(image)
+    return sam_result
+def produce_3d_reconstruction(image):
+    depth_predictor = DepthPredictor()
+    depth_result = depth_predictor.predict(image)
+    rgb_gltf_path = create_3d_obj(np.array(image), depth_result, path='./rgb.gltf')
+    return rgb_gltf_path
+def produce_point_cloud(depth_map, segmentation_map):
+    return point_cloud(np.array(segmentation_map), depth_map)
+def snap(image, depth_map, segmentation_map):
+    depth_result = produce_depth_map(image) if depth_map else None
+    sam_result = produce_segmentation_map(image) if segmentation_map else None
+    rgb_gltf_path = produce_3d_reconstruction(image) if depth_map else None
+    point_cloud_fig = produce_point_cloud(depth_result, sam_result) if (segmentation_map and depth_map) else None
+    return [image, depth_result, sam_result, rgb_gltf_path, point_cloud_fig]
+demo = gr.Interface(
+    snap,
+    inputs=[gr.Image(source="webcam", tool=None, label="Input Image", type="pil"),
+            "checkbox",
+            "checkbox"],
+    outputs=[gr.Image(label="RGB"),
+             gr.Image(label="predicted depth"),
+             gr.Image(label="predicted segmentation"),
+             gr.Model3D(label="3D mesh reconstruction - RGB",
+                        clear_color=[1.0, 1.0, 1.0, 1.0]),
+             gr.Plot()]
+)
+if __name__ == "__main__":
+    demo.launch()

inference.py ADDED Viewed

	@@ -0,0 +1,448 @@

+from transformers import DPTImageProcessor, DPTForDepthEstimation
+from segment_anything import SamAutomaticMaskGenerator, sam_model_registry, SamPredictor
+import gradio as gr
+import supervision as sv
+import torch
+import numpy as np
+from PIL import Image
+import requests
+import open3d as o3d
+import pandas as pd
+import plotly.express as px
+import matplotlib.pyplot as plt
+def remove_outliers(point_cloud, threshold=3.0):
+    # Calculate mean and standard deviation along each dimension
+    mean = np.mean(point_cloud, axis=0)
+    std = np.std(point_cloud, axis=0)
+    # Define lower and upper bounds for each dimension
+    lower_bounds = mean - threshold * std
+    upper_bounds = mean + threshold * std
+    # Create a boolean mask for points within the bounds
+    mask = np.all((point_cloud >= lower_bounds) & (point_cloud <= upper_bounds), axis=1)
+    # Filter out outlier points
+    filtered_point_cloud = point_cloud[mask]
+    return filtered_point_cloud
+def map_image_range(depth, min_value, max_value):
+    """
+    Maps the values of a numpy image array to a specified range.
+    Args:
+        image (numpy.ndarray): Input image array with values ranging from 0 to 1.
+        min_value (float): Minimum value of the new range.
+        max_value (float): Maximum value of the new range.
+    Returns:
+        numpy.ndarray: Image array with values mapped to the specified range.
+    """
+    # Ensure the input image is a numpy array
+    print(np.min(depth))
+    print(np.max(depth))
+    depth = np.array(depth)
+    # map the depth values are between 0 and 1
+    depth = (depth - depth.min()) / (depth.max() - depth.min())
+    # invert
+    depth = 1 - depth
+    print(np.min(depth))
+    print(np.max(depth))
+    # Map the values to the specified range
+    mapped_image = (depth - 0) * (max_value - min_value) / (1 - 0) + min_value
+    print(np.min(mapped_image))
+    print(np.max(mapped_image))
+    return mapped_image
+def PCL(mask, depth):
+    assert mask.shape == depth.shape
+    assert type(mask) == np.ndarray
+    assert type(depth) == np.ndarray
+    rgb_mask = np.zeros((mask.shape[0], mask.shape[1], 3)).astype("uint8")
+    rgb_mask[mask] = (255, 0, 0)
+    print(np.unique(rgb_mask))
+    depth_o3d = o3d.geometry.Image(depth)
+    image_o3d = o3d.geometry.Image(rgb_mask)
+    # print(len(depth_o3d))
+    # print(len(image_o3d))
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
+    # Step 3: Create a PointCloud from the RGBD image
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+        rgbd_image,
+        o3d.camera.PinholeCameraIntrinsic(
+            o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+        ),
+    )
+    # Step 4: Convert PointCloud data to a NumPy array
+    # print(len(pcd))
+    points = np.asarray(pcd.points)
+    colors = np.asarray(pcd.colors)
+    print(np.unique(colors, axis=0))
+    print(np.unique(colors, axis=1))
+    print(np.unique(colors))
+    mask = colors[:, 0] == 1.0
+    print(mask.sum())
+    print(colors.shape)
+    points = points[mask]
+    colors = colors[mask]
+    return points, colors
+def PCL_rgb(rgb, depth):
+    # assert rgb.shape == depth.shape
+    assert type(rgb) == np.ndarray
+    assert type(depth) == np.ndarray
+    depth_o3d = o3d.geometry.Image(depth)
+    image_o3d = o3d.geometry.Image(rgb)
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
+    # Step 3: Create a PointCloud from the RGBD image
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+        rgbd_image,
+        o3d.camera.PinholeCameraIntrinsic(
+            o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+        ),
+    )
+    # Step 4: Convert PointCloud data to a NumPy array
+    points = np.asarray(pcd.points)
+    colors = np.asarray(pcd.colors)
+    return points, colors
+class DepthPredictor:
+    def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
+        self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+        self.model.eval()
+    def predict(self, image):
+        # prepare image for the model
+        encoding = self.feature_extractor(image, return_tensors="pt")
+        # forward pass
+        with torch.no_grad():
+            outputs = self.model(**encoding)
+            predicted_depth = outputs.predicted_depth
+            # interpolate to original size
+            prediction = torch.nn.functional.interpolate(
+                predicted_depth.unsqueeze(1),
+                size=image.size[::-1],
+                mode="bicubic",
+                align_corners=False,
+            ).squeeze()
+        output = prediction.cpu().numpy()
+        # output = 1 - (output/np.max(output))
+        return output
+    def generate_pcl(self, image):
+        print(np.array(image).shape)
+        depth = self.predict(image)
+        print(depth.shape)
+        # Step 2: Create an RGBD image from the RGB and depth image
+        depth_o3d = o3d.geometry.Image(depth)
+        image_o3d = o3d.geometry.Image(np.array(image))
+        rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+            image_o3d, depth_o3d, convert_rgb_to_intensity=False
+        )
+        # Step 3: Create a PointCloud from the RGBD image
+        pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+            rgbd_image,
+            o3d.camera.PinholeCameraIntrinsic(
+                o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+            ),
+        )
+        # Step 4: Convert PointCloud data to a NumPy array
+        points = np.asarray(pcd.points)
+        colors = np.asarray(pcd.colors)
+        print(points.shape, colors.shape)
+        return points, colors
+    def generate_fig(self, image):
+        points, colors = self.generate_pcl(image)
+        data = {
+            "x": points[:, 0],
+            "y": points[:, 1],
+            "z": points[:, 2],
+            "red": colors[:, 0],
+            "green": colors[:, 1],
+            "blue": colors[:, 2],
+        }
+        df = pd.DataFrame(data)
+        size = np.zeros(len(df))
+        size[:] = 0.01
+        # Step 6: Create a 3D scatter plot using Plotly Express
+        fig = px.scatter_3d(df, x="x", y="y", z="z", color="red", size=size)
+        return fig
+    def generate_fig2(self, image):
+        points, colors = self.generate_pcl(image)
+        # Step 6: Create a 3D scatter plot using Plotly Express
+        fig = plt.figure()
+        ax = fig.add_subplot(111, projection="3d")
+        ax.scatter(points, size=0.01, c=colors, marker="o")
+        return fig
+    def generate_obj_rgb(self, image, n_samples, cube_size, max_depth, min_depth):
+        # Step 1: Create a point cloud
+        depth = self.predict(image)
+        image = np.array(image)
+        depth = map_image_range(depth, min_depth, max_depth)
+        point_cloud, color_array = PCL_rgb(image, depth)
+        idxs = np.random.choice(len(point_cloud), int(n_samples))
+        point_cloud = point_cloud[idxs]
+        color_array = color_array[idxs]
+        # Create a mesh to hold the colored cubes
+        mesh = o3d.geometry.TriangleMesh()
+        # Create cubes and add them to the mesh
+        for point, color in zip(point_cloud, color_array):
+            cube = o3d.geometry.TriangleMesh.create_box(
+                width=cube_size, height=cube_size, depth=cube_size
+            )
+            cube.translate(-point)
+            cube.paint_uniform_color(color)
+            mesh += cube
+        # Save the mesh to an .obj file
+        output_file = "./cloud.obj"
+        o3d.io.write_triangle_mesh(output_file, mesh)
+        return output_file
+    def generate_obj_masks(self, image, n_samples, masks, cube_size):
+        # Generate a point cloud
+        point_cloud, color_array = self.generate_pcl(image)
+        print(point_cloud.shape)
+        mesh = o3d.geometry.TriangleMesh()
+        # Create cubes and add them to the mesh
+        cs = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
+        for c, (mask, _) in zip(cs, masks):
+            mask = mask.ravel()
+            point_cloud_subset, color_array_subset = (
+                point_cloud[mask],
+                color_array[mask],
+            )
+            idxs = np.random.choice(len(point_cloud_subset), int(n_samples))
+            point_cloud_subset = point_cloud_subset[idxs]
+            for point in point_cloud_subset:
+                cube = o3d.geometry.TriangleMesh.create_box(
+                    width=cube_size, height=cube_size, depth=cube_size
+                )
+                cube.translate(-point)
+                cube.paint_uniform_color(c)
+                mesh += cube
+        # Save the mesh to an .obj file
+        output_file = "./cloud.obj"
+        o3d.io.write_triangle_mesh(output_file, mesh)
+        return output_file
+    def generate_obj_masks2(
+        self, image, masks, cube_size, n_samples, min_depth, max_depth
+    ):
+        # Generate a point cloud
+        depth = self.predict(image)
+        depth = map_image_range(depth, min_depth, max_depth)
+        image = np.array(image)
+        mesh = o3d.geometry.TriangleMesh()
+        # Create cubes and add them to the mesh
+        print(len(masks))
+        cs = [(1, 0, 0), (0, 1, 0), (0, 0, 1)]
+        for c, (mask, _) in zip(cs, masks):
+            points, _ = PCL(mask, depth)
+            idxs = np.random.choice(len(points), int(n_samples))
+            points = points[idxs]
+            points = remove_outliers(points)
+            for point in points:
+                cube = o3d.geometry.TriangleMesh.create_box(
+                    width=cube_size, height=cube_size, depth=cube_size
+                )
+                cube.translate(-point)
+                cube.paint_uniform_color(c)
+                mesh += cube
+        # Save the mesh to an .obj file
+        output_file = "./cloud.obj"
+        o3d.io.write_triangle_mesh(output_file, mesh)
+        return output_file
+import numpy as np
+from typing import Optional, Tuple
+class CustomSamPredictor(SamPredictor):
+    def __init__(
+        self,
+        sam_model,
+    ) -> None:
+        super().__init__(sam_model)
+    def encode_image(
+        self,
+        image: np.ndarray,
+        image_format: str = "RGB",
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image, allowing
+        masks to be predicted with the 'predict' method.
+        Arguments:
+          image (np.ndarray): The image for calculating masks. Expects an
+            image in HWC uint8 format, with pixel values in [0, 255].
+          image_format (str): The color format of the image, in ['RGB', 'BGR'].
+        """
+        assert image_format in [
+            "RGB",
+            "BGR",
+        ], f"image_format must be in ['RGB', 'BGR'], is {image_format}."
+        if image_format != self.model.image_format:
+            image = image[..., ::-1]
+        # Transform the image to the form expected by the model
+        input_image = self.transform.apply_image(image)
+        input_image_torch = torch.as_tensor(input_image, device=self.device)
+        input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[
+            None, :, :, :
+        ]
+        self.set_torch_image(input_image_torch, image.shape[:2])
+        return self.get_image_embedding()
+    def decode_and_predict(
+        self,
+        embedding: torch.Tensor,
+        point_coords: Optional[np.ndarray] = None,
+        point_labels: Optional[np.ndarray] = None,
+        box: Optional[np.ndarray] = None,
+        mask_input: Optional[np.ndarray] = None,
+        multimask_output: bool = True,
+        return_logits: bool = False,
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Decodes the provided image embedding and makes mask predictions based on prompts.
+        Arguments:
+          embedding (torch.Tensor): The image embedding to decode.
+          ... (other arguments from the predict function)
+        Returns:
+          (np.ndarray): The output masks in CxHxW format.
+          (np.ndarray): An array of quality predictions for each mask.
+          (np.ndarray): Low resolution mask logits for subsequent iterations.
+        """
+        self.features = embedding
+        self.is_image_set = True
+        return self.predict(
+            point_coords=point_coords,
+            point_labels=point_labels,
+            box=box,
+            mask_input=mask_input,
+            multimask_output=multimask_output,
+            return_logits=return_logits,
+        )
+    def dummy_set_torch_image(
+        self,
+        transformed_image: torch.Tensor,
+        original_image_size: Tuple[int, ...],
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image, allowing
+        masks to be predicted with the 'predict' method. Expects the input
+        image to be already transformed to the format expected by the model.
+        Arguments:
+          transformed_image (torch.Tensor): The input image, with shape
+            1x3xHxW, which has been transformed with ResizeLongestSide.
+          original_image_size (tuple(int, int)): The size of the image
+            before transformation, in (H, W) format.
+        """
+        assert (
+            len(transformed_image.shape) == 4
+            and transformed_image.shape[1] == 3
+            and max(*transformed_image.shape[2:]) == self.model.image_encoder.img_size
+        ), f"set_torch_image input must be BCHW with long side {self.model.image_encoder.img_size}."
+        self.reset_image()
+        self.original_size = original_image_size
+        self.input_size = tuple(transformed_image.shape[-2:])
+        input_image = self.model.preprocess(transformed_image)
+        # The following line is commented out to avoid encoding on cpu
+        # self.features = self.model.image_encoder(input_image)
+        self.is_image_set = True
+    def dummy_set_image(
+        self,
+        image: np.ndarray,
+        image_format: str = "RGB",
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image, allowing
+        masks to be predicted with the 'predict' method.
+        Arguments:
+          image (np.ndarray): The image for calculating masks. Expects an
+            image in HWC uint8 format, with pixel values in [0, 255].
+          image_format (str): The color format of the image, in ['RGB', 'BGR'].
+        """
+        assert image_format in [
+            "RGB",
+            "BGR",
+        ], f"image_format must be in ['RGB', 'BGR'], is {image_format}."
+        if image_format != self.model.image_format:
+            image = image[..., ::-1]
+        # Transform the image to the form expected by the model
+        input_image = self.transform.apply_image(image)
+        input_image_torch = torch.as_tensor(input_image, device=self.device)
+        input_image_torch = input_image_torch.permute(2, 0, 1).contiguous()[
+            None, :, :, :
+        ]
+        self.dummy_set_torch_image(input_image_torch, image.shape[:2])
+class SegmentPredictor:
+    def __init__(self, device=None):
+        MODEL_TYPE = "vit_h"
+        checkpoint = "sam_vit_h_4b8939.pth"
+        sam = sam_model_registry[MODEL_TYPE](checkpoint=checkpoint)
+        # Select device
+        if device is None:
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        else:
+            self.device = device
+        sam.to(device=self.device)
+        self.mask_generator = SamAutomaticMaskGenerator(sam)
+        self.conditioned_pred = CustomSamPredictor(sam)
+    def encode(self, image):
+        image = np.array(image)
+        return self.conditioned_pred.encode_image(image)
+    def dummy_encode(self, image):
+        image = np.array(image)
+        self.conditioned_pred.dummy_set_image(image)
+    def cond_pred(self, embedding, pts, lbls):
+        lbls = np.array(lbls)
+        pts = np.array(pts)
+        masks, _, _ = self.conditioned_pred.decode_and_predict(
+            embedding, point_coords=pts, point_labels=lbls, multimask_output=True
+        )
+        idxs = np.argsort(-masks.sum(axis=(1, 2)))
+        sam_masks = []
+        for n, i in enumerate(idxs):
+            sam_masks.append((masks[i], str(n)))
+        return sam_masks
+    def segment_everything(self, image):
+        image = np.array(image)
+        sam_result = self.mask_generator.generate(image)
+        sam_masks = []
+        for i, mask in enumerate(sam_result):
+            sam_masks.append((mask["segmentation"], str(i)))
+        return sam_masks

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ libgl1-mesa-glx

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+gradio
+huggingface_hub
+segment-anything
+supervision
+torch
+torchvision
+opencv-python
+transformers
+open3d
+plotly
+pandas
+numpy

sam_vit_b_01ec64.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec2df62732614e57411cdcf32a23ffdf28910380d03139ee0f4fcbe91eb8c912
+size 375042383

sam_vit_h_4b8939.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e
+size 2564550879

tests.py ADDED Viewed

File without changes

utils.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import numpy as np
+import open3d as o3d
+import open3d as o3d
+import plotly.express as px
+import numpy as np
+import pandas as pd
+from inference import DepthPredictor
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+def create_3d_obj(rgb_image, depth_image, depth=10, path="./image.gltf"):
+    depth_o3d = o3d.geometry.Image(depth_image)
+    image_o3d = o3d.geometry.Image(rgb_image)
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
+    w = int(depth_image.shape[1])
+    h = int(depth_image.shape[0])
+    camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
+    camera_intrinsic.set_intrinsics(w, h, 500, 500, w / 2, h / 2)
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
+    print("normals")
+    pcd.normals = o3d.utility.Vector3dVector(
+        np.zeros((1, 3))
+    )  # invalidate existing normals
+    pcd.estimate_normals(
+        search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
+    )
+    pcd.orient_normals_towards_camera_location(
+        camera_location=np.array([0.0, 0.0, 1000.0])
+    )
+    pcd.transform([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+    pcd.transform([[-1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
+    print("run Poisson surface reconstruction")
+    with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
+        mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
+            pcd, depth=depth, width=0, scale=1.1, linear_fit=True
+        )
+    voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 256
+    print(f"voxel_size = {voxel_size:e}")
+    mesh = mesh_raw.simplify_vertex_clustering(
+        voxel_size=voxel_size,
+        contraction=o3d.geometry.SimplificationContraction.Average,
+    )
+    # vertices_to_remove = densities < np.quantile(densities, 0.001)
+    # mesh.remove_vertices_by_mask(vertices_to_remove)
+    bbox = pcd.get_axis_aligned_bounding_box()
+    mesh_crop = mesh.crop(bbox)
+    gltf_path = path
+    o3d.io.write_triangle_mesh(gltf_path, mesh_crop, write_triangle_uvs=True)
+    return gltf_path
+def create_3d_pc(rgb_image, depth_image, depth=10):
+    depth_image = depth_image.astype(np.float32)  # Convert depth map to float32
+    depth_o3d = o3d.geometry.Image(depth_image)
+    image_o3d = o3d.geometry.Image(rgb_image)
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
+    w = int(depth_image.shape[1])
+    h = int(depth_image.shape[0])
+    # Specify camera intrinsic parameters (modify based on actual camera)
+    fx = 500
+    fy = 500
+    cx = w / 2
+    cy = h / 2
+    camera_intrinsic = o3d.camera.PinholeCameraIntrinsic(w, h, fx, fy, cx, cy)
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
+    print("Estimating normals...")
+    pcd.estimate_normals(
+        search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30)
+    )
+    pcd.orient_normals_towards_camera_location(
+        camera_location=np.array([0.0, 0.0, 1000.0])
+    )
+    # Save the point cloud as a PLY file
+    filename = "pc.pcd"
+    o3d.io.write_point_cloud(filename, pcd)
+    return filename  # Return the file path where the PLY file is saved
+def point_cloud(rgb_image):
+    depth_predictor = DepthPredictor()
+    depth_result = depth_predictor.predict(rgb_image)
+    # Step 2: Create an RGBD image from the RGB and depth image
+    depth_o3d = o3d.geometry.Image(depth_image)
+    image_o3d = o3d.geometry.Image(rgb_image)
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
+    # Step 3: Create a PointCloud from the RGBD image
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+        rgbd_image,
+        o3d.camera.PinholeCameraIntrinsic(
+            o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+        ),
+    )
+    # Step 4: Convert PointCloud data to a NumPy array
+    points = np.asarray(pcd.points)
+    colors = np.asarray(pcd.colors)
+    # Step 5: Create a DataFrame from the NumPy arrays
+    data = {
+        "x": points[:, 0],
+        "y": points[:, 1],
+        "z": points[:, 2],
+        "red": colors[:, 0],
+        "green": colors[:, 1],
+        "blue": colors[:, 2],
+    }
+    df = pd.DataFrame(data)
+    size = np.zeros(len(df))
+    size[:] = 0.01
+    # Step 6: Create a 3D scatter plot using Plotly Express
+    fig = px.scatter_3d(df, x="x", y="y", z="z", color="red", size=size)
+    return fig
+def array_PCL(rgb_image, depth_image):
+    FX_RGB = 5.1885790117450188e02
+    FY_RGB = 5.1946961112127485e02
+    CX_RGB = 3.2558244941119034e0
+    CY_RGB = 2.5373616633400465e02
+    FX_DEPTH = FX_RGB
+    FY_DEPTH = FY_RGB
+    CX_DEPTH = CX_RGB
+    CY_DEPTH = CY_RGB
+    height = depth_image.shape[0]
+    width = depth_image.shape[1]
+    # compute indices:
+    jj = np.tile(range(width), height)
+    ii = np.repeat(range(height), width)
+    # Compute constants:
+    xx = (jj - CX_DEPTH) / FX_DEPTH
+    yy = (ii - CY_DEPTH) / FY_DEPTH
+    # transform depth image to vector of z:
+    length = height * width
+    z = depth_image.reshape(length)
+    # compute point cloud
+    pcd = np.dstack((xx * z, yy * z, z)).reshape((length, 3))
+    # cam_RGB = np.apply_along_axis(np.linalg.inv(R).dot, 1, pcd) - np.linalg.inv(R).dot(T)
+    xx_rgb = (
+        ((rgb_image[:, 0] * FX_RGB) / rgb_image[:, 2] + CX_RGB + width / 2)
+        .astype(int)
+        .clip(0, width - 1)
+    )
+    yy_rgb = (
+        ((rgb_image[:, 1] * FY_RGB) / rgb_image[:, 2] + CY_RGB)
+        .astype(int)
+        .clip(0, height - 1)
+    )
+    # colors = rgb_image[yy_rgb, xx_rgb]/255
+    return pcd  # , colors
+def generate_PCL(image):
+    depth_predictor = DepthPredictor()
+    depth_result = depth_predictor.predict(image)
+    image = np.array(image)
+    pcd = array_PCL(image, depth_result)
+    fig = px.scatter_3d(x=pcd[:, 0], y=pcd[:, 1], z=pcd[:, 2], size_max=0.01)
+    return fig
+def plot_PCL(rgb_image, depth_image):
+    pcd, colors = array_PCL(rgb_image, depth_image)
+    fig = px.scatter_3d(
+        x=pcd[:, 0], y=pcd[:, 1], z=pcd[:, 2], color=colors, size_max=0.1
+    )
+    return fig
+def PCL3(image):
+    depth_predictor = DepthPredictor()
+    depth_result = depth_predictor.predict(image)
+    image = np.array(image)
+    # Step 2: Create an RGBD image from the RGB and depth image
+    depth_o3d = o3d.geometry.Image(depth_result)
+    image_o3d = o3d.geometry.Image(image)
+    rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        image_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
+    # Step 3: Create a PointCloud from the RGBD image
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
+        rgbd_image,
+        o3d.camera.PinholeCameraIntrinsic(
+            o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault
+        ),
+    )
+    # Step 4: Convert PointCloud data to a NumPy array
+    vis = o3d.visualization.Visualizer()
+    vis.add_geometry(pcd)
+    # Step 4: Convert PointCloud data to a NumPy array
+    points = np.asarray(pcd.points)
+    colors = np.asarray(pcd.colors)
+    sizes = np.zeros(colors.shape)
+    sizes[:] = 0.01
+    colors = [tuple(c) for c in colors]
+    fig = plt.figure()
+    # ax = fig.add_subplot(111, projection='3d')
+    ax = Axes3D(fig)
+    print("plotting...")
+    ax.scatter(points[:, 0], points[:, 1], points[:, 2], c=colors, s=0.01)
+    print("Plot Succesful")
+    # data = {'x': points[:, 0], 'y': points[:, 1], 'z': points[:, 2], 'sizes': sizes[:, 0]}
+    # df = pd.DataFrame(data)
+    # Step 6: Create a 3D scatter plot using Plotly Express
+    # fig = px.scatter_3d(df, x='x', y='y', z='z', color=colors, size="sizes")
+    return fig
+import numpy as np