Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

App Files Files Community

xinjie.wang commited on Sep 4

Commit

885d2b7

1 Parent(s): 916cfab

update

Browse files

Files changed (12) hide show

common.py +14 -2
embodied_gen/data/backproject_v2.py +17 -10
embodied_gen/data/differentiable_render.py +2 -2
embodied_gen/data/utils.py +51 -10
embodied_gen/models/delight_model.py +2 -0
embodied_gen/models/texture_model.py +2 -1
embodied_gen/scripts/gen_texture.py +123 -0
embodied_gen/scripts/imageto3d.py +18 -3
embodied_gen/scripts/render_gs.py +12 -23
embodied_gen/scripts/texture_gen.sh +1 -0
embodied_gen/utils/process_media.py +19 -2
embodied_gen/validators/urdf_convertor.py +1 -1

common.py CHANGED Viewed

@@ -503,7 +503,17 @@ def extract_3d_representations_v2(
         device="cpu",
     )
     color_path = os.path.join(user_dir, "color.png")
-    render_gs_api(aligned_gs_path, color_path)
     mesh = trimesh.Trimesh(
         vertices=mesh_model.vertices.cpu().numpy(),
@@ -518,12 +528,14 @@ def extract_3d_representations_v2(
     mesh = backproject_api(
         delight_model=DELIGHT,
         imagesr_model=IMAGESR_MODEL,
-        color_path=color_path,
         mesh_path=mesh_obj_path,
         output_path=mesh_obj_path,
         skip_fix_mesh=False,
         delight=enable_delight,
         texture_wh=[texture_size, texture_size],
     )
     mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")

         device="cpu",
     )
     color_path = os.path.join(user_dir, "color.png")
+    render_gs_api(
+        input_gs=aligned_gs_path,
+        output_path=color_path,
+        elevation=[20, -10],
+    )
+    color_path2 = os.path.join(user_dir, "color2.png")
+    render_gs_api(
+        input_gs=aligned_gs_path,
+        output_path=color_path2,
+        elevation=[60, -50],
+    )
     mesh = trimesh.Trimesh(
         vertices=mesh_model.vertices.cpu().numpy(),
     mesh = backproject_api(
         delight_model=DELIGHT,
         imagesr_model=IMAGESR_MODEL,
+        color_path=[color_path, color_path2],
         mesh_path=mesh_obj_path,
         output_path=mesh_obj_path,
         skip_fix_mesh=False,
         delight=enable_delight,
         texture_wh=[texture_size, texture_size],
+        elevation=[20, -10, 60, -50],
+        num_images=12,
     )
     mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")

embodied_gen/data/backproject_v2.py CHANGED Viewed

@@ -33,6 +33,7 @@ from embodied_gen.data.mesh_operator import MeshFixer
 from embodied_gen.data.utils import (
     CameraSetting,
     DiffrastRender,
     get_images_from_grid,
     init_kal_camera,
     normalize_vertices_array,
@@ -41,6 +42,7 @@ from embodied_gen.data.utils import (
 )
 from embodied_gen.models.delight_model import DelightingModel
 from embodied_gen.models.sr_model import ImageRealESRGAN
 logging.basicConfig(
     format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
@@ -541,8 +543,9 @@ def parse_args():
     parser = argparse.ArgumentParser(description="Backproject texture")
     parser.add_argument(
         "--color_path",
         type=str,
-        help="Multiview color image in 6x512x512 file path",
     )
     parser.add_argument(
         "--mesh_path",
@@ -559,7 +562,7 @@ def parse_args():
     )
     parser.add_argument(
         "--elevation",
-        nargs=2,
         type=float,
         default=[20.0, -10.0],
         help="Elevation angles for the camera (default: [20.0, -10.0])",
@@ -647,19 +650,23 @@ def entrypoint(
         fov=math.radians(args.fov),
         device=args.device,
     )
-    view_weights = [1, 0.1, 0.02, 0.1, 1, 0.02]
-    color_grid = Image.open(args.color_path)
     if args.delight:
-        if delight_model is None:
-            delight_model = DelightingModel()
-        save_dir = os.path.dirname(args.output_path)
-        os.makedirs(save_dir, exist_ok=True)
         color_grid = delight_model(color_grid)
         if not args.no_save_delight_img:
-            color_grid.save(f"{save_dir}/color_grid_delight.png")
     multiviews = get_images_from_grid(color_grid, img_size=512)
     # Use RealESRGAN_x4plus for x4 (512->2048) image super resolution.
     if imagesr_model is None:
@@ -688,7 +695,7 @@ def entrypoint(
     texture_backer = TextureBacker(
         camera_params=camera_params,
         view_weights=view_weights,
-        render_wh=camera_params.resolution_hw,
         texture_wh=args.texture_wh,
         smooth_texture=not args.no_smooth_texture,
     )

 from embodied_gen.data.utils import (
     CameraSetting,
     DiffrastRender,
+    as_list,
     get_images_from_grid,
     init_kal_camera,
     normalize_vertices_array,
 )
 from embodied_gen.models.delight_model import DelightingModel
 from embodied_gen.models.sr_model import ImageRealESRGAN
+from embodied_gen.utils.process_media import vcat_pil_images
 logging.basicConfig(
     format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
     parser = argparse.ArgumentParser(description="Backproject texture")
     parser.add_argument(
         "--color_path",
+        nargs="+",
         type=str,
+        help="Multiview color image in 6x512x512 file paths",
     )
     parser.add_argument(
         "--mesh_path",
     )
     parser.add_argument(
         "--elevation",
+        nargs="+",
         type=float,
         default=[20.0, -10.0],
         help="Elevation angles for the camera (default: [20.0, -10.0])",
         fov=math.radians(args.fov),
         device=args.device,
     )
+    args.color_path = as_list(args.color_path)
+    if args.delight and delight_model is None:
+        delight_model = DelightingModel()
+    color_grid = [Image.open(color_path) for color_path in args.color_path]
+    color_grid = vcat_pil_images(color_grid, image_mode="RGBA")
     if args.delight:
         color_grid = delight_model(color_grid)
         if not args.no_save_delight_img:
+            save_dir = os.path.dirname(args.output_path)
+            os.makedirs(save_dir, exist_ok=True)
+            color_grid.save(f"{save_dir}/color_delight.png")
     multiviews = get_images_from_grid(color_grid, img_size=512)
+    view_weights = [1, 0.1, 0.02, 0.1, 1, 0.02]
+    view_weights += [0.01] * (len(multiviews) - len(view_weights))
     # Use RealESRGAN_x4plus for x4 (512->2048) image super resolution.
     if imagesr_model is None:
     texture_backer = TextureBacker(
         camera_params=camera_params,
         view_weights=view_weights,
+        render_wh=args.resolution_hw,
         texture_wh=args.texture_wh,
         smooth_texture=not args.no_smooth_texture,
     )

embodied_gen/data/differentiable_render.py CHANGED Viewed

@@ -503,7 +503,7 @@ def parse_args():
         help="Whether to generate global normal .mp4 rendering file.",
     )
     parser.add_argument(
-        "--prompts",
         type=str,
         nargs="+",
         default=None,
@@ -579,7 +579,7 @@ def entrypoint(**kwargs) -> None:
         mesh_path=args.mesh_path,
         output_root=args.output_root,
         uuid=args.uuid,
-        prompts=args.prompts,
     )
     return

         help="Whether to generate global normal .mp4 rendering file.",
     )
     parser.add_argument(
+        "--video_prompts",
         type=str,
         nargs="+",
         default=None,
         mesh_path=args.mesh_path,
         output_root=args.output_root,
         uuid=args.uuid,
+        prompts=args.video_prompts,
     )
     return

embodied_gen/data/utils.py CHANGED Viewed

@@ -28,7 +28,7 @@ import numpy as np
 import nvdiffrast.torch as dr
 import torch
 import torch.nn.functional as F
-from PIL import Image
 try:
     from kolors.models.modeling_chatglm import ChatGLMModel
@@ -698,6 +698,8 @@ def as_list(obj):
         return obj
     elif isinstance(obj, set):
         return list(obj)
     else:
         return [obj]
@@ -742,6 +744,8 @@ def _compute_az_el_by_camera_params(
 ):
     num_view = camera_params.num_images // len(camera_params.elevation)
     view_interval = 2 * np.pi / num_view / 2
     azimuths = []
     elevations = []
     for idx, el in enumerate(camera_params.elevation):
@@ -758,8 +762,13 @@ def _compute_az_el_by_camera_params(
     return azimuths, elevations
-def init_kal_camera(camera_params: CameraSetting) -> Camera:
-    azimuths, elevations = _compute_az_el_by_camera_params(camera_params)
     cam_pts = _compute_cam_pts_by_az_el(
         azimuths, elevations, camera_params.distance
     )
@@ -856,13 +865,38 @@ def get_images_from_grid(
         image = Image.open(image)
     view_images = np.array(image)
-    view_images = np.concatenate(
-        [view_images[:img_size, ...], view_images[img_size:, ...]], axis=1
-    )
-    images = np.split(view_images, view_images.shape[1] // img_size, axis=1)
-    images = [Image.fromarray(img) for img in images]
-    return images
 def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
@@ -872,7 +906,14 @@ def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
             texture, d=5, sigmaColor=20, sigmaSpace=20
         )
-    return texture
 def quat_mult(q1, q2):

 import nvdiffrast.torch as dr
 import torch
 import torch.nn.functional as F
+from PIL import Image, ImageEnhance
 try:
     from kolors.models.modeling_chatglm import ChatGLMModel
         return obj
     elif isinstance(obj, set):
         return list(obj)
+    elif obj is None:
+        return obj
     else:
         return [obj]
 ):
     num_view = camera_params.num_images // len(camera_params.elevation)
     view_interval = 2 * np.pi / num_view / 2
+    if num_view == 1:
+        view_interval = np.pi / 2
     azimuths = []
     elevations = []
     for idx, el in enumerate(camera_params.elevation):
     return azimuths, elevations
+def init_kal_camera(
+    camera_params: CameraSetting,
+    flip_az: bool = False,
+) -> Camera:
+    azimuths, elevations = _compute_az_el_by_camera_params(
+        camera_params, flip_az
+    )
     cam_pts = _compute_cam_pts_by_az_el(
         azimuths, elevations, camera_params.distance
     )
         image = Image.open(image)
     view_images = np.array(image)
+    height, width, _ = view_images.shape
+    rows = height // img_size
+    cols = width // img_size
+    blocks = []
+    for i in range(rows):
+        for j in range(cols):
+            block = view_images[
+                i * img_size : (i + 1) * img_size,
+                j * img_size : (j + 1) * img_size,
+                :,
+            ]
+            blocks.append(Image.fromarray(block))
+    return blocks
+def enhance_image(
+    image: Image.Image,
+    contrast_factor: float = 1.3,
+    color_factor: float = 1.2,
+    brightness_factor: float = 0.95,
+) -> Image.Image:
+    enhancer_contrast = ImageEnhance.Contrast(image)
+    img_contrasted = enhancer_contrast.enhance(contrast_factor)
+    enhancer_color = ImageEnhance.Color(img_contrasted)
+    img_colored = enhancer_color.enhance(color_factor)
+    enhancer_brightness = ImageEnhance.Brightness(img_colored)
+    enhanced_image = enhancer_brightness.enhance(brightness_factor)
+    return enhanced_image
 def post_process_texture(texture: np.ndarray, iter: int = 1) -> np.ndarray:
             texture, d=5, sigmaColor=20, sigmaSpace=20
         )
+    texture = enhance_image(
+        image=Image.fromarray(texture),
+        contrast_factor=1.3,
+        color_factor=1.2,
+        brightness_factor=0.95,
+    )
+    return np.array(texture)
 def quat_mult(q1, q2):

embodied_gen/models/delight_model.py CHANGED Viewed

@@ -29,6 +29,7 @@ from diffusers import (
 from huggingface_hub import snapshot_download
 from PIL import Image
 from embodied_gen.models.segment_model import RembgRemover
 __all__ = [
     "DelightingModel",
@@ -84,6 +85,7 @@ class DelightingModel(object):
     def _lazy_init_pipeline(self):
         if self.pipeline is None:
             pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
                 self.model_path,
                 torch_dtype=torch.float16,

 from huggingface_hub import snapshot_download
 from PIL import Image
 from embodied_gen.models.segment_model import RembgRemover
+from embodied_gen.utils.log import logger
 __all__ = [
     "DelightingModel",
     def _lazy_init_pipeline(self):
         if self.pipeline is None:
+            logger.info("Loading Delighting Model...")
             pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
                 self.model_path,
                 torch_dtype=torch.float16,

embodied_gen/models/texture_model.py CHANGED Viewed

@@ -29,6 +29,7 @@ from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import (
 )
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
 from embodied_gen.models.text_model import download_kolors_weights
 __all__ = [
     "build_texture_gen_pipe",
@@ -42,7 +43,7 @@ def build_texture_gen_pipe(
     device: str = "cuda",
 ) -> DiffusionPipeline:
     download_kolors_weights(f"{base_ckpt_dir}/Kolors")
     tokenizer = ChatGLMTokenizer.from_pretrained(
         f"{base_ckpt_dir}/Kolors/text_encoder"
     )

 )
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
 from embodied_gen.models.text_model import download_kolors_weights
+from embodied_gen.utils.log import logger
 __all__ = [
     "build_texture_gen_pipe",
     device: str = "cuda",
 ) -> DiffusionPipeline:
     download_kolors_weights(f"{base_ckpt_dir}/Kolors")
+    logger.info(f"Load Kolors weights...")
     tokenizer = ChatGLMTokenizer.from_pretrained(
         f"{base_ckpt_dir}/Kolors/text_encoder"
     )

embodied_gen/scripts/gen_texture.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import os
+import shutil
+from dataclasses import dataclass
+import tyro
+from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
+from embodied_gen.data.differentiable_render import entrypoint as drender_api
+from embodied_gen.data.utils import as_list
+from embodied_gen.models.delight_model import DelightingModel
+from embodied_gen.models.sr_model import ImageRealESRGAN
+from embodied_gen.scripts.render_mv import (
+    build_texture_gen_pipe,
+)
+from embodied_gen.scripts.render_mv import infer_pipe as render_mv_api
+from embodied_gen.utils.log import logger
+@dataclass
+class TextureGenConfig:
+    mesh_path: str | list[str]
+    prompt: str | list[str]
+    output_root: str
+    controlnet_cond_scale: float = 0.7
+    guidance_scale: float = 9
+    strength: float = 0.9
+    num_inference_steps: int = 40
+    delight: bool = True
+    seed: int = 0
+    base_ckpt_dir: str = "./weights"
+    texture_size: int = 2048
+    ip_adapt_scale: float = 0.0
+    ip_img_path: str | list[str] | None = None
+def entrypoint() -> None:
+    cfg = tyro.cli(TextureGenConfig)
+    cfg.mesh_path = as_list(cfg.mesh_path)
+    cfg.prompt = as_list(cfg.prompt)
+    cfg.ip_img_path = as_list(cfg.ip_img_path)
+    assert len(cfg.mesh_path) == len(cfg.prompt)
+    # Pre-load models.
+    if cfg.ip_adapt_scale > 0:
+        PIPELINE = build_texture_gen_pipe(
+            base_ckpt_dir="./weights",
+            ip_adapt_scale=cfg.ip_adapt_scale,
+            device="cuda",
+        )
+    else:
+        PIPELINE = build_texture_gen_pipe(
+            base_ckpt_dir="./weights",
+            ip_adapt_scale=0,
+            device="cuda",
+        )
+    DELIGHT = None
+    if cfg.delight:
+        DELIGHT = DelightingModel()
+    IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
+    for idx in range(len(cfg.mesh_path)):
+        mesh_path = cfg.mesh_path[idx]
+        prompt = cfg.prompt[idx]
+        uuid = os.path.splitext(os.path.basename(mesh_path))[0]
+        output_root = os.path.join(cfg.output_root, uuid)
+        drender_api(
+            mesh_path=mesh_path,
+            output_root=f"{output_root}/condition",
+            uuid=uuid,
+        )
+        render_mv_api(
+            index_file=f"{output_root}/condition/index.json",
+            controlnet_cond_scale=cfg.controlnet_cond_scale,
+            guidance_scale=cfg.guidance_scale,
+            strength=cfg.strength,
+            num_inference_steps=cfg.num_inference_steps,
+            ip_adapt_scale=cfg.ip_adapt_scale,
+            ip_img_path=(
+                None if cfg.ip_img_path is None else cfg.ip_img_path[idx]
+            ),
+            prompt=prompt,
+            save_dir=f"{output_root}/multi_view",
+            sub_idxs=[[0, 1, 2], [3, 4, 5]],
+            pipeline=PIPELINE,
+            seed=cfg.seed,
+        )
+        textured_mesh = backproject_api(
+            delight_model=DELIGHT,
+            imagesr_model=IMAGESR_MODEL,
+            mesh_path=mesh_path,
+            color_path=f"{output_root}/multi_view/color_sample0.png",
+            output_path=f"{output_root}/texture_mesh/{uuid}.obj",
+            save_glb_path=f"{output_root}/texture_mesh/{uuid}.glb",
+            skip_fix_mesh=True,
+            delight=cfg.delight,
+            no_save_delight_img=True,
+            texture_wh=[cfg.texture_size, cfg.texture_size],
+        )
+        drender_api(
+            mesh_path=f"{output_root}/texture_mesh/{uuid}.obj",
+            output_root=f"{output_root}/texture_mesh",
+            uuid=uuid,
+            num_images=90,
+            elevation=[20],
+            with_mtl=True,
+            gen_color_mp4=True,
+            pbr_light_factor=1.2,
+        )
+        # Re-organize folders
+        shutil.rmtree(f"{output_root}/condition")
+        shutil.copy(
+            f"{output_root}/texture_mesh/{uuid}/color.mp4",
+            f"{output_root}/color.mp4",
+        )
+        shutil.rmtree(f"{output_root}/texture_mesh/{uuid}")
+        logger.info(
+            f"Successfully generate textured mesh in {output_root}/texture_mesh"
+        )
+if __name__ == "__main__":
+    entrypoint()

embodied_gen/scripts/imageto3d.py CHANGED Viewed

@@ -108,6 +108,9 @@ def parse_args():
         default=2,
     )
     parser.add_argument("--disable_decompose_convex", action="store_true")
     args, unknown = parser.parse_known_args()
     return args
@@ -209,7 +212,17 @@ def entrypoint(**kwargs):
                     device="cpu",
                 )
                 color_path = os.path.join(output_root, "color.png")
-                render_gs_api(aligned_gs_path, color_path)
                 geo_flag, geo_result = GEO_CHECKER(
                     [color_path], text=asset_node
@@ -241,12 +254,14 @@ def entrypoint(**kwargs):
             mesh = backproject_api(
                 delight_model=DELIGHT,
                 imagesr_model=IMAGESR_MODEL,
-                color_path=color_path,
                 mesh_path=mesh_obj_path,
                 output_path=mesh_obj_path,
                 skip_fix_mesh=False,
                 delight=True,
-                texture_wh=[2048, 2048],
             )
             mesh_glb_path = os.path.join(output_root, f"{filename}.glb")

         default=2,
     )
     parser.add_argument("--disable_decompose_convex", action="store_true")
+    parser.add_argument(
+        "--texture_wh", type=int, nargs=2, default=[2048, 2048]
+    )
     args, unknown = parser.parse_known_args()
     return args
                     device="cpu",
                 )
                 color_path = os.path.join(output_root, "color.png")
+                render_gs_api(
+                    input_gs=aligned_gs_path,
+                    output_path=color_path,
+                    elevation=[20, -10],
+                )
+                color_path2 = os.path.join(output_root, "color2.png")
+                render_gs_api(
+                    input_gs=aligned_gs_path,
+                    output_path=color_path2,
+                    elevation=[60, -50],
+                )
                 geo_flag, geo_result = GEO_CHECKER(
                     [color_path], text=asset_node
             mesh = backproject_api(
                 delight_model=DELIGHT,
                 imagesr_model=IMAGESR_MODEL,
+                color_path=[color_path, color_path2],
                 mesh_path=mesh_obj_path,
                 output_path=mesh_obj_path,
                 skip_fix_mesh=False,
                 delight=True,
+                texture_wh=args.texture_wh,
+                elevation=[20, -10, 60, -50],
+                num_images=12,
             )
             mesh_glb_path = os.path.join(output_root, f"{filename}.glb")

embodied_gen/scripts/render_gs.py CHANGED Viewed

@@ -18,12 +18,11 @@
 import argparse
 import logging
 import math
-import os
 import cv2
-import numpy as np
 import spaces
 import torch
 from tqdm import tqdm
 from embodied_gen.data.utils import (
     CameraSetting,
@@ -31,6 +30,7 @@ from embodied_gen.data.utils import (
     normalize_vertices_array,
 )
 from embodied_gen.models.gs_model import GaussianOperator
 logging.basicConfig(
     format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
@@ -113,12 +113,11 @@ def load_gs_model(
 @spaces.GPU
-def entrypoint(input_gs: str = None, output_path: str = None) -> None:
     args = parse_args()
-    if isinstance(input_gs, str):
-        args.input_gs = input_gs
-    if isinstance(output_path, str):
-        args.output_path = output_path
     # Setup camera parameters
     camera_params = CameraSetting(
@@ -129,7 +128,7 @@ def entrypoint(input_gs: str = None, output_path: str = None) -> None:
         fov=math.radians(args.fov),
         device=args.device,
     )
-    camera = init_kal_camera(camera_params)
     matrix_mv = camera.view_matrix()  # (n_cam 4 4) world2cam
     matrix_mv[:, :3, 3] = -matrix_mv[:, :3, 3]
     w2cs = matrix_mv.to(camera_params.device)
@@ -153,21 +152,11 @@ def entrypoint(input_gs: str = None, output_path: str = None) -> None:
             (args.image_size, args.image_size),
             interpolation=cv2.INTER_AREA,
         )
-        images.append(color)
-    # Cat color images into grid image and save.
-    select_idxs = [[0, 2, 1], [5, 4, 3]]  # fix order for 6 views
-    grid_image = []
-    for row_idxs in select_idxs:
-        row_image = []
-        for row_idx in row_idxs:
-            row_image.append(images[row_idx])
-        row_image = np.concatenate(row_image, axis=1)
-        grid_image.append(row_image)
-    grid_image = np.concatenate(grid_image, axis=0)
-    os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
-    cv2.imwrite(args.output_path, grid_image)
     logger.info(f"Saved grid image to {args.output_path}")

 import argparse
 import logging
 import math
 import cv2
 import spaces
 import torch
+from PIL import Image
 from tqdm import tqdm
 from embodied_gen.data.utils import (
     CameraSetting,
     normalize_vertices_array,
 )
 from embodied_gen.models.gs_model import GaussianOperator
+from embodied_gen.utils.process_media import combine_images_to_grid
 logging.basicConfig(
     format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
 @spaces.GPU
+def entrypoint(**kwargs) -> None:
     args = parse_args()
+    for k, v in kwargs.items():
+        if hasattr(args, k) and v is not None:
+            setattr(args, k, v)
     # Setup camera parameters
     camera_params = CameraSetting(
         fov=math.radians(args.fov),
         device=args.device,
     )
+    camera = init_kal_camera(camera_params, flip_az=True)
     matrix_mv = camera.view_matrix()  # (n_cam 4 4) world2cam
     matrix_mv[:, :3, 3] = -matrix_mv[:, :3, 3]
     w2cs = matrix_mv.to(camera_params.device)
             (args.image_size, args.image_size),
             interpolation=cv2.INTER_AREA,
         )
+        color = cv2.cvtColor(color, cv2.COLOR_BGRA2RGBA)
+        images.append(Image.fromarray(color))
+    combine_images_to_grid(images, image_mode="RGBA")[0].save(args.output_path)
     logger.info(f"Saved grid image to {args.output_path}")

embodied_gen/scripts/texture_gen.sh CHANGED Viewed

@@ -28,6 +28,7 @@ if [[ -z "$mesh_path" || -z "$prompt" || -z "$output_root" ]]; then
     exit 1
 fi
 uuid=$(basename "$output_root")
 # Step 1: drender-cli for condition rendering
 drender-cli --mesh_path ${mesh_path} \

     exit 1
 fi
+echo "Will be deprecated, recommended to use 'texture-cli' instead."
 uuid=$(basename "$output_root")
 # Step 1: drender-cli for condition rendering
 drender-cli --mesh_path ${mesh_path} \

embodied_gen/utils/process_media.py CHANGED Viewed

@@ -49,6 +49,7 @@ __all__ = [
     "is_image_file",
     "parse_text_prompts",
     "check_object_edge_truncated",
 ]
@@ -166,6 +167,7 @@ def combine_images_to_grid(
     images: list[str | Image.Image],
     cat_row_col: tuple[int, int] = None,
     target_wh: tuple[int, int] = (512, 512),
 ) -> list[Image.Image]:
     n_images = len(images)
     if n_images == 1:
@@ -178,13 +180,13 @@ def combine_images_to_grid(
         n_row, n_col = cat_row_col
     images = [
-        Image.open(p).convert("RGB") if isinstance(p, str) else p
         for p in images
     ]
     images = [img.resize(target_wh) for img in images]
     grid_w, grid_h = n_col * target_wh[0], n_row * target_wh[1]
-    grid = Image.new("RGB", (grid_w, grid_h), (0, 0, 0))
     for idx, img in enumerate(images):
         row, col = divmod(idx, n_col)
@@ -435,6 +437,21 @@ def check_object_edge_truncated(
     return not (top or bottom or left or right)
 if __name__ == "__main__":
     image_paths = [
         "outputs/layouts_sim/task_0000/images/pen.png",

     "is_image_file",
     "parse_text_prompts",
     "check_object_edge_truncated",
+    "vcat_pil_images",
 ]
     images: list[str | Image.Image],
     cat_row_col: tuple[int, int] = None,
     target_wh: tuple[int, int] = (512, 512),
+    image_mode: str = "RGB",
 ) -> list[Image.Image]:
     n_images = len(images)
     if n_images == 1:
         n_row, n_col = cat_row_col
     images = [
+        Image.open(p).convert(image_mode) if isinstance(p, str) else p
         for p in images
     ]
     images = [img.resize(target_wh) for img in images]
     grid_w, grid_h = n_col * target_wh[0], n_row * target_wh[1]
+    grid = Image.new(image_mode, (grid_w, grid_h), (0, 0, 0))
     for idx, img in enumerate(images):
         row, col = divmod(idx, n_col)
     return not (top or bottom or left or right)
+def vcat_pil_images(
+    images: list[Image.Image], image_mode: str = "RGB"
+) -> Image.Image:
+    widths, heights = zip(*(img.size for img in images))
+    total_height = sum(heights)
+    max_width = max(widths)
+    new_image = Image.new(image_mode, (max_width, total_height))
+    y_offset = 0
+    for image in images:
+        new_image.paste(image, (0, y_offset))
+        y_offset += image.size[1]
+    return new_image
 if __name__ == "__main__":
     image_paths = [
         "outputs/layouts_sim/task_0000/images/pen.png",

embodied_gen/validators/urdf_convertor.py CHANGED Viewed

@@ -266,7 +266,7 @@ class URDFGenerator(object):
             if self.decompose_convex:
                 try:
                     d_params = dict(
-                        threshold=0.05, max_convex_hull=64, verbose=False
                     )
                     filename = f"{os.path.splitext(obj_name)[0]}_collision.ply"
                     output_path = os.path.join(mesh_folder, filename)

             if self.decompose_convex:
                 try:
                     d_params = dict(
+                        threshold=0.05, max_convex_hull=100, verbose=False
                     )
                     filename = f"{os.path.splitext(obj_name)[0]}_collision.ply"
                     output_path = os.path.join(mesh_folder, filename)