import multiprocessing import objaverse import random import numpy import trimesh import huggingface_hub from huggingface_hub import Repository import os import pickle import json from tqdm import tqdm DATASET_REPO_URL = "https://huggingface.co/datasets/Gary3410/object_tmp" DATA_FILENAME = "object_tmp_4.pkl" DATA_FILE = os.path.join("data", DATA_FILENAME) def random_sampling(points, num_samples): # 如果点云数量小于采样数量,允许重复选择点 if len(points) < num_samples: indices = numpy.random.choice(len(points), num_samples, replace=True) else: indices = numpy.random.choice(len(points), num_samples, replace=False) sampled_points = points[indices] return sampled_points def model_to_pc(mesh: trimesh.Trimesh, n_sample_points=10000): f32 = numpy.float32 rad = numpy.sqrt(mesh.area / (3 * n_sample_points)) for _ in range(24): pcd, face_idx = trimesh.sample.sample_surface_even(mesh, n_sample_points, rad) rad *= 0.85 if len(pcd) == n_sample_points: break else: raise ValueError("Bad geometry, cannot finish sampling.", mesh.area) if isinstance(mesh.visual, trimesh.visual.ColorVisuals): rgba = mesh.visual.face_colors[face_idx] elif isinstance(mesh.visual, trimesh.visual.TextureVisuals): bc = trimesh.proximity.points_to_barycentric(mesh.triangles[face_idx], pcd) if mesh.visual.uv is None or len(mesh.visual.uv) < mesh.faces[face_idx].max(): uv = numpy.zeros([len(bc), 2]) # st.warning("Invalid UV, filling with zeroes") else: uv = numpy.einsum('ntc,nt->nc', mesh.visual.uv[mesh.faces[face_idx]], bc) material = mesh.visual.material if hasattr(material, 'materials'): if len(material.materials) == 0: rgba = numpy.ones_like(pcd) * 0.8 texture = None # st.warning("Empty MultiMaterial found, falling back to light grey") else: material = material.materials[0] if hasattr(material, 'image'): texture = material.image if texture is None: rgba = numpy.zeros([len(uv), len(material.main_color)]) + material.main_color elif hasattr(material, 'baseColorTexture'): texture = material.baseColorTexture if texture is None: rgba = numpy.zeros([len(uv), len(material.main_color)]) + material.main_color else: texture = None rgba = numpy.ones_like(pcd) * 0.8 # st.warning("Unknown material, falling back to light grey") if texture is not None: rgba = trimesh.visual.uv_to_interpolated_color(uv, texture) if rgba.max() > 1: if rgba.max() > 255: rgba = rgba.astype(f32) / rgba.max() else: rgba = rgba.astype(f32) / 255.0 return numpy.concatenate([numpy.array(pcd, f32), numpy.array(rgba, f32)[:, :3]], axis=-1) def trimesh_to_pc(scene_or_mesh): if isinstance(scene_or_mesh, trimesh.Scene): meshes = [] for node_name in scene_or_mesh.graph.nodes_geometry: # which geometry does this node refer to transform, geometry_name = scene_or_mesh.graph[node_name] # get the actual potential mesh instance geometry = scene_or_mesh.geometry[geometry_name].copy() if not hasattr(geometry, 'triangles'): continue geometry: trimesh.Trimesh geometry = geometry.apply_transform(transform) meshes.append(geometry) total_area = sum(geometry.area for geometry in meshes) if total_area < 1e-6: raise ValueError("Bad geometry: total area too small (< 1e-6)") pcs = [] for geometry in meshes: pcs.append(model_to_pc(geometry, max(1, round(geometry.area / total_area * 10000)))) if not len(pcs): raise ValueError("Unsupported mesh object: no triangles found") return numpy.concatenate(pcs) else: assert isinstance(scene_or_mesh, trimesh.Trimesh) return model_to_pc(scene_or_mesh, 10000) processes = multiprocessing.cpu_count() # uids = objaverse.load_uids() # random_object_uids = random.sample(uids, 100) uids = [] object_id_tmp_dict = {} # 解析json文件 with open('object_id.json','r') as file: str = file.read() obj_data = json.loads(str) for ints_key in obj_data.keys(): ints_dict_one = obj_data[ints_key] ints_id_dict = ints_dict_one["obj_id"] for ints_one in ints_id_dict.keys(): uid_one = ints_id_dict[ints_one] uids.append(uid_one) uids = list(set(uids)) uids = sorted(uids) # uids = uids[:809] # uids = uids[809:1619] # uids = uids[1619:] uids = uids[:2] objects = objaverse.load_objects( uids=uids, download_processes=processes ) repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token="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" ) for objaid in tqdm(uids): objamodel = objaverse.load_objects([objaid])[objaid] try: pc = trimesh_to_pc(trimesh.load(objamodel)) pc = random_sampling(pc, num_samples=5000) object_id_tmp_dict[objaid] = pc print(pc.shape) except: continue with open(DATA_FILE, 'wb') as file: pickle.dump(object_id_tmp_dict, file) commit_url = repo.push_to_hub() print("Done")