import multiprocessing import objaverse import random import numpy import trimesh import huggingface_hub from huggingface_hub import Repository import os import pickle import json from tqdm import tqdm DATASET_REPO_URL = "https://huggingface.co/datasets/Gary3410/object_tmp" DATA_FILENAME = "object_tmp_22_3.pkl" DATA_FILE = os.path.join("data", DATA_FILENAME) def random_sampling(points, num_samples): # 如果点云数量小于采样数量,允许重复选择点 if len(points) < num_samples: indices = numpy.random.choice(len(points), num_samples, replace=True) else: indices = numpy.random.choice(len(points), num_samples, replace=False) sampled_points = points[indices] return sampled_points def model_to_pc(mesh: trimesh.Trimesh, n_sample_points=10000): f32 = numpy.float32 rad = numpy.sqrt(mesh.area / (3 * n_sample_points)) for _ in range(24): pcd, face_idx = trimesh.sample.sample_surface_even(mesh, n_sample_points, rad) rad *= 0.85 if len(pcd) == n_sample_points: break else: raise ValueError("Bad geometry, cannot finish sampling.", mesh.area) if isinstance(mesh.visual, trimesh.visual.ColorVisuals): rgba = mesh.visual.face_colors[face_idx] elif isinstance(mesh.visual, trimesh.visual.TextureVisuals): bc = trimesh.proximity.points_to_barycentric(mesh.triangles[face_idx], pcd) if mesh.visual.uv is None or len(mesh.visual.uv) < mesh.faces[face_idx].max(): uv = numpy.zeros([len(bc), 2]) # st.warning("Invalid UV, filling with zeroes") else: uv = numpy.einsum('ntc,nt->nc', mesh.visual.uv[mesh.faces[face_idx]], bc) material = mesh.visual.material if hasattr(material, 'materials'): if len(material.materials) == 0: rgba = numpy.ones_like(pcd) * 0.8 texture = None # st.warning("Empty MultiMaterial found, falling back to light grey") else: material = material.materials[0] if hasattr(material, 'image'): texture = material.image if texture is None: rgba = numpy.zeros([len(uv), len(material.main_color)]) + material.main_color elif hasattr(material, 'baseColorTexture'): texture = material.baseColorTexture if texture is None: rgba = numpy.zeros([len(uv), len(material.main_color)]) + material.main_color else: texture = None rgba = numpy.ones_like(pcd) * 0.8 # st.warning("Unknown material, falling back to light grey") if texture is not None: rgba = trimesh.visual.uv_to_interpolated_color(uv, texture) if rgba.max() > 1: if rgba.max() > 255: rgba = rgba.astype(f32) / rgba.max() else: rgba = rgba.astype(f32) / 255.0 return numpy.concatenate([numpy.array(pcd, f32), numpy.array(rgba, f32)[:, :3]], axis=-1) def trimesh_to_pc(scene_or_mesh): if isinstance(scene_or_mesh, trimesh.Scene): meshes = [] for node_name in scene_or_mesh.graph.nodes_geometry: # which geometry does this node refer to transform, geometry_name = scene_or_mesh.graph[node_name] # get the actual potential mesh instance geometry = scene_or_mesh.geometry[geometry_name].copy() if not hasattr(geometry, 'triangles'): continue geometry: trimesh.Trimesh geometry = geometry.apply_transform(transform) meshes.append(geometry) total_area = sum(geometry.area for geometry in meshes) if total_area < 1e-6: raise ValueError("Bad geometry: total area too small (< 1e-6)") pcs = [] for geometry in meshes: pcs.append(model_to_pc(geometry, max(1, round(geometry.area / total_area * 10000)))) if not len(pcs): raise ValueError("Unsupported mesh object: no triangles found") return numpy.concatenate(pcs) else: assert isinstance(scene_or_mesh, trimesh.Trimesh) return model_to_pc(scene_or_mesh, 10000) processes = multiprocessing.cpu_count() # uids = objaverse.load_uids() # random_object_uids = random.sample(uids, 100) uids = [] object_id_tmp_dict = {} # 解析json文件 with open('object_22.json','r') as file: str = file.read() obj_data = json.loads(str) for ints_key in obj_data.keys(): ints_dict_one = obj_data[ints_key] # ints_id_dict = ints_dict_one["obj_id"] # for ints_one in ints_id_dict.keys(): # uid_one = ints_id_dict[ints_one] # uids.append(uid_one) for ints_one in ints_dict_one.keys(): uid_one = ints_dict_one[ints_one] uids.append(uid_one) uids = list(set(uids)) uids = sorted(uids) # 2195 # uids = uids[:809] # uids = uids[809:1619] uids = uids[1619:] # dict = { # "cracker_box": "dbbfa480a2e4476c8b0ca91667087f44", # "packet": "82ab1bb22a22407e8ebd86cae46dc9d3", # "mug": "a831091dabdb40598ed1681e2aa35058", # "chips_can": "6848fd0ec6df45bcb53fc0e47b51e844", # "baseball": "4799dc6cb5f046bd88609381020feab1", # "paper_towel": "d020130cdc0b4937a062f8d744d707c6", # "banana": "1de9ac4b1a4e46279c271747becbf0eb", # "bowl": "9fa4ca710fa9458aaeb9554c80583c76", # "pear": "fa74a39529f5462e978b4f5dbc0b8add", # "sponge": "939ca8d1e0c2424ba28db9afd19e7e53", # "tennis_ball": "05eb4a68e56249c5b3db4ea937a57b6e", # "plastic_bag": "686c497301da4fa5b6927b79340f473f" # } # new_uids = [] # for dict_key_one in dict.keys(): # new_uids_one = dict[dict_key_one] # if new_uids_one not in uids: # new_uids.append(new_uids_one) objects = objaverse.load_objects( uids=uids, download_processes=processes ) repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token="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" ) for objaid in tqdm(uids): objamodel = objaverse.load_objects([objaid])[objaid] try: pc = trimesh_to_pc(trimesh.load(objamodel)) pc = random_sampling(pc, num_samples=5000) object_id_tmp_dict[objaid] = pc except: continue with open(DATA_FILE, 'wb') as file: pickle.dump(object_id_tmp_dict, file) commit_url = repo.push_to_hub() print("Done")