yanranxiaoxi commited on
Commit
33f62f4
·
verified ·
1 Parent(s): cd2aaf5

First commit

Browse files
app.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import spaces
3
+ import gradio as gr
4
+ import os
5
+ import numpy as np
6
+ import trimesh
7
+ import mcubes
8
+ import imageio
9
+ from torchvision.utils import save_image
10
+ from PIL import Image
11
+ from transformers import AutoModel, AutoConfig
12
+ from rembg import remove, new_session
13
+ from functools import partial
14
+ from kiui.op import recenter
15
+ import kiui
16
+ from gradio_litmodel3d import LitModel3D
17
+ import shutil
18
+
19
+ def find_cuda():
20
+ # 检查 CUDA_HOME 或 CUDA_PATH 环境变量是否已设置
21
+ cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
22
+
23
+ if cuda_home and os.path.exists(cuda_home):
24
+ return cuda_home
25
+
26
+ # 在系统 PATH 中搜索 nvcc 可执行文件
27
+ nvcc_path = shutil.which('nvcc')
28
+
29
+ if nvcc_path:
30
+ # 删除“bin/nvcc”部分,获取 CUDA 安装路径
31
+ cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
32
+ return cuda_path
33
+
34
+ return None
35
+
36
+ cuda_path = find_cuda()
37
+
38
+ if cuda_path:
39
+ print(f"CUDA 已安装在:{cuda_path}")
40
+ else:
41
+ print("未找到已安装的 CUDA 路径")
42
+
43
+ # 从 HF 加载预训练模型
44
+ class LRMGeneratorWrapper:
45
+ def __init__(self):
46
+ self.config = AutoConfig.from_pretrained("yanranxiaoxi/image-upscale", trust_remote_code=True)
47
+ self.model = AutoModel.from_pretrained("yanranxiaoxi/image-upscale", trust_remote_code=True)
48
+ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
49
+ self.model.to(self.device)
50
+ self.model.eval()
51
+
52
+ def forward(self, image, camera):
53
+ return self.model(image, camera)
54
+
55
+ model_wrapper = LRMGeneratorWrapper()
56
+
57
+ # 处理输入图像
58
+ def preprocess_image(image, source_size):
59
+ session = new_session("isnet-general-use")
60
+ rembg_remove = partial(remove, session=session)
61
+ image = np.array(image)
62
+ image = rembg_remove(image)
63
+ mask = rembg_remove(image, only_mask=True)
64
+ image = recenter(image, mask, border_ratio=0.20)
65
+ image = torch.tensor(image).permute(2, 0, 1).unsqueeze(0) / 255.0
66
+ if image.shape[1] == 4:
67
+ image = image[:, :3, ...] * image[:, 3:, ...] + (1 - image[:, 3:, ...])
68
+ image = torch.nn.functional.interpolate(image, size=(source_size, source_size), mode='bicubic', align_corners=True)
69
+ image = torch.clamp(image, 0, 1)
70
+ return image
71
+
72
+ def get_normalized_camera_intrinsics(intrinsics: torch.Tensor):
73
+ fx, fy = intrinsics[:, 0, 0], intrinsics[:, 0, 1]
74
+ cx, cy = intrinsics[:, 1, 0], intrinsics[:, 1, 1]
75
+ width, height = intrinsics[:, 2, 0], intrinsics[:, 2, 1]
76
+ fx, fy = fx / width, fy / height
77
+ cx, cy = cx / width, cy / height
78
+ return fx, fy, cx, cy
79
+
80
+ def build_camera_principle(RT: torch.Tensor, intrinsics: torch.Tensor):
81
+ fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics)
82
+ return torch.cat([
83
+ RT.reshape(-1, 12),
84
+ fx.unsqueeze(-1), fy.unsqueeze(-1), cx.unsqueeze(-1), cy.unsqueeze(-1),
85
+ ], dim=-1)
86
+
87
+ def _default_intrinsics():
88
+ fx = fy = 384
89
+ cx = cy = 256
90
+ w = h = 512
91
+ intrinsics = torch.tensor([
92
+ [fx, fy],
93
+ [cx, cy],
94
+ [w, h],
95
+ ], dtype=torch.float32)
96
+ return intrinsics
97
+
98
+ def _default_source_camera(batch_size: int = 1):
99
+ canonical_camera_extrinsics = torch.tensor([[
100
+ [0, 0, 1, 1],
101
+ [1, 0, 0, 0],
102
+ [0, 1, 0, 0],
103
+ ]], dtype=torch.float32)
104
+ canonical_camera_intrinsics = _default_intrinsics().unsqueeze(0)
105
+ source_camera = build_camera_principle(canonical_camera_extrinsics, canonical_camera_intrinsics)
106
+ return source_camera.repeat(batch_size, 1)
107
+
108
+ def _center_looking_at_camera_pose(camera_position: torch.Tensor, look_at: torch.Tensor = None, up_world: torch.Tensor = None):
109
+ """
110
+ camera_position: (M, 3)
111
+ look_at: (3)
112
+ up_world: (3)
113
+ return: (M, 3, 4)
114
+ """
115
+ # 默认情况下,从原点向上为 pos-z
116
+ if look_at is None:
117
+ look_at = torch.tensor([0, 0, 0], dtype=torch.float32)
118
+ if up_world is None:
119
+ up_world = torch.tensor([0, 0, 1], dtype=torch.float32)
120
+ look_at = look_at.unsqueeze(0).repeat(camera_position.shape[0], 1)
121
+ up_world = up_world.unsqueeze(0).repeat(camera_position.shape[0], 1)
122
+
123
+ z_axis = camera_position - look_at
124
+ z_axis = z_axis / z_axis.norm(dim=-1, keepdim=True)
125
+ x_axis = torch.cross(up_world, z_axis)
126
+ x_axis = x_axis / x_axis.norm(dim=-1, keepdim=True)
127
+ y_axis = torch.cross(z_axis, x_axis)
128
+ y_axis = y_axis / y_axis.norm(dim=-1, keepdim=True)
129
+ extrinsics = torch.stack([x_axis, y_axis, z_axis, camera_position], dim=-1)
130
+ return extrinsics
131
+
132
+ def compose_extrinsic_RT(RT: torch.Tensor):
133
+ """
134
+ 从 RT 生成标准形式的外差矩阵。
135
+ 分批输入/输出。
136
+ """
137
+ return torch.cat([
138
+ RT,
139
+ torch.tensor([[[0, 0, 0, 1]]], dtype=torch.float32).repeat(RT.shape[0], 1, 1).to(RT.device)
140
+ ], dim=1)
141
+
142
+ def _build_camera_standard(RT: torch.Tensor, intrinsics: torch.Tensor):
143
+ """
144
+ RT: (N, 3, 4)
145
+ intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]]
146
+ """
147
+ E = compose_extrinsic_RT(RT)
148
+ fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics)
149
+ I = torch.stack([
150
+ torch.stack([fx, torch.zeros_like(fx), cx], dim=-1),
151
+ torch.stack([torch.zeros_like(fy), fy, cy], dim=-1),
152
+ torch.tensor([[0, 0, 1]], dtype=torch.float32, device=RT.device).repeat(RT.shape[0], 1),
153
+ ], dim=1)
154
+ return torch.cat([
155
+ E.reshape(-1, 16),
156
+ I.reshape(-1, 9),
157
+ ], dim=-1)
158
+
159
+ def _default_render_cameras(batch_size: int = 1):
160
+ M = 80
161
+ radius = 1.5
162
+ elevation = 0
163
+ camera_positions = []
164
+ rand_theta = np.random.uniform(0, np.pi/180)
165
+ elevation = np.radians(elevation)
166
+ for i in range(M):
167
+ theta = 2 * np.pi * i / M + rand_theta
168
+ x = radius * np.cos(theta) * np.cos(elevation)
169
+ y = radius * np.sin(theta) * np.cos(elevation)
170
+ z = radius * np.sin(elevation)
171
+ camera_positions.append([x, y, z])
172
+ camera_positions = torch.tensor(camera_positions, dtype=torch.float32)
173
+ extrinsics = _center_looking_at_camera_pose(camera_positions)
174
+
175
+ render_camera_intrinsics = _default_intrinsics().unsqueeze(0).repeat(extrinsics.shape[0], 1, 1)
176
+ render_cameras = _build_camera_standard(extrinsics, render_camera_intrinsics)
177
+ return render_cameras.unsqueeze(0).repeat(batch_size, 1, 1)
178
+
179
+ @spaces.GPU
180
+ def generate_mesh(image, source_size=512, render_size=384, mesh_size=512, export_mesh=False, export_video=True, fps=30):
181
+ image = preprocess_image(image, source_size).to(model_wrapper.device)
182
+ source_camera = _default_source_camera(batch_size=1).to(model_wrapper.device)
183
+
184
+ with torch.no_grad():
185
+ planes = model_wrapper.forward(image, source_camera)
186
+
187
+ if export_mesh:
188
+ grid_out = model_wrapper.model.synthesizer.forward_grid(planes=planes, grid_size=mesh_size)
189
+ vtx, faces = mcubes.marching_cubes(grid_out['sigma'].float().squeeze(0).squeeze(-1).cpu().numpy(), 1.0)
190
+ vtx = vtx / (mesh_size - 1) * 2 - 1
191
+ vtx_tensor = torch.tensor(vtx, dtype=torch.float32, device=model_wrapper.device).unsqueeze(0)
192
+ vtx_colors = model_wrapper.model.synthesizer.forward_points(planes, vtx_tensor)['rgb'].float().squeeze(0).cpu().numpy()
193
+ vtx_colors = (vtx_colors * 255).astype(np.uint8)
194
+ mesh = trimesh.Trimesh(vertices=vtx, faces=faces, vertex_colors=vtx_colors)
195
+
196
+ mesh_path = "xiaoxis_mesh.obj"
197
+ mesh.export(mesh_path, 'obj')
198
+
199
+ return mesh_path, mesh_path
200
+
201
+ if export_video:
202
+ render_cameras = _default_render_cameras(batch_size=1).to(model_wrapper.device)
203
+ frames = []
204
+ chunk_size = 1
205
+ for i in range(0, render_cameras.shape[1], chunk_size):
206
+ frame_chunk = model_wrapper.model.synthesizer(
207
+ planes,
208
+ render_cameras[:, i:i + chunk_size],
209
+ render_size,
210
+ render_size,
211
+ 0,
212
+ 0
213
+ )
214
+ frames.append(frame_chunk['images_rgb'])
215
+
216
+ frames = torch.cat(frames, dim=1)
217
+ frames = frames.squeeze(0)
218
+ frames = (frames.permute(0, 2, 3, 1).cpu().numpy() * 255).astype(np.uint8)
219
+
220
+ video_path = "xiaoxis_video.mp4"
221
+ imageio.mimwrite(video_path, frames, fps=fps)
222
+
223
+ return None, video_path
224
+
225
+ return None, None
226
+
227
+ def step_1_generate_obj(image):
228
+ mesh_path, _ = generate_mesh(image, export_mesh=True)
229
+ return mesh_path, mesh_path
230
+
231
+ def step_2_generate_video(image):
232
+ _, video_path = generate_mesh(image, export_video=True)
233
+ return video_path
234
+
235
+ def step_3_display_3d_model(mesh_file):
236
+ return mesh_file
237
+
238
+ # 从 assets 文件夹中设置示例文件,并限制最多读取 10 个文件
239
+ example_folder = "assets"
240
+ examples = [os.path.join(example_folder, f) for f in os.listdir(example_folder) if f.endswith(('.png', '.jpg', '.jpeg'))][:10]
241
+
242
+ with gr.Blocks() as demo:
243
+ with gr.Row():
244
+
245
+ with gr.Column():
246
+ gr.Markdown("""
247
+ # Image Upscale Demo
248
+
249
+ 从单张图像生成三维点云与模型
250
+
251
+ """)
252
+ img_input = gr.Image(type="pil", label="输入图像")
253
+ examples_component = gr.Examples(examples=examples, inputs=img_input, outputs=None, examples_per_page=3)
254
+ generate_mesh_button = gr.Button("生成模型")
255
+ # generate_video_button = gr.Button("生成视频")
256
+ obj_file_output = gr.File(label="下载 .obj 文件")
257
+ # video_file_output = gr.File(label="下载视频")
258
+
259
+ with gr.Column():
260
+ model_output = LitModel3D(
261
+ clear_color=[0, 0, 0, 0], # 可调整背景颜色,以获得更好的对比度
262
+ label="3D 模型可视化",
263
+ scale=1.0,
264
+ tonemapping="aces", # 可使用 aces 色调映射,使灯光更逼真
265
+ exposure=1.0, # 可调节曝光以控制亮度
266
+ contrast=1.1, # 可略微增加对比度,以获得更好的深度
267
+ camera_position=(0, 0, 2), # 将设置初始摄像机位置,使模型居中
268
+ zoom_speed=0.5, # 将调整变焦速度,以便更好地控制
269
+ pan_speed=0.5, # 将调整摇摄速度,以便更好地控制
270
+ interactive=True # 这样用户就可以与模型进行交互
271
+ )
272
+
273
+
274
+ # clear outputs
275
+ def clear_model_viewer():
276
+ """在加载新模型前重置 Model3D 组件。"""
277
+ return gr.update(value=None)
278
+
279
+ def generate_and_visualize(image):
280
+ mesh_path = step_1_generate_obj(image)
281
+ return mesh_path, mesh_path
282
+
283
+ # first we clear the existing 3D model
284
+ img_input.change(clear_model_viewer, inputs=None, outputs=model_output)
285
+
286
+ # then, generate the mesh and video
287
+ generate_mesh_button.click(step_1_generate_obj, inputs=img_input, outputs=[obj_file_output, model_output])
288
+ generate_video_button.click(step_2_generate_video, inputs=img_input, outputs=video_file_output)
289
+
290
+ demo.launch()
assets/asset_a box with Hello CCVR painted on it.webp ADDED
assets/asset_a cat holding a sign that says hello CCVR.webp ADDED
assets/asset_a tiny astronaut hatching from an egg on the moon.webp ADDED
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.2.0
2
+ gradio==4.31.4
3
+ gradio-litmodel3d==0.0.1
4
+ numpy
5
+ trimesh==4.3.2
6
+ PyMCubes==0.1.4
7
+ imageio[ffmpeg]
8
+ rembg[gpu,cli]
9
+ kiui
10
+ Pillow
11
+ transformers
12
+ torchvision