import torch |
import spaces |
import gradio as gr |
import os |
import numpy as np |
import trimesh |
import mcubes |
import imageio |
from torchvision.utils import save_image |
from PIL import Image |
import io |
from io import BytesIO |
from transformers import AutoModel, AutoConfig |
from rembg import remove, new_session |
from functools import partial |
from kiui.op import recenter |
import kiui |
from gradio_litmodel3d import LitModel3D |
import shutil |
from fastapi import FastAPI, File, UploadFile, Form |
from fastapi.responses import StreamingResponse |
from pydantic import BaseModel |
from pydantic import Field |
from typing import Optional |
import logging |
import os |
import boto3 |
import uuid |
from fastapi import FastAPI, File, UploadFile, HTTPException |
from fastapi.responses import JSONResponse |
import datetime |
import tempfile |
import time |
app = FastAPI() |
ACCESS = os.getenv("ACCESS") |
SECRET = os.getenv("SECRET") |
bedrock = boto3.client(service_name='bedrock', aws_access_key_id = ACCESS, aws_secret_access_key = SECRET, region_name='us-east-1') |
bedrock_runtime = boto3.client(service_name='bedrock-runtime', aws_access_key_id = ACCESS, aws_secret_access_key = SECRET, region_name='us-east-1') |
s3_client = boto3.client('s3',aws_access_key_id = ACCESS, aws_secret_access_key = SECRET, region_name='us-east-1') |
def find_cuda(): |
cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH') |
if cuda_home and os.path.exists(cuda_home): |
return cuda_home |
nvcc_path = shutil.which('nvcc') |
if nvcc_path: |
cuda_path = os.path.dirname(os.path.dirname(nvcc_path)) |
return cuda_path |
return None |
cuda_path = find_cuda() |
if cuda_path: |
print(f"CUDA installation found at: {cuda_path}") |
else: |
print("CUDA installation not found") |
class LRMGeneratorWrapper: |
def __init__(self): |
self.config = AutoConfig.from_pretrained("facebook/vfusion3d", trust_remote_code=True) |
self.model = AutoModel.from_pretrained("facebook/vfusion3d", trust_remote_code=True) |
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
self.model.to(self.device) |
self.model.eval() |
def forward(self, image, camera): |
return self.model(image, camera) |
model_wrapper = LRMGeneratorWrapper() |
def preprocess_image(image, source_size): |
session = new_session("isnet-general-use") |
rembg_remove = partial(remove, session=session) |
image = np.array(image) |
image = rembg_remove(image) |
mask = rembg_remove(image, only_mask=True) |
image = recenter(image, mask, border_ratio=0.20) |
image = torch.tensor(image).permute(2, 0, 1).unsqueeze(0) / 255.0 |
if image.shape[1] == 4: |
image = image[:, :3, ...] * image[:, 3:, ...] + (1 - image[:, 3:, ...]) |
image = torch.nn.functional.interpolate(image, size=(source_size, source_size), mode='bicubic', align_corners=True) |
image = torch.clamp(image, 0, 1) |
return image |
def get_normalized_camera_intrinsics(intrinsics: torch.Tensor): |
fx, fy = intrinsics[:, 0, 0], intrinsics[:, 0, 1] |
cx, cy = intrinsics[:, 1, 0], intrinsics[:, 1, 1] |
width, height = intrinsics[:, 2, 0], intrinsics[:, 2, 1] |
fx, fy = fx / width, fy / height |
cx, cy = cx / width, cy / height |
return fx, fy, cx, cy |
def build_camera_principle(RT: torch.Tensor, intrinsics: torch.Tensor): |
fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics) |
return torch.cat([ |
RT.reshape(-1, 12), |
fx.unsqueeze(-1), fy.unsqueeze(-1), cx.unsqueeze(-1), cy.unsqueeze(-1), |
], dim=-1) |
def _default_intrinsics(): |
fx = fy = 384 |
cx = cy = 256 |
w = h = 512 |
intrinsics = torch.tensor([ |
[fx, fy], |
[cx, cy], |
[w, h], |
], dtype=torch.float32) |
return intrinsics |
def _default_source_camera(batch_size: int = 1): |
canonical_camera_extrinsics = torch.tensor([[ |
[0, 0, 1, 1], |
[1, 0, 0, 0], |
[0, 1, 0, 0], |
]], dtype=torch.float32) |
canonical_camera_intrinsics = _default_intrinsics().unsqueeze(0) |
source_camera = build_camera_principle(canonical_camera_extrinsics, canonical_camera_intrinsics) |
return source_camera.repeat(batch_size, 1) |
def _center_looking_at_camera_pose(camera_position: torch.Tensor, look_at: torch.Tensor = None, up_world: torch.Tensor = None): |
""" |
camera_position: (M, 3) |
look_at: (3) |
up_world: (3) |
return: (M, 3, 4) |
""" |
if look_at is None: |
look_at = torch.tensor([0, 0, 0], dtype=torch.float32) |
if up_world is None: |
up_world = torch.tensor([0, 0, 1], dtype=torch.float32) |
look_at = look_at.unsqueeze(0).repeat(camera_position.shape[0], 1) |
up_world = up_world.unsqueeze(0).repeat(camera_position.shape[0], 1) |
z_axis = camera_position - look_at |
z_axis = z_axis / z_axis.norm(dim=-1, keepdim=True) |
x_axis = torch.cross(up_world, z_axis) |
x_axis = x_axis / x_axis.norm(dim=-1, keepdim=True) |
y_axis = torch.cross(z_axis, x_axis) |
y_axis = y_axis / y_axis.norm(dim=-1, keepdim=True) |
extrinsics = torch.stack([x_axis, y_axis, z_axis, camera_position], dim=-1) |
return extrinsics |
def compose_extrinsic_RT(RT: torch.Tensor): |
""" |
Compose the standard form extrinsic matrix from RT. |
Batched I/O. |
""" |
return torch.cat([ |
RT, |
torch.tensor([[[0, 0, 0, 1]]], dtype=torch.float32).repeat(RT.shape[0], 1, 1).to(RT.device) |
], dim=1) |
def _build_camera_standard(RT: torch.Tensor, intrinsics: torch.Tensor): |
""" |
RT: (N, 3, 4) |
intrinsics: (N, 3, 2), [[fx, fy], [cx, cy], [width, height]] |
""" |
E = compose_extrinsic_RT(RT) |
fx, fy, cx, cy = get_normalized_camera_intrinsics(intrinsics) |
I = torch.stack([ |
torch.stack([fx, torch.zeros_like(fx), cx], dim=-1), |
torch.stack([torch.zeros_like(fy), fy, cy], dim=-1), |
torch.tensor([[0, 0, 1]], dtype=torch.float32, device=RT.device).repeat(RT.shape[0], 1), |
], dim=1) |
return torch.cat([ |
E.reshape(-1, 16), |
I.reshape(-1, 9), |
], dim=-1) |
def _default_render_cameras(batch_size: int = 1): |
M = 80 |
radius = 1.5 |
elevation = 0 |
camera_positions = [] |
rand_theta = np.random.uniform(0, np.pi/180) |
elevation = np.radians(elevation) |
for i in range(M): |
theta = 2 * np.pi * i / M + rand_theta |
x = radius * np.cos(theta) * np.cos(elevation) |
y = radius * np.sin(theta) * np.cos(elevation) |
z = radius * np.sin(elevation) |
camera_positions.append([x, y, z]) |
camera_positions = torch.tensor(camera_positions, dtype=torch.float32) |
extrinsics = _center_looking_at_camera_pose(camera_positions) |
render_camera_intrinsics = _default_intrinsics().unsqueeze(0).repeat(extrinsics.shape[0], 1, 1) |
render_cameras = _build_camera_standard(extrinsics, render_camera_intrinsics) |
return render_cameras.unsqueeze(0).repeat(batch_size, 1, 1) |
def generate_mesh(image, source_size=512, render_size=384, mesh_size=512, export_mesh=False, export_video=True, fps=30): |
image = preprocess_image(image, source_size).to(model_wrapper.device) |
source_camera = _default_source_camera(batch_size=1).to(model_wrapper.device) |
with torch.no_grad(): |
planes = model_wrapper.forward(image, source_camera) |
if export_mesh: |
grid_out = model_wrapper.model.synthesizer.forward_grid(planes=planes, grid_size=mesh_size) |
vtx, faces = mcubes.marching_cubes(grid_out['sigma'].float().squeeze(0).squeeze(-1).cpu().numpy(), 1.0) |
vtx = vtx / (mesh_size - 1) * 2 - 1 |
vtx_tensor = torch.tensor(vtx, dtype=torch.float32, device=model_wrapper.device).unsqueeze(0) |
vtx_colors = model_wrapper.model.synthesizer.forward_points(planes, vtx_tensor)['rgb'].float().squeeze(0).cpu().numpy() |
vtx_colors = (vtx_colors * 255).astype(np.uint8) |
mesh = trimesh.Trimesh(vertices=vtx, faces=faces, vertex_colors=vtx_colors) |
mesh_path = "awesome_mesh.obj" |
mesh.export(mesh_path, 'obj') |
return mesh_path, mesh_path |
if export_video: |
render_cameras = _default_render_cameras(batch_size=1).to(model_wrapper.device) |
frames = [] |
chunk_size = 1 |
for i in range(0, render_cameras.shape[1], chunk_size): |
frame_chunk = model_wrapper.model.synthesizer( |
planes, |
render_cameras[:, i:i + chunk_size], |
render_size, |
render_size, |
0, |
0 |
) |
frames.append(frame_chunk['images_rgb']) |
frames = torch.cat(frames, dim=1) |
frames = frames.squeeze(0) |
frames = (frames.permute(0, 2, 3, 1).cpu().numpy() * 255).astype(np.uint8) |
video_path = "awesome_video.mp4" |
imageio.mimwrite(video_path, frames, fps=fps) |
return None, video_path |
return None, None |
def step_1_generate_obj(image): |
mesh_path, _ = generate_mesh(image, export_mesh=True) |
return mesh_path, mesh_path |
def step_2_generate_video(image): |
_, video_path = generate_mesh(image, export_video=True) |
return video_path |
def step_3_display_3d_model(mesh_file): |
return mesh_file |
def upload_file_to_s3(file_path, bucket_name, object_name): |
s3_client.upload_file(file_path, bucket_name, object_name) |
return True |
@app.post("/upload/") |
async def upload_image(file: UploadFile = File(...)): |
image_bytes = await file.read() |
img_input = Image.open(BytesIO(image_bytes)) |
model_output = LitModel3D( |
clear_color=[0.1, 0.1, 0.1, 0], |
label="3D Model Visualization", |
scale=1.0, |
tonemapping="aces", |
exposure=1.0, |
contrast=1.1, |
camera_position=(0, 0, 2), |
zoom_speed=0.5, |
pan_speed=0.5, |
interactive=True |
) |
obj_file_output, model_output = step_1_generate_obj(img_input) |
timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f') |
object_name = f'frameobject_{timestamp}.obj' |
if upload_file_to_s3(obj_file_output, 'framebucket3d',object_name): |
return { "obj_path": f"https://framebucket3d.s3.amazonaws.com/{object_name}" } |
if __name__ == "__main__": |
import uvicorn |
uvicorn.run(app, host="", port=7860) |