Spaces:

bartduis
/

rayst3r

Running on Zero

App Files Files

xet

Community

bartduis commited on Jun 10

Commit

560e9b5

1 Parent(s): dc82be6

bp

Browse files

Files changed (1) hide show

app.py +18 -7

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from PIL import Image
 import matplotlib.pyplot as plt
 from eval_wrapper.eval import EvalWrapper, eval_scene
 from torchvision import transforms
 outdir = "/tmp/rayst3r"
@@ -19,16 +20,16 @@ outdir = "/tmp/rayst3r"
 print("Loading DINOv2 model")
 dino_model = torch.hub.load('facebookresearch/dinov2', "dinov2_vitl14_reg")
 dino_model.eval()
-dino_model.to("cuda")
 print("Loading MoGe model")
-device = torch.device("cuda")
 # Load the model from huggingface hub (or load from local).
-moge_model = MoGeModel.from_pretrained("Ruicheng/moge-vitl").to(device)
 print("Loading RaySt3R model")
 rayst3r_checkpoint = hf_hub_download("bartduis/rayst3r", "rayst3r.pth")
-rayst3r_model = EvalWrapper(rayst3r_checkpoint,device=device)
 def depth2uint16(depth):
     return depth * torch.iinfo(torch.uint16).max / 10.0 # threshold is in m, convert to uint16 value
@@ -89,14 +90,19 @@ def prep_for_rayst3r(img,depth_dict,mask):
     depth = depth_dict["depth"].cpu()
     depth = depth2uint16(depth)
     save_tensor_as_png(depth, os.path.join(input_dir, "depth.png"),dtype=torch.uint16)
     # save mask as bool
     save_tensor_as_png(torch.from_numpy(mask).bool(), os.path.join(input_dir, "mask.png"),dtype=torch.bool)
     # save image
     save_tensor_as_png(torch.from_numpy(img), os.path.join(input_dir, "rgb.png"))
 def rayst3r_to_glb(img,depth_dict,mask,max_total_points=10e6,rotated=False):
     prep_for_rayst3r(img,depth_dict,mask)
     rayst3r_points = eval_scene(rayst3r_model,os.path.join(outdir, "input"),do_filter_all_masks=True,dino_model=dino_model).cpu()
     # subsample points
@@ -145,9 +151,11 @@ def input_to_glb(outdir,img,depth_dict,mask,rotated=False):
     scene.export(outfile)
     return outfile
 def depth_moge(input_img):
-    input_img_torch = torch.tensor(input_img / 255, dtype=torch.float32, device=device).permute(2, 0, 1)
-    output = moge_model.infer(input_img_torch)
     return output
 def mask_rembg(input_img):
@@ -168,7 +176,10 @@ def mask_rembg(input_img):
     rgb = output_np[:,:,:3]
     return mask, rgb
 def process_image(input_img):
     # resize the input image
     rotated = False
     #if input_img.shape[0] > input_img.shape[1]:

 import matplotlib.pyplot as plt
 from eval_wrapper.eval import EvalWrapper, eval_scene
 from torchvision import transforms
+from spaces import GPU
 outdir = "/tmp/rayst3r"
 print("Loading DINOv2 model")
 dino_model = torch.hub.load('facebookresearch/dinov2', "dinov2_vitl14_reg")
 dino_model.eval()
+#dino_model.to("cuda")
 print("Loading MoGe model")
+#device = torch.device("cuda")
 # Load the model from huggingface hub (or load from local).
+moge_model = MoGeModel.from_pretrained("Ruicheng/moge-vitl")
 print("Loading RaySt3R model")
 rayst3r_checkpoint = hf_hub_download("bartduis/rayst3r", "rayst3r.pth")
+rayst3r_model = EvalWrapper(rayst3r_checkpoint,device='cpu')
 def depth2uint16(depth):
     return depth * torch.iinfo(torch.uint16).max / 10.0 # threshold is in m, convert to uint16 value
     depth = depth_dict["depth"].cpu()
     depth = depth2uint16(depth)
     save_tensor_as_png(depth, os.path.join(input_dir, "depth.png"),dtype=torch.uint16)
     # save mask as bool
     save_tensor_as_png(torch.from_numpy(mask).bool(), os.path.join(input_dir, "mask.png"),dtype=torch.bool)
     # save image
     save_tensor_as_png(torch.from_numpy(img), os.path.join(input_dir, "rgb.png"))
+@GPU
 def rayst3r_to_glb(img,depth_dict,mask,max_total_points=10e6,rotated=False):
     prep_for_rayst3r(img,depth_dict,mask)
+    dino_model.to("cuda")
+    rayst3r_model.to("cuda")
     rayst3r_points = eval_scene(rayst3r_model,os.path.join(outdir, "input"),do_filter_all_masks=True,dino_model=dino_model).cpu()
     # subsample points
     scene.export(outfile)
     return outfile
+@GPU
 def depth_moge(input_img):
+    moge_model.to("cuda")
+    input_img_torch = torch.tensor(input_img / 255, dtype=torch.float32, device='cuda').permute(2, 0, 1)
+    output = moge_model.infer(input_img_torch).cpu()
     return output
 def mask_rembg(input_img):
     rgb = output_np[:,:,:3]
     return mask, rgb
+@GPU
 def process_image(input_img):
     # resize the input image
     rotated = False
     #if input_img.shape[0] > input_img.shape[1]: