Spaces:

joselobenitezg
/

sapiens-demo

Build error

App Files Files Community

joselobenitezg commited on Aug 27, 2024

Commit

5f51879

1 Parent(s): 9930f16

set tf32 matmul

Browse files

Files changed (3) hide show

inference/depth.py +3 -112
inference/normal.py +3 -96
inference/pose.py +0 -82

inference/depth.py CHANGED Viewed

@@ -1,115 +1,3 @@
-# # Example usage
-# import torch
-# import numpy as np
-# from PIL import Image
-# from torchvision import transforms
-# from config import LABELS_TO_IDS
-# from utils.vis_utils import visualize_mask_with_overlay
-# import torch
-# import torch.nn.functional as F
-# import numpy as np
-# import cv2
-# TASK = 'depth'
-# VERSION = 'sapiens_0.3b'
-# model_path = get_model_path(TASK, VERSION)
-# print(model_path)
-# model = torch.jit.load(model_path)
-# model.eval()
-# model.to("cuda")
-# def get_depth(image, depth_model, input_shape=(3, 1024, 768), device="cuda"):
-#     # Preprocess the image
-#     img = preprocess_image(image, input_shape)
-#     # Run the model
-#     with torch.no_grad():
-#         result = depth_model(img.to(device))
-#     # Post-process the output
-#     depth_map = post_process_depth(result, (image.shape[0], image.shape[1]))
-#     # Visualize the depth map
-#     depth_image = visualize_depth(depth_map)
-#     return depth_image, depth_map
-# def preprocess_image(image, input_shape):
-#     img = cv2.resize(image, (input_shape[2], input_shape[1]), interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1)
-#     img = torch.from_numpy(img)
-#     img = img[[2, 1, 0], ...].float()
-#     mean = torch.tensor([123.5, 116.5, 103.5]).view(-1, 1, 1)
-#     std = torch.tensor([58.5, 57.0, 57.5]).view(-1, 1, 1)
-#     img = (img - mean) / std
-#     return img.unsqueeze(0)
-# def post_process_depth(result, original_shape):
-#     # Check the dimensionality of the result
-#     if result.dim() == 3:
-#         result = result.unsqueeze(0)
-#     elif result.dim() == 4:
-#         pass
-#     else:
-#         raise ValueError(f"Unexpected result dimension: {result.dim()}")
-#     # Ensure we're interpolating to the correct dimensions
-#     seg_logits = F.interpolate(result, size=original_shape, mode="bilinear", align_corners=False).squeeze(0)
-#     depth_map = seg_logits.data.float().cpu().numpy()
-#     # If depth_map has an extra dimension, squeeze it
-#     if depth_map.ndim == 3 and depth_map.shape[0] == 1:
-#         depth_map = depth_map.squeeze(0)
-#     return depth_map
-# def visualize_depth(depth_map):
-#     # Normalize the depth map
-#     min_val, max_val = np.nanmin(depth_map), np.nanmax(depth_map)
-#     depth_normalized = 1 - ((depth_map - min_val) / (max_val - min_val))
-#     # Convert to uint8
-#     depth_normalized = (depth_normalized * 255).astype(np.uint8)
-#     # Apply colormap
-#     depth_colored = cv2.applyColorMap(depth_normalized, cv2.COLORMAP_INFERNO)
-#     return depth_colored
-# # You can add the surface normal calculation if needed
-# def calculate_surface_normal(depth_map):
-#     kernel_size = 7
-#     grad_x = cv2.Sobel(depth_map.astype(np.float32), cv2.CV_32F, 1, 0, ksize=kernel_size)
-#     grad_y = cv2.Sobel(depth_map.astype(np.float32), cv2.CV_32F, 0, 1, ksize=kernel_size)
-#     z = np.full(grad_x.shape, -1)
-#     normals = np.dstack((-grad_x, -grad_y, z))
-#     normals_mag = np.linalg.norm(normals, axis=2, keepdims=True)
-#     with np.errstate(divide="ignore", invalid="ignore"):
-#         normals_normalized = normals / (normals_mag + 1e-5)
-#     normals_normalized = np.nan_to_num(normals_normalized, nan=-1, posinf=-1, neginf=-1)
-#     normal_from_depth = ((normals_normalized + 1) / 2 * 255).astype(np.uint8)
-#     normal_from_depth = normal_from_depth[:, :, ::-1]  # RGB to BGR for cv2
-#     return normal_from_depth
-# from utils.vis_utils import resize_image
-# pil_image = Image.open('/home/user/app/assets/image.webp')
-# # Load and process an image
-# image = cv2.imread('/home/user/app/assets/frame.png')
-# depth_image, depth_map = get_depth(image, model)
-# surface_normal = calculate_surface_normal(depth_map)
-# cv2.imwrite("output_surface_normal.jpg", surface_normal)
-# # Save the results
-# output_im = cv2.imwrite("output_depth_image2.jpg", depth_image)
 import torch
 import torch.nn.functional as F
 import numpy as np
@@ -121,6 +9,9 @@ def load_model(task, version):
     try:
         model_path = SAPIENS_LITE_MODELS_PATH[task][version]
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         model = torch.jit.load(model_path)
         model.eval()
         model.to(device)

 import torch
 import torch.nn.functional as F
 import numpy as np
     try:
         model_path = SAPIENS_LITE_MODELS_PATH[task][version]
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        if torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 8:
+            torch.backends.cuda.matmul.allow_tf32 = True
+            torch.backends.cudnn.allow_tf32 = True
         model = torch.jit.load(model_path)
         model.eval()
         model.to(device)

inference/normal.py CHANGED Viewed

@@ -1,99 +1,3 @@
-# import torch
-# import torch.nn.functional as F
-# import numpy as np
-# import cv2
-# from PIL import Image
-# from config import SAPIENS_LITE_MODELS_PATH
-# # Example usage
-# TASK = 'normal'
-# VERSION = 'sapiens_0.3b'
-# model_path = get_model_path(TASK, VERSION)
-# print(model_path)
-# model = torch.jit.load(model_path)
-# model.eval()
-# model.to("cuda")
-# import torch
-# import torch.nn.functional as F
-# import numpy as np
-# import cv2
-# def get_normal(image, normal_model, input_shape=(3, 1024, 768), device="cuda"):
-#     # Preprocess the image
-#     img = preprocess_image(image, input_shape)
-#     # Run the model
-#     with torch.no_grad():
-#         result = normal_model(img.to(device))
-#     # Post-process the output
-#     normal_map = post_process_normal(result, (image.shape[0], image.shape[1]))
-#     # Visualize the normal map
-#     normal_image = visualize_normal(normal_map)
-#     return normal_image, normal_map
-# def preprocess_image(image, input_shape):
-#     img = cv2.resize(image, (input_shape[2], input_shape[1]), interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1)
-#     img = torch.from_numpy(img)
-#     img = img[[2, 1, 0], ...].float()
-#     mean = torch.tensor([123.5, 116.5, 103.5]).view(-1, 1, 1)
-#     std = torch.tensor([58.5, 57.0, 57.5]).view(-1, 1, 1)
-#     img = (img - mean) / std
-#     return img.unsqueeze(0)
-# def post_process_normal(result, original_shape):
-#     # Check the dimensionality of the result
-#     if result.dim() == 3:
-#         result = result.unsqueeze(0)
-#     elif result.dim() == 4:
-#         pass
-#     else:
-#         raise ValueError(f"Unexpected result dimension: {result.dim()}")
-#     # Ensure we're interpolating to the correct dimensions
-#     seg_logits = F.interpolate(result, size=original_shape, mode="bilinear", align_corners=False).squeeze(0)
-#     normal_map = seg_logits.float().cpu().numpy().transpose(1, 2, 0)  # H x W x 3
-#     return normal_map
-# def visualize_normal(normal_map):
-#     normal_map_norm = np.linalg.norm(normal_map, axis=-1, keepdims=True)
-#     normal_map_normalized = normal_map / (normal_map_norm + 1e-5)  # Add a small epsilon to avoid division by zero
-#     # Convert to 0-255 range and BGR format for visualization
-#     normal_map_vis = ((normal_map_normalized + 1) / 2 * 255).astype(np.uint8)
-#     normal_map_vis = normal_map_vis[:, :, ::-1]  # RGB to BGR
-#     return normal_map_vis
-# def load_normal_model(checkpoint, use_torchscript=False):
-#     if use_torchscript:
-#         return torch.jit.load(checkpoint)
-#     else:
-#         model = torch.export.load(checkpoint).module()
-#         model = model.to("cuda")
-#         model = torch.compile(model, mode="max-autotune", fullgraph=True)
-#         return model
-# import cv2
-# import numpy as np
-# # Load the model
-# normal_model = load_normal_model(model_path, use_torchscript='_torchscript')
-# # Load the image
-# image = cv2.imread("/home/user/app/assets/image.webp")
-# # Get the normal map and visualization
-# normal_image, normal_map = get_normal(image, normal_model)
-# # Save the results
-# cv2.imwrite("output_normal_image.png", normal_image)
 import torch
 import torch.nn.functional as F
 import numpy as np
@@ -105,6 +9,9 @@ def load_model(task, version):
     try:
         model_path = SAPIENS_LITE_MODELS_PATH[task][version]
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         model = torch.jit.load(model_path)
         model.eval()
         model.to(device)

 import torch
 import torch.nn.functional as F
 import numpy as np
     try:
         model_path = SAPIENS_LITE_MODELS_PATH[task][version]
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        if torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 8:
+            torch.backends.cuda.matmul.allow_tf32 = True
+            torch.backends.cudnn.allow_tf32 = True
         model = torch.jit.load(model_path)
         model.eval()
         model.to(device)

inference/pose.py CHANGED Viewed

@@ -1,85 +1,3 @@
-# import torch
-# import numpy as np
-# from PIL import Image
-# from torchvision import transforms
-# from config import LABELS_TO_IDS
-# from utils.vis_utils import visualize_mask_with_overlay
-# # Example usage
-# TASK = 'pose'
-# VERSION = 'sapiens_1b'
-# model_path = get_model_path(TASK, VERSION)
-# print(model_path)
-# model = torch.jit.load(model_path)
-# model.eval()
-# model.to("cuda")
-# def get_pose(image, pose_estimator, input_shape=(3, 1024, 768), device="cuda"):
-#     # Preprocess the image
-#     img = preprocess_image(image, input_shape)
-#     # Run the model
-#     with torch.no_grad():
-#         heatmap = pose_estimator(img.to(device))
-#     # Post-process the output
-#     keypoints, keypoint_scores = udp_decode(heatmap[0].cpu().float().numpy(),
-#                                             input_shape[1:],
-#                                             (input_shape[1] // 4, input_shape[2] // 4))
-#     # Scale keypoints to original image size
-#     scale_x = image.width / input_shape[2]
-#     scale_y = image.height / input_shape[1]
-#     keypoints[:, 0] *= scale_x
-#     keypoints[:, 1] *= scale_y
-#     # Visualize the keypoints on the original image
-#     pose_image = visualize_keypoints(image, keypoints, keypoint_scores)
-#     return pose_image
-# def preprocess_image(image, input_shape):
-#     # Resize and normalize the image
-#     img = image.resize((input_shape[2], input_shape[1]))
-#     img = np.array(img).transpose(2, 0, 1)
-#     img = torch.from_numpy(img).float()
-#     img = img[[2, 1, 0], ...] # RGB to BGR
-#     mean = torch.tensor([123.675, 116.28, 103.53]).view(3, 1, 1)
-#     std = torch.tensor([58.395, 57.12, 57.375]).view(3, 1, 1)
-#     img = (img - mean) / std
-#     return img.unsqueeze(0)
-# def udp_decode(heatmap, img_size, heatmap_size):
-#     # This is a simplified version. You might need to implement the full UDP decode logic
-#     h, w = heatmap_size
-#     keypoints = np.zeros((heatmap.shape[0], 2))
-#     keypoint_scores = np.zeros(heatmap.shape[0])
-#     for i in range(heatmap.shape[0]):
-#         hm = heatmap[i]
-#         idx = np.unravel_index(np.argmax(hm), hm.shape)
-#         keypoints[i] = [idx[1] * img_size[1] / w, idx[0] * img_size[0] / h]
-#         keypoint_scores[i] = hm[idx]
-#     return keypoints, keypoint_scores
-# def visualize_keypoints(image, keypoints, keypoint_scores, threshold=0.3):
-#     draw = ImageDraw.Draw(image)
-#     for (x, y), score in zip(keypoints, keypoint_scores):
-#         if score > threshold:
-#             draw.ellipse([(x-2, y-2), (x+2, y+2)], fill='red', outline='red')
-#     return image
-# from utils.vis_utils import resize_image
-# pil_image = Image.open('/home/user/app/assets/image.webp')
-# if pil_image.mode == 'RGBA':
-#     pil_image = pil_image.convert('RGB')
-# output_pose = get_pose(resized_pil_image, model)
-# output_pose
 import torch
 import numpy as np
 from PIL import Image, ImageDraw

 import torch
 import numpy as np
 from PIL import Image, ImageDraw