Spaces:

OttoYu
/

DPT-Segmentation

Runtime error

App Files Files Community

OttoYu commited on May 26, 2023

Commit

118a37c

•

1 Parent(s): a81318b

Create app.py

Browse files

Files changed (1) hide show

app.py +74 -0

app.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import gradio as gr
+from transformers import DPTFeatureExtractor, DPTForDepthEstimation
+import torch
+import numpy as np
+from PIL import Image
+import cv2
+from sklearn.cluster import KMeans
+from matplotlib import pyplot as plt
+torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')
+feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
+model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+def process_image(image):
+    # Prepare image for the model
+    encoding = feature_extractor(image, return_tensors="pt")
+    # Forward pass
+    with torch.no_grad():
+        outputs = model(**encoding)
+        predicted_depth = outputs.predicted_depth
+    # Interpolate to original size
+    prediction = torch.nn.functional.interpolate(
+        predicted_depth.unsqueeze(1),
+        size=image.size[::-1],
+        mode="bicubic",
+        align_corners=False,
+    ).squeeze()
+    depth_map_gray = (prediction.cpu().numpy() * 255).astype('uint8')
+    # Perform feature segmentation
+    rgb_image = np.array(image)
+    depth_threshold = 1000
+    binary_mask = np.where(depth_map_gray > depth_threshold, 255, 0).astype(np.uint8)
+    gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
+    pixels = gray_image.reshape((-1, 1))
+    num_clusters = 3
+    kmeans = KMeans(n_clusters=num_clusters)
+    kmeans.fit(pixels)
+    labels = kmeans.labels_
+    labels = labels.reshape(gray_image.shape)
+    cluster_features = []
+    for i in range(num_clusters):
+        mask = np.where(labels == i, 255, 0).astype(np.uint8)
+        cluster_image = cv2.bitwise_and(rgb_image, rgb_image, mask=mask)
+        cluster_features.append(cluster_image)
+    # Prepare output images
+    depth_image = Image.fromarray(depth_map_gray, mode='L')
+    cluster_images = [Image.fromarray(cluster) for cluster in cluster_features]
+    return depth_image, cluster_images
+title = "Demo: zero-shot depth estimation with DPT and feature segmentation"
+description = "Demo for Intel's DPT with feature segmentation, a Dense Prediction Transformer for state-of-the-art dense prediction tasks such as semantic segmentation and depth estimation."
+examples = [['cats.jpg']]
+iface = gr.Interface(
+    fn=process_image,
+    inputs=gr.inputs.Image(type="pil"),
+    outputs=[
+        gr.outputs.Image(type="pil", label="predicted depth"),
+        gr.outputs.Image(type="pil", label="cluster 1"),
+        gr.outputs.Image(type="pil", label="cluster 2"),
+        gr.outputs.Image(type="pil", label="cluster 3"),
+    ],
+    title=title,
+    description=description,
+    examples=examples,
+    enable_queue=True
+)
+iface.launch(debug=True)