OttoYu commited on
Commit
118a37c
1 Parent(s): a81318b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import DPTFeatureExtractor, DPTForDepthEstimation
3
+ import torch
4
+ import numpy as np
5
+ from PIL import Image
6
+ import cv2
7
+ from sklearn.cluster import KMeans
8
+ from matplotlib import pyplot as plt
9
+
10
+ torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')
11
+
12
+ feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
13
+ model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
14
+
15
+ def process_image(image):
16
+ # Prepare image for the model
17
+ encoding = feature_extractor(image, return_tensors="pt")
18
+
19
+ # Forward pass
20
+ with torch.no_grad():
21
+ outputs = model(**encoding)
22
+ predicted_depth = outputs.predicted_depth
23
+
24
+ # Interpolate to original size
25
+ prediction = torch.nn.functional.interpolate(
26
+ predicted_depth.unsqueeze(1),
27
+ size=image.size[::-1],
28
+ mode="bicubic",
29
+ align_corners=False,
30
+ ).squeeze()
31
+ depth_map_gray = (prediction.cpu().numpy() * 255).astype('uint8')
32
+
33
+ # Perform feature segmentation
34
+ rgb_image = np.array(image)
35
+ depth_threshold = 1000
36
+ binary_mask = np.where(depth_map_gray > depth_threshold, 255, 0).astype(np.uint8)
37
+ gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
38
+ pixels = gray_image.reshape((-1, 1))
39
+ num_clusters = 3
40
+ kmeans = KMeans(n_clusters=num_clusters)
41
+ kmeans.fit(pixels)
42
+ labels = kmeans.labels_
43
+ labels = labels.reshape(gray_image.shape)
44
+ cluster_features = []
45
+ for i in range(num_clusters):
46
+ mask = np.where(labels == i, 255, 0).astype(np.uint8)
47
+ cluster_image = cv2.bitwise_and(rgb_image, rgb_image, mask=mask)
48
+ cluster_features.append(cluster_image)
49
+
50
+ # Prepare output images
51
+ depth_image = Image.fromarray(depth_map_gray, mode='L')
52
+ cluster_images = [Image.fromarray(cluster) for cluster in cluster_features]
53
+
54
+ return depth_image, cluster_images
55
+
56
+ title = "Demo: zero-shot depth estimation with DPT and feature segmentation"
57
+ description = "Demo for Intel's DPT with feature segmentation, a Dense Prediction Transformer for state-of-the-art dense prediction tasks such as semantic segmentation and depth estimation."
58
+ examples = [['cats.jpg']]
59
+
60
+ iface = gr.Interface(
61
+ fn=process_image,
62
+ inputs=gr.inputs.Image(type="pil"),
63
+ outputs=[
64
+ gr.outputs.Image(type="pil", label="predicted depth"),
65
+ gr.outputs.Image(type="pil", label="cluster 1"),
66
+ gr.outputs.Image(type="pil", label="cluster 2"),
67
+ gr.outputs.Image(type="pil", label="cluster 3"),
68
+ ],
69
+ title=title,
70
+ description=description,
71
+ examples=examples,
72
+ enable_queue=True
73
+ )
74
+ iface.launch(debug=True)