Spaces:

OttoYu
/

DPT-Segmentation

Runtime error

Create app.py

118a37c about 1 year ago

2.69 kB

	import gradio as gr
	from transformers import DPTFeatureExtractor, DPTForDepthEstimation
	import torch
	import numpy as np
	from PIL import Image
	import cv2
	from sklearn.cluster import KMeans
	from matplotlib import pyplot as plt

	torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')

	feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
	model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

	def process_image(image):
	# Prepare image for the model
	encoding = feature_extractor(image, return_tensors="pt")

	# Forward pass
	with torch.no_grad():
	outputs = model(**encoding)
	predicted_depth = outputs.predicted_depth

	# Interpolate to original size
	prediction = torch.nn.functional.interpolate(
	predicted_depth.unsqueeze(1),
	size=image.size[::-1],
	mode="bicubic",
	align_corners=False,
	).squeeze()
	depth_map_gray = (prediction.cpu().numpy() * 255).astype('uint8')

	# Perform feature segmentation
	rgb_image = np.array(image)
	depth_threshold = 1000
	binary_mask = np.where(depth_map_gray > depth_threshold, 255, 0).astype(np.uint8)
	gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
	pixels = gray_image.reshape((-1, 1))
	num_clusters = 3
	kmeans = KMeans(n_clusters=num_clusters)
	kmeans.fit(pixels)
	labels = kmeans.labels_
	labels = labels.reshape(gray_image.shape)
	cluster_features = []
	for i in range(num_clusters):
	mask = np.where(labels == i, 255, 0).astype(np.uint8)
	cluster_image = cv2.bitwise_and(rgb_image, rgb_image, mask=mask)
	cluster_features.append(cluster_image)

	# Prepare output images
	depth_image = Image.fromarray(depth_map_gray, mode='L')
	cluster_images = [Image.fromarray(cluster) for cluster in cluster_features]

	return depth_image, cluster_images

	title = "Demo: zero-shot depth estimation with DPT and feature segmentation"
	description = "Demo for Intel's DPT with feature segmentation, a Dense Prediction Transformer for state-of-the-art dense prediction tasks such as semantic segmentation and depth estimation."
	examples = [['cats.jpg']]

	iface = gr.Interface(
	fn=process_image,
	inputs=gr.inputs.Image(type="pil"),
	outputs=[
	gr.outputs.Image(type="pil", label="predicted depth"),
	gr.outputs.Image(type="pil", label="cluster 1"),
	gr.outputs.Image(type="pil", label="cluster 2"),
	gr.outputs.Image(type="pil", label="cluster 3"),
	],
	title=title,
	description=description,
	examples=examples,
	enable_queue=True
	)
	iface.launch(debug=True)